diff --git a/kernel/src/driver/base/kobject.rs b/kernel/src/driver/base/kobject.rs index 7b8ca3cf..bb23b889 100644 --- a/kernel/src/driver/base/kobject.rs +++ b/kernel/src/driver/base/kobject.rs @@ -333,11 +333,9 @@ impl KObjectManager { length -= cur; let parent_name = parent.name(); let name = parent_name.as_bytes(); - for i in 0..cur { - path[length + i] = name[i]; - } + path[length..(cur + length)].copy_from_slice(&name[..cur]); length -= 1; - path[length] = '/' as u8; + path[length] = b'/'; if let Some(weak_parent) = parent.parent() { if let Some(upgraded_parent) = weak_parent.upgrade() { parent = upgraded_parent; diff --git a/kernel/src/driver/base/uevent/kobject_uevent.rs b/kernel/src/driver/base/uevent/kobject_uevent.rs index 2167ad74..b55e3e3b 100644 --- a/kernel/src/driver/base/uevent/kobject_uevent.rs +++ b/kernel/src/driver/base/uevent/kobject_uevent.rs @@ -10,10 +10,10 @@ use crate::libs::rwlock::RwLock; use crate::net::socket::netlink::af_netlink::netlink_has_listeners; use crate::net::socket::netlink::af_netlink::NetlinkSocket; use crate::net::socket::netlink::af_netlink::{netlink_broadcast, NetlinkSock}; -use crate::net::socket::netlink::netlink::{ +use crate::net::socket::netlink::skbuff::SkBuff; +use crate::net::socket::netlink::{ netlink_kernel_create, NetlinkKernelCfg, NETLINK_KOBJECT_UEVENT, NL_CFG_F_NONROOT_RECV, }; -use crate::net::socket::netlink::skbuff::SkBuff; use alloc::boxed::Box; use alloc::collections::LinkedList; use alloc::string::{String, ToString}; @@ -86,46 +86,26 @@ fn uevent_net_exit() { // /* This lock protects uevent_seqnum and uevent_sock_list */ // static DEFINE_MUTEX(uevent_sock_mutex); -// to be adjust -pub const BUFFERSIZE: usize = 666; - /* - kobject_uevent_env,以envp为环境变量,上报一个指定action的uevent。环境变量的作用是为执行用户空间程序指定运行环境。具体动作如下: - 查找kobj本身或者其parent是否从属于某个kset,如果不是,则报错返回(注2:由此可以说明,如果一个kobject没有加入kset,是不允许上报uevent的) - 查看kobj->uevent_suppress是否设置,如果设置,则忽略所有的uevent上报并返回(注3:由此可知,可以通过Kobject的uevent_suppress标志,管控Kobject的uevent的上报) - 如果所属的kset有kset->filter函数,则调用该函数,过滤此次上报(注4:这佐证了3.2小节有关filter接口的说明,kset可以通过filter接口过滤不希望上报的event,从而达到整体的管理效果) - 判断所属的kset是否有合法的名称(称作subsystem,和前期的内核版本有区别),否则不允许上报uevent - 分配一个用于此次上报的、存储环境变量的buffer(结果保存在env指针中),并获得该Kobject在sysfs中路径信息(用户空间软件需要依据该路径信息在sysfs中访问它) - 调用add_uevent_var接口(下面会介绍),将Action、路径信息、subsystem等信息,添加到env指针中 - 如果传入的envp不空,则解析传入的环境变量中,同样调用add_uevent_var接口,添加到env指针中 - 如果所属的kset存在kset->uevent接口,调用该接口,添加kset统一的环境变量到env指针 - 根据ACTION的类型,设置kobj->state_add_uevent_sent和kobj->state_remove_uevent_sent变量,以记录正确的状态 - 调用add_uevent_var接口,添加格式为"SEQNUM=%llu”的序列号 - 如果定义了"CONFIG_NET”,则使用netlink发送该uevent - 以uevent_helper、subsystem以及添加了标准环境变量(HOME=/,PATH=/sbin:/bin:/usr/sbin:/usr/bin)的env指针为参数,调用kmod模块提供的call_usermodehelper函数,上报uevent。 - 其中uevent_helper的内容是由内核配置项CONFIG_UEVENT_HELPER_PATH(位于./drivers/base/Kconfig)决定的(可参考lib/kobject_uevent.c, line 32),该配置项指定了一个用户空间程序(或者脚本),用于解析上报的uevent,例如"/sbin/hotplug”。 - call_usermodehelper的作用,就是fork一个进程,以uevent为参数,执行uevent_helper。 -kobject_uevent,和kobject_uevent_env功能一样,只是没有指定任何的环境变量。 -add_uevent_var,以格式化字符的形式(类似printf、printk等),将环境变量copy到env指针中。 - -kobject_action_type,将enum kobject_action类型的Action,转换为字符串 */ -//kobject_uevent->kobject_uevent_env +/// kobject_uevent,和kobject_uevent_env功能一样,只是没有指定任何的环境变量 pub fn kobject_uevent(kobj: Arc, action: KobjectAction) -> Result<(), SystemError> { // kobject_uevent和kobject_uevent_env功能一样,只是没有指定任何的环境变量 - match kobject_uevent_env(kobj, action, None) { + match kobject_uevent_env(kobj, action, Vec::new()) { Ok(_) => Ok(()), Err(e) => Err(e), } } + +/// kobject_uevent_env,以envp为环境变量,上报一个指定action的uevent。环境变量的作用是为执行用户空间程序指定运行环境。 pub fn kobject_uevent_env( kobj: Arc, action: KobjectAction, - envp_ext: Option>, + envp_ext: Vec, ) -> Result { log::info!("kobject_uevent_env: kobj: {:?}, action: {:?}", kobj, action); let mut state = KObjectState::empty(); @@ -247,15 +227,14 @@ pub fn kobject_uevent_env( }; /* keys passed in from the caller */ - if let Some(env_ext) = envp_ext { - for var in env_ext { - let retval = add_uevent_var(&mut env, "%s", &var).unwrap(); - if !retval.is_zero() { - drop(devpath); - drop(env); - log::info!("add_uevent_var failed"); - return Ok(retval); - } + + for var in envp_ext { + let retval = add_uevent_var(&mut env, "%s", &var).unwrap(); + if !retval.is_zero() { + drop(devpath); + drop(env); + log::info!("add_uevent_var failed"); + return Ok(retval); } } if let Some(kset_ref) = kset.as_ref() { @@ -335,6 +314,7 @@ pub fn kobject_uevent_env( return Ok(retval); } +/// 以格式化字符的形式,将环境变量copy到env指针中。 pub fn add_uevent_var( env: &mut Box, format: &str, @@ -403,7 +383,6 @@ pub fn kobject_uevent_net_broadcast( action_string: &str, devpath: &str, ) -> i32 { - let mut ret = 0; // let net:Net = None; // let mut ops = kobj_ns_ops(kobj); @@ -428,7 +407,7 @@ pub fn kobject_uevent_net_broadcast( // if !net.is_none() { // ret = uevent_net_broadcast_tagged(net.unwrap(), env, action_string, devpath); // } else { - ret = uevent_net_broadcast_untagged(env, action_string, devpath); + let ret = uevent_net_broadcast_untagged(env, action_string, devpath); // } log::info!("kobject_uevent_net_broadcast finish. ret: {}", ret); ret diff --git a/kernel/src/driver/base/uevent/mod.rs b/kernel/src/driver/base/uevent/mod.rs index 53f1a1e4..ea14f077 100644 --- a/kernel/src/driver/base/uevent/mod.rs +++ b/kernel/src/driver/base/uevent/mod.rs @@ -1,26 +1,7 @@ -// include/linux/kobject.h -// lib/kobject_uevent.c - +use crate::driver::base::uevent::kobject_uevent::kobject_uevent_env; +use core::fmt::Write; /* - UEVENT_HELPER_PATH_LEN - UEVENT_NUM_ENVP - _KOBJECT_H_ - -Variable - - __randomize_layout - -Enum - - kobject_action - Struct - - kobj_attribute - kobj_type - kobj_uevent_env - kobject - kset kset_uevent_ops Function @@ -32,56 +13,83 @@ Function to_kset */ use crate::driver::base::kobject::KObject; -use alloc::string::String; +use crate::driver::net::Iface; +use crate::filesystem::sysfs::{Attribute, SysFSOpsSupport, SYSFS_ATTR_MODE_RW}; +use alloc::string::{String, ToString}; +use alloc::sync::Arc; use alloc::vec::Vec; +use intertrait::cast::CastArc; +use system_error::SystemError; + +use super::block::block_device::{BlockDevice, BlockDeviceOps}; +use super::char::CharDevice; +use super::device::{Device, DeviceType}; pub mod kobject_uevent; // https://code.dragonos.org.cn/xref/linux-6.1.9/lib/kobject_uevent.c?fi=kobject_uevent#457 -// kobject_action #[derive(Debug)] pub enum KobjectAction { KOBJADD, - KOBJREMOVE, //Kobject(或上层数据结构)的添加/移除事件 - KOBJCHANGE, //Kobject(或上层数据结构)的状态或者内容发生改变; 如果设备驱动需要上报的事件不再上面事件的范围内,或者是自定义的事件,可以使用该event,并携带相应的参数。 - KOBJMOVE, //Kobject(或上层数据结构)更改名称或者更改Parent(意味着在sysfs中更改了目录结构) + KOBJREMOVE, // Kobject(或上层数据结构)的添加/移除事件 + KOBJCHANGE, // Kobject(或上层数据结构)的状态或者内容发生改变; 如果设备驱动需要上报的事件不再上面事件的范围内,或者是自定义的事件,可以使用该event,并携带相应的参数。 + KOBJMOVE, // Kobject(或上层数据结构)更改名称或者更改Parent(意味着在sysfs中更改了目录结构) KOBJONLINE, - KOBJOFFLINE, //Kobject(或上层数据结构)的上线/下线事件,其实是是否使能 + KOBJOFFLINE, // Kobject(或上层数据结构)的上线/下线事件,其实是是否使能 KOBJBIND, KOBJUNBIND, } -/* - @parament: +/// 解析一个字符串,以确定它代表的是哪个 kobject_action,并提取出随后的参数(如果有的话) +fn kobject_action_type(buf: &[u8]) -> Result<(KobjectAction, Vec), SystemError> { + let mut action = KobjectAction::KOBJCHANGE; + let mut action_args: Vec = Vec::new(); + let mut count = buf.len(); + if count != 0 && (buf[count - 1] == b'\n' || buf[count - 1] == b'\0') { + count -= 1; + } + if count == 0 { + return Err(SystemError::EINVAL); + } - envp,指针数组,用于保存每个环境变量的地址,最多可支持的环境变量数量为UEVENT_NUM_ENVP。 + let arg_start = buf.iter().position(|&c| c == b' ').unwrap_or(count); + let count_first = arg_start; + let args_start = arg_start + 1; - envp_idx,用于访问环境变量指针数组的index。 + // 匹配KobjectAction + match &buf[..count_first] { + b"add" => action = KobjectAction::KOBJADD, + b"remove" => action = KobjectAction::KOBJREMOVE, + b"change" => action = KobjectAction::KOBJCHANGE, + b"move" => action = KobjectAction::KOBJMOVE, + b"online" => action = KobjectAction::KOBJONLINE, + b"offline" => action = KobjectAction::KOBJOFFLINE, + b"bind" => action = KobjectAction::KOBJBIND, + b"unbind" => action = KobjectAction::KOBJUNBIND, + _ => return Err(SystemError::EINVAL), + } - buf,保存环境变量的buffer,最大为UEVENT_BUFFER_SIZE。 + // 如果有参数,提取参数 + if count - args_start > 0 { + action_args = buf[args_start..] + .split(|&c| c == b' ') + .map(|s| String::from_utf8_lossy(s).to_string()) + .collect::>(); + } - buflen,访问buf的变量。 - -*/ - -//https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/kobject.h#31 + Ok((action, action_args)) +} pub const UEVENT_NUM_ENVP: usize = 64; pub const UEVENT_BUFFER_SIZE: usize = 2048; pub const UEVENT_HELPER_PATH_LEN: usize = 256; -/// Represents the environment for handling kernel object uevents. -/* - envp,指针数组,用于保存每个环境变量的地址,最多可支持的环境变量数量为UEVENT_NUM_ENVP。 - - envp_idx,用于访问环境变量指针数组的index。 - - buf,保存环境变量的buffer,最大为UEVENT_BUFFER_SIZE。 - - buflen,访问buf的变量。 - -*/ -// 表示一个待发送的uevent +/// 表示处理内核对象 uevents 的环境 +/// - envp,指针数组,用于保存每个环境变量的地址,最多可支持的环境变量数量为UEVENT_NUM_ENVP。 +/// - envp_idx,用于访问环境变量指针数组的index。 +/// - buf,保存环境变量的buffer,最大为UEVENT_BUFFER_SIZE。 +/// - buflen,访问buf的变量。 +// https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/kobject.h#31 #[derive(Debug)] pub struct KobjUeventEnv { argv: Vec, @@ -100,3 +108,113 @@ pub struct KobjUeventEnv { uevent,当任何Kobject需要上报uevent时,它所属的kset可以通过该接口统一为这些event添加环境变量。因为很多时候上报uevent时的环境变量都是相同的,因此可以由kset统一处理,就不需要让每个Kobject独自添加了。 */ + +/// 设备文件夹下的`uevent`文件的属性 +#[derive(Debug, Clone, Copy)] +pub struct UeventAttr; + +impl Attribute for UeventAttr { + fn name(&self) -> &str { + "uevent" + } + + fn mode(&self) -> crate::filesystem::vfs::syscall::ModeType { + SYSFS_ATTR_MODE_RW + } + + fn support(&self) -> crate::filesystem::sysfs::SysFSOpsSupport { + SysFSOpsSupport::ATTR_SHOW | SysFSOpsSupport::ATTR_STORE + } + + /// 用户空间读取 uevent 文件,返回 uevent 信息 + fn show(&self, _kobj: Arc, _buf: &mut [u8]) -> Result { + let device: Arc = _kobj + .parent() + .and_then(|x| x.upgrade()) + .ok_or(SystemError::ENODEV)?; + let device = kobj2device(device).ok_or(SystemError::EINVAL)?; + let device_type = device.dev_type(); + let mut uevent_content = String::new(); + match device_type { + DeviceType::Block => { + let block_device = device + .cast::() + .ok() + .ok_or(SystemError::EINVAL)?; + let major = block_device.id_table().device_number().major().data(); + let minor = block_device.id_table().device_number().minor(); + let device_name = block_device.id_table().name(); + writeln!(&mut uevent_content, "MAJOR={:?}", major).unwrap(); + writeln!(&mut uevent_content, "MINOR={:?}", minor).unwrap(); + writeln!(&mut uevent_content, "DEVNAME={}", device_name).unwrap(); + writeln!(&mut uevent_content, "DEVTYPE=disk").unwrap(); + } + DeviceType::Char => { + let char_device = device + .cast::() + .ok() + .ok_or(SystemError::EINVAL)?; + let major = char_device.id_table().device_number().major().data(); + let minor = char_device.id_table().device_number().minor(); + let device_name = char_device.id_table().name(); + writeln!(&mut uevent_content, "MAJOR={}", major).unwrap(); + writeln!(&mut uevent_content, "MINOR={}", minor).unwrap(); + writeln!(&mut uevent_content, "DEVNAME={}", device_name).unwrap(); + writeln!(&mut uevent_content, "DEVTYPE=char").unwrap(); + } + DeviceType::Net => { + let net_device = device.cast::().ok().ok_or(SystemError::EINVAL)?; + // let ifindex = net_device.ifindex().expect("Find ifindex error.\n"); + let device_name = net_device.iface_name(); + writeln!(&mut uevent_content, "INTERFACE={}", device_name).unwrap(); + // writeln!(&mut uevent_content, "IFINDEX={}", ifindex).unwrap(); + } + _ => { + // 处理其他设备类型 + let device_name = device.name(); + writeln!(&mut uevent_content, "DEVNAME={}", device_name).unwrap(); + writeln!(&mut uevent_content, "DEVTYPE={:?}", device_type).unwrap(); + } + } + sysfs_emit_str(_buf, &uevent_content) + } + /// 捕获来自用户空间对 uevent 文件的写操作,触发uevent事件 + fn store(&self, _kobj: Arc, _buf: &[u8]) -> Result { + return kobject_synth_uevent(_buf, _kobj); + } +} + +/// 将 kobject 转换为 device +fn kobj2device(kobj: Arc) -> Option> { + kobj.cast::().ok() +} + +/// 将设备的基本信息写入 uevent 文件 +fn sysfs_emit_str(buf: &mut [u8], content: &str) -> Result { + let bytes = content.as_bytes(); + if buf.len() < bytes.len() { + return Err(SystemError::ENOMEM); + } + buf[..bytes.len()].copy_from_slice(bytes); + Ok(bytes.len()) +} + +/// 解析用户空间写入的 uevent 信息,触发 uevent 事件 +fn kobject_synth_uevent(buf: &[u8], kobj: Arc) -> Result { + let no_uuid_envp = vec!["SYNTH_UUID=0".to_string()]; + let (action, action_args) = kobject_action_type(buf)?; + + let result = if action_args.is_empty() { + kobject_uevent_env(kobj.clone(), action, no_uuid_envp) + } else { + kobject_uevent_env(kobj.clone(), action, action_args) + }; + + if let Err(e) = result { + let device = kobj2device(kobj).ok_or(SystemError::EINVAL)?; + let devname = device.name(); + log::error!("synth uevent: {}: {:?}", devname, e); + return Err(SystemError::EINVAL); + } + Ok(buf.len()) +} diff --git a/kernel/src/net/socket/netlink/af_netlink.rs b/kernel/src/net/socket/netlink/af_netlink.rs index 74461089..bed04672 100644 --- a/kernel/src/net/socket/netlink/af_netlink.rs +++ b/kernel/src/net/socket/netlink/af_netlink.rs @@ -1,48 +1,42 @@ // 参考https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c -use core::cmp::{max, min}; -use core::ops::{Deref, DerefMut}; -use core::{any::Any, fmt::Debug, hash::Hash}; -use core::{mem, slice}; - -use alloc::string::String; -use alloc::sync::{Arc, Weak}; - -use hashbrown::HashMap; -use intertrait::cast::CastBox; -use intertrait::CastFromSync; -use log::warn; -use netlink::callback; -use netlink::netlink::{sk_data_ready, NetlinkKernelCfg}; -use num::Zero; -use system_error::SystemError; -use unified_init::macros::unified_init; - use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode}; -use crate::libs::mutex::Mutex; +use crate::libs::mutex::{Mutex, MutexGuard}; use crate::libs::rwlock::RwLockWriteGuard; -use crate::libs::spinlock::{SpinLock, SpinLockGuard}; -use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll}; +use crate::libs::spinlock::SpinLockGuard; use crate::net::socket::netlink::skbuff::SkBuff; use crate::net::socket::*; -use crate::net::syscall::{MsgHdr, SockAddr, SockAddrNl}; +use crate::net::syscall::SockAddrNl; use crate::time::timer::schedule_timeout; use crate::{libs::rwlock::RwLock, syscall::Syscall}; +use alloc::string::String; +use alloc::sync::Arc; use alloc::{boxed::Box, vec::Vec}; +use core::mem; +use core::ops::Deref; +use core::ptr::copy_nonoverlapping; +use core::{any::Any, fmt::Debug, hash::Hash}; +use hashbrown::HashMap; +use intertrait::CastFromSync; +use netlink::{ + sk_data_ready, NetlinkKernelCfg, NETLINK_ADD_MEMBERSHIP, NETLINK_DROP_MEMBERSHIP, + NETLINK_PKTINFO, +}; +use num::Zero; +use system_error::SystemError; use system_error::SystemError::ECONNREFUSED; +use unified_init::macros::unified_init; use crate::net::socket::{AddressFamily, Endpoint, Inode, MessageFlag, Socket}; use lazy_static::lazy_static; use super::callback::NetlinkCallback; use super::endpoint::NetlinkEndpoint; -use super::netlink::{ - NLmsgFlags, NLmsgType, NLmsghdr, VecExt, NETLINK_USERSOCK, NL_CFG_F_NONROOT_SEND, -}; -use super::netlink_proto::{proto_register, Proto, NETLINK_PROTO}; +use super::netlink_proto::{proto_register, NETLINK_PROTO}; use super::skbuff::{netlink_overrun, skb_orphan, skb_shared}; use super::sock::SockFlags; +use super::{NLmsgFlags, NLmsgType, NLmsghdr, VecExt, NETLINK_USERSOCK, NL_CFG_F_NONROOT_SEND}; use crate::init::initcall::INITCALL_CORE; -use crate::net::socket::netlink::netlink::NetlinkState; +use crate::net::socket::netlink::NetlinkState; // Flags constants bitflags! { pub struct NetlinkFlags: u32 { @@ -91,6 +85,7 @@ impl<'a> Iterator for HListHeadIter<'a> { } } } +type NetlinkSockComparator = Arc bool + Send + Sync>; /// 每一个netlink协议族都有一个NetlinkTable,用于保存该协议族的所有netlink套接字 pub struct NetlinkTable { hash: HashMap>>>, @@ -101,24 +96,26 @@ pub struct NetlinkTable { mc_list: HListHead, pub bind: Option i32 + Send + Sync>>, pub unbind: Option i32 + Send + Sync>>, - pub compare: Option bool + Send + Sync>>, + pub compare: Option, } -impl<'a> NetlinkTable { +impl NetlinkTable { fn new() -> NetlinkTable { NetlinkTable { hash: HashMap::new(), - listeners: Some(Listeners { masks: Vec::new() }), + listeners: Some(Listeners { + masks: Vec::with_capacity(32), + }), registered: 0, flags: 0, - groups: 0, + groups: 32, mc_list: HListHead { first: None }, bind: None, unbind: None, compare: None, } } - fn listeners(&self) -> RCuListeners { - RCuListeners::new() + fn listeners(&self) -> Listeners { + Listeners::new() } fn flags(&self) -> u32 { 0 @@ -145,40 +142,6 @@ impl<'a> NetlinkTable { } } -pub struct LockedNetlinkTable(RwLock); - -impl LockedNetlinkTable { - pub fn new(netlinktable: NetlinkTable) -> LockedNetlinkTable { - LockedNetlinkTable(RwLock::new(netlinktable)) - } -} -// You would need to implement the actual methods for the traits and the bind/unbind functions. -trait NetlinkMessageHandler { - fn handle_message(&mut self, msg: &[u8]) { - // Implementation of message handling - } -} - -struct RCuListeners { - list: Vec>, -} - -impl RCuListeners { - fn new() -> Self { - Self { list: Vec::new() } - } - - fn register(&mut self, listener: Box) { - self.list.push(listener); - } - - fn handle_message(&mut self, msg: &[u8]) { - for listener in &mut self.list { - listener.handle_message(msg); - } - } -} - // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#2916 /// netlink 协议的最大数量 const MAX_LINKS: usize = 32; @@ -254,6 +217,10 @@ pub fn netlink_add_usersock_entry(nl_table: &mut RwLockWriteGuard>>, portid: u32, ) -> Result<(), SystemError> { - let mut nl_table = NL_TABLE.write(); + let mut nl_table: RwLockWriteGuard> = NL_TABLE.write(); let index = sk.lock().sk_protocol(); @@ -299,46 +266,57 @@ pub fn netlink_insert( Ok(()) } -/// + fn netlink_bind( sock: Arc>>, addr: &SockAddrNl, ) -> Result<(), SystemError> { log::info!("netlink_bind here!"); - let sk = Arc::clone(&sock); + let sk: Arc>> = Arc::clone(&sock); // todo: net namespace支持 // let net = sock_net(sk); - let nlk: Arc = Arc::clone(&sk) - .arc_any() - .downcast() - .map_err(|_| SystemError::EINVAL)?; - let nladdr = addr; - let mut err = 0; - let mut groups: u32; - let mut bound: bool; + log::info!("netlink_bind: nl_family: {:?}", addr.nl_family); + // let nlk: Arc> = sock + // .clone() + // .arc_any() + // .downcast() + // .map_err(|_| SystemError::EINVAL)?; + let nlk = Arc::new(Mutex::new( + sock.lock() + .deref() + .as_any() + .downcast_ref::() + .ok_or(SystemError::EINVAL)? + .clone(), + )); + let nladdr = addr; + let mut groups: u32; + log::info!("netlink_bind: nl_family: {:?}", nladdr.nl_family); if nladdr.nl_family != AddressFamily::Netlink { + log::warn!("netlink_bind: nl_family != AF_NETLINK"); return Err(SystemError::EINVAL); } groups = nladdr.nl_groups; - + log::info!("netlink_bind: groups: {}", groups); + let mut nlk = nlk.lock(); // Only superuser is allowed to listen multicasts - // if groups != 0 { - // if !netlink_allowed(sock, NL_CFG_F_NONROOT_RECV) { - // return Err(-EPERM); - // } - // err = netlink_realloc_groups(sk); - // if err != 0 { - // return Err(err); - // } - // } + if groups != 0 { + let group_count = addr.nl_groups.count_ones(); // 计算多播组数量 + nlk.ngroups = group_count; + // if !netlink_allowed(sock, NL_CFG_F_NONROOT_RECV) { + // return Err(-EPERM); + // } + let _ = netlink_realloc_groups(&mut nlk); + } // BITS_PER_LONG = __WORDSIZE = 64 - if nlk.ngroups < 64 as u64 { + if nlk.ngroups < 64 { groups &= (1 << nlk.ngroups) - 1; } - bound = nlk.bound; + let bound = nlk.bound; + log::info!("netlink_bind: bound: {}", bound); if bound { // Ensure nlk.portid is up-to-date. if nladdr.nl_pid != nlk.portid { @@ -348,15 +326,16 @@ fn netlink_bind( if groups != 0 { for group in 0..(mem::size_of::() * 8) as u32 { - if group == groups { + if group != groups { continue; } + // 尝试绑定到第 group + 1 个组播组。如果绑定成功(错误码err为0),则继续绑定下一个组播组。 // err = nlk.bind().unwrap()(group + 1); - if err == 0 { - continue; - } + // if err == 0 { + // continue; + // } // netlink_undo_bind(group, groups, sk); - return Err(SystemError::EINVAL); + // return Err(SystemError::EINVAL); } } @@ -364,46 +343,70 @@ fn netlink_bind( // using any of the bound attributes. if !bound { if nladdr.nl_pid != 0 { + log::info!("netlink_bind: insert"); let _ = netlink_insert(sk, nladdr.nl_pid); } else { - // todo - // netlink_autobind(sock) + log::info!("netlink_bind: autobind"); + netlink_autobind(sock, &mut nlk.portid); }; - if err != 0 { - // BITS_PER_TYPE = SIZEOF TYPE * BITS PER BYTES - // todo - // netlink_undo_bind(mem::size_of::() * 8, groups, sk); - // netlink_unlock_table(); - return Err(SystemError::EINVAL); - } + // if err != 0 { + // BITS_PER_TYPE = SIZEOF TYPE * BITS PER BYTES + // todo + // netlink_undo_bind(mem::size_of::() * 8, groups, sk); + // netlink_unlock_table(); + // return Err(SystemError::EINVAL); + // } } - // todo // netlink_update_subscriptions(sk, nlk.subscriptions + hweight32(groups) - hweight32(nlk.groups.unwrap()[0])); - // nlk.groups.unwrap()[0] = (nlk.groups.unwrap()[0] & !0xffffffff) | groups; - // netlink_update_listeners(sk); + log::info!("netlink_bind: nlk.groups: {:?}", nlk.groups); + nlk.groups[0] = groups; + log::info!("netlink_bind: nlk.groups: {:?}", nlk.groups); + netlink_update_listeners(nlk); Ok(()) } +/// 自动为netlink套接字选择一个端口号,并在netlink table 中插入这个端口。如果端口已经被使用,它会尝试使用不同的端口号直到找到一个可用的端口。如果有多个线程同时尝试绑定,则认为是正常情况,并成功返回. +fn netlink_autobind(sk: Arc>>, portid: &mut u32) { + let mut rover: u32 = 0; + loop { + // 假设 netlink_lookup 是一个函数,返回一个 Option>>> 类型 + let ret = netlink_lookup(sk.lock().sk_protocol(), *portid); + + // 如果查询成功 + if ret.is_some() { + // 如果 rover 是 0,重置为 1 + if rover == 0 { + // todo:随机 + rover = 1; // 在 Rust 中不能有 -4096 这样的u32值,因此我们从 1 开始递减 + } else { + // 否则递减 rover + rover -= 1; + } + *portid = rover; + } else { + // 如果查询失败,增加 rover + rover += 1; + *portid = rover; + break; + } + } + let _ = netlink_insert(sk, *portid); +} // TODO: net namespace支持 // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#532 /// 在 netlink_table 中查找 netlink 套接字 -fn netlink_lookup(protocol: usize, portid: u32) -> Arc>> { +fn netlink_lookup(protocol: usize, portid: u32) -> Option>>> { // todo: net 支持 let nl_table = NL_TABLE.read(); let index = protocol; let sk = nl_table[index].hash.get(&portid).unwrap(); - Arc::clone(sk) + Some(Arc::clone(sk)) } // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#672 -pub enum Error { - SocketTypeNotSupported, - ProtocolNotSupported, -} - // netlink机制特定的内核抽象,不同于标准的trait Socket pub trait NetlinkSocket: Socket + Any { // fn sk_prot(&self) -> &dyn proto; @@ -448,8 +451,8 @@ pub struct NetlinkSock { dst_group: u32, pub flags: u32, subscriptions: u32, - ngroups: u64, - groups: Vec, + ngroups: u32, + groups: Vec, pub protocol: usize, bound: bool, state: NetlinkState, @@ -476,13 +479,12 @@ impl Socket for NetlinkSock { let addr = netlinkendpoint.addr; let sock: Arc>> = Arc::new(Mutex::new(Box::new(self.clone()))); - let _ = netlink_bind(sock, &addr); + return netlink_bind(sock, &addr); } _ => { return Err(SystemError::EINVAL); } } - Ok(()) } fn close(&self) -> Result<(), SystemError> { Ok(()) @@ -529,6 +531,10 @@ impl Socket for NetlinkSock { log::warn!("recv_buffer_size is implemented to 0"); 0 } + + fn set_option(&self, level: OptionsLevel, name: usize, val: &[u8]) -> Result<(), SystemError> { + return netlink_setsockopt(self, level, name, val); + } } impl IndexNode for NetlinkSock { fn read_at( @@ -606,10 +612,10 @@ impl NetlinkSocket for NetlinkSock { Vec::new() } fn flags(&self) -> Option { - Some(SockFlags::SockDead) + Some(SockFlags::Dead) } fn sock_sndtimeo(&self, noblock: bool) -> i64 { - if noblock == true { + if noblock { return 0; } else { return self.sk_sndtimeo; @@ -643,7 +649,7 @@ impl NetlinkSock { flags: 0, subscriptions: 0, ngroups: 0, - groups: Vec::new(), + groups: vec![0; 32], bound: false, state: NetlinkState::NetlinkUnconnected, protocol: 1, @@ -657,14 +663,7 @@ impl NetlinkSock { callback: None, } } - fn register(&self, listener: Box) { - // Implementation of the function - } - fn unregister(&self, listener: Box) { - // Implementation of the function - } // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#1078 - /// fn netlink_connect(&self, _endpoint: Endpoint) -> Result<(), SystemError> { Ok(()) } @@ -686,7 +685,7 @@ impl NetlinkSock { } #[allow(unsafe_code)] let header = unsafe { &*(data.as_ptr() as *const NLmsghdr) }; - if header.nlmsg_len as usize > data.len() { + if header.nlmsg_len > data.len() { log::warn!( "netlink_send: data too short, nlmsg_len: {}", header.nlmsg_len @@ -700,7 +699,7 @@ impl NetlinkSock { let mut msg = Vec::new(); let new_header = NLmsghdr { nlmsg_len: 0, // to be determined later - nlmsg_type: NLmsgType::NLMSG_DONE.into(), + nlmsg_type: NLmsgType::NLMSG_DONE, nlmsg_flags: NLmsgFlags::NLM_F_MULTI, nlmsg_seq: header.nlmsg_seq, nlmsg_pid: header.nlmsg_pid, @@ -768,15 +767,15 @@ impl NetlinkSock { #[derive(Clone)] pub struct Listeners { - // Recursive Wakeup Unlocking? + // todo: rcu + // 动态位图,每一位代表一个组播组,如果对应位为 1,表示有监听 masks: Vec, } impl Listeners { + /// 创建一个新的 `Listeners` 实例,并将 `masks` 的所有位初始化为 0 pub fn new() -> Listeners { - Listeners { masks: Vec::new() } - } - fn masks(&self) -> Vec { - Vec::new() + let masks = vec![0u64; 32]; + Listeners { masks } } } @@ -785,6 +784,7 @@ fn initialize_netlink_table() -> RwLock> { for _ in 0..MAX_LINKS { tables.push(NetlinkTable::new()); } + log::info!("initialize_netlink_table,len:{}", tables.len()); RwLock::new(tables) } @@ -792,7 +792,6 @@ lazy_static! { /// 一个维护全局的 NetlinkTable 向量,每一个元素代表一个 netlink 协议类型,最大数量为 MAX_LINKS pub static ref NL_TABLE: RwLock> = initialize_netlink_table(); } - pub fn netlink_has_listeners(sk: &NetlinkSock, group: u32) -> i32 { log::info!("netlink_has_listeners"); let mut res = 0; @@ -817,10 +816,15 @@ pub fn netlink_has_listeners(sk: &NetlinkSock, group: u32) -> i32 { // 检查 listeners 是否存在 if let Some(listeners) = &netlink_table.listeners { // 检查 group 是否在范围内 + log::info!("listeners.masks:{:?}", listeners.masks); if group > 0 && (group as usize - 1) < listeners.masks.len() { res = listeners.masks[group as usize - 1] as i32; } else { - log::error!("Group {} is out of bounds", group); + log::error!( + "Group {} is out of bounds, len is {}", + group, + listeners.masks.len() + ); } } else { log::error!("Listeners for protocol {} are None", protocol); @@ -907,7 +911,7 @@ fn do_one_broadcast( if info.skb_2.read().is_empty() { netlink_overrun(&sk); info.failure = 1; - if !sk.lock().flags().is_none() & !NetlinkFlags::BROADCAST_SEND_ERROR.bits().is_zero() { + if sk.lock().flags().is_some() & !NetlinkFlags::BROADCAST_SEND_ERROR.bits().is_zero() { info.delivery_failure = 1; } return Err(SystemError::EINVAL); @@ -925,7 +929,7 @@ fn do_one_broadcast( // 如果将承载了组播消息的 skb 发送到该用户进程 netlink 套接字失败 if ret < 0 { netlink_overrun(&sk); - if !sk.lock().flags().is_none() & !NetlinkFlags::BROADCAST_SEND_ERROR.bits().is_zero() { + if sk.lock().flags().is_some() & !NetlinkFlags::BROADCAST_SEND_ERROR.bits().is_zero() { info.delivery_failure = 1; } } else { @@ -948,8 +952,8 @@ fn do_one_broadcast( /// [1]. 用户进程 --组播--> 用户进程 /// [2]. kernel --组播--> 用户进程 /// -pub fn netlink_broadcast<'a>( - ssk: &'a Arc, +pub fn netlink_broadcast( + ssk: &Arc, skb: Arc>, portid: u32, group: u64, @@ -1036,7 +1040,7 @@ fn netlink_broadcast_deliver( // 将 skb 发送到该 netlink 套接字,实际也就是将该 skb 放入了该套接字的接收队列中 let _ = netlink_sendskb(sk.clone(), skb); // 如果套接字的接收缓冲区已经接收但尚未处理数据长度大于其接收缓冲的1/2,则返回1 - if &sk.lock().sk_rmem_alloc() > &(sk.lock().sk_rcvbuf() >> 1) { + if sk.lock().sk_rmem_alloc() > (sk.lock().sk_rcvbuf() >> 1) { return 1; } else { return 0; @@ -1074,9 +1078,9 @@ fn netlink_sendskb(sk: Arc>>, skb: &Arc Result { let mut err: i32; - let timeo: i64; // todo:重新调整skb的大小 // skb = netlink_trim(skb, gfp_any()); // 计算发送超时时间(如果是非阻塞调用,则返回 0) - timeo = ssk.lock().sock_sndtimeo(nonblock); + let timeo: i64 = ssk.lock().sock_sndtimeo(nonblock); loop { // 根据源sock结构和目的单播地址,得到目的sock结构 let sk = netlink_getsockbyportid(ssk.clone(), portid); @@ -1150,7 +1153,7 @@ fn netlink_unicast_kernel( // ret = ECONNREFUSED = 111; ret = 111; // 检查内核netlink套接字是否注册了netlink_rcv回调(就是各个协议在创建内核netlink套接字时通常会传入的input函数) - if !nlk_guard.callback.is_none() { + if nlk_guard.callback.is_some() { ret = skb.read().len; netlink_skb_set_owner_r(&skb, sk); // todo: netlink_deliver_tap_kernel(sk, ssk, skb); @@ -1218,7 +1221,7 @@ fn netlink_attachskb( if (sk.lock().sk_rmem_alloc() > sk.lock().sk_rcvbuf() || nlk_guard.state == NetlinkState::NETLINK_S_CONGESTED) && // todo: sock_flag - sk.lock().flags() != Some(SockFlags::SockDead) + sk.lock().flags() != Some(SockFlags::Dead) { timeo = schedule_timeout(timeo)?; } @@ -1242,7 +1245,8 @@ fn netlink_getsockbyportid( ssk: Arc>>, portid: u32, ) -> Result>>, SystemError> { - let sock: Arc>> = netlink_lookup(ssk.lock().sk_protocol(), portid); + let sock: Arc>> = + netlink_lookup(ssk.lock().sk_protocol(), portid).unwrap(); if Some(sock.clone()).is_none() { return Err(SystemError::ECONNREFUSED); } @@ -1266,3 +1270,112 @@ fn netlink_getsockbyportid( } return Ok(sock); } + +/// 设置 netlink 套接字的选项 +fn netlink_setsockopt( + nlk: &NetlinkSock, + level: OptionsLevel, + optname: usize, + optval: &[u8], +) -> Result<(), SystemError> { + if level != OptionsLevel::NETLINK { + return Err(SystemError::ENOPROTOOPT); + } + let optlen = optval.len(); + let mut val: usize = 0; + if optlen >= size_of::() { + unsafe { + if optval.len() >= size_of::() { + // 将 optval 中的数据拷贝到 val 中 + copy_nonoverlapping( + optval.as_ptr(), + &mut val as *mut usize as *mut u8, + size_of::(), + ); + } else { + return Err(SystemError::EFAULT); + } + } + } else { + return Err(SystemError::EINVAL); + } + match optname { + // add 和 drop 对应同一段代码 + NETLINK_ADD_MEMBERSHIP | NETLINK_DROP_MEMBERSHIP => { + let group = val as u64; + let mut nl_table = NL_TABLE.write(); + let netlink_table = &mut nl_table[nlk.protocol]; + let listeners = netlink_table.listeners.as_mut().unwrap(); + let group = group - 1; + let mask = 1 << (group % 64); + let idx = group / 64; + if optname == NETLINK_ADD_MEMBERSHIP { + listeners.masks[idx as usize] |= mask; + } else { + listeners.masks[idx as usize] &= !mask; + } + } + NETLINK_PKTINFO => { + // if val != 0 { + // nlk.flags |= NetlinkFlags::RECV_PKTINFO.bits(); + // } else { + // nlk.flags &= !NetlinkFlags::RECV_PKTINFO.bits(); + // } + } + _ => { + return Err(SystemError::ENOPROTOOPT); + } + } + Ok(()) +} + +fn netlink_update_listeners(nlk: MutexGuard) { + log::info!("netlink_update_listeners"); + let mut nl_table = NL_TABLE.write(); + let netlink_table = &mut nl_table[nlk.protocol]; + let listeners = netlink_table.listeners.as_mut().unwrap(); + listeners.masks.clear(); + log::info!("nlk.ngroups:{}", nlk.ngroups); + listeners.masks.resize(nlk.ngroups as usize, 0); + log::info!("nlk.groups:{:?}", nlk.groups); + for group in &nlk.groups { + let mask = 1 << (group % 64); + let idx = group / 64; + + listeners.masks[idx as usize] |= mask; + log::info!( + "group:{},mask:{},idx:{},masks:{:?}", + group, + mask, + idx, + listeners.masks + ); + } +} + +/// 重新分配 netlink 套接字的组 +fn netlink_realloc_groups(nlk: &mut MutexGuard) -> Result<(), SystemError> { + let nl_table = NL_TABLE.write(); + let groups = nl_table[nlk.protocol].groups; + if nl_table[nlk.protocol].registered == 0 { + // 没有注册任何组 + log::warn!("netlink_realloc_groups: not registered"); + return Err(SystemError::ENOENT); + } + if nlk.ngroups >= groups { + // 当前已分配的组数量 大于或等于 groups(当前协议的组数量),则没有必要重新分配\ + log::info!("netlink_realloc_groups: no need to realloc"); + return Ok(()); + } + log::info!("nlk.ngroups:{},groups:{}", nlk.ngroups, groups); + let mut new_groups = vec![0u32; groups as usize]; + log::info!("nlk.groups:{:?}", nlk.groups); + // 当 nlk.ngroups 大于 0 时复制数据 + if nlk.ngroups > 0 { + new_groups[..nlk.ngroups as usize].copy_from_slice(&nlk.groups); + } + nlk.groups = new_groups; + nlk.ngroups = groups; + log::info!("nlk.groups:{:?}", nlk.groups); + Ok(()) +} diff --git a/kernel/src/net/socket/netlink/mod.rs b/kernel/src/net/socket/netlink/mod.rs index 117d9410..8bcc16d8 100644 --- a/kernel/src/net/socket/netlink/mod.rs +++ b/kernel/src/net/socket/netlink/mod.rs @@ -1,31 +1,330 @@ -use alloc::sync::Arc; -use netlink::NETLINK_KOBJECT_UEVENT; -use system_error::SystemError; - -use crate::driver::base::uevent::KobjUeventEnv; - -use super::{family, inet::datagram, Inode, Socket, Type}; - -//https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/ -/* -.. - - -Kconfig -Makefile -af_netlink.c -af_netlink.h -diag.c Netlink 套接字的诊断功能,主要用于查询内核中存在的 Netlink 套接字信息 -genetlink.c -policy.c -*/ -// Top-level module defining the public API for Netlink pub mod af_netlink; pub mod callback; pub mod endpoint; -pub mod netlink; pub mod netlink_proto; pub mod skbuff; pub mod sock; +use super::{family, inet::datagram, Inode, Socket, Type}; +use crate::driver::base::uevent::KobjUeventEnv; +use alloc::sync::Arc; +use alloc::{boxed::Box, slice, vec::Vec}; +use system_error::SystemError; + +// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/ + +use crate::libs::mutex::Mutex; +use core::mem; + +use af_netlink::{netlink_insert, Listeners, NetlinkFlags, NetlinkSock, NetlinkSocket, NL_TABLE}; +// 监听事件类型 +pub const NETLINK_ADD_MEMBERSHIP: usize = 1; +pub const NETLINK_DROP_MEMBERSHIP: usize = 2; +pub const NETLINK_PKTINFO: usize = 3; // 接收包信息。如果设置了这个选项,套接字将接收包含发送者信息(如发送者的端口号和地址)的消息 + // Netlink protocol family +pub const NETLINK_ROUTE: usize = 0; +pub const NETLINK_UNUSED: usize = 1; +pub const NETLINK_USERSOCK: usize = 2; +pub const NETLINK_FIREWALL: usize = 3; +pub const NETLINK_SOCK_DIAG: usize = 4; +pub const NETLINK_NFLOG: usize = 5; +pub const NETLINK_XFRM: usize = 6; +pub const NETLINK_SELINUX: usize = 7; +pub const NETLINK_ISCSI: usize = 8; +pub const NETLINK_AUDIT: usize = 9; +pub const NETLINK_FIB_LOOKUP: usize = 10; +pub const NETLINK_CONNECTOR: usize = 11; +pub const NETLINK_NETFILTER: usize = 12; +pub const NETLINK_IP6_FW: usize = 13; +pub const NETLINK_DNRTMSG: usize = 14; +// implemente uevent needed +pub const NETLINK_KOBJECT_UEVENT: usize = 15; +pub const NETLINK_GENERIC: usize = 16; +// pub const NETLINK_DM : usize = 17; // Assuming DM Events is unused, not defined +pub const NETLINK_SCSITRANSPORT: usize = 18; +pub const NETLINK_ECRYPTFS: usize = 19; +pub const NETLINK_RDMA: usize = 20; +pub const NETLINK_CRYPTO: usize = 21; +pub const NETLINK_SMC: usize = 22; + +//pub const NETLINK_INET_DIAG = NETLINK_SOCK_DIAG; +pub const NETLINK_INET_DIAG: usize = 4; + +pub const MAX_LINKS: usize = 32; + +pub const NL_CFG_F_NONROOT_RECV: u32 = 1 << 0; +pub const NL_CFG_F_NONROOT_SEND: u32 = 1 << 1; + +bitflags! { +/// 四种通用的消息类型 nlmsg_type +pub struct NLmsgType: u8 { + /* Nothing. */ + const NLMSG_NOOP = 0x1; + /* Error */ + const NLMSG_ERROR = 0x2; + /* End of a dump */ + const NLMSG_DONE = 0x3; + /* Data lost */ + const NLMSG_OVERRUN = 0x4; +} + +//消息标记 nlmsg_flags +// const NLM_F_REQUEST = 1; /* It is request message. */ +// const NLM_F_MULTI = 2; /* Multipart message, terminated by NLMSG_DONE */ +// const NLM_F_ACK = 4; /* Reply with ack, with zero or error code */ +// const NLM_F_ECHO = 8; /* Echo this request */ +// const NLM_F_DUMP_INTR = 16; /* Dump was inconsistent due to sequence change */ +pub struct NLmsgFlags: u16 { + /* Flags values */ + const NLM_F_REQUEST = 0x01; + const NLM_F_MULTI = 0x02; + const NLM_F_ACK = 0x04; + const NLM_F_ECHO = 0x08; + const NLM_F_DUMP_INTR = 0x10; + const NLM_F_DUMP_FILTERED = 0x20; + + /* Modifiers to GET request */ + const NLM_F_ROOT = 0x100; /* specify tree root */ + const NLM_F_MATCH = 0x200; /* return all matching */ + const NLM_F_ATOMIC = 0x400; /* atomic GET */ + //const NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH; + const NLM_F_DUMP = 0x100 | 0x200; + + /* Modifiers to NEW request */ + const NLM_F_REPLACE = 0x100; /* Override existing */ + const NLM_F_EXCL = 0x200; /* Do not touch, if it exists */ + const NLM_F_CREATE = 0x400; /* Create, if it does not exist */ + const NLM_F_APPEND = 0x800; /* Add to end of list */ + + /* Modifiers to DELETE request */ + const NLM_F_NONREC = 0x100; /* Do not delete recursively */ + + /* Flags for ACK message */ + const NLM_F_CAPPED = 0x100; /* request was capped */ + const NLM_F_ACK_TLVS = 0x200; /* extended ACK TVLs were included */ +} +} +// 定义Netlink消息的结构体,如NLmsghdr和geNLmsghdr(拓展的netlink消息头),以及用于封包和解包消息的函数。 +// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/netlink.h +/// netlink消息报头 +/** + * struct NLmsghdr - fixed format metadata header of Netlink messages + * @nlmsg_len: Length of message including header + * @nlmsg_type: Message content type + * @nlmsg_flags: Additional flags + * @nlmsg_seq: Sequence number + * @nlmsg_pid: Sending process port ID + */ +pub struct NLmsghdr { + pub nlmsg_len: usize, + pub nlmsg_type: NLmsgType, + pub nlmsg_flags: NLmsgFlags, + pub nlmsg_seq: u32, + pub nlmsg_pid: u32, +} + +const NLMSG_ALIGNTO: usize = 4; +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum NetlinkState { + NetlinkUnconnected = 0, + NetlinkConnected, + NETLINK_S_CONGESTED = 2, +} + +fn nlmsg_align(len: usize) -> usize { + (len + NLMSG_ALIGNTO - 1) & !(NLMSG_ALIGNTO - 1) +} + +fn nlmsg_hdrlen() -> usize { + nlmsg_align(mem::size_of::()) +} + +fn nlmsg_length(len: usize) -> usize { + len + nlmsg_hdrlen() +} + +fn nlmsg_space(len: usize) -> usize { + nlmsg_align(nlmsg_length(len)) +} + +unsafe fn nlmsg_data(nlh: &NLmsghdr) -> *mut u8 { + ((nlh as *const NLmsghdr) as *mut u8).add(nlmsg_length(0)) +} + +unsafe fn nlmsg_next(nlh: *mut NLmsghdr, len: usize) -> *mut NLmsghdr { + let nlmsg_len = (*nlh).nlmsg_len; + let new_len = len - nlmsg_align(nlmsg_len); + nlh.add(nlmsg_align(nlmsg_len)) +} + +fn nlmsg_ok(nlh: &NLmsghdr, len: usize) -> bool { + len >= nlmsg_hdrlen() && nlh.nlmsg_len >= nlmsg_hdrlen() && nlh.nlmsg_len <= len +} + +fn nlmsg_payload(nlh: &NLmsghdr, len: usize) -> usize { + nlh.nlmsg_len - nlmsg_space(len) +} +// 定义类型别名来简化闭包类型的定义 +type InputCallback = Arc; +type BindCallback = Arc i32 + Send + Sync>; +type UnbindCallback = Arc i32 + Send + Sync>; +type CompareCallback = Arc bool + Send + Sync>; +/// 该结构包含了内核netlink的可选参数: +#[derive(Default)] +pub struct NetlinkKernelCfg { + pub groups: u32, + pub flags: u32, + pub input: Option, + pub bind: Option, + pub unbind: Option, + pub compare: Option, +} + +impl NetlinkKernelCfg { + pub fn new() -> Self { + NetlinkKernelCfg { + groups: 32, + flags: 0, + input: None, + bind: None, + unbind: None, + compare: None, + } + } + + pub fn set_input(&mut self, callback: F) + where + F: FnMut() + Send + Sync + 'static, + { + self.input = Some(Arc::new(callback)); + } + + pub fn set_bind(&mut self, callback: F) + where + F: Fn(i32) -> i32 + Send + Sync + 'static, + { + self.bind = Some(Arc::new(callback)); + } + + pub fn set_unbind(&mut self, callback: F) + where + F: Fn(i32) -> i32 + Send + Sync + 'static, + { + self.unbind = Some(Arc::new(callback)); + } + + pub fn set_compare(&mut self, callback: F) + where + F: Fn(&NetlinkSock) -> bool + Send + Sync + 'static, + { + self.compare = Some(Arc::new(callback)); + } +} +// https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/netlink.h#229 +// netlink属性头 +struct NLattr { + nla_len: u16, + nla_type: u16, +} + +pub trait VecExt { + fn align4(&mut self); + fn push_ext(&mut self, data: T); + fn set_ext(&mut self, offset: usize, data: T); +} + +impl VecExt for Vec { + fn align4(&mut self) { + let len = (self.len() + 3) & !3; + if len > self.len() { + self.resize(len, 0); + } + } + + fn push_ext(&mut self, data: T) { + #[allow(unsafe_code)] + let bytes = + unsafe { slice::from_raw_parts(&data as *const T as *const u8, size_of::()) }; + for byte in bytes { + self.push(*byte); + } + } + + fn set_ext(&mut self, offset: usize, data: T) { + if self.len() < offset + size_of::() { + self.resize(offset + size_of::(), 0); + } + #[allow(unsafe_code)] + let bytes = + unsafe { slice::from_raw_parts(&data as *const T as *const u8, size_of::()) }; + self[offset..(bytes.len() + offset)].copy_from_slice(bytes); + } +} + +// todo: net namespace +pub fn netlink_kernel_create( + unit: usize, + cfg: Option, +) -> Result { + // THIS_MODULE + let mut nlk: NetlinkSock = NetlinkSock::new(); + let sk: Arc>> = Arc::new(Mutex::new(Box::new(nlk.clone()))); + let groups: u32; + if unit >= MAX_LINKS { + return Err(SystemError::EINVAL); + } + __netlink_create(&mut nlk, unit, 1).expect("__netlink_create failed"); + + if let Some(cfg) = cfg.as_ref() { + if cfg.groups < 32 { + groups = 32; + } else { + groups = cfg.groups; + } + } else { + groups = 32; + } + let listeners = Listeners::new(); + // todo:设计和实现回调函数 + // sk.sk_data_read = netlink_data_ready; + // if cfg.is_some() && cfg.unwrap().input.is_some(){ + // nlk.netlink_rcv = cfg.unwrap().input; + // } + netlink_insert(sk, 0).expect("netlink_insert failed"); + nlk.flags |= NetlinkFlags::NETLINK_F_KERNEL_SOCKET.bits(); + + let mut nl_table = NL_TABLE.write(); + if nl_table[unit].get_registered() == 0 { + nl_table[unit].set_groups(groups); + if let Some(cfg) = cfg.as_ref() { + nl_table[unit].bind = cfg.bind.clone(); + nl_table[unit].unbind = cfg.unbind.clone(); + nl_table[unit].set_flags(cfg.flags); + if cfg.compare.is_some() { + nl_table[unit].compare = cfg.compare.clone(); + } + nl_table[unit].set_registered(1); + } else { + drop(listeners); + let registered = nl_table[unit].get_registered(); + nl_table[unit].set_registered(registered + 1); + } + } + return Ok(nlk); +} + +fn __netlink_create(nlk: &mut NetlinkSock, unit: usize, kern: usize) -> Result { + // 其他的初始化配置参数 + nlk.flags = kern as u32; + nlk.protocol = unit; + return Ok(0); +} + +pub fn sk_data_ready(nlk: Arc) -> Result<(), SystemError> { + // 唤醒 + return Ok(()); +} + pub struct Netlink; impl family::Family for Netlink { diff --git a/kernel/src/net/socket/netlink/netlink.rs b/kernel/src/net/socket/netlink/netlink.rs index 790bba25..e69de29b 100644 --- a/kernel/src/net/socket/netlink/netlink.rs +++ b/kernel/src/net/socket/netlink/netlink.rs @@ -1,319 +0,0 @@ -use alloc::{ - boxed::Box, - slice, - sync::{Arc, Weak}, - vec::Vec, -}; -use system_error::SystemError; - -//定义Netlink消息的结构体,如NLmsghdr和geNLmsghdr(拓展的netlink消息头),以及用于封包和解包消息的函数。 -//参考 https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/netlink.h -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -// Ensure the header is only included once -use crate::libs::mutex::Mutex; -use core::mem; - -use super::af_netlink::{ - netlink_insert, Listeners, NetlinkFlags, NetlinkSock, NetlinkSocket, NL_TABLE, -}; -// Netlink protocol family -pub const NETLINK_ROUTE: usize = 0; -pub const NETLINK_UNUSED: usize = 1; -pub const NETLINK_USERSOCK: usize = 2; -pub const NETLINK_FIREWALL: usize = 3; -pub const NETLINK_SOCK_DIAG: usize = 4; -pub const NETLINK_NFLOG: usize = 5; -pub const NETLINK_XFRM: usize = 6; -pub const NETLINK_SELINUX: usize = 7; -pub const NETLINK_ISCSI: usize = 8; -pub const NETLINK_AUDIT: usize = 9; -pub const NETLINK_FIB_LOOKUP: usize = 10; -pub const NETLINK_CONNECTOR: usize = 11; -pub const NETLINK_NETFILTER: usize = 12; -pub const NETLINK_IP6_FW: usize = 13; -pub const NETLINK_DNRTMSG: usize = 14; -// implemente uevent needed -pub const NETLINK_KOBJECT_UEVENT: usize = 15; -pub const NETLINK_GENERIC: usize = 16; -// pub const NETLINK_DM : usize = 17; // Assuming DM Events is unused, not defined -pub const NETLINK_SCSITRANSPORT: usize = 18; -pub const NETLINK_ECRYPTFS: usize = 19; -pub const NETLINK_RDMA: usize = 20; -pub const NETLINK_CRYPTO: usize = 21; -pub const NETLINK_SMC: usize = 22; - -//pub const NETLINK_INET_DIAG = NETLINK_SOCK_DIAG; -pub const NETLINK_INET_DIAG: usize = 4; - -pub const MAX_LINKS: usize = 32; - -pub const NL_CFG_F_NONROOT_RECV: u32 = 1 << 0; -pub const NL_CFG_F_NONROOT_SEND: u32 = 1 << 1; - -bitflags! { -/// 四种通用的消息类型 nlmsg_type -pub struct NLmsgType: u8 { - /* Nothing. */ - const NLMSG_NOOP = 0x1; - /* Error */ - const NLMSG_ERROR = 0x2; - /* End of a dump */ - const NLMSG_DONE = 0x3; - /* Data lost */ - const NLMSG_OVERRUN = 0x4; -} - -//消息标记 nlmsg_flags -// const NLM_F_REQUEST = 1; /* It is request message. */ -// const NLM_F_MULTI = 2; /* Multipart message, terminated by NLMSG_DONE */ -// const NLM_F_ACK = 4; /* Reply with ack, with zero or error code */ -// const NLM_F_ECHO = 8; /* Echo this request */ -// const NLM_F_DUMP_INTR = 16; /* Dump was inconsistent due to sequence change */ -pub struct NLmsgFlags: u16 { - /* Flags values */ - const NLM_F_REQUEST = 0x01; - const NLM_F_MULTI = 0x02; - const NLM_F_ACK = 0x04; - const NLM_F_ECHO = 0x08; - const NLM_F_DUMP_INTR = 0x10; - const NLM_F_DUMP_FILTERED = 0x20; - - /* Modifiers to GET request */ - const NLM_F_ROOT = 0x100; /* specify tree root */ - const NLM_F_MATCH = 0x200; /* return all matching */ - const NLM_F_ATOMIC = 0x400; /* atomic GET */ - //const NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH; - const NLM_F_DUMP = 0x100 | 0x200; - - /* Modifiers to NEW request */ - const NLM_F_REPLACE = 0x100; /* Override existing */ - const NLM_F_EXCL = 0x200; /* Do not touch, if it exists */ - const NLM_F_CREATE = 0x400; /* Create, if it does not exist */ - const NLM_F_APPEND = 0x800; /* Add to end of list */ - - /* Modifiers to DELETE request */ - const NLM_F_NONREC = 0x100; /* Do not delete recursively */ - - /* Flags for ACK message */ - const NLM_F_CAPPED = 0x100; /* request was capped */ - const NLM_F_ACK_TLVS = 0x200; /* extended ACK TVLs were included */ -} -} -/// netlink消息报头 -/** - * struct NLmsghdr - fixed format metadata header of Netlink messages - * @nlmsg_len: Length of message including header - * @nlmsg_type: Message content type - * @nlmsg_flags: Additional flags - * @nlmsg_seq: Sequence number - * @nlmsg_pid: Sending process port ID - */ -pub struct NLmsghdr { - pub nlmsg_len: usize, - pub nlmsg_type: NLmsgType, - pub nlmsg_flags: NLmsgFlags, - pub nlmsg_seq: u32, - pub nlmsg_pid: u32, -} - -const NLMSG_ALIGNTO: usize = 4; -#[derive(Debug, PartialEq, Copy, Clone)] -pub enum NetlinkState { - NetlinkUnconnected = 0, - NetlinkConnected, - NETLINK_S_CONGESTED = 2, -} - -fn nlmsg_align(len: usize) -> usize { - (len + NLMSG_ALIGNTO - 1) & !(NLMSG_ALIGNTO - 1) -} - -fn nlmsg_hdrlen() -> usize { - nlmsg_align(mem::size_of::()) -} - -fn nlmsg_length(len: usize) -> usize { - len + nlmsg_hdrlen() -} - -fn nlmsg_space(len: usize) -> usize { - nlmsg_align(nlmsg_length(len)) -} - -unsafe fn nlmsg_data(nlh: &NLmsghdr) -> *mut u8 { - ((nlh as *const NLmsghdr) as *mut u8).add(nlmsg_length(0)) -} - -unsafe fn nlmsg_next(nlh: *mut NLmsghdr, len: usize) -> *mut NLmsghdr { - let nlmsg_len = (*nlh).nlmsg_len; - let new_len = len - nlmsg_align(nlmsg_len); - nlh.add(nlmsg_align(nlmsg_len)) -} - -fn nlmsg_ok(nlh: &NLmsghdr, len: usize) -> bool { - len >= nlmsg_hdrlen() && nlh.nlmsg_len >= nlmsg_hdrlen() && nlh.nlmsg_len <= len -} - -fn nlmsg_payload(nlh: &NLmsghdr, len: usize) -> usize { - nlh.nlmsg_len - nlmsg_space(len) -} -// 定义类型别名来简化闭包类型的定义 -type InputCallback = Arc; -type BindCallback = Arc i32 + Send + Sync>; -type UnbindCallback = Arc i32 + Send + Sync>; -type CompareCallback = Arc bool + Send + Sync>; -/// 该结构包含了内核netlink的可选参数: -#[derive(Default)] -pub struct NetlinkKernelCfg { - pub groups: u32, - pub flags: u32, - pub input: Option, - pub bind: Option, - pub unbind: Option, - pub compare: Option, -} - -impl NetlinkKernelCfg { - pub fn new() -> Self { - NetlinkKernelCfg { - groups: 32, - flags: 0, - input: None, - bind: None, - unbind: None, - compare: None, - } - } - - pub fn set_input(&mut self, callback: F) - where - F: FnMut() + Send + Sync + 'static, - { - self.input = Some(Arc::new(callback)); - } - - pub fn set_bind(&mut self, callback: F) - where - F: Fn(i32) -> i32 + Send + Sync + 'static, - { - self.bind = Some(Arc::new(callback)); - } - - pub fn set_unbind(&mut self, callback: F) - where - F: Fn(i32) -> i32 + Send + Sync + 'static, - { - self.unbind = Some(Arc::new(callback)); - } - - pub fn set_compare(&mut self, callback: F) - where - F: Fn(&NetlinkSock) -> bool + Send + Sync + 'static, - { - self.compare = Some(Arc::new(callback)); - } -} -//https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/netlink.h#229 -//netlink属性头 -struct NLattr { - nla_len: u16, - nla_type: u16, -} - -pub trait VecExt { - fn align4(&mut self); - fn push_ext(&mut self, data: T); - fn set_ext(&mut self, offset: usize, data: T); -} - -impl VecExt for Vec { - fn align4(&mut self) { - let len = (self.len() + 3) & !3; - if len > self.len() { - self.resize(len, 0); - } - } - - fn push_ext(&mut self, data: T) { - #[allow(unsafe_code)] - let bytes = - unsafe { slice::from_raw_parts(&data as *const T as *const u8, size_of::()) }; - for byte in bytes { - self.push(*byte); - } - } - - fn set_ext(&mut self, offset: usize, data: T) { - if self.len() < offset + size_of::() { - self.resize(offset + size_of::(), 0); - } - #[allow(unsafe_code)] - let bytes = - unsafe { slice::from_raw_parts(&data as *const T as *const u8, size_of::()) }; - self[offset..(bytes.len() + offset)].copy_from_slice(bytes); - } -} - -// todo: net namespace -pub fn netlink_kernel_create( - unit: usize, - cfg: Option, -) -> Result { - // THIS_MODULE - let mut nlk: NetlinkSock = NetlinkSock::new(); - let sk: Arc>> = Arc::new(Mutex::new(Box::new(nlk.clone()))); - let groups: u32; - if unit >= MAX_LINKS { - return Err(SystemError::EINVAL); - } - __netlink_create(&mut nlk, unit, 1).expect("__netlink_create failed"); - - if let Some(cfg) = cfg.as_ref() { - if cfg.groups < 32 { - groups = 32; - } else { - groups = cfg.groups; - } - } else { - groups = 32; - } - let listeners = Listeners::new(); - // todo:设计和实现回调函数 - // sk.sk_data_read = netlink_data_ready; - // if cfg.is_some() && cfg.unwrap().input.is_some(){ - // nlk.netlink_rcv = cfg.unwrap().input; - // } - netlink_insert(sk, 0).expect("netlink_insert failed"); - nlk.flags |= NetlinkFlags::NETLINK_F_KERNEL_SOCKET.bits(); - - let mut nl_table = NL_TABLE.write(); - if nl_table[unit].get_registered() == 0 { - nl_table[unit].set_groups(groups); - if let Some(cfg) = cfg.as_ref() { - nl_table[unit].bind = cfg.bind.clone(); - nl_table[unit].unbind = cfg.unbind.clone(); - nl_table[unit].set_flags(cfg.flags); - if cfg.compare.is_some() { - nl_table[unit].compare = cfg.compare.clone(); - } - nl_table[unit].set_registered(1); - } else { - drop(listeners); - let registered = nl_table[unit].get_registered(); - nl_table[unit].set_registered(registered + 1); - } - } - return Ok(nlk); -} - -fn __netlink_create(nlk: &mut NetlinkSock, unit: usize, kern: usize) -> Result { - // 其他的初始化配置参数 - nlk.flags = kern as u32; - nlk.protocol = unit; - return Ok(0); -} - -pub fn sk_data_ready(nlk: Arc) -> Result<(), SystemError> { - // 唤醒 - return Ok(()); -} diff --git a/kernel/src/net/socket/netlink/sock.rs b/kernel/src/net/socket/netlink/sock.rs index 258416d3..915b118c 100644 --- a/kernel/src/net/socket/netlink/sock.rs +++ b/kernel/src/net/socket/netlink/sock.rs @@ -1,34 +1,33 @@ -// Sock flags in Rust #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SockFlags { - SockDead, - SockDone, - SockUrginline, - SockKeepopen, - SockLinger, - SockDestroy, - SockBroadcast, - SockTimestamp, - SockZapped, - SockUseWriteQueue, // whether to call sk->sk_write_space in sock_wfree - SockDbg, // %SO_DEBUG setting - SockRcvtstamp, // %SO_TIMESTAMP setting - SockRcvtstampns, // %SO_TIMESTAMPNS setting - SockLocalroute, // route locally only, %SO_DONTROUTE setting - SockMemalloc, // VM depends on this socket for swapping - SockTimestampingRxSoftware, // %SOF_TIMESTAMPING_RX_SOFTWARE - SockFasync, // fasync() active - SockRxqOvfl, - SockZerocopy, // buffers from userspace - SockWifiStatus, // push wifi status to userspace - SockNofcs, // Tell NIC not to do the Ethernet FCS. + Dead, + Done, + Urginline, + Keepopen, + Linger, + Destroy, + Broadcast, + Timestamp, + Zapped, + UseWriteQueue, // whether to call sk->sk_write_space in _wfree + Dbg, // %SO_DEBUG setting + Rcvtstamp, // %SO_TIMESTAMP setting + Rcvtstampns, // %SO_TIMESTAMPNS setting + Localroute, // route locally only, %SO_DONTROUTE setting + Memalloc, // VM depends on this et for swapping + TimestampingRxSoftware, // %SOF_TIMESTAMPING_RX_SOFTWARE + Fasync, // fasync() active + RxqOvfl, + Zerocopy, // buffers from userspace + WifiStatus, // push wifi status to userspace + Nofcs, // Tell NIC not to do the Ethernet FCS. // Will use last 4 bytes of packet sent from // user-space instead. - SockFilterLocked, // Filter cannot be changed anymore - SockSelectErrQueue, // Wake select on error queue - SockRcuFree, // wait rcu grace period in sk_destruct() - SockTxtime, - SockXdp, // XDP is attached - SockTstampNew, // Indicates 64 bit timestamps always - SockRcvmark, // Receive SO_MARK ancillary data with packet + FilterLocked, // Filter cannot be changed anymore + SelectErrQueue, // Wake select on error queue + RcuFree, // wait rcu grace period in sk_destruct() + Txtime, + Xdp, // XDP is attached + TstampNew, // Indicates 64 bit timestamps always + Rcvmark, // Receive SO_MARK ancillary data with packet } diff --git a/kernel/src/net/socket/unix/stream/mod.rs b/kernel/src/net/socket/unix/stream/mod.rs index 1cedb422..2978a0b1 100644 --- a/kernel/src/net/socket/unix/stream/mod.rs +++ b/kernel/src/net/socket/unix/stream/mod.rs @@ -231,9 +231,10 @@ impl Socket for StreamSocket { //目前只实现了阻塞式实现 loop { wq_wait_event_interruptible!(self.wait_queue, self.is_acceptable(), {})?; - match self.try_accept().map(|(stream_socket, remote_endpoint)| { - (stream_socket, remote_endpoint) - }) { + match self + .try_accept() + .map(|(stream_socket, remote_endpoint)| (stream_socket, remote_endpoint)) + { Ok((socket, endpoint)) => { debug!("server accept!:{:?}", endpoint); return Ok((socket, endpoint));