diff --git a/kernel/src/context.rs b/kernel/src/context.rs index 42e1f1f10..85959dfaf 100644 --- a/kernel/src/context.rs +++ b/kernel/src/context.rs @@ -166,24 +166,31 @@ impl<'a> CurrentUserSpace<'a> { } /// A trait providing the ability to read a C string from the user space. -/// -/// The user space should be of the current process. The implemented method -/// should read the bytes iteratively in the reader ([`VmReader`]) until -/// encountering the end of the reader or reading a `\0` (which is also -/// included in the final C String). pub trait ReadCString { + /// Reads a C string from `self`. + /// + /// This method should read the bytes iteratively in `self` until + /// encountering the end of the reader or reading a `\0` (which is also + /// included in the final C String). fn read_cstring(&mut self) -> Result; + + /// Reads a C string from `self` with a maximum length of `max_len`. + /// + /// This method functions similarly to [`ReadCString::read_cstring`], + /// but imposes an additional limit on the length of the C string. + fn read_cstring_with_max_len(&mut self, max_len: usize) -> Result; } impl ReadCString for VmReader<'_, Fallible> { - /// Reads a C string from the user space. - /// - /// This implementation is inspired by - /// the `do_strncpy_from_user` function in Linux kernel. - /// The original Linux implementation can be found at: - /// fn read_cstring(&mut self) -> Result { - let max_len = self.remain(); + self.read_cstring_with_max_len(self.remain()) + } + + fn read_cstring_with_max_len(&mut self, max_len: usize) -> Result { + // This implementation is inspired by + // the `do_strncpy_from_user` function in Linux kernel. + // The original Linux implementation can be found at: + // let mut buffer: Vec = Vec::with_capacity(max_len); if read_until_nul_byte(self, &mut buffer, max_len)? { @@ -199,7 +206,10 @@ impl ReadCString for VmReader<'_, Fallible> { impl ReadCString for VmReaderArray<'_> { fn read_cstring(&mut self) -> Result { - let max_len = self.sum_lens(); + self.read_cstring_with_max_len(self.sum_lens()) + } + + fn read_cstring_with_max_len(&mut self, max_len: usize) -> Result { let mut buffer: Vec = Vec::with_capacity(max_len); for reader in self.readers_mut() { diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 8355ee8f2..21ce519c6 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -28,6 +28,7 @@ #![feature(step_trait)] #![feature(trait_alias)] #![feature(trait_upcasting)] +#![feature(associated_type_defaults)] #![register_tool(component_access_control)] use kcmdline::KCmdlineArg; diff --git a/kernel/src/net/socket/netlink/message/attr/mod.rs b/kernel/src/net/socket/netlink/message/attr/mod.rs new file mode 100644 index 000000000..9ee41cb53 --- /dev/null +++ b/kernel/src/net/socket/netlink/message/attr/mod.rs @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Netlink attributes. +//! +//! Netlink attributes provide additional information for each [`segment`]. +//! Each netlink attribute consists of two components: +//! 1. Header: The attribute header is of type [`CNlAttrHeader`], +//! which specifies the type and length of the attribute. The attribute +//! type belongs to different classes, which rely on the segment type. +//! 2. Payload: The attribute's payload, which can vary in type. +//! Currently, payload types include primitive types, C string, and binary. +//! The payload can also include one or multiple other attributes, +//! known as nested attributes. +//! +//! Similar to [`super::segment::NlSegment`], attributes have alignment requirements; +//! both the header and payload must be aligned to [`super::NLMSG_ALIGN`] +//! when being transferred to and from user space. +//! +//! The layout of a netlink attribute is depicted as follows: +//! +//! ┌────────┬─────────┬─────────┬─────────┐ +//! │ Header │ Padding │ Payload │ Padding │ +//! └────────┴─────────┴─────────┴─────────┘ +//! +//! [`segment`]: super::segment + +use align_ext::AlignExt; + +use super::NLMSG_ALIGN; +use crate::{ + prelude::*, + util::{MultiRead, MultiWrite}, +}; + +pub mod noattr; + +/// Netlink attribute header. +/// +/// Reference: . +// +// The layout of the `type_` field is structured as follows: +// ┌────────┬───────────────┬──────────┐ +// │ Nested │ Net Byteorder │ Payload │ +// └────────┴───────────────┴──────────┘ +// bit 15 bit 14 bits 13-0 +#[derive(Debug, Clone, Copy, Pod)] +#[repr(C)] +pub struct CAttrHeader { + len: u16, + type_: u16, +} + +impl CAttrHeader { + pub fn type_(&self) -> u16 { + self.type_ & ATTRIBUTE_TYPE_MASK + } +} + +const IS_NESTED_MASK: u16 = 1u16 << 15; +const IS_NET_BYTEORDER_MASK: u16 = 1u16 << 14; +const ATTRIBUTE_TYPE_MASK: u16 = !(IS_NESTED_MASK | IS_NET_BYTEORDER_MASK); + +/// Netlink Attribute. +pub trait Attribute: Debug + Send + Sync { + /// Returns the type of the attribute. + fn type_(&self) -> u16; + + /// Returns the byte representation of the payload. + fn payload_as_bytes(&self) -> &[u8]; + + /// Returns the payload length (excluding padding). + fn payload_len(&self) -> usize { + self.payload_as_bytes().len() + } + + /// Returns the total length of the attribute (header + payload, excluding padding). + fn total_len(&self) -> usize { + core::mem::size_of::() + self.payload_len() + } + + /// Returns the total length of the attribute (header + payload, including padding). + fn total_len_with_padding(&self) -> usize { + self.total_len().align_up(NLMSG_ALIGN) + } + + /// Returns the length of the padding bytes. + fn padding_len(&self) -> usize { + self.total_len_with_padding() - self.total_len() + } + + /// Reads the attribute from the `reader`. + fn read_from(reader: &mut dyn MultiRead) -> Result + where + Self: Sized; + + /// Reads all attributes from the reader. + /// + /// The cumulative length of the read attributes must not exceed total_len. + fn read_all_from(reader: &mut dyn MultiRead, mut total_len: usize) -> Result> + where + Self: Sized, + { + let mut res = Vec::new(); + + while total_len > 0 { + let attr = Self::read_from(reader)?; + total_len -= attr.total_len(); + + let padding_len = attr.padding_len().min(total_len); + reader.skip(padding_len); + total_len -= padding_len; + + res.push(attr); + } + + Ok(res) + } + + /// Writes the attribute to the `writer`. + fn write_to(&self, writer: &mut dyn MultiWrite) -> Result<()> { + let header = CAttrHeader { + type_: self.type_(), + len: self.total_len() as u16, + }; + + writer.write_val(&header)?; + writer.write(&mut VmReader::from(self.payload_as_bytes()))?; + + let padding_len = self.padding_len(); + writer.skip(padding_len); + + Ok(()) + } +} diff --git a/kernel/src/net/socket/netlink/message/attr/noattr.rs b/kernel/src/net/socket/netlink/message/attr/noattr.rs new file mode 100644 index 000000000..6b83bcb87 --- /dev/null +++ b/kernel/src/net/socket/netlink/message/attr/noattr.rs @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::Attribute; +use crate::{prelude::*, util::MultiRead}; + +/// A special type indicates that a segment cannot have attributes. +#[derive(Debug)] +pub enum NoAttr {} + +impl Attribute for NoAttr { + fn type_(&self) -> u16 { + match *self {} + } + + fn payload_as_bytes(&self) -> &[u8] { + match *self {} + } + + fn read_from(_reader: &mut dyn MultiRead) -> Result + where + Self: Sized, + { + return_errno_with_message!(Errno::EINVAL, "`NoAttr` cannot be read"); + } + + fn read_all_from(_reader: &mut dyn MultiRead, _total_len: usize) -> Result> + where + Self: Sized, + { + Ok(Vec::new()) + } +} diff --git a/kernel/src/net/socket/netlink/message/mod.rs b/kernel/src/net/socket/netlink/message/mod.rs new file mode 100644 index 000000000..cd354fabd --- /dev/null +++ b/kernel/src/net/socket/netlink/message/mod.rs @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Netlink message types for all netlink protocols. +//! +//! This module defines how to interpret messages sent from user space and how to write +//! kernel messages back to user space. + +mod attr; +mod segment; + +pub(super) use attr::{noattr::NoAttr, Attribute, CAttrHeader}; +pub(super) use segment::{ + ack::{DoneSegment, ErrorSegment}, + common::SegmentCommon, + header::{CMsgSegHdr, GetRequestFlags, SegHdrCommonFlags}, + CSegmentType, SegmentBody, +}; + +use crate::{ + prelude::*, + util::{MultiRead, MultiWrite}, +}; + +/// A netlink message. +/// +/// A netlink message can be transmitted to and from user space using a single send/receive syscall. +/// It consists of one or more [`ProtocolSegment`]s. +#[derive(Debug)] +pub(super) struct Message { + segments: Vec, +} + +impl Message { + pub(super) const fn new(segments: Vec) -> Self { + Self { segments } + } + + pub(super) fn segments(&self) -> &[T] { + &self.segments + } + + pub(super) fn segments_mut(&mut self) -> &mut [T] { + &mut self.segments + } + + pub(super) fn read_from(reader: &mut dyn MultiRead) -> Result { + // FIXME: Does a request contain only one segment? We need to investigate further. + let segments = { + let segment = T::read_from(reader)?; + vec![segment] + }; + + Ok(Self { segments }) + } + + pub(super) fn write_to(&self, writer: &mut dyn MultiWrite) -> Result<()> { + for segment in self.segments.iter() { + segment.write_to(writer)?; + } + + Ok(()) + } + + pub(super) fn total_len(&self) -> usize { + self.segments + .iter() + .map(|segment| segment.header().len as usize) + .sum() + } +} + +pub(super) trait ProtocolSegment: Sized { + fn header(&self) -> &CMsgSegHdr; + fn header_mut(&mut self) -> &mut CMsgSegHdr; + fn read_from(reader: &mut dyn MultiRead) -> Result; + fn write_to(&self, writer: &mut dyn MultiWrite) -> Result<()>; +} + +pub(super) const NLMSG_ALIGN: usize = 4; diff --git a/kernel/src/net/socket/netlink/message/segment/ack.rs b/kernel/src/net/socket/netlink/message/segment/ack.rs new file mode 100644 index 000000000..f1e289b82 --- /dev/null +++ b/kernel/src/net/socket/netlink/message/segment/ack.rs @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! This module defines segments that only appear in acknowledgment messages. +//! +//! An acknowledgment segment appears as the final segment in a response message from the kernel. +//! Netlink utilizes two classes of acknowledgment segments: +//! 1. Done Segment: Indicates the conclusion of a message comprised of multiple segments. +//! 2. Error Segment: Indicates that an error occurred while the kernel processed the user space request. +//! + +use super::{ + common::SegmentCommon, + header::{CMsgSegHdr, SegHdrCommonFlags}, + CSegmentType, SegmentBody, +}; +use crate::{net::socket::netlink::message::NoAttr, prelude::*}; + +pub type DoneSegment = SegmentCommon; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod)] +pub struct DoneSegmentBody { + error_code: i32, +} + +impl SegmentBody for DoneSegmentBody { + type CType = DoneSegmentBody; +} + +impl DoneSegment { + pub fn new_from_request(request_header: &CMsgSegHdr, error: Option) -> Self { + let header = CMsgSegHdr { + len: 0, + type_: CSegmentType::DONE as _, + flags: SegHdrCommonFlags::empty().bits(), + seq: request_header.seq, + pid: request_header.pid, + }; + + let body = { + let error_code = error_to_error_code(error); + DoneSegmentBody { error_code } + }; + + Self::new(header, body, Vec::new()) + } +} + +pub type ErrorSegment = SegmentCommon; + +#[derive(Debug, Pod, Clone, Copy)] +#[repr(C)] +pub struct ErrorSegmentBody { + error_code: i32, + request_header: CMsgSegHdr, +} + +impl SegmentBody for ErrorSegmentBody { + type CType = ErrorSegmentBody; +} + +impl ErrorSegment { + pub fn new_from_request(request_header: &CMsgSegHdr, error: Option) -> Self { + let header = CMsgSegHdr { + len: 0, + type_: CSegmentType::ERROR as _, + flags: SegHdrCommonFlags::empty().bits(), + seq: request_header.seq, + pid: request_header.pid, + }; + + let body = { + let error_code = error_to_error_code(error); + ErrorSegmentBody { + error_code, + request_header: *request_header, + } + }; + + Self::new(header, body, Vec::new()) + } +} + +const fn error_to_error_code(error: Option) -> i32 { + if let Some(error) = error { + -(error.error() as i32) + } else { + 0 + } +} diff --git a/kernel/src/net/socket/netlink/message/segment/common.rs b/kernel/src/net/socket/netlink/message/segment/common.rs new file mode 100644 index 000000000..155a0c4e3 --- /dev/null +++ b/kernel/src/net/socket/netlink/message/segment/common.rs @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: MPL-2.0 + +use align_ext::AlignExt; + +use super::{header::CMsgSegHdr, SegmentBody}; +use crate::{ + net::socket::netlink::message::{attr::Attribute, NLMSG_ALIGN}, + prelude::*, + util::{MultiRead, MultiWrite}, +}; + +#[derive(Debug)] +pub struct SegmentCommon { + header: CMsgSegHdr, + body: Body, + attrs: Vec, +} + +impl SegmentCommon { + pub const HEADER_LEN: usize = size_of::(); + + pub fn header(&self) -> &CMsgSegHdr { + &self.header + } + + pub fn header_mut(&mut self) -> &mut CMsgSegHdr { + &mut self.header + } + + pub fn body(&self) -> &Body { + &self.body + } + + pub fn attrs(&self) -> &Vec { + &self.attrs + } +} + +impl SegmentCommon { + pub const BODY_LEN: usize = size_of::(); + + pub fn new(header: CMsgSegHdr, body: Body, attrs: Vec) -> Self { + let mut res = Self { + header, + body, + attrs, + }; + res.header.len = res.total_len() as u32; + res + } + + pub fn read_from(header: CMsgSegHdr, reader: &mut dyn MultiRead) -> Result + where + Error: From<>::Error>, + { + let (body, remain_len) = Body::read_from(&header, reader).unwrap(); + + let attrs = Attr::read_all_from(reader, remain_len)?; + + Ok(Self { + header, + body, + attrs, + }) + } + + pub fn write_to(&self, writer: &mut dyn MultiWrite) -> Result<()> { + // FIXME: If the message can be truncated, we should avoid returning an error. + // Furthermore, we need to check the Linux behavior to determine whether to return an error + // if the writer is not large enough to accommodate the final padding bytes. + if writer.sum_lens() < (self.header.len as usize).align_up(NLMSG_ALIGN) { + return_errno_with_message!(Errno::EFAULT, "the writer length is too small"); + } + + writer.write_val(&self.header)?; + self.body.write_to(writer)?; + for attr in self.attrs.iter() { + attr.write_to(writer)?; + } + + Ok(()) + } + + pub fn total_len(&self) -> usize { + Self::HEADER_LEN + Self::BODY_LEN + self.attrs_len() + } +} + +impl SegmentCommon { + pub fn attrs_len(&self) -> usize { + self.attrs + .iter() + .map(|attr| attr.total_len_with_padding()) + .sum() + } +} diff --git a/kernel/src/net/socket/netlink/message/segment/header.rs b/kernel/src/net/socket/netlink/message/segment/header.rs new file mode 100644 index 000000000..2e442d737 --- /dev/null +++ b/kernel/src/net/socket/netlink/message/segment/header.rs @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! General netlink message types for all netlink protocols. + +use crate::prelude::*; + +/// `nlmsghdr` in Linux. +/// +/// Reference: . +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod)] +pub struct CMsgSegHdr { + /// Length of the message, including the header + pub len: u32, + /// Type of message content + pub type_: u16, + /// Additional flags + pub flags: u16, + /// Sequence number + pub seq: u32, + /// Sending process port ID + pub pid: u32, +} + +bitflags! { + /// Common flags used in [`CMsgSegmentHdr`]. + /// + /// Reference: . + pub struct SegHdrCommonFlags: u16 { + /// Indicates a request message + const REQUEST = 0x01; + /// Multipart message, terminated by NLMSG_DONE + const MULTI = 0x02; + /// Reply with an acknowledgment, with zero or an error code + const ACK = 0x04; + /// Echo this request + const ECHO = 0x08; + /// Dump was inconsistent due to sequence change + const DUMP_INTR = 0x10; + /// Dump was filtered as requested + const DUMP_FILTERED = 0x20; + } +} + +bitflags! { + /// Modifiers for GET requests. + /// + /// Reference: . + pub struct GetRequestFlags: u16 { + /// Specify the tree root + const ROOT = 0x100; + /// Return all matching results + const MATCH = 0x200; + /// Atomic get request + const ATOMIC = 0x400; + /// Combination flag for root and match + const DUMP = Self::ROOT.bits | Self::MATCH.bits; + } +} + +bitflags! { + /// Modifiers for NEW requests. + /// + /// Reference: . + pub struct NewRequestFlags: u16 { + /// Override existing entries + const REPLACE = 0x100; + /// Do not modify if it exists + const EXCL = 0x200; + /// Create if it does not exist + const CREATE = 0x400; + /// Add to the end of the list + const APPEND = 0x800; + } +} + +bitflags! { + /// Modifiers for DELETE requests. + /// + /// Reference: . + pub struct DeleteRequestFlags: u16 { + /// Do not delete recursively + const NONREC = 0x100; + /// Delete multiple objects + const BULK = 0x200; + } +} + +bitflags! { + /// Flags for ACK messages. + /// + /// Reference: . + pub struct AckFlags: u16 { + const CAPPED = 0x100; + const ACK_TLVS = 0x100; + } +} diff --git a/kernel/src/net/socket/netlink/message/segment/mod.rs b/kernel/src/net/socket/netlink/message/segment/mod.rs new file mode 100644 index 000000000..e787ab62d --- /dev/null +++ b/kernel/src/net/socket/netlink/message/segment/mod.rs @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MPL-2.0 + +use align_ext::AlignExt; +use header::CMsgSegHdr; + +use super::NLMSG_ALIGN; +use crate::{ + prelude::*, + util::{MultiRead, MultiWrite}, +}; + +pub mod ack; +pub mod common; +pub mod header; + +pub trait SegmentBody: Sized + Clone + Copy { + // The actual message body should be `Self::CType`, + // but older versions of Linux use a legacy type (usually `CRtGenMsg` here). + // Additionally, some software, like iproute2, also uses this legacy type. + // Therefore, we need to handle both cases. + // Reference: . + // FIXME: Verify whether the legacy type includes any types other than `CRtGenMsg`. + type CLegacyType: Pod = Self::CType; + type CType: Pod + From + TryInto + From; + + /// Reads the segment body from the `reader`. + /// + /// This method returns the body and the remaining length to be read from the `reader`. + fn read_from(header: &CMsgSegHdr, reader: &mut dyn MultiRead) -> Result<(Self, usize)> + where + Error: From<>::Error>, + { + let mut remaining_len = (header.len as usize) + .checked_sub(size_of_val(header)) + .ok_or_else(|| Error::with_message(Errno::EINVAL, "the message length is too small"))?; + + // Align `remaining_len` up to `NLMSG_ALIGN`. + let reader_len = reader.sum_lens(); + if reader_len < remaining_len { + return_errno_with_message!(Errno::EINVAL, "the reader length is too small"); + } + remaining_len = remaining_len.align_up(NLMSG_ALIGN).min(reader_len); + + // Read the body. + let (c_type, padding_len) = if remaining_len >= size_of::() { + let c_type = reader.read_val::()?; + remaining_len -= size_of_val(&c_type); + + (c_type, Self::padding_len()) + } else if remaining_len >= size_of::() { + let legacy = reader.read_val::()?; + remaining_len -= size_of_val(&legacy); + + (Self::CType::from(legacy), Self::lecacy_padding_len()) + } else { + return_errno_with_message!(Errno::EINVAL, "the message length is too small"); + }; + + // Skip the padding bytes. + let padding_len = padding_len.min(remaining_len); + reader.skip(padding_len); + remaining_len -= padding_len; + + let body = c_type.try_into()?; + Ok((body, remaining_len)) + } + + fn write_to(&self, writer: &mut dyn MultiWrite) -> Result<()> { + // Write the body. + let c_body = Self::CType::from(*self); + writer.write_val(&c_body)?; + + // Skip the padding bytes. + let padding_len = Self::padding_len(); + writer.skip(padding_len); + Ok(()) + } + + fn padding_len() -> usize { + let payload_len = size_of::(); + payload_len.align_up(NLMSG_ALIGN) - payload_len + } + + fn lecacy_padding_len() -> usize { + let payload_len = size_of::(); + payload_len.align_up(NLMSG_ALIGN) - payload_len + } +} + +#[repr(u16)] +#[derive(Debug, Clone, Copy, TryFromInt, PartialEq, Eq, PartialOrd, Ord)] +pub enum CSegmentType { + // Standard netlink message types + NOOP = 1, + ERROR = 2, + DONE = 3, + OVERRUN = 4, + + // protocol-level types + NEWLINK = 16, + DELLINK = 17, + GETLINK = 18, + SETLINK = 19, + + NEWADDR = 20, + DELADDR = 21, + GETADDR = 22, + + NEWROUTE = 24, + DELROUTE = 25, + GETROUTE = 26, + // TODO: The list is not exhaustive. +} diff --git a/kernel/src/net/socket/netlink/route/message/attr/addr.rs b/kernel/src/net/socket/netlink/route/message/attr/addr.rs new file mode 100644 index 000000000..a6abe5049 --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/attr/addr.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::IFNAME_SIZE; +use crate::{ + net::socket::netlink::message::{Attribute, CAttrHeader}, + prelude::*, + util::MultiRead, +}; + +/// Address-related attributes. +/// +/// Reference: . +#[derive(Debug, Clone, Copy, TryFromInt)] +#[repr(u16)] +#[expect(non_camel_case_types)] +enum AddrAttrClass { + UNSPEC = 0, + ADDRESS = 1, + LOCAL = 2, + LABEL = 3, + BROADCAST = 4, + ANYCAST = 5, + CACHEINFO = 6, + MULTICAST = 7, + FLAGS = 8, + RT_PRIORITY = 9, + TARGET_NETNSID = 10, +} + +#[derive(Debug)] +pub enum AddrAttr { + Address([u8; 4]), + Local([u8; 4]), + Label(CString), +} + +impl AddrAttr { + fn class(&self) -> AddrAttrClass { + match self { + AddrAttr::Address(_) => AddrAttrClass::ADDRESS, + AddrAttr::Local(_) => AddrAttrClass::LOCAL, + AddrAttr::Label(_) => AddrAttrClass::LABEL, + } + } +} + +impl Attribute for AddrAttr { + fn type_(&self) -> u16 { + self.class() as u16 + } + + fn payload_as_bytes(&self) -> &[u8] { + match self { + AddrAttr::Address(address) => address, + AddrAttr::Local(local) => local, + AddrAttr::Label(label) => label.as_bytes_with_nul(), + } + } + + fn read_from(reader: &mut dyn MultiRead) -> Result + where + Self: Sized, + { + let header = reader.read_val::()?; + // TODO: Currently, `IS_NET_BYTEORDER_MASK` and `IS_NESTED_MASK` are ignored. + let res = match AddrAttrClass::try_from(header.type_())? { + AddrAttrClass::ADDRESS => Self::Address(reader.read_val()?), + AddrAttrClass::LOCAL => Self::Local(reader.read_val()?), + AddrAttrClass::LABEL => Self::Label(reader.read_cstring_with_max_len(IFNAME_SIZE)?), + class => { + // FIXME: Netlink should ignore all unknown attributes. + // See the reference in `LinkAttr::read_from`. + warn!("address attribute `{:?}` is not supported", class); + return_errno_with_message!(Errno::EINVAL, "unsupported address attribute"); + } + }; + + Ok(res) + } +} diff --git a/kernel/src/net/socket/netlink/route/message/attr/link.rs b/kernel/src/net/socket/netlink/route/message/attr/link.rs new file mode 100644 index 000000000..0a4d0e8a8 --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/attr/link.rs @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::IFNAME_SIZE; +use crate::{ + net::socket::netlink::message::{Attribute, CAttrHeader}, + prelude::*, + util::MultiRead, +}; + +/// Link-level attributes. +/// +/// Reference: . +#[derive(Debug, Clone, Copy, TryFromInt)] +#[repr(u16)] +#[allow(non_camel_case_types)] +enum LinkAttrClass { + UNSPEC = 0, + ADDRESS = 1, + BROADCAST = 2, + IFNAME = 3, + MTU = 4, + LINK = 5, + QDISC = 6, + STATS = 7, + COST = 8, + PRIORITY = 9, + MASTER = 10, + /// Wireless Extension event + WIRELESS = 11, + /// Protocol specific information for a link + PROTINFO = 12, + TXQLEN = 13, + MAP = 14, + WEIGHT = 15, + OPERSTATE = 16, + LINKMODE = 17, + LINKINFO = 18, + NET_NS_PID = 19, + IFALIAS = 20, + /// Number of VFs if device is SR-IOV PF + NUM_VF = 21, + VFINFO_LIST = 22, + STATS64 = 23, + VF_PORTS = 24, + PORT_SELF = 25, + AF_SPEC = 26, + /// Group the device belongs to + GROUP = 27, + NET_NS_FD = 28, + /// Extended info mask, VFs, etc. + EXT_MASK = 29, + /// Promiscuity count: > 0 means acts PROMISC + PROMISCUITY = 30, + NUM_TX_QUEUES = 31, + NUM_RX_QUEUES = 32, + CARRIER = 33, + PHYS_PORT_ID = 34, + CARRIER_CHANGES = 35, + PHYS_SWITCH_ID = 36, + LINK_NETNSID = 37, + PHYS_PORT_NAME = 38, + PROTO_DOWN = 39, + GSO_MAX_SEGS = 40, + GSO_MAX_SIZE = 41, + PAD = 42, + XDP = 43, + EVENT = 44, + NEW_NETNSID = 45, + IF_NETNSID = 46, + CARRIER_UP_COUNT = 47, + CARRIER_DOWN_COUNT = 48, + NEW_IFINDEX = 49, + MIN_MTU = 50, + MAX_MTU = 51, + PROP_LIST = 52, + /// Alternative ifname + ALT_IFNAME = 53, + PERM_ADDRESS = 54, + PROTO_DOWN_REASON = 55, + PARENT_DEV_NAME = 56, + PARENT_DEV_BUS_NAME = 57, +} + +#[derive(Debug)] +pub enum LinkAttr { + Name(CString), + Mtu(u32), + TxqLen(u32), + LinkMode(u8), + ExtMask(RtExtFilter), +} + +impl LinkAttr { + fn class(&self) -> LinkAttrClass { + match self { + LinkAttr::Name(_) => LinkAttrClass::IFNAME, + LinkAttr::Mtu(_) => LinkAttrClass::MTU, + LinkAttr::TxqLen(_) => LinkAttrClass::TXQLEN, + LinkAttr::LinkMode(_) => LinkAttrClass::LINKMODE, + LinkAttr::ExtMask(_) => LinkAttrClass::EXT_MASK, + } + } +} + +impl Attribute for LinkAttr { + fn type_(&self) -> u16 { + self.class() as u16 + } + + fn payload_as_bytes(&self) -> &[u8] { + match self { + LinkAttr::Name(name) => name.as_bytes_with_nul(), + LinkAttr::Mtu(mtu) => mtu.as_bytes(), + LinkAttr::TxqLen(txq_len) => txq_len.as_bytes(), + LinkAttr::LinkMode(link_mode) => link_mode.as_bytes(), + LinkAttr::ExtMask(ext_filter) => ext_filter.as_bytes(), + } + } + + fn read_from(reader: &mut dyn MultiRead) -> Result + where + Self: Sized, + { + let header = reader.read_val::()?; + // TODO: Currently, `IS_NET_BYTEORDER_MASK` and `IS_NESTED_MASK` are ignored. + let res = match LinkAttrClass::try_from(header.type_())? { + LinkAttrClass::IFNAME => Self::Name(reader.read_cstring_with_max_len(IFNAME_SIZE)?), + LinkAttrClass::MTU => Self::Mtu(reader.read_val()?), + LinkAttrClass::TXQLEN => Self::TxqLen(reader.read_val()?), + LinkAttrClass::LINKMODE => Self::LinkMode(reader.read_val()?), + LinkAttrClass::EXT_MASK => Self::ExtMask(reader.read_val()?), + class => { + // FIXME: Netlink should ignore all unknown attributes. + // But how to decide the payload type if the class is unknown? + // Reference: https://docs.kernel.org/userspace-api/netlink/intro.html#unknown-attributes + warn!("link attribute `{:?}` is not supported", class); + return_errno_with_message!(Errno::EINVAL, "unsupported link attribute"); + } + }; + + Ok(res) + } +} + +bitflags! { + /// New extended info filters for [`NlLinkAttr::ExtMask`]. + /// + /// Reference: . + #[repr(C)] + #[derive(Pod)] + pub struct RtExtFilter: u32 { + const VF = 1 << 0; + const BRVLAN = 1 << 1; + const BRVLAN_COMPRESSED = 1 << 2; + const SKIP_STATS = 1 << 3; + const MRP = 1 << 4; + const CFM_CONFIG = 1 << 5; + const CFM_STATUS = 1 << 6; + const MST = 1 << 7; + } +} diff --git a/kernel/src/net/socket/netlink/route/message/attr/mod.rs b/kernel/src/net/socket/netlink/route/message/attr/mod.rs new file mode 100644 index 000000000..18783030b --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/attr/mod.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MPL-2.0 + +pub mod addr; +pub mod link; + +/// The size limit for interface names. +const IFNAME_SIZE: usize = 16; diff --git a/kernel/src/net/socket/netlink/route/message/mod.rs b/kernel/src/net/socket/netlink/route/message/mod.rs new file mode 100644 index 000000000..8b3d0f39e --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/mod.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! Netlink message types for the netlink route protocol. +//! +//! This module defines how to interpret messages sent from user space and how to write +//! kernel messages back to user space. + +mod attr; +mod segment; + +pub(super) use attr::{addr::AddrAttr, link::LinkAttr}; +pub(super) use segment::{ + addr::{AddrMessageFlags, AddrSegment, AddrSegmentBody, RtScope}, + link::{LinkSegment, LinkSegmentBody}, + RtnlSegment, +}; + +use crate::net::socket::netlink::message::Message; + +/// A netlink route message. +pub(super) type RtnlMessage = Message; diff --git a/kernel/src/net/socket/netlink/route/message/segment/addr.rs b/kernel/src/net/socket/netlink/route/message/segment/addr.rs new file mode 100644 index 000000000..cdad13674 --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/segment/addr.rs @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: MPL-2.0 + +use core::num::NonZeroU32; + +use super::legacy::CRtGenMsg; +use crate::{ + net::socket::netlink::{ + message::{SegmentBody, SegmentCommon}, + route::message::attr::addr::AddrAttr, + }, + prelude::*, +}; + +pub type AddrSegment = SegmentCommon; + +impl SegmentBody for AddrSegmentBody { + type CLegacyType = CRtGenMsg; + type CType = CIfaddrMsg; +} + +/// `ifaddrmsg` in Linux. +/// +/// Reference: . +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod)] +pub struct CIfaddrMsg { + pub family: u8, + /// The prefix length + pub prefix_len: u8, + /// Flags + pub flags: u8, + /// Address scope + pub scope: u8, + /// Link index + pub index: u32, +} + +#[derive(Debug, Clone, Copy)] +pub struct AddrSegmentBody { + pub family: i32, + pub prefix_len: u8, + pub flags: AddrMessageFlags, + pub scope: RtScope, + pub index: Option, +} + +impl TryFrom for AddrSegmentBody { + type Error = Error; + + fn try_from(value: CIfaddrMsg) -> Result { + // TODO: If the attribute IFA_FLAGS exists, the flags in header should be ignored. + let flags = AddrMessageFlags::from_bits_truncate(value.flags as u32); + let scope = RtScope::try_from(value.scope)?; + let index = NonZeroU32::new(value.index); + + Ok(Self { + family: value.family as i32, + prefix_len: value.prefix_len, + flags, + scope, + index, + }) + } +} + +impl From for CIfaddrMsg { + fn from(value: AddrSegmentBody) -> Self { + let index = if let Some(index) = value.index { + index.get() + } else { + 0 + }; + CIfaddrMsg { + family: value.family as u8, + prefix_len: value.prefix_len, + flags: value.flags.bits() as u8, + scope: value.scope as _, + index, + } + } +} + +bitflags! { + /// Flags in [`CIfaddrMsg`]. + /// + /// Reference: . + pub struct AddrMessageFlags: u32 { + const SECONDARY = 0x01; + const NODAD = 0x02; + const OPTIMISTIC = 0x04; + const DADFAILED = 0x08; + const HOMEADDRESS = 0x10; + const DEPRECATED = 0x20; + const TENTATIVE = 0x40; + const PERMANENT = 0x80; + const MANAGETEMPADDR = 0x100; + const NOPREFIXROUTE = 0x200; + const MCAUTOJOIN = 0x400; + const STABLE_PRIVACY = 0x800; + } +} + +/// `rt_scope_t` in Linux. +/// +/// Reference: . +#[repr(u8)] +#[derive(Debug, Clone, Copy, TryFromInt)] +pub enum RtScope { + UNIVERSE = 0, + // User defined values + SITE = 200, + LINK = 253, + HOST = 254, + NOWHERE = 255, +} diff --git a/kernel/src/net/socket/netlink/route/message/segment/legacy.rs b/kernel/src/net/socket/netlink/route/message/segment/legacy.rs new file mode 100644 index 000000000..973bfd63f --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/segment/legacy.rs @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: MPL-2.0 + +use super::{addr::CIfaddrMsg, link::CIfinfoMsg}; +use crate::prelude::*; + +/// `rtgenmsg` in Linux. +/// +/// Reference: . +#[derive(Debug, Clone, Copy, Pod)] +#[repr(C)] +pub struct CRtGenMsg { + pub family: u8, +} + +impl From for CIfinfoMsg { + fn from(value: CRtGenMsg) -> Self { + Self { + family: value.family, + _pad: 0, + type_: 0, + index: 0, + flags: 0, + change: 0, + } + } +} + +impl From for CIfaddrMsg { + fn from(value: CRtGenMsg) -> Self { + Self { + family: value.family, + prefix_len: 0, + flags: 0, + scope: 0, + index: 0, + } + } +} diff --git a/kernel/src/net/socket/netlink/route/message/segment/link.rs b/kernel/src/net/socket/netlink/route/message/segment/link.rs new file mode 100644 index 000000000..a3e0339dd --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/segment/link.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: MPL-2.0 + +use core::num::NonZeroU32; + +use aster_bigtcp::iface::{InterfaceFlags, InterfaceType}; + +use super::legacy::CRtGenMsg; +use crate::{ + net::socket::netlink::{ + message::{SegmentBody, SegmentCommon}, + route::message::attr::link::LinkAttr, + }, + prelude::*, + util::net::CSocketAddrFamily, +}; + +pub type LinkSegment = SegmentCommon; + +impl SegmentBody for LinkSegmentBody { + type CLegacyType = CRtGenMsg; + type CType = CIfinfoMsg; +} + +/// `ifinfomsg` in Linux. +/// +/// Reference: . +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod)] +pub struct CIfinfoMsg { + /// AF_UNSPEC + pub family: u8, + /// Padding byte + pub _pad: u8, + /// Device type + pub type_: u16, + /// Interface index + pub index: u32, + /// Device flags + pub flags: u32, + /// Change mask + pub change: u32, +} + +#[derive(Debug, Clone, Copy)] +pub struct LinkSegmentBody { + pub family: CSocketAddrFamily, + pub type_: InterfaceType, + pub index: Option, + pub flags: InterfaceFlags, +} + +impl TryFrom for LinkSegmentBody { + type Error = Error; + + fn try_from(value: CIfinfoMsg) -> Result { + let family = CSocketAddrFamily::try_from(value.family as i32)?; + let type_ = InterfaceType::try_from(value.type_)?; + let index = NonZeroU32::new(value.index); + let flags = InterfaceFlags::from_bits_truncate(value.flags); + + Ok(Self { + family, + type_, + index, + flags, + }) + } +} + +impl From for CIfinfoMsg { + fn from(value: LinkSegmentBody) -> Self { + CIfinfoMsg { + family: value.family as _, + _pad: 0, + type_: value.type_ as _, + index: value.index.map(NonZeroU32::get).unwrap_or(0), + flags: value.flags.bits(), + change: 0, + } + } +} diff --git a/kernel/src/net/socket/netlink/route/message/segment/mod.rs b/kernel/src/net/socket/netlink/route/message/segment/mod.rs new file mode 100644 index 000000000..40ad20267 --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/segment/mod.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: MPL-2.0 + +//! This module defines the message segment, +//! which is the basic unit of a netlink message. +//! +//! Typically, a segment will consist of three parts: +//! +//! 1. Header: The headers of all segments are of type [`CMessageSegmentHeader`], +//! which indicate the type and total length of the segment. +//! +//! 2. Body: The body is the main component of a segment. +//! Each segment will have one and only one body. +//! The body type is defined by the `type_` field of the header. +//! +//! 3. Attributes: Attributes are optional. +//! A segment can have zero or multiple attributes. +//! Attributes belong to different classes, +//! with the class defined by the `type_` field of the header. +//! The total number of attributes is controlled by the `len` field of the header. +//! +//! Note that all headers, bodies, and attributes require +//! their starting address in memory to be aligned to [`super::NLMSG_ALIGN`] +//! when copying to and from user space. +//! Therefore, necessary padding must be added to ensure alignment. +//! +//! The layout of a segment in memory is shown below: +//! +//! ┌────────┬─────────┬──────┬─────────┬──────┬──────┬──────┐ +//! │ Header │ Padding │ Body │ Padding │ Attr │ Attr │ Attr │ +//! └────────┴─────────┴──────┴─────────┴──────┴──────┴──────┘ + +pub mod addr; +mod legacy; +pub mod link; +pub mod route; + +use addr::AddrSegment; +use link::LinkSegment; + +use crate::{ + net::socket::netlink::message::{ + CMsgSegHdr, CSegmentType, DoneSegment, ErrorSegment, ProtocolSegment, + }, + prelude::*, + util::{MultiRead, MultiWrite}, +}; + +/// The netlink route segment, which is the basic unit of a netlink route message. +#[derive(Debug)] +pub enum RtnlSegment { + NewLink(LinkSegment), + GetLink(LinkSegment), + NewAddr(AddrSegment), + GetAddr(AddrSegment), + Done(DoneSegment), + Error(ErrorSegment), +} + +impl ProtocolSegment for RtnlSegment { + fn header(&self) -> &CMsgSegHdr { + match self { + RtnlSegment::NewLink(link_segment) | RtnlSegment::GetLink(link_segment) => { + link_segment.header() + } + RtnlSegment::NewAddr(addr_segment) | RtnlSegment::GetAddr(addr_segment) => { + addr_segment.header() + } + RtnlSegment::Done(done_segment) => done_segment.header(), + RtnlSegment::Error(error_segment) => error_segment.header(), + } + } + + fn header_mut(&mut self) -> &mut CMsgSegHdr { + match self { + RtnlSegment::NewLink(link_segment) | RtnlSegment::GetLink(link_segment) => { + link_segment.header_mut() + } + RtnlSegment::NewAddr(addr_segment) | RtnlSegment::GetAddr(addr_segment) => { + addr_segment.header_mut() + } + RtnlSegment::Done(done_segment) => done_segment.header_mut(), + RtnlSegment::Error(error_segment) => error_segment.header_mut(), + } + } + + fn read_from(reader: &mut dyn MultiRead) -> Result { + let header = reader.read_val::()?; + + let segment = match CSegmentType::try_from(header.type_)? { + CSegmentType::GETLINK => RtnlSegment::GetLink(LinkSegment::read_from(header, reader)?), + CSegmentType::GETADDR => RtnlSegment::GetAddr(AddrSegment::read_from(header, reader)?), + _ => return_errno_with_message!(Errno::EINVAL, "unsupported segment type"), + }; + + Ok(segment) + } + + fn write_to(&self, writer: &mut dyn MultiWrite) -> Result<()> { + match self { + RtnlSegment::NewLink(link_segment) => link_segment.write_to(writer)?, + RtnlSegment::NewAddr(addr_segment) => addr_segment.write_to(writer)?, + RtnlSegment::Done(done_segment) => done_segment.write_to(writer)?, + RtnlSegment::Error(error_segment) => error_segment.write_to(writer)?, + RtnlSegment::GetAddr(_) | RtnlSegment::GetLink(_) => { + unreachable!("kernel should not write get requests to user space"); + } + } + Ok(()) + } +} diff --git a/kernel/src/net/socket/netlink/route/message/segment/route.rs b/kernel/src/net/socket/netlink/route/message/segment/route.rs new file mode 100644 index 000000000..a6d333656 --- /dev/null +++ b/kernel/src/net/socket/netlink/route/message/segment/route.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MPL-2.0 + +use crate::prelude::*; + +/// `rtmsg` in Linux. +/// +/// Reference: . +#[repr(C)] +#[derive(Debug, Clone, Copy, Pod)] +#[expect(unused)] +pub(super) struct CRtMsg { + family: u8, + dst_len: u8, + src_len: u8, + tos: u8, + table: u8, + protocol: u8, + scope: u8, + type_: u8, + flags: u32, +} diff --git a/kernel/src/util/iovec.rs b/kernel/src/util/iovec.rs index 9192534bd..cdfb5df66 100644 --- a/kernel/src/util/iovec.rs +++ b/kernel/src/util/iovec.rs @@ -154,6 +154,13 @@ pub trait MultiRead: ReadCString { fn is_empty(&self) -> bool { self.sum_lens() == 0 } + + /// Skips the first `nbytes` bytes of data. + /// + /// # Panics + /// + /// If `nbytes` is greater that [`MultiRead::sum_lens`], this method will panic. + fn skip(&mut self, nbytes: usize); } /// Trait defining the write behavior for a collection of [`VmWriter`]s. @@ -177,6 +184,13 @@ pub trait MultiWrite { fn is_empty(&self) -> bool { self.sum_lens() == 0 } + + /// Skips the first `nbytes` bytes of space. + /// + /// # Panics + /// + /// If `nbytes` is greater that [`MultiWrite::sum_lens`], this method will panic. + fn skip(&mut self, nbytes: usize); } impl MultiRead for VmReaderArray<'_> { @@ -196,6 +210,23 @@ impl MultiRead for VmReaderArray<'_> { fn sum_lens(&self) -> usize { self.0.iter().map(|vm_reader| vm_reader.remain()).sum() } + + fn skip(&mut self, mut nbytes: usize) { + for reader in &mut self.0 { + let bytes_to_skip = reader.remain().min(nbytes); + reader.skip(bytes_to_skip); + nbytes -= bytes_to_skip; + + if nbytes == 0 { + return; + } + } + + panic!( + "the readers are exhausted but there are {} bytes remaining to skip", + nbytes + ); + } } impl MultiRead for VmReader<'_> { @@ -206,6 +237,18 @@ impl MultiRead for VmReader<'_> { fn sum_lens(&self) -> usize { self.remain() } + + fn skip(&mut self, nbytes: usize) { + VmReader::skip(self, nbytes); + } +} + +impl dyn MultiRead + '_ { + pub fn read_val(&mut self) -> Result { + let mut val = T::new_zeroed(); + self.read(&mut VmWriter::from(val.as_bytes_mut()))?; + Ok(val) + } } impl MultiWrite for VmWriterArray<'_> { @@ -225,6 +268,23 @@ impl MultiWrite for VmWriterArray<'_> { fn sum_lens(&self) -> usize { self.0.iter().map(|vm_writer| vm_writer.avail()).sum() } + + fn skip(&mut self, mut nbytes: usize) { + for writer in &mut self.0 { + let bytes_to_skip = writer.avail().min(nbytes); + writer.skip(bytes_to_skip); + nbytes -= bytes_to_skip; + + if nbytes == 0 { + return; + } + } + + panic!( + "the writers are exhausted but there are {} bytes remaining to skip", + nbytes + ); + } } impl MultiWrite for VmWriter<'_> { @@ -235,4 +295,15 @@ impl MultiWrite for VmWriter<'_> { fn sum_lens(&self) -> usize { self.avail() } + + fn skip(&mut self, nbytes: usize) { + VmWriter::skip(self, nbytes); + } +} + +impl dyn MultiWrite + '_ { + pub fn write_val(&mut self, val: &T) -> Result<()> { + self.write(&mut VmReader::from(val.as_bytes()))?; + Ok(()) + } }