Create backlog sockets on demand

This commit is contained in:
Ruihan Li
2024-12-02 23:11:43 +08:00
committed by Tate, Hongliang Tian
parent a739848464
commit 776fd6a892
24 changed files with 947 additions and 781 deletions

View File

@ -9,5 +9,8 @@ pub use init::{init, IFACES};
pub use poll::lazy_init;
pub type Iface = dyn aster_bigtcp::iface::Iface<ext::BigtcpExt>;
pub type BoundTcpSocket = aster_bigtcp::socket::BoundTcpSocket<ext::BigtcpExt>;
pub type BoundUdpSocket = aster_bigtcp::socket::BoundUdpSocket<ext::BigtcpExt>;
pub type BoundPort = aster_bigtcp::iface::BoundPort<ext::BigtcpExt>;
pub type TcpConnection = aster_bigtcp::socket::TcpConnection<ext::BigtcpExt>;
pub type TcpListener = aster_bigtcp::socket::TcpListener<ext::BigtcpExt>;
pub type UdpSocket = aster_bigtcp::socket::UdpSocket<ext::BigtcpExt>;

View File

@ -7,7 +7,7 @@ use aster_bigtcp::{
};
use crate::{
net::iface::{Iface, IFACES},
net::iface::{BoundPort, Iface, IFACES},
prelude::*,
};
@ -45,30 +45,20 @@ fn get_ephemeral_iface(remote_ip_addr: &IpAddress) -> Arc<Iface> {
ifaces[0].clone()
}
pub(super) fn bind_socket<S, T>(
unbound_socket: Box<S>,
endpoint: &IpEndpoint,
can_reuse: bool,
bind: impl FnOnce(
Arc<Iface>,
Box<S>,
BindPortConfig,
) -> core::result::Result<T, (BindError, Box<S>)>,
) -> core::result::Result<T, (Error, Box<S>)> {
pub(super) fn bind_port(endpoint: &IpEndpoint, can_reuse: bool) -> Result<BoundPort> {
let iface = match get_iface_to_bind(&endpoint.addr) {
Some(iface) => iface,
None => {
let err = Error::with_message(
return_errno_with_message!(
Errno::EADDRNOTAVAIL,
"the address is not available from the local machine",
"the address is not available from the local machine"
);
return Err((err, unbound_socket));
}
};
let bind_port_config = BindPortConfig::new(endpoint.port, can_reuse);
bind(iface, unbound_socket, bind_port_config).map_err(|(err, unbound)| (err.into(), unbound))
Ok(iface.bind(bind_port_config)?)
}
impl From<BindError> for Error {

View File

@ -8,7 +8,7 @@ use aster_bigtcp::{
use crate::{
events::IoEvents,
net::{
iface::{BoundUdpSocket, Iface},
iface::{Iface, UdpSocket},
socket::util::send_recv_flags::SendRecvFlags,
},
prelude::*,
@ -16,12 +16,12 @@ use crate::{
};
pub struct BoundDatagram {
bound_socket: BoundUdpSocket,
bound_socket: UdpSocket,
remote_endpoint: Option<IpEndpoint>,
}
impl BoundDatagram {
pub fn new(bound_socket: BoundUdpSocket) -> Self {
pub fn new(bound_socket: UdpSocket) -> Self {
Self {
bound_socket,
remote_endpoint: None,

View File

@ -1,19 +1,17 @@
// SPDX-License-Identifier: MPL-2.0
use aster_bigtcp::{socket::UnboundUdpSocket, wire::IpEndpoint};
use aster_bigtcp::{socket::UdpSocket, wire::IpEndpoint};
use super::{bound::BoundDatagram, DatagramObserver};
use crate::{events::IoEvents, net::socket::ip::common::bind_socket, prelude::*};
use crate::{events::IoEvents, net::socket::ip::common::bind_port, prelude::*};
pub struct UnboundDatagram {
unbound_socket: Box<UnboundUdpSocket>,
_private: (),
}
impl UnboundDatagram {
pub fn new() -> Self {
Self {
unbound_socket: Box::new(UnboundUdpSocket::new()),
}
Self { _private: () }
}
pub fn bind(
@ -22,18 +20,17 @@ impl UnboundDatagram {
can_reuse: bool,
observer: DatagramObserver,
) -> core::result::Result<BoundDatagram, (Error, Self)> {
let bound_socket = match bind_socket(
self.unbound_socket,
endpoint,
can_reuse,
|iface, socket, config| iface.bind_udp(socket, observer, config),
) {
Ok(bound_socket) => bound_socket,
Err((err, unbound_socket)) => return Err((err, Self { unbound_socket })),
let bound_port = match bind_port(endpoint, can_reuse) {
Ok(bound_port) => bound_port,
Err(err) => return Err((err, self)),
};
let bound_endpoint = bound_socket.local_endpoint().unwrap();
bound_socket.bind(bound_endpoint).unwrap();
let bound_socket = match UdpSocket::new_bind(bound_port, observer) {
Ok(bound_socket) => bound_socket,
Err((_, err)) => {
unreachable!("`new_bind fails with {:?}, which should not happen", err)
}
};
Ok(BoundDatagram::new(bound_socket))
}

View File

@ -12,7 +12,7 @@ use super::StreamObserver;
use crate::{
events::IoEvents,
net::{
iface::{BoundTcpSocket, Iface},
iface::{Iface, TcpConnection},
socket::util::{send_recv_flags::SendRecvFlags, shutdown_cmd::SockShutdownCmd},
},
prelude::*,
@ -21,7 +21,7 @@ use crate::{
};
pub struct ConnectedStream {
bound_socket: BoundTcpSocket,
tcp_conn: TcpConnection,
remote_endpoint: IpEndpoint,
/// Indicates whether this connection is "new" in a `connect()` system call.
///
@ -47,12 +47,12 @@ pub struct ConnectedStream {
impl ConnectedStream {
pub fn new(
bound_socket: BoundTcpSocket,
tcp_conn: TcpConnection,
remote_endpoint: IpEndpoint,
is_new_connection: bool,
) -> Self {
Self {
bound_socket,
tcp_conn,
remote_endpoint,
is_new_connection,
is_receiving_closed: AtomicBool::new(false),
@ -70,7 +70,7 @@ impl ConnectedStream {
if cmd.shut_write() {
self.is_sending_closed.store(true, Ordering::Relaxed);
self.bound_socket.close();
self.tcp_conn.close();
events |= IoEvents::OUT | IoEvents::HUP;
}
@ -84,7 +84,7 @@ impl ConnectedStream {
writer: &mut dyn MultiWrite,
_flags: SendRecvFlags,
) -> Result<(usize, NeedIfacePoll)> {
let result = self.bound_socket.recv(|socket_buffer| {
let result = self.tcp_conn.recv(|socket_buffer| {
match writer.write(&mut VmReader::from(&*socket_buffer)) {
Ok(len) => (len, Ok(len)),
Err(e) => (0, Err(e)),
@ -116,7 +116,7 @@ impl ConnectedStream {
reader: &mut dyn MultiRead,
_flags: SendRecvFlags,
) -> Result<(usize, NeedIfacePoll)> {
let result = self.bound_socket.send(|socket_buffer| {
let result = self.tcp_conn.send(|socket_buffer| {
match reader.read(&mut VmWriter::from(socket_buffer)) {
Ok(len) => (len, Ok(len)),
Err(e) => (0, Err(e)),
@ -143,7 +143,7 @@ impl ConnectedStream {
}
pub fn local_endpoint(&self) -> IpEndpoint {
self.bound_socket.local_endpoint().unwrap()
self.tcp_conn.local_endpoint().unwrap()
}
pub fn remote_endpoint(&self) -> IpEndpoint {
@ -151,7 +151,7 @@ impl ConnectedStream {
}
pub fn iface(&self) -> &Arc<Iface> {
self.bound_socket.iface()
self.tcp_conn.iface()
}
pub fn check_new(&mut self) -> Result<()> {
@ -163,8 +163,12 @@ impl ConnectedStream {
Ok(())
}
pub(super) fn init_observer(&self, observer: StreamObserver) {
self.tcp_conn.init_observer(observer);
}
pub(super) fn check_io_events(&self) -> IoEvents {
self.bound_socket.raw_with(|socket| {
self.tcp_conn.raw_with(|socket| {
if socket.is_peer_closed() {
// Only the sending side of peer socket is closed
self.is_receiving_closed.store(true, Ordering::Relaxed);
@ -202,18 +206,14 @@ impl ConnectedStream {
})
}
pub(super) fn set_observer(&self, observer: StreamObserver) {
self.bound_socket.set_observer(observer)
}
pub(super) fn set_raw_option<R>(
&mut self,
set_option: impl Fn(&mut dyn RawTcpSetOption) -> R,
&self,
set_option: impl FnOnce(&dyn RawTcpSetOption) -> R,
) -> R {
set_option(&mut self.bound_socket)
set_option(&self.tcp_conn)
}
pub(super) fn raw_with<R>(&self, f: impl FnOnce(&RawTcpSocket) -> R) -> R {
self.bound_socket.raw_with(f)
self.tcp_conn.raw_with(f)
}
}

View File

@ -1,19 +1,19 @@
// SPDX-License-Identifier: MPL-2.0
use aster_bigtcp::{
socket::{ConnectState, RawTcpSetOption},
socket::{ConnectState, RawTcpOption, RawTcpSetOption},
wire::IpEndpoint,
};
use super::{connected::ConnectedStream, init::InitStream};
use super::{connected::ConnectedStream, init::InitStream, StreamObserver};
use crate::{
events::IoEvents,
net::iface::{BoundTcpSocket, Iface},
net::iface::{BoundPort, Iface, TcpConnection},
prelude::*,
};
pub struct ConnectingStream {
bound_socket: BoundTcpSocket,
tcp_conn: TcpConnection,
remote_endpoint: IpEndpoint,
}
@ -25,32 +25,38 @@ pub enum ConnResult {
impl ConnectingStream {
pub fn new(
bound_socket: BoundTcpSocket,
bound_port: BoundPort,
remote_endpoint: IpEndpoint,
) -> core::result::Result<Self, (Error, BoundTcpSocket)> {
option: &RawTcpOption,
observer: StreamObserver,
) -> core::result::Result<Self, (Error, BoundPort)> {
// The only reason this method might fail is because we're trying to connect to an
// unspecified address (i.e. 0.0.0.0). We currently have no support for binding to,
// listening on, or connecting to the unspecified address.
//
// We assume the remote will just refuse to connect, so we return `ECONNREFUSED`.
if bound_socket.connect(remote_endpoint).is_err() {
return Err((
Error::with_message(
Errno::ECONNREFUSED,
"connecting to an unspecified address is not supported",
),
bound_socket,
));
}
let tcp_conn =
match TcpConnection::new_connect(bound_port, remote_endpoint, option, observer) {
Ok(tcp_conn) => tcp_conn,
Err((bound_port, _)) => {
return Err((
Error::with_message(
Errno::ECONNREFUSED,
"connecting to an unspecified address is not supported",
),
bound_port,
))
}
};
Ok(Self {
bound_socket,
tcp_conn,
remote_endpoint,
})
}
pub fn has_result(&self) -> bool {
match self.bound_socket.connect_state() {
match self.tcp_conn.connect_state() {
ConnectState::Connecting => false,
ConnectState::Connected => true,
ConnectState::Refused => true,
@ -58,21 +64,23 @@ impl ConnectingStream {
}
pub fn into_result(self) -> ConnResult {
let next_state = self.bound_socket.connect_state();
let next_state = self.tcp_conn.connect_state();
match next_state {
ConnectState::Connecting => ConnResult::Connecting(self),
ConnectState::Connected => ConnResult::Connected(ConnectedStream::new(
self.bound_socket,
self.tcp_conn,
self.remote_endpoint,
true,
)),
ConnectState::Refused => ConnResult::Refused(InitStream::new_bound(self.bound_socket)),
ConnectState::Refused => ConnResult::Refused(InitStream::new_bound(
self.tcp_conn.into_bound_port().unwrap(),
)),
}
}
pub fn local_endpoint(&self) -> IpEndpoint {
self.bound_socket.local_endpoint().unwrap()
self.tcp_conn.local_endpoint().unwrap()
}
pub fn remote_endpoint(&self) -> IpEndpoint {
@ -80,7 +88,7 @@ impl ConnectingStream {
}
pub fn iface(&self) -> &Arc<Iface> {
self.bound_socket.iface()
self.tcp_conn.iface()
}
pub(super) fn check_io_events(&self) -> IoEvents {
@ -88,9 +96,9 @@ impl ConnectingStream {
}
pub(super) fn set_raw_option<R>(
&mut self,
set_option: impl Fn(&mut dyn RawTcpSetOption) -> R,
&self,
set_option: impl FnOnce(&dyn RawTcpSetOption) -> R,
) -> R {
set_option(&mut self.bound_socket)
set_option(&self.tcp_conn)
}
}

View File

@ -1,43 +1,38 @@
// SPDX-License-Identifier: MPL-2.0
use aster_bigtcp::{
socket::{RawTcpSetOption, UnboundTcpSocket},
wire::IpEndpoint,
};
use aster_bigtcp::{socket::RawTcpOption, wire::IpEndpoint};
use super::{connecting::ConnectingStream, listen::ListenStream, StreamObserver};
use crate::{
events::IoEvents,
net::{
iface::BoundTcpSocket,
socket::ip::common::{bind_socket, get_ephemeral_endpoint},
iface::BoundPort,
socket::ip::common::{bind_port, get_ephemeral_endpoint},
},
prelude::*,
process::signal::Pollee,
};
pub enum InitStream {
Unbound(Box<UnboundTcpSocket>),
Bound(BoundTcpSocket),
Unbound,
Bound(BoundPort),
}
impl InitStream {
pub fn new() -> Self {
InitStream::Unbound(Box::new(UnboundTcpSocket::new()))
InitStream::Unbound
}
pub fn new_bound(bound_socket: BoundTcpSocket) -> Self {
InitStream::Bound(bound_socket)
pub fn new_bound(bound_port: BoundPort) -> Self {
InitStream::Bound(bound_port)
}
pub fn bind(
self,
endpoint: &IpEndpoint,
can_reuse: bool,
observer: StreamObserver,
) -> core::result::Result<BoundTcpSocket, (Error, Self)> {
let unbound_socket = match self {
InitStream::Unbound(unbound_socket) => unbound_socket,
) -> core::result::Result<BoundPort, (Error, Self)> {
match self {
InitStream::Unbound => (),
InitStream::Bound(bound_socket) => {
return Err((
Error::with_message(Errno::EINVAL, "the socket is already bound to an address"),
@ -45,48 +40,45 @@ impl InitStream {
));
}
};
let bound_socket = match bind_socket(
unbound_socket,
endpoint,
can_reuse,
|iface, socket, config| iface.bind_tcp(socket, observer, config),
) {
Ok(bound_socket) => bound_socket,
Err((err, unbound_socket)) => return Err((err, InitStream::Unbound(unbound_socket))),
let bound_port = match bind_port(endpoint, can_reuse) {
Ok(bound_port) => bound_port,
Err(err) => return Err((err, Self::Unbound)),
};
Ok(bound_socket)
Ok(bound_port)
}
fn bind_to_ephemeral_endpoint(
self,
remote_endpoint: &IpEndpoint,
observer: StreamObserver,
) -> core::result::Result<BoundTcpSocket, (Error, Self)> {
) -> core::result::Result<BoundPort, (Error, Self)> {
let endpoint = get_ephemeral_endpoint(remote_endpoint);
self.bind(&endpoint, false, observer)
self.bind(&endpoint, false)
}
pub fn connect(
self,
remote_endpoint: &IpEndpoint,
pollee: &Pollee,
option: &RawTcpOption,
observer: StreamObserver,
) -> core::result::Result<ConnectingStream, (Error, Self)> {
let bound_socket = match self {
InitStream::Bound(bound_socket) => bound_socket,
InitStream::Unbound(_) => self
.bind_to_ephemeral_endpoint(remote_endpoint, StreamObserver::new(pollee.clone()))?,
let bound_port = match self {
InitStream::Bound(bound_port) => bound_port,
InitStream::Unbound => self.bind_to_ephemeral_endpoint(remote_endpoint)?,
};
ConnectingStream::new(bound_socket, *remote_endpoint)
.map_err(|(err, bound_socket)| (err, InitStream::Bound(bound_socket)))
ConnectingStream::new(bound_port, *remote_endpoint, option, observer)
.map_err(|(err, bound_port)| (err, InitStream::Bound(bound_port)))
}
pub fn listen(
self,
backlog: usize,
pollee: &Pollee,
option: &RawTcpOption,
observer: StreamObserver,
) -> core::result::Result<ListenStream, (Error, Self)> {
let InitStream::Bound(bound_socket) = self else {
let InitStream::Bound(bound_port) = self else {
// FIXME: The socket should be bound to INADDR_ANY (i.e., 0.0.0.0) with an ephemeral
// port. However, INADDR_ANY is not yet supported, so we need to return an error first.
debug_assert!(false, "listen() without bind() is not implemented");
@ -96,14 +88,13 @@ impl InitStream {
));
};
ListenStream::new(bound_socket, backlog, pollee)
.map_err(|(err, bound_socket)| (err, InitStream::Bound(bound_socket)))
Ok(ListenStream::new(bound_port, backlog, option, observer))
}
pub fn local_endpoint(&self) -> Option<IpEndpoint> {
match self {
InitStream::Unbound(_) => None,
InitStream::Bound(bound_socket) => Some(bound_socket.local_endpoint().unwrap()),
InitStream::Unbound => None,
InitStream::Bound(bound_port) => Some(bound_port.endpoint().unwrap()),
}
}
@ -111,14 +102,4 @@ impl InitStream {
// Linux adds OUT and HUP events for a newly created socket
IoEvents::OUT | IoEvents::HUP
}
pub(super) fn set_raw_option<R>(
&mut self,
set_option: impl Fn(&mut dyn RawTcpSetOption) -> R,
) -> R {
match self {
InitStream::Unbound(unbound_socket) => set_option(unbound_socket.as_mut()),
InitStream::Bound(bound_socket) => set_option(bound_socket),
}
}
}

View File

@ -1,103 +1,59 @@
// SPDX-License-Identifier: MPL-2.0
use aster_bigtcp::{
errors::tcp::ListenError,
iface::BindPortConfig,
socket::{RawTcpSetOption, TcpState, UnboundTcpSocket},
socket::{RawTcpOption, RawTcpSetOption},
wire::IpEndpoint,
};
use ostd::sync::PreemptDisabled;
use super::{connected::ConnectedStream, StreamObserver};
use crate::{
events::IoEvents,
net::iface::{BoundTcpSocket, Iface},
net::iface::{BoundPort, Iface, TcpListener},
prelude::*,
process::signal::Pollee,
};
pub struct ListenStream {
backlog: usize,
/// A bound socket held to ensure the TCP port cannot be released
bound_socket: BoundTcpSocket,
/// Backlog sockets listening at the local endpoint
backlog_sockets: RwLock<Vec<BacklogSocket>, PreemptDisabled>,
tcp_listener: TcpListener,
}
impl ListenStream {
pub fn new(
bound_socket: BoundTcpSocket,
bound_port: BoundPort,
backlog: usize,
pollee: &Pollee,
) -> core::result::Result<Self, (Error, BoundTcpSocket)> {
option: &RawTcpOption,
observer: StreamObserver,
) -> Self {
const SOMAXCONN: usize = 4096;
let somaxconn = SOMAXCONN.min(backlog);
let max_conn = SOMAXCONN.min(backlog);
let listen_stream = Self {
backlog: somaxconn,
bound_socket,
backlog_sockets: RwLock::new(Vec::new()),
let tcp_listener = match TcpListener::new_listen(bound_port, max_conn, option, observer) {
Ok(tcp_listener) => tcp_listener,
Err((_, err)) => {
unreachable!("`new_listen` fails with {:?}, which should not happen", err)
}
};
if let Err(err) = listen_stream.fill_backlog_sockets(pollee) {
return Err((err, listen_stream.bound_socket));
}
Ok(listen_stream)
Self { tcp_listener }
}
/// Append sockets listening at LocalEndPoint to support backlog
fn fill_backlog_sockets(&self, pollee: &Pollee) -> Result<()> {
let mut backlog_sockets = self.backlog_sockets.write();
pub fn try_accept(&self) -> Result<ConnectedStream> {
let (new_conn, remote_endpoint) = self.tcp_listener.accept().ok_or_else(|| {
Error::with_message(Errno::EAGAIN, "no pending connection is available")
})?;
let backlog = self.backlog;
let current_backlog_len = backlog_sockets.len();
debug_assert!(backlog >= current_backlog_len);
if backlog == current_backlog_len {
return Ok(());
}
for _ in current_backlog_len..backlog {
let backlog_socket = BacklogSocket::new(&self.bound_socket, pollee)?;
backlog_sockets.push(backlog_socket);
}
Ok(())
}
pub fn try_accept(&self, pollee: &Pollee) -> Result<ConnectedStream> {
let mut backlog_sockets = self.backlog_sockets.write();
let index = backlog_sockets
.iter()
.position(|backlog_socket| backlog_socket.can_accept())
.ok_or_else(|| {
Error::with_message(Errno::EAGAIN, "no pending connection is available")
})?;
let active_backlog_socket = backlog_sockets.remove(index);
if let Ok(backlog_socket) = BacklogSocket::new(&self.bound_socket, pollee) {
backlog_sockets.push(backlog_socket);
}
let remote_endpoint = active_backlog_socket.remote_endpoint().unwrap();
Ok(ConnectedStream::new(
active_backlog_socket.into_bound_socket(),
remote_endpoint,
false,
))
Ok(ConnectedStream::new(new_conn, remote_endpoint, false))
}
pub fn local_endpoint(&self) -> IpEndpoint {
self.bound_socket.local_endpoint().unwrap()
self.tcp_listener.local_endpoint().unwrap()
}
pub fn iface(&self) -> &Arc<Iface> {
self.bound_socket.iface()
self.tcp_listener.iface()
}
pub(super) fn check_io_events(&self) -> IoEvents {
let backlog_sockets = self.backlog_sockets.read();
let can_accept = backlog_sockets.iter().any(|socket| socket.can_accept());
let can_accept = self.tcp_listener.can_accept();
// If network packets come in simultaneously, the socket state may change in the middle.
// However, the current pollee implementation should be able to handle this race condition.
@ -108,97 +64,10 @@ impl ListenStream {
}
}
/// Calls `f` to set socket option on raw socket.
///
/// This method will call `f` on the bound socket and each backlog socket that is in `Listen` state .
pub(super) fn set_raw_option<R>(
&mut self,
set_option: impl Fn(&mut dyn RawTcpSetOption) -> R,
&self,
set_option: impl FnOnce(&dyn RawTcpSetOption) -> R,
) -> R {
self.backlog_sockets.write().iter_mut().for_each(|socket| {
if socket
.bound_socket
.raw_with(|raw_tcp_socket| raw_tcp_socket.state() != TcpState::Listen)
{
return;
}
// If the socket receives SYN after above check,
// we will also set keep alive on the socket that is not in `Listen` state.
// But such a race doesn't matter, we just let it happen.
set_option(&mut socket.bound_socket);
});
set_option(&mut self.bound_socket)
}
}
struct BacklogSocket {
bound_socket: BoundTcpSocket,
}
impl BacklogSocket {
// FIXME: All of the error codes below seem to have no Linux equivalents, and I see no reason
// why the error may occur. Perhaps it is better to call `unwrap()` directly?
fn new(bound_socket: &BoundTcpSocket, pollee: &Pollee) -> Result<Self> {
let local_endpoint = bound_socket.local_endpoint().ok_or(Error::with_message(
Errno::EINVAL,
"the socket is not bound",
))?;
let unbound_socket = {
let mut unbound = UnboundTcpSocket::new();
unbound.set_keep_alive(bound_socket.raw_with(|socket| socket.keep_alive()));
unbound.set_nagle_enabled(bound_socket.raw_with(|socket| socket.nagle_enabled()));
// TODO: Inherit other options that can be set via `setsockopt` from bound socket
Box::new(unbound)
};
let bound_socket = {
let iface = bound_socket.iface();
let bind_port_config = BindPortConfig::new(local_endpoint.port, true);
iface
.bind_tcp(
unbound_socket,
StreamObserver::new(pollee.clone()),
bind_port_config,
)
.map_err(|(err, _)| err)?
};
match bound_socket.listen(local_endpoint) {
Ok(()) => Ok(Self { bound_socket }),
Err(ListenError::Unaddressable) => {
return_errno_with_message!(Errno::EINVAL, "the listening address is invalid")
}
Err(ListenError::InvalidState) => {
return_errno_with_message!(Errno::EINVAL, "the listening socket is invalid")
}
}
}
/// Returns whether the backlog socket can be `accept`ed.
///
/// According to the Linux implementation, assuming the TCP Fast Open mechanism is off, a
/// backlog socket becomes ready to be returned in the `accept` system call when the 3-way
/// handshake is complete (i.e., when it enters the ESTABLISHED state).
///
/// The Linux kernel implementation can be found at
/// <https://elixir.bootlin.com/linux/v6.11.8/source/net/ipv4/tcp_input.c#L7304>.
//
// FIMXE: Some sockets may be dead (e.g., RSTed), and such sockets can never become alive
// again. We need to remove them from the backlog sockets.
fn can_accept(&self) -> bool {
self.bound_socket.raw_with(|socket| socket.may_send())
}
fn remote_endpoint(&self) -> Option<IpEndpoint> {
self.bound_socket
.raw_with(|socket| socket.remote_endpoint())
}
fn into_bound_socket(self) -> BoundTcpSocket {
self.bound_socket
set_option(&self.tcp_listener)
}
}

View File

@ -3,14 +3,14 @@
use core::sync::atomic::{AtomicBool, Ordering};
use aster_bigtcp::{
socket::{NeedIfacePoll, RawTcpSetOption},
socket::{NeedIfacePoll, RawTcpOption, RawTcpSetOption},
wire::IpEndpoint,
};
use connected::ConnectedStream;
use connecting::{ConnResult, ConnectingStream};
use init::InitStream;
use listen::ListenStream;
use options::{Congestion, MaxSegment, NoDelay, WindowClamp};
use options::{Congestion, MaxSegment, NoDelay, WindowClamp, KEEPALIVE_INTERVAL};
use ostd::sync::{PreemptDisabled, RwLockReadGuard, RwLockWriteGuard};
use takeable::Takeable;
use util::TcpOptionSet;
@ -83,6 +83,13 @@ impl OptionSet {
let tcp = TcpOptionSet::new();
OptionSet { socket, tcp }
}
fn raw(&self) -> RawTcpOption {
RawTcpOption {
keep_alive: self.socket.keep_alive().then_some(KEEPALIVE_INTERVAL),
is_nagle_enabled: !self.tcp.no_delay(),
}
}
}
impl StreamSocket {
@ -114,7 +121,7 @@ impl StreamSocket {
});
let pollee = Pollee::new();
connected_stream.set_observer(StreamObserver::new(pollee.clone()));
connected_stream.init_observer(StreamObserver::new(pollee.clone()));
Arc::new(Self {
options: RwLock::new(options),
@ -207,7 +214,9 @@ impl StreamSocket {
// `Some(_)` if blocking is not necessary or not allowed.
fn start_connect(&self, remote_endpoint: &IpEndpoint) -> Option<Result<()>> {
let is_nonblocking = self.is_nonblocking();
let mut state = self.write_updated_state();
let (options, mut state) = self.update_connecting();
let raw_option = options.raw();
let (result_or_block, iface_to_poll) = state.borrow_result(|mut owned_state| {
let init_stream = match owned_state {
@ -243,7 +252,11 @@ impl StreamSocket {
}
};
let connecting_stream = match init_stream.connect(remote_endpoint, &self.pollee) {
let connecting_stream = match init_stream.connect(
remote_endpoint,
&raw_option,
StreamObserver::new(self.pollee.clone()),
) {
Ok(connecting_stream) => connecting_stream,
Err((err, init_stream)) => {
return (State::Init(init_stream), (Some(Err(err)), None));
@ -298,13 +311,11 @@ impl StreamSocket {
return_errno_with_message!(Errno::EINVAL, "the socket is not listening");
};
let accepted = listen_stream
.try_accept(&self.pollee)
.map(|connected_stream| {
let remote_endpoint = connected_stream.remote_endpoint();
let accepted_socket = Self::new_accepted(connected_stream);
(accepted_socket as _, remote_endpoint.into())
});
let accepted = listen_stream.try_accept().map(|connected_stream| {
let remote_endpoint = connected_stream.remote_endpoint();
let accepted_socket = Self::new_accepted(connected_stream);
(accepted_socket as _, remote_endpoint.into())
});
let iface_to_poll = listen_stream.iface().clone();
drop(state);
@ -475,18 +486,14 @@ impl Socket for StreamSocket {
);
};
let bound_socket = match init_stream.bind(
&endpoint,
can_reuse,
StreamObserver::new(self.pollee.clone()),
) {
Ok(bound_socket) => bound_socket,
let bound_port = match init_stream.bind(&endpoint, can_reuse) {
Ok(bound_port) => bound_port,
Err((err, init_stream)) => {
return (State::Init(init_stream), Err(err));
}
};
(State::Init(InitStream::new_bound(bound_socket)), Ok(()))
(State::Init(InitStream::new_bound(bound_port)), Ok(()))
})
}
@ -501,7 +508,9 @@ impl Socket for StreamSocket {
}
fn listen(&self, backlog: usize) -> Result<()> {
let mut state = self.write_updated_state();
let (options, mut state) = self.update_connecting();
let raw_option = options.raw();
state.borrow_result(|owned_state| {
let init_stream = match owned_state {
@ -520,7 +529,11 @@ impl Socket for StreamSocket {
}
};
let listen_stream = match init_stream.listen(backlog, &self.pollee) {
let listen_stream = match init_stream.listen(
backlog,
&raw_option,
StreamObserver::new(self.pollee.clone()),
) {
Ok(listen_stream) => listen_stream,
Err((err, init_stream)) => {
return (State::Init(init_stream), Err(err));
@ -701,7 +714,7 @@ impl Socket for StreamSocket {
tcp_no_delay: NoDelay => {
let no_delay = tcp_no_delay.get().unwrap();
options.tcp.set_no_delay(*no_delay);
state.set_raw_option(|raw_socket: &mut dyn RawTcpSetOption| raw_socket.set_nagle_enabled(!no_delay));
state.set_raw_option(|raw_socket: &dyn RawTcpSetOption| raw_socket.set_nagle_enabled(!no_delay));
},
tcp_congestion: Congestion => {
let congestion = tcp_congestion.get().unwrap();
@ -736,14 +749,16 @@ impl Socket for StreamSocket {
impl State {
/// Calls `f` to set raw socket option.
///
/// Note that for listening socket, `f` is called on all backlog sockets in `Listen` State.
/// That is to say, `f` won't be called on backlog sockets in `SynReceived` or `Established` state.
fn set_raw_option<R>(&mut self, set_option: impl Fn(&mut dyn RawTcpSetOption) -> R) -> R {
/// For listening sockets, socket options are inherited by new connections. However, they are
/// not updated for connections in the backlog queue.
fn set_raw_option<R>(&self, set_option: impl FnOnce(&dyn RawTcpSetOption) -> R) -> Option<R> {
match self {
State::Init(init_stream) => init_stream.set_raw_option(set_option),
State::Connecting(connecting_stream) => connecting_stream.set_raw_option(set_option),
State::Connected(connected_stream) => connected_stream.set_raw_option(set_option),
State::Listen(listen_stream) => listen_stream.set_raw_option(set_option),
State::Init(_) => None,
State::Connecting(connecting_stream) => {
Some(connecting_stream.set_raw_option(set_option))
}
State::Connected(connected_stream) => Some(connected_stream.set_raw_option(set_option)),
State::Listen(listen_stream) => Some(listen_stream.set_raw_option(set_option)),
}
}
@ -758,24 +773,17 @@ impl State {
}
impl SetSocketLevelOption for State {
fn set_keep_alive(&mut self, keep_alive: bool) -> NeedIfacePoll {
/// The keepalive interval.
///
/// The linux value can be found at `/proc/sys/net/ipv4/tcp_keepalive_intvl`,
/// which is by default 75 seconds for most Linux distributions.
const KEEPALIVE_INTERVAL: aster_bigtcp::time::Duration =
aster_bigtcp::time::Duration::from_secs(75);
fn set_keep_alive(&self, keep_alive: bool) -> NeedIfacePoll {
let interval = if keep_alive {
Some(KEEPALIVE_INTERVAL)
} else {
None
};
let set_keepalive =
|raw_socket: &mut dyn RawTcpSetOption| raw_socket.set_keep_alive(interval);
let set_keepalive = |raw_socket: &dyn RawTcpSetOption| raw_socket.set_keep_alive(interval);
self.set_raw_option(set_keepalive)
.unwrap_or(NeedIfacePoll::FALSE)
}
}

View File

@ -4,6 +4,7 @@ use aster_bigtcp::socket::{SocketEventObserver, SocketEvents};
use crate::{events::IoEvents, process::signal::Pollee};
#[derive(Clone)]
pub struct StreamObserver(Pollee);
impl StreamObserver {

View File

@ -9,3 +9,10 @@ impl_socket_options!(
pub struct MaxSegment(u32);
pub struct WindowClamp(u32);
);
/// The keepalive interval.
///
/// The linux value can be found at `/proc/sys/net/ipv4/tcp_keepalive_intvl`,
/// which is by default 75 seconds for most Linux distributions.
pub(super) const KEEPALIVE_INTERVAL: aster_bigtcp::time::Duration =
aster_bigtcp::time::Duration::from_secs(75);

View File

@ -173,7 +173,7 @@ impl LingerOption {
/// A trait used for setting socket level options on actual sockets.
pub(in crate::net) trait SetSocketLevelOption {
/// Sets whether keepalive messages are enabled.
fn set_keep_alive(&mut self, _keep_alive: bool) -> NeedIfacePoll {
fn set_keep_alive(&self, _keep_alive: bool) -> NeedIfacePoll {
NeedIfacePoll::FALSE
}
}