// SPDX-License-Identifier: MPL-2.0 #![expect(dead_code)] use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListAtomicLink}; use ostd::{ cpu::num_cpus, sync::{Waiter, Waker}, }; use spin::Once; use crate::{prelude::*, process::Pid, time::wait::ManagedTimeout}; type FutexBitSet = u32; type FutexBucketRef = Arc>; const FUTEX_OP_MASK: u32 = 0x0000_000F; const FUTEX_FLAGS_MASK: u32 = 0xFFFF_FFF0; const FUTEX_BITSET_MATCH_ANY: FutexBitSet = 0xFFFF_FFFF; /// do futex wait pub fn futex_wait( futex_addr: u64, futex_val: i32, timeout: Option, ctx: &Context, pid: Option, ) -> Result<()> { futex_wait_bitset( futex_addr as _, futex_val, timeout, FUTEX_BITSET_MATCH_ANY, ctx, pid, ) } /// Does futex wait bitset pub fn futex_wait_bitset( futex_addr: Vaddr, futex_val: i32, timeout: Option, bitset: FutexBitSet, ctx: &Context, pid: Option, ) -> Result<()> { debug!( "futex_wait_bitset addr: {:#x}, val: {}, bitset: {:#x}", futex_addr, futex_val, bitset ); if bitset == 0 { return_errno_with_message!(Errno::EINVAL, "at least one bit should be set"); } let futex_key = FutexKey::new(futex_addr, bitset, pid); let (futex_item, waiter) = FutexItem::create(futex_key); let (_, futex_bucket_ref) = get_futex_bucket(futex_key); // lock futex bucket ref here to avoid data race let mut futex_bucket = futex_bucket_ref.lock(); if !futex_key.load_val(ctx).is_ok_and(|val| val == futex_val) { return_errno_with_message!( Errno::EAGAIN, "futex value does not match or load_val failed" ); } futex_bucket.add_item(futex_item); // drop lock drop(futex_bucket); waiter.pause_timeout(timeout) // TODO: Ensure the futex item is dequeued and dropped. // // The enqueued futex item remain undequeued // if the futex wait operation is interrupted by a signal or times out. // In such cases, the `Box` would persist in memory, // leaving our implementation vulnerable to exploitation by user programs // that could repeatedly issue futex wait operations // to exhaust kernel memory. } /// Does futex wake pub fn futex_wake(futex_addr: Vaddr, max_count: usize, pid: Option) -> Result { futex_wake_bitset(futex_addr, max_count, FUTEX_BITSET_MATCH_ANY, pid) } /// Does futex wake with bitset pub fn futex_wake_bitset( futex_addr: Vaddr, max_count: usize, bitset: FutexBitSet, pid: Option, ) -> Result { debug!( "futex_wake_bitset addr: {:#x}, max_count: {}, bitset: {:#x}", futex_addr, max_count, bitset ); if bitset == 0 { return_errno_with_message!(Errno::EINVAL, "at least one bit should be set"); } let futex_key = FutexKey::new(futex_addr, bitset, pid); let (_, futex_bucket_ref) = get_futex_bucket(futex_key); let mut futex_bucket = futex_bucket_ref.lock(); let res = futex_bucket.remove_and_wake_items(futex_key, max_count); drop(futex_bucket); Ok(res) } /// Does futex requeue pub fn futex_requeue( futex_addr: Vaddr, max_nwakes: usize, max_nrequeues: usize, futex_new_addr: Vaddr, pid: Option, ) -> Result { if futex_new_addr == futex_addr { return futex_wake(futex_addr, max_nwakes, pid); } let futex_key = FutexKey::new(futex_addr, FUTEX_BITSET_MATCH_ANY, pid); let futex_new_key = FutexKey::new(futex_new_addr, FUTEX_BITSET_MATCH_ANY, pid); let (bucket_idx, futex_bucket_ref) = get_futex_bucket(futex_key); let (new_bucket_idx, futex_new_bucket_ref) = get_futex_bucket(futex_new_key); let nwakes = { if bucket_idx == new_bucket_idx { let mut futex_bucket = futex_bucket_ref.lock(); let nwakes = futex_bucket.remove_and_wake_items(futex_key, max_nwakes); futex_bucket.update_item_keys(futex_key, futex_new_key, max_nrequeues); drop(futex_bucket); nwakes } else { let (mut futex_bucket, mut futex_new_bucket) = { if bucket_idx < new_bucket_idx { let futex_bucket = futex_bucket_ref.lock(); let futext_new_bucket = futex_new_bucket_ref.lock(); (futex_bucket, futext_new_bucket) } else { // bucket_idx > new_bucket_idx let futex_new_bucket = futex_new_bucket_ref.lock(); let futex_bucket = futex_bucket_ref.lock(); (futex_bucket, futex_new_bucket) } }; let nwakes = futex_bucket.remove_and_wake_items(futex_key, max_nwakes); futex_bucket.requeue_items_to_another_bucket( futex_key, &mut futex_new_bucket, futex_new_key, max_nrequeues, ); nwakes } }; Ok(nwakes) } static FUTEX_BUCKETS: Once = Once::new(); /// Get the futex hash bucket count. /// /// This number is calculated the same way as Linux's: /// fn get_bucket_count() -> usize { ((1 << 8) * num_cpus()).next_power_of_two() } fn get_futex_bucket(key: FutexKey) -> (usize, FutexBucketRef) { FUTEX_BUCKETS.get().unwrap().get_bucket(key) } /// Initialize the futex system. pub fn init() { FUTEX_BUCKETS.call_once(|| FutexBucketVec::new(get_bucket_count())); } struct FutexBucketVec { vec: Vec, } impl FutexBucketVec { pub fn new(size: usize) -> FutexBucketVec { let mut buckets = FutexBucketVec { vec: Vec::with_capacity(size), }; for _ in 0..size { let bucket = Arc::new(Mutex::new(FutexBucket::new())); buckets.vec.push(bucket); } buckets } pub fn get_bucket(&self, key: FutexKey) -> (usize, FutexBucketRef) { let index = (self.vec.len() - 1) & { // The addr is the multiples of 4, so we ignore the last 2 bits let addr = key.addr() >> 2; // simple hash addr / self.size() }; (index, self.vec[index].clone()) } fn size(&self) -> usize { self.vec.len() } } struct FutexBucket { items: LinkedList, } intrusive_adapter!(FutexItemAdapter = Box: FutexItem { link: LinkedListAtomicLink }); impl FutexBucket { pub fn new() -> FutexBucket { FutexBucket { items: LinkedList::new(FutexItemAdapter::new()), } } pub fn add_item(&mut self, item: Box) { self.items.push_back(item); } pub fn remove_item(&mut self, item: &FutexItem) { let mut item_cursor = self.items.front_mut(); while !item_cursor.is_null() { // The item_cursor has been checked not null. let futex_item = item_cursor.get().unwrap(); if !futex_item.match_up(item) { item_cursor.move_next(); continue; } else { let _ = item_cursor.remove(); break; } } } pub fn remove_and_wake_items(&mut self, key: FutexKey, max_count: usize) -> usize { let mut count = 0; let mut item_cursor = self.items.front_mut(); while !item_cursor.is_null() && count < max_count { // The item_cursor has been checked not null. let item = item_cursor.get().unwrap(); if !item.key.match_up(&key) { item_cursor.move_next(); continue; } let item = item_cursor.remove().unwrap(); if !item.wake() { continue; } count += 1; } count } pub fn update_item_keys(&mut self, key: FutexKey, new_key: FutexKey, max_count: usize) { let mut count = 0; let mut item_cursor = self.items.front_mut(); while !item_cursor.is_null() && count < max_count { // The item_cursor has been checked not null. let item = item_cursor.get().unwrap(); if !item.key.match_up(&key) { item_cursor.move_next(); continue; } let mut item = item_cursor.remove().unwrap(); item.key = new_key; item_cursor.insert_before(item); count += 1; } } pub fn requeue_items_to_another_bucket( &mut self, key: FutexKey, another: &mut Self, new_key: FutexKey, max_nrequeues: usize, ) { let mut count = 0; let mut item_cursor = self.items.front_mut(); while !item_cursor.is_null() && count < max_nrequeues { // The item_cursor has been checked not null. let item = item_cursor.get().unwrap(); if !item.key.match_up(&key) { item_cursor.move_next(); continue; } let mut item = item_cursor.remove().unwrap(); item.key = new_key; another.add_item(item); count += 1; } } } struct FutexItem { key: FutexKey, waker: Arc, link: LinkedListAtomicLink, } impl FutexItem { pub fn create(key: FutexKey) -> (Box, Waiter) { let (waiter, waker) = Waiter::new_pair(); let futex_item = Box::new(FutexItem { key, waker, link: LinkedListAtomicLink::new(), }); (futex_item, waiter) } #[must_use] pub fn wake(&self) -> bool { self.waker.wake_up() } pub fn match_up(&self, another: &Self) -> bool { self.key.match_up(&another.key) } } // The addr of a futex, it should be used to mark different futex word #[derive(Debug, Clone, Copy)] struct FutexKey { addr: Vaddr, bitset: FutexBitSet, /// Specify whether this `FutexKey` is process private or shared. If `pid` is /// None, then this `FutexKey` is shared. pid: Option, } impl FutexKey { pub fn new(addr: Vaddr, bitset: FutexBitSet, pid: Option) -> Self { Self { addr, bitset, pid } } pub fn load_val(&self, ctx: &Context) -> Result { // FIXME: how to implement a atomic load? warn!("implement an atomic load"); ctx.user_space().read_val(self.addr) } pub fn addr(&self) -> Vaddr { self.addr } pub fn bitset(&self) -> FutexBitSet { self.bitset } pub fn match_up(&self, another: &Self) -> bool { // TODO: Use hash value to do match_up self.addr == another.addr && (self.bitset & another.bitset) != 0 && self.pid == another.pid } } // The implementation is from occlum #[derive(PartialEq, Debug, Clone, Copy)] #[expect(non_camel_case_types)] pub enum FutexOp { FUTEX_WAIT = 0, FUTEX_WAKE = 1, FUTEX_FD = 2, FUTEX_REQUEUE = 3, FUTEX_CMP_REQUEUE = 4, FUTEX_WAKE_OP = 5, FUTEX_LOCK_PI = 6, FUTEX_UNLOCK_PI = 7, FUTEX_TRYLOCK_PI = 8, FUTEX_WAIT_BITSET = 9, FUTEX_WAKE_BITSET = 10, } impl FutexOp { pub fn from_u32(bits: u32) -> Result { match bits { 0 => Ok(FutexOp::FUTEX_WAIT), 1 => Ok(FutexOp::FUTEX_WAKE), 2 => Ok(FutexOp::FUTEX_FD), 3 => Ok(FutexOp::FUTEX_REQUEUE), 4 => Ok(FutexOp::FUTEX_CMP_REQUEUE), 5 => Ok(FutexOp::FUTEX_WAKE_OP), 6 => Ok(FutexOp::FUTEX_LOCK_PI), 7 => Ok(FutexOp::FUTEX_UNLOCK_PI), 8 => Ok(FutexOp::FUTEX_TRYLOCK_PI), 9 => Ok(FutexOp::FUTEX_WAIT_BITSET), 10 => Ok(FutexOp::FUTEX_WAKE_BITSET), _ => return_errno_with_message!(Errno::EINVAL, "Unknown futex op"), } } } bitflags! { pub struct FutexFlags : u32 { const FUTEX_PRIVATE = 128; const FUTEX_CLOCK_REALTIME = 256; } } impl FutexFlags { pub fn from_u32(bits: u32) -> Result { FutexFlags::from_bits(bits) .ok_or_else(|| Error::with_message(Errno::EINVAL, "unknown futex flags")) } } pub fn futex_op_and_flags_from_u32(bits: u32) -> Result<(FutexOp, FutexFlags)> { let op = { let op_bits = bits & FUTEX_OP_MASK; FutexOp::from_u32(op_bits)? }; let flags = { let flags_bits = bits & FUTEX_FLAGS_MASK; FutexFlags::from_u32(flags_bits)? }; Ok((op, flags)) }