Implement system call sched_get/set_affinity

2025-06-20 13:06:33 +00:00 · 2024-09-23 14:38:17 +08:00
parent 3468ec213b
commit e319641b4d
8 changed files with 167 additions and 94 deletions
--- a/kernel/src/sched/priority_scheduler.rs
+++ b/kernel/src/sched/priority_scheduler.rs
@ -1,8 +1,9 @@
 // SPDX-License-Identifier: MPL-2.0
 use core::sync::atomic::Ordering;
 use ostd::{
    cpu::{num_cpus, CpuId, CpuSet, PinCurrentCpu},
    sync::PreemptDisabled,
    task::{
        scheduler::{
            info::CommonSchedInfo, inject_scheduler, EnqueueFlags, LocalRunQueue, Scheduler,
@ -254,11 +255,11 @@ impl PreemptSchedInfo for Thread {
    const LOWEST_TASK_PRIORITY: Priority = Priority::new(PriorityRange::new(PriorityRange::MAX));
    fn priority(&self) -> Priority {
-        self.priority()
+        self.atomic_priority().load(Ordering::Relaxed)
    }
-    fn cpu_affinity(&self) -> SpinLockGuard<CpuSet, PreemptDisabled> {
+    fn cpu_affinity(&self) -> CpuSet {
-        self.lock_cpu_affinity()
+        self.atomic_cpu_affinity().load()
    }
 }
@ -268,7 +269,7 @@ trait PreemptSchedInfo {
    fn priority(&self) -> Priority;
-    fn cpu_affinity(&self) -> SpinLockGuard<CpuSet, PreemptDisabled>;
+    fn cpu_affinity(&self) -> CpuSet;
    fn is_real_time(&self) -> bool {
        self.priority() < Self::REAL_TIME_TASK_PRIORITY
--- a/kernel/src/syscall/arch/riscv.rs
+++ b/kernel/src/syscall/arch/riscv.rs
@ -79,7 +79,7 @@ use crate::syscall::{
    rt_sigpending::sys_rt_sigpending,
    rt_sigprocmask::sys_rt_sigprocmask,
    rt_sigsuspend::sys_rt_sigsuspend,
-    sched_getaffinity::sys_sched_getaffinity,
+    sched_affinity::{sys_sched_getaffinity, sys_sched_setaffinity},
    sched_yield::sys_sched_yield,
    semctl::sys_semctl,
    semget::sys_semget,
@ -191,6 +191,7 @@ impl_syscall_nums_and_dispatch_fn! {
    SYS_SETITIMER = 103          => sys_setitimer(args[..3]);
    SYS_TIMER_CREATE = 107       => sys_timer_create(args[..3]);
    SYS_TIMER_DELETE = 111       => sys_timer_delete(args[..1]);
    SYS_SCHED_SETAFFINITY = 122  => sys_sched_setaffinity(args[..3]);
    SYS_SCHED_GETAFFINITY = 123  => sys_sched_getaffinity(args[..3]);
    SYS_SCHED_YIELD = 124        => sys_sched_yield(args[..0]);
    SYS_KILL = 129               => sys_kill(args[..2]);
--- a/kernel/src/syscall/arch/x86.rs
+++ b/kernel/src/syscall/arch/x86.rs
@ -86,7 +86,7 @@ use crate::syscall::{
    rt_sigprocmask::sys_rt_sigprocmask,
    rt_sigreturn::sys_rt_sigreturn,
    rt_sigsuspend::sys_rt_sigsuspend,
-    sched_getaffinity::sys_sched_getaffinity,
+    sched_affinity::{sys_sched_getaffinity, sys_sched_setaffinity},
    sched_yield::sys_sched_yield,
    select::sys_select,
    semctl::sys_semctl,
@ -263,6 +263,7 @@ impl_syscall_nums_and_dispatch_fn! {
    SYS_GETTID = 186           => sys_gettid(args[..0]);
    SYS_TIME = 201             => sys_time(args[..1]);
    SYS_FUTEX = 202            => sys_futex(args[..6]);
    SYS_SCHED_SETAFFINITY = 203 => sys_sched_setaffinity(args[..3]);
    SYS_SCHED_GETAFFINITY = 204 => sys_sched_getaffinity(args[..3]);
    SYS_EPOLL_CREATE = 213     => sys_epoll_create(args[..1]);
    SYS_GETDENTS64 = 217       => sys_getdents64(args[..3]);
--- a/kernel/src/syscall/mod.rs
+++ b/kernel/src/syscall/mod.rs
@ -93,7 +93,7 @@ mod rt_sigpending;
 mod rt_sigprocmask;
 mod rt_sigreturn;
 mod rt_sigsuspend;
-mod sched_getaffinity;
+mod sched_affinity;
 mod sched_yield;
 mod select;
 mod semctl;
--- a/kernel/src/syscall/sched_affinity.rs
+++ b/kernel/src/syscall/sched_affinity.rs
@ -0,0 +1,130 @@
 // SPDX-License-Identifier: MPL-2.0
 use core::{cmp, mem};
 use ostd::cpu::{num_cpus, CpuId, CpuSet};
 use super::SyscallReturn;
 use crate::{prelude::*, process::posix_thread::thread_table, thread::Tid};
 pub fn sys_sched_getaffinity(
    tid: Tid,
    cpuset_size: usize,
    cpu_set_ptr: Vaddr,
    ctx: &Context,
 ) -> Result<SyscallReturn> {
    let cpu_set = match tid {
        0 => ctx.thread.atomic_cpu_affinity().load(),
        _ => match thread_table::get_thread(tid) {
            Some(thread) => thread.atomic_cpu_affinity().load(),
            None => return Err(Error::with_message(Errno::ESRCH, "thread does not exist")),
        },
    };
    let bytes_written = write_cpu_set_to(ctx.get_user_space(), &cpu_set, cpuset_size, cpu_set_ptr)?;
    Ok(SyscallReturn::Return(bytes_written as isize))
 }
 // TODO: The manual page of `sched_setaffinity` says that if the thread is not
 // running on the CPU specified in the affinity mask, it would be migrated to
 // one of the CPUs specified in the mask. We currently do not support this
 // feature as the scheduler is not ready for migration yet.
 pub fn sys_sched_setaffinity(
    tid: Tid,
    cpuset_size: usize,
    cpu_set_ptr: Vaddr,
    ctx: &Context,
 ) -> Result<SyscallReturn> {
    let user_cpu_set = read_cpu_set_from(ctx.get_user_space(), cpuset_size, cpu_set_ptr)?;
    match tid {
        0 => ctx.thread.atomic_cpu_affinity().store(&user_cpu_set),
        _ => match thread_table::get_thread(tid) {
            Some(thread) => {
                thread.atomic_cpu_affinity().store(&user_cpu_set);
            }
            None => return Err(Error::with_message(Errno::ESRCH, "thread does not exist")),
        },
    }
    Ok(SyscallReturn::Return(0))
 }
 // Linux uses `DECLARE_BITMAP` for `cpu_set_t`, inside which each part is a
 // `long`. We use the same scheme to ensure byte endianness compatibility.
 type Part = u64;
 const SIZE_OF_PART: usize = mem::size_of::<Part>();
 const CPUS_IN_PART: usize = SIZE_OF_PART * 8;
 fn read_cpu_set_from(
    uspace: CurrentUserSpace,
    cpuset_size: usize,
    cpu_set_ptr: Vaddr,
 ) -> Result<CpuSet> {
    if cpuset_size == 0 {
        return Err(Error::with_message(Errno::EINVAL, "invalid cpuset size"));
    }
    let num_cpus = num_cpus();
    let mut ret_set = CpuSet::new_empty();
    let nr_parts_to_read = cmp::min(cpuset_size / SIZE_OF_PART, num_cpus.div_ceil(CPUS_IN_PART));
    for part_id in 0..nr_parts_to_read {
        let user_part: Part = uspace.read_val(cpu_set_ptr + part_id * SIZE_OF_PART)?;
        for bit_id in 0..CPUS_IN_PART {
            if user_part & (1 << bit_id) != 0 {
                // If the CPU ID is invalid, just ignore it.
                let Ok(cpu_id) = CpuId::try_from(part_id * CPUS_IN_PART + bit_id) else {
                    continue;
                };
                ret_set.add(cpu_id);
            }
        }
    }
    if ret_set.is_empty() {
        return Err(Error::with_message(Errno::EINVAL, "empty cpuset"));
    }
    Ok(ret_set)
 }
 // Returns the number of bytes written.
 fn write_cpu_set_to(
    uspace: CurrentUserSpace,
    cpu_set: &CpuSet,
    cpuset_size: usize,
    cpu_set_ptr: Vaddr,
 ) -> Result<usize> {
    if cpuset_size == 0 {
        return Err(Error::with_message(Errno::EINVAL, "invalid cpuset size"));
    }
    let num_cpus = num_cpus();
    let nr_parts_to_write = cmp::min(cpuset_size / SIZE_OF_PART, num_cpus.div_ceil(CPUS_IN_PART));
    let mut user_part: Part = 0;
    let mut part_idx = 0;
    for cpu_id in cpu_set.iter() {
        let id = cpu_id.as_usize();
        while part_idx < cmp::min(id / CPUS_IN_PART, nr_parts_to_write) {
            uspace.write_val(cpu_set_ptr + part_idx * SIZE_OF_PART, &user_part)?;
            user_part = 0;
            part_idx += 1;
        }
        if part_idx >= nr_parts_to_write {
            break;
        }
        user_part |= 1 << (id % CPUS_IN_PART);
    }
    while part_idx < nr_parts_to_write {
        uspace.write_val(cpu_set_ptr + part_idx * SIZE_OF_PART, &user_part)?;
        user_part = 0;
        part_idx += 1;
    }
    Ok(nr_parts_to_write * SIZE_OF_PART)
 }
--- a/kernel/src/syscall/sched_getaffinity.rs
+++ b/kernel/src/syscall/sched_getaffinity.rs
@ -1,70 +0,0 @@
 // SPDX-License-Identifier: MPL-2.0
 use core::{cmp, mem};
 use super::SyscallReturn;
 use crate::{
    prelude::*,
    process::{process_table, Pid},
 };
 fn get_num_cpus() -> usize {
    // TODO: Properly determine the number of available CPUs
    // This could be through a system configuration query.
    1
 }
 pub fn sys_sched_getaffinity(
    pid: Pid,
    cpuset_size: usize,
    cpu_set_ptr: Vaddr,
    ctx: &Context,
 ) -> Result<SyscallReturn> {
    let num_cpus = get_num_cpus();
    if cpuset_size < core::mem::size_of::<cpu_set_t>() {
        return Err(Error::with_message(Errno::EINVAL, "invalid cpuset size"));
    }
    match pid {
        0 => {
            // TODO: Get the current thread's CPU affinity
            // Placeholder for future implementation.
        }
        _ => {
            match process_table::get_process(pid) {
                Some(_process) => { /* Placeholder if process-specific logic needed */ }
                None => return Err(Error::with_message(Errno::ESRCH, "process does not exist")),
            }
        }
    }
    let dummy_cpu_set = cpu_set_t::new(num_cpus);
    ctx.get_user_space()
        .write_val(cpu_set_ptr, &dummy_cpu_set)?;
    Ok(SyscallReturn::Return(0))
 }
 const CPU_SETSIZE: usize = 1024; // Max number of CPU bits.
 const __NCPUBITS: usize = 8 * mem::size_of::<usize>();
 #[derive(Debug, Clone, Copy, Pod)]
 #[repr(C, packed)]
 struct cpu_set_t {
    __bits: [usize; CPU_SETSIZE / __NCPUBITS],
 }
 impl cpu_set_t {
    /// Creates a new cpu_set_t representing available CPUs.
    fn new(num_cpus: usize) -> Self {
        let mut bits = [0usize; CPU_SETSIZE / __NCPUBITS];
        for cpu in 0..cmp::min(num_cpus, CPU_SETSIZE) {
            bits[cpu / __NCPUBITS] |= 1 << (cpu % __NCPUBITS);
        }
        Self { __bits: bits }
    }
 }
--- a/kernel/src/thread/mod.rs
+++ b/kernel/src/thread/mod.rs
@ -4,7 +4,10 @@
 use core::sync::atomic::Ordering;
-use ostd::{cpu::CpuSet, sync::PreemptDisabled, task::Task};
+use ostd::{
    cpu::{AtomicCpuSet, CpuSet},
    task::Task,
 };
 use self::status::{AtomicThreadStatus, ThreadStatus};
 use crate::{
@ -34,8 +37,8 @@ pub struct Thread {
    status: AtomicThreadStatus,
    /// Thread priority
    priority: AtomicPriority,
-    /// Thread cpu affinity
+    /// Thread CPU affinity
-    cpu_affinity: SpinLock<CpuSet>,
+    cpu_affinity: AtomicCpuSet,
 }
 impl Thread {
@ -52,7 +55,7 @@ impl Thread {
            data: Box::new(data),
            status: AtomicThreadStatus::new(status),
            priority: AtomicPriority::new(priority),
-            cpu_affinity: SpinLock::new(cpu_affinity),
+            cpu_affinity: AtomicCpuSet::new(cpu_affinity),
        }
    }
@ -67,6 +70,11 @@ impl Thread {
            .cloned()
    }
    /// Returns the task associated with this thread.
    pub fn task(&self) -> Arc<Task> {
        self.task.upgrade().unwrap()
    }
    /// Gets the Thread from task's data.
    ///
    /// # Panics
@ -116,14 +124,9 @@ impl Thread {
        self.priority.store(new_priority, Ordering::Relaxed)
    }
-    /// Acquires the lock of cpu affinity.
+    /// Returns the reference to the atomic CPU affinity.
-    pub fn lock_cpu_affinity(&self) -> SpinLockGuard<CpuSet, PreemptDisabled> {
+    pub fn atomic_cpu_affinity(&self) -> &AtomicCpuSet {
-        self.cpu_affinity.lock()
+        &self.cpu_affinity
    }
    /// Updates the cpu affinity with the new value.
    pub fn set_cpu_affinity(&self, new_cpu_affinity: CpuSet) {
        *self.cpu_affinity.lock() = new_cpu_affinity;
    }
    pub fn yield_now() {
--- a/ostd/src/cpu/set.rs
+++ b/ostd/src/cpu/set.rs
@ -80,6 +80,11 @@ impl CpuSet {
            .sum()
    }
    /// Returns true if the set is empty.
    pub fn is_empty(&self) -> bool {
        self.bits.iter().all(|part| *part == 0)
    }
    /// Adds all CPUs to the set.
    pub fn add_all(&mut self) {
        self.bits.fill(!0);
@ -92,6 +97,8 @@ impl CpuSet {
    }
    /// Iterates over the CPUs in the set.
    ///
    /// The order of the iteration is guaranteed to be in ascending order.
    pub fn iter(&self) -> impl Iterator<Item = CpuId> + '_ {
        self.bits.iter().enumerate().flat_map(|(part_idx, &part)| {
            (0..BITS_PER_PART).filter_map(move |bit_idx| {
@ -165,9 +172,9 @@ impl AtomicCpuSet {
    ///
    /// This operation can only be done in the [`Ordering::Relaxed`] memory
    /// order. It cannot be used to synchronize anything between CPUs.
-    pub fn store(&self, value: CpuSet) {
+    pub fn store(&self, value: &CpuSet) {
-        for (part, new_part) in self.bits.iter().zip(value.bits) {
+        for (part, new_part) in self.bits.iter().zip(value.bits.iter()) {
-            part.store(new_part, Ordering::Relaxed);
+            part.store(*new_part, Ordering::Relaxed);
        }
    }
@ -260,7 +267,7 @@ mod test {
                }
            }
-            atomic_set.store(CpuSet::with_capacity_val(test_num_cpus, 0));
+            atomic_set.store(&CpuSet::with_capacity_val(test_num_cpus, 0));
            for cpu_id in test_all_iter() {
                assert!(!atomic_set.contains(cpu_id, Ordering::Relaxed));