Re-enable CFS

2025-06-21 08:26:30 +00:00 · 2025-01-22 14:08:35 +08:00
parent c2f48a41db
commit d71771e49a
17 changed files with 102 additions and 512 deletions
--- a/kernel/src/lib.rs
+++ b/kernel/src/lib.rs
@ -37,10 +37,10 @@ use ostd::{
    cpu::{CpuId, CpuSet, PinCurrentCpu},
 };
 use process::Process;
 use sched::SchedPolicy;
 use crate::{
    prelude::*,
    sched::priority::Priority,
    thread::{kernel_thread::ThreadOptions, Thread},
 };
@ -86,8 +86,8 @@ pub fn main() {
    let mut affinity = CpuSet::new_empty();
    affinity.add(CpuId::bsp());
    ThreadOptions::new(init_thread)
        .priority(Priority::idle())
        .cpu_affinity(affinity)
        .sched_policy(SchedPolicy::Idle)
        .spawn();
 }
@ -121,7 +121,7 @@ fn ap_init() {
    ThreadOptions::new(ap_idle_thread)
        .cpu_affinity(cpu_id.into())
-        .priority(Priority::idle())
+        .sched_policy(SchedPolicy::Idle)
        .spawn();
 }
--- a/kernel/src/net/iface/poll.rs
+++ b/kernel/src/net/iface/poll.rs
@ -7,7 +7,11 @@ use log::trace;
 use ostd::timer::Jiffies;
 use super::{Iface, IFACES};
-use crate::{sched::priority::Priority, thread::kernel_thread::ThreadOptions, WaitTimeout};
+use crate::{
    sched::{Nice, SchedPolicy},
    thread::kernel_thread::ThreadOptions,
    WaitTimeout,
 };
 pub fn lazy_init() {
    for iface in IFACES.get().unwrap() {
@ -62,8 +66,7 @@ fn spawn_background_poll_thread(iface: Arc<Iface>) {
        }
    };
    // FIXME: remove the use of real-time priority.
    ThreadOptions::new(task_fn)
-        .priority(Priority::default_real_time())
+        .sched_policy(SchedPolicy::Fair(Nice::MIN))
        .spawn();
 }
--- a/kernel/src/process/posix_thread/builder.rs
+++ b/kernel/src/process/posix_thread/builder.rs
@ -13,7 +13,7 @@ use crate::{
        signal::{sig_mask::AtomicSigMask, sig_queues::SigQueues},
        Credentials, Process,
    },
-    sched::priority::Priority,
+    sched::{Nice, SchedPolicy},
    thread::{task, Thread, Tid},
    time::{clocks::ProfClock, TimerManager},
 };
@ -34,7 +34,7 @@ pub struct PosixThreadBuilder {
    fs: Option<Arc<ThreadFsInfo>>,
    sig_mask: AtomicSigMask,
    sig_queues: SigQueues,
-    priority: Priority,
+    sched_policy: SchedPolicy,
 }
 impl PosixThreadBuilder {
@ -51,7 +51,7 @@ impl PosixThreadBuilder {
            fs: None,
            sig_mask: AtomicSigMask::new_empty(),
            sig_queues: SigQueues::new(),
-            priority: Priority::default(),
+            sched_policy: SchedPolicy::Fair(Nice::default()),
        }
    }
@ -90,8 +90,8 @@ impl PosixThreadBuilder {
        self
    }
-    pub fn priority(mut self, priority: Priority) -> Self {
+    pub fn sched_policy(mut self, sched_policy: SchedPolicy) -> Self {
-        self.priority = priority;
+        self.sched_policy = sched_policy;
        self
    }
@ -108,7 +108,7 @@ impl PosixThreadBuilder {
            fs,
            sig_mask,
            sig_queues,
-            priority,
+            sched_policy,
        } = self;
        let file_table = file_table.unwrap_or_else(|| RwArc::new(FileTable::new_with_stdio()));
@ -141,8 +141,8 @@ impl PosixThreadBuilder {
            let thread = Arc::new(Thread::new(
                weak_task.clone(),
                posix_thread,
                priority,
                cpu_affinity,
                sched_policy,
            ));
            let thread_local = ThreadLocal::new(set_child_tid, clear_child_tid, file_table);
--- a/kernel/src/process/process/builder.rs
+++ b/kernel/src/process/process/builder.rs
@ -12,7 +12,7 @@ use crate::{
        signal::sig_disposition::SigDispositions,
        Credentials,
    },
-    sched::priority::Nice,
+    sched::Nice,
 };
 pub struct ProcessBuilder<'a> {
--- a/kernel/src/process/process/mod.rs
+++ b/kernel/src/process/process/mod.rs
@ -20,7 +20,7 @@ use super::{
 use crate::{
    device::tty::open_ntty_as_controlling_terminal,
    prelude::*,
-    sched::priority::{AtomicNice, Nice},
+    sched::{AtomicNice, Nice},
    thread::{AsThread, Thread},
    time::clocks::ProfClock,
    vm::vmar::Vmar,
--- a/kernel/src/sched/mod.rs
+++ b/kernel/src/sched/mod.rs
@ -1,16 +1,11 @@
 // SPDX-License-Identifier: MPL-2.0
-pub mod priority;
+mod nice;
 // TODO: Remove this out-dated module once the `sched_class` module is stable.
 mod priority_scheduler;
 mod sched_class;
 mod stats;
-// Export the stats getter functions.
+pub use self::{
-pub use stats::{loadavg, nr_queued_and_running};
+    nice::{AtomicNice, Nice},
-
+    sched_class::{init, SchedAttr, SchedPolicy},
-// TODO: Use `sched_class::init` instead after the completion of #1676.
+    stats::{loadavg, nr_queued_and_running},
-pub use self::priority_scheduler::init;
+};
 // There may be multiple scheduling policies in the system,
 // and subsequent schedulers can be placed under this module.
 pub use self::sched_class::SchedAttr;
--- a/kernel/src/sched/priority.rs
+++ b/kernel/src/sched/priority.rs
@ -1,6 +1,6 @@
 // SPDX-License-Identifier: MPL-2.0
-use core::sync::atomic::{AtomicI8, AtomicU8};
+use core::sync::atomic::AtomicI8;
 use atomic_integer_wrapper::define_atomic_version_of_integer_like_type;
@ -9,9 +9,9 @@ use atomic_integer_wrapper::define_atomic_version_of_integer_like_type;
 /// It is an integer in the range of [-20, 19]. Process with a smaller nice
 /// value is more favorable in scheduling.
 #[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
-pub struct Nice(NiceRange);
+pub struct Nice(NiceValue);
-pub type NiceRange = RangedI8<-20, 19>;
+pub type NiceValue = RangedI8<-20, 19>;
 define_atomic_version_of_integer_like_type!(Nice, try_from = true, {
    #[derive(Debug)]
@ -19,22 +19,25 @@ define_atomic_version_of_integer_like_type!(Nice, try_from = true, {
 });
 impl Nice {
-    pub const fn new(range: NiceRange) -> Self {
+    pub const MIN: Self = Nice::new(NiceValue::MIN);
    pub const MAX: Self = Nice::new(NiceValue::MAX);
    pub const fn new(range: NiceValue) -> Self {
        Self(range)
    }
-    pub const fn range(&self) -> &NiceRange {
+    pub const fn value(&self) -> &NiceValue {
        &self.0
    }
-    pub fn range_mut(&mut self) -> &mut NiceRange {
+    pub fn value_mut(&mut self) -> &mut NiceValue {
        &mut self.0
    }
 }
 impl Default for Nice {
    fn default() -> Self {
-        Self::new(NiceRange::new(0))
+        Self::new(NiceValue::new(0))
    }
 }
@ -45,7 +48,7 @@ impl From<Nice> for i8 {
 }
 impl TryFrom<i8> for Nice {
-    type Error = <NiceRange as TryFrom<i8>>::Error;
+    type Error = <NiceValue as TryFrom<i8>>::Error;
    fn try_from(value: i8) -> Result<Self, Self::Error> {
        let range = value.try_into()?;
@ -53,79 +56,14 @@ impl TryFrom<i8> for Nice {
    }
 }
 /// The thread scheduling priority value.
 ///
 /// It is an integer in the range of [0, 139]. Here we follow the Linux
 /// priority mappings: the relation between [`Priority`] and [`Nice`] is
 /// as such - prio = nice + 120 while the priority of [0, 99] are
 /// reserved for real-time tasks.
 #[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
 pub struct Priority(PriorityRange);
 pub type PriorityRange = RangedU8<0, 139>;
 define_atomic_version_of_integer_like_type!(Priority, try_from = true, {
    #[derive(Debug)]
    pub struct AtomicPriority(AtomicU8);
 });
 impl Priority {
    pub const MIN_NORMAL: Self = Self::new(PriorityRange::new(100));
    pub const fn new(range: PriorityRange) -> Self {
        Self(range)
    }
    pub const fn range(&self) -> &PriorityRange {
        &self.0
    }
    pub fn range_mut(&mut self) -> &mut PriorityRange {
        &mut self.0
    }
 }
 impl From<Nice> for Priority {
    fn from(nice: Nice) -> Self {
        Self::new(PriorityRange::new((nice.range().get() as i16 + 120) as u8))
    }
 }
 impl From<Priority> for Nice {
    fn from(priority: Priority) -> Self {
        Self::new(NiceRange::new(((priority.range().get() as i16 - 100) - 20) as i8))
    }
 }
 impl Default for Priority {
    fn default() -> Self {
        Nice::default().into()
    }
 }
 impl From<Priority> for u8 {
    fn from(value: Priority) -> Self {
        value.0.into()
    }
 }
 impl TryFrom<u8> for Priority {
    type Error = <PriorityRange as TryFrom<u8>>::Error;
    fn try_from(value: u8) -> Result<Self, Self::Error> {
        let range = value.try_into()?;
        Ok(Self::new(range))
    }
 }
 macro_rules! define_ranged_integer {
    ($visibility: vis, $name: ident, $type: ty) => {
        #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
        $visibility struct $name<const MIN: $type, const MAX: $type>($type);
        impl<const MIN: $type, const MAX: $type> $name<MIN, MAX> {
-            $visibility const MIN: $type = MIN as $type;
+            $visibility const MIN: Self = Self::new(MIN);
-            $visibility const MAX: $type = MAX as $type;
+            $visibility const MAX: Self = Self::new(MAX);
            $visibility const fn new(val: $type) -> Self {
                assert!(val >= MIN && val <= MAX);
@ -152,7 +90,7 @@ macro_rules! define_ranged_integer {
            type Error = &'static str;
            fn try_from(value: $type) -> Result<Self, Self::Error> {
-                if value < Self::MIN || value > Self::MAX {
+                if value < MIN || value > MAX {
                    Err("Initialized with out-of-range value.")
                } else {
                    Ok(Self(value))
--- a/kernel/src/sched/priority_scheduler.rs
+++ b/kernel/src/sched/priority_scheduler.rs
@ -1,338 +0,0 @@
 // SPDX-License-Identifier: MPL-2.0
 use core::sync::atomic::Ordering;
 use ostd::{
    cpu::{num_cpus, CpuId, CpuSet, PinCurrentCpu},
    task::{
        disable_preempt,
        scheduler::{
            info::CommonSchedInfo, inject_scheduler, EnqueueFlags, LocalRunQueue, Scheduler,
            UpdateFlags,
        },
        Task,
    },
    trap::disable_local,
 };
 use super::{
    priority::Priority,
    stats::{set_stats_from_scheduler, SchedulerStats},
 };
 use crate::{prelude::*, thread::Thread};
 pub fn init() {
    let preempt_scheduler = Box::new(PreemptScheduler::default());
    let scheduler = Box::<PreemptScheduler<Thread, Task>>::leak(preempt_scheduler);
    // Inject the scheduler into the ostd for actual scheduling work.
    inject_scheduler(scheduler);
    // Set the scheduler into the system for statistics.
    // We set this after injecting the scheduler into ostd,
    // so that the loadavg statistics are updated after the scheduler is used.
    set_stats_from_scheduler(scheduler);
 }
 /// The preempt scheduler.
 ///
 /// Real-time tasks are placed in the `real_time_entities` queue and
 /// are always prioritized during scheduling.
 /// Normal tasks are placed in the `normal_entities` queue and are only
 /// scheduled for execution when there are no real-time tasks.
 struct PreemptScheduler<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> {
    rq: Vec<SpinLock<PreemptRunQueue<T, U>>>,
 }
 impl<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> PreemptScheduler<T, U> {
    fn new(nr_cpus: usize) -> Self {
        let mut rq = Vec::with_capacity(nr_cpus);
        for _ in 0..nr_cpus {
            rq.push(SpinLock::new(PreemptRunQueue::new()));
        }
        Self { rq }
    }
    /// Selects a CPU for task to run on for the first time.
    fn select_cpu(&self, entity: &PreemptSchedEntity<T, U>) -> CpuId {
        // If the CPU of a runnable task has been set before, keep scheduling
        // the task to that one.
        // TODO: Consider migrating tasks between CPUs for load balancing.
        if let Some(cpu_id) = entity.task.cpu().get() {
            return cpu_id;
        }
        let irq_guard = disable_local();
        let mut selected = irq_guard.current_cpu();
        let mut minimum_load = usize::MAX;
        for candidate in entity.thread.cpu_affinity().iter() {
            let rq = self.rq[candidate.as_usize()].lock();
            // A wild guess measuring the load of a runqueue. We assume that
            // real-time tasks are 4-times as important as normal tasks.
            let load = rq.real_time_entities.len() * 8
                + rq.normal_entities.len() * 2
                + rq.lowest_entities.len();
            if load < minimum_load {
                selected = candidate;
                minimum_load = load;
            }
        }
        selected
    }
 }
 impl<T: Sync + Send + PreemptSchedInfo + FromTask<U>, U: Sync + Send + CommonSchedInfo> Scheduler<U>
    for PreemptScheduler<T, U>
 {
    fn enqueue(&self, task: Arc<U>, flags: EnqueueFlags) -> Option<CpuId> {
        let entity = PreemptSchedEntity::new(task);
        let (still_in_rq, target_cpu) = {
            let selected_cpu_id = self.select_cpu(&entity);
            if let Err(task_cpu_id) = entity.task.cpu().set_if_is_none(selected_cpu_id) {
                debug_assert!(flags != EnqueueFlags::Spawn);
                (true, task_cpu_id)
            } else {
                (false, selected_cpu_id)
            }
        };
        let mut rq = self.rq[target_cpu.as_usize()].disable_irq().lock();
        if still_in_rq && let Err(_) = entity.task.cpu().set_if_is_none(target_cpu) {
            return None;
        }
        let new_priority = entity.thread.priority();
        if entity.thread.is_real_time() {
            rq.real_time_entities.push_back(entity);
        } else if entity.thread.is_lowest() {
            rq.lowest_entities.push_back(entity);
        } else {
            rq.normal_entities.push_back(entity);
        }
        // Preempt the current task, but only if the newly queued task has a strictly higher
        // priority (i.e., a lower value returned by the `priority` method) than the current task.
        if rq
            .current
            .as_ref()
            .is_some_and(|current| new_priority < current.thread.priority())
        {
            Some(target_cpu)
        } else {
            None
        }
    }
    fn local_rq_with(&self, f: &mut dyn FnMut(&dyn LocalRunQueue<U>)) {
        let irq_guard = disable_local();
        let local_rq: &PreemptRunQueue<T, U> = &self.rq[irq_guard.current_cpu().as_usize()].lock();
        f(local_rq);
    }
    fn local_mut_rq_with(&self, f: &mut dyn FnMut(&mut dyn LocalRunQueue<U>)) {
        let irq_guard = disable_local();
        let local_rq: &mut PreemptRunQueue<T, U> =
            &mut self.rq[irq_guard.current_cpu().as_usize()].lock();
        f(local_rq);
    }
 }
 impl<T: Sync + Send + PreemptSchedInfo + FromTask<U>, U: Sync + Send + CommonSchedInfo>
    SchedulerStats for PreemptScheduler<T, U>
 {
    fn nr_queued_and_running(&self) -> (u32, u32) {
        let _preempt_guard = disable_preempt();
        let mut nr_queued = 0;
        let mut nr_running = 0;
        for rq in self.rq.iter() {
            let rq = rq.lock();
            nr_queued +=
                rq.real_time_entities.len() + rq.normal_entities.len() + rq.lowest_entities.len();
            if rq.current.is_some() {
                nr_running += 1;
            }
        }
        (nr_queued as u32, nr_running)
    }
 }
 impl Default for PreemptScheduler<Thread, Task> {
    fn default() -> Self {
        Self::new(num_cpus())
    }
 }
 struct PreemptRunQueue<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> {
    current: Option<PreemptSchedEntity<T, U>>,
    real_time_entities: VecDeque<PreemptSchedEntity<T, U>>,
    normal_entities: VecDeque<PreemptSchedEntity<T, U>>,
    lowest_entities: VecDeque<PreemptSchedEntity<T, U>>,
 }
 impl<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> PreemptRunQueue<T, U> {
    pub fn new() -> Self {
        Self {
            current: None,
            real_time_entities: VecDeque::new(),
            normal_entities: VecDeque::new(),
            lowest_entities: VecDeque::new(),
        }
    }
 }
 impl<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> LocalRunQueue<U>
    for PreemptRunQueue<T, U>
 {
    fn current(&self) -> Option<&Arc<U>> {
        self.current.as_ref().map(|entity| &entity.task)
    }
    fn update_current(&mut self, flags: UpdateFlags) -> bool {
        match flags {
            UpdateFlags::Tick => {
                let Some(ref mut current_entity) = self.current else {
                    return false;
                };
                current_entity.tick()
                    || (!current_entity.thread.is_real_time()
                        && !self.real_time_entities.is_empty())
            }
            _ => true,
        }
    }
    fn pick_next_current(&mut self) -> Option<&Arc<U>> {
        let next_entity = if !self.real_time_entities.is_empty() {
            self.real_time_entities.pop_front()
        } else if !self.normal_entities.is_empty() {
            self.normal_entities.pop_front()
        } else {
            self.lowest_entities.pop_front()
        }?;
        if let Some(prev_entity) = self.current.replace(next_entity) {
            if prev_entity.thread.is_real_time() {
                self.real_time_entities.push_back(prev_entity);
            } else if prev_entity.thread.is_lowest() {
                self.lowest_entities.push_back(prev_entity);
            } else {
                self.normal_entities.push_back(prev_entity);
            }
        }
        Some(&self.current.as_ref().unwrap().task)
    }
    fn dequeue_current(&mut self) -> Option<Arc<U>> {
        self.current.take().map(|entity| {
            let runnable = entity.task;
            runnable.cpu().set_to_none();
            runnable
        })
    }
 }
 struct PreemptSchedEntity<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> {
    task: Arc<U>,
    thread: Arc<T>,
    time_slice: TimeSlice,
 }
 impl<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> PreemptSchedEntity<T, U> {
    fn new(task: Arc<U>) -> Self {
        let thread = T::from_task(&task);
        let time_slice = TimeSlice::default();
        Self {
            task,
            thread,
            time_slice,
        }
    }
    fn tick(&mut self) -> bool {
        self.time_slice.elapse()
    }
 }
 impl<T: PreemptSchedInfo + FromTask<U>, U: CommonSchedInfo> Clone for PreemptSchedEntity<T, U> {
    fn clone(&self) -> Self {
        Self {
            task: self.task.clone(),
            thread: self.thread.clone(),
            time_slice: self.time_slice,
        }
    }
 }
 #[derive(Clone, Copy)]
 pub struct TimeSlice {
    elapsed_ticks: u32,
 }
 impl TimeSlice {
    const DEFAULT_TIME_SLICE: u32 = 100;
    pub const fn new() -> Self {
        TimeSlice { elapsed_ticks: 0 }
    }
    pub fn elapse(&mut self) -> bool {
        self.elapsed_ticks = (self.elapsed_ticks + 1) % Self::DEFAULT_TIME_SLICE;
        self.elapsed_ticks == 0
    }
 }
 impl Default for TimeSlice {
    fn default() -> Self {
        Self::new()
    }
 }
 impl PreemptSchedInfo for Thread {
    const REAL_TIME_TASK_PRIORITY: Priority = Priority::default_real_time();
    const LOWEST_TASK_PRIORITY: Priority = Priority::idle();
    fn priority(&self) -> Priority {
        self.atomic_priority().load(Ordering::Relaxed)
    }
    fn cpu_affinity(&self) -> CpuSet {
        self.atomic_cpu_affinity().load()
    }
 }
 trait PreemptSchedInfo {
    const REAL_TIME_TASK_PRIORITY: Priority;
    const LOWEST_TASK_PRIORITY: Priority;
    fn priority(&self) -> Priority;
    fn cpu_affinity(&self) -> CpuSet;
    fn is_real_time(&self) -> bool {
        self.priority() < Self::REAL_TIME_TASK_PRIORITY
    }
    fn is_lowest(&self) -> bool {
        self.priority() == Self::LOWEST_TASK_PRIORITY
    }
 }
 impl FromTask<Task> for Thread {
    fn from_task(task: &Arc<Task>) -> Arc<Self> {
        task.data().downcast_ref::<Arc<Self>>().unwrap().clone()
    }
 }
 trait FromTask<U> {
    fn from_task(task: &Arc<U>) -> Arc<Self>;
 }
--- a/kernel/src/sched/sched_class/fair.rs
+++ b/kernel/src/sched/sched_class/fair.rs
@ -19,7 +19,7 @@ use super::{
    CurrentRuntime, SchedAttr, SchedClassRq,
 };
 use crate::{
-    sched::priority::{Nice, NiceRange},
+    sched::nice::{Nice, NiceValue},
    thread::AsThread,
 };
@ -38,8 +38,8 @@ pub const fn nice_to_weight(nice: Nice) -> u64 {
        let mut ret = [0; 40];
        let mut index = 0;
-        let mut nice = NiceRange::MIN;
+        let mut nice = NiceValue::MIN.get();
-        while nice <= NiceRange::MAX {
+        while nice <= NiceValue::MAX.get() {
            ret[index] = match nice {
                0 => WEIGHT_0,
                nice @ 1.. => {
@ -60,7 +60,7 @@ pub const fn nice_to_weight(nice: Nice) -> u64 {
        ret
    };
-    NICE_TO_WEIGHT[(nice.range().get() + 20) as usize]
+    NICE_TO_WEIGHT[(nice.value().get() + 20) as usize]
 }
 /// The scheduling entity for the FAIR scheduling class.
--- a/kernel/src/sched/sched_class/mod.rs
+++ b/kernel/src/sched/sched_class/mod.rs
@ -19,7 +19,10 @@ use ostd::{
    trap::disable_local,
 };
-use super::{priority::Nice, stats::SchedulerStats};
+use super::{
    nice::Nice,
    stats::{set_stats_from_scheduler, SchedulerStats},
 };
 use crate::thread::{AsThread, Thread};
 mod policy;
@ -30,13 +33,21 @@ mod idle;
 mod real_time;
 mod stop;
-use self::policy::{SchedPolicy, SchedPolicyKind, SchedPolicyState};
+pub use self::policy::SchedPolicy;
 use self::policy::{SchedPolicyKind, SchedPolicyState};
 type SchedEntity = (Arc<Task>, Arc<Thread>);
 #[expect(unused)]
 pub fn init() {
-    inject_scheduler(Box::leak(Box::new(ClassScheduler::new())));
+    let scheduler = Box::leak(Box::new(ClassScheduler::new()));
    // Inject the scheduler into the ostd for actual scheduling work.
    inject_scheduler(scheduler);
    // Set the scheduler into the system for statistics.
    // We set this after injecting the scheduler into ostd,
    // so that the loadavg statistics are updated after the scheduler is used.
    set_stats_from_scheduler(scheduler);
 }
 /// Represents the middle layer between scheduling classes and generic scheduler
@ -128,7 +139,7 @@ impl SchedAttr {
            real_time: {
                let (prio, policy) = match policy {
                    SchedPolicy::RealTime { rt_prio, rt_policy } => (rt_prio.get(), rt_policy),
-                    _ => (real_time::RtPrio::MAX, Default::default()),
+                    _ => (real_time::RtPrio::MAX.get(), Default::default()),
                };
                real_time::RealTimeAttr::new(prio, policy)
            },
--- a/kernel/src/sched/sched_class/policy.rs
+++ b/kernel/src/sched/sched_class/policy.rs
@ -7,7 +7,7 @@ use int_to_c_enum::TryFromInt;
 use ostd::sync::SpinLock;
 pub use super::real_time::{RealTimePolicy, RtPrio};
-use crate::sched::priority::{Nice, Priority};
+use crate::sched::nice::Nice;
 /// The User-chosen scheduling policy.
 ///
@ -16,7 +16,7 @@ use crate::sched::priority::{Nice, Priority};
 pub enum SchedPolicy {
    Stop,
    RealTime {
-        rt_prio: super::real_time::RtPrio,
+        rt_prio: RtPrio,
        rt_policy: RealTimePolicy,
    },
    Fair(Nice),
@ -32,19 +32,6 @@ pub(super) enum SchedPolicyKind {
    Idle = 3,
 }
 impl From<Priority> for SchedPolicy {
    fn from(priority: Priority) -> Self {
        match priority.range().get() {
            rt @ 0..=99 => SchedPolicy::RealTime {
                rt_prio: RtPrio::new(rt as u8),
                rt_policy: Default::default(),
            },
            100..=139 => SchedPolicy::Fair(priority.into()),
            _ => unreachable!(),
        }
    }
 }
 impl SchedPolicy {
    pub(super) fn kind(&self) -> SchedPolicyKind {
        match self {
--- a/kernel/src/sched/sched_class/real_time.rs
+++ b/kernel/src/sched/sched_class/real_time.rs
@ -17,7 +17,7 @@ use ostd::{
 };
 use super::{time::base_slice_clocks, CurrentRuntime, SchedAttr, SchedClassRq};
-use crate::{sched::priority::RangedU8, thread::AsThread};
+use crate::{sched::nice::RangedU8, thread::AsThread};
 pub type RtPrio = RangedU8<0, 99>;
--- a/kernel/src/syscall/set_get_priority.rs
+++ b/kernel/src/syscall/set_get_priority.rs
@ -6,14 +6,17 @@ use super::SyscallReturn;
 use crate::{
    prelude::*,
    process::{posix_thread::AsPosixThread, process_table, Pgid, Pid, Process, Uid},
-    sched::priority::{Nice, NiceRange},
+    sched::Nice,
 };
 pub fn sys_set_priority(which: i32, who: u32, prio: i32, ctx: &Context) -> Result<SyscallReturn> {
    let prio_target = PriorityTarget::new(which, who, ctx)?;
-    let new_nice = {
+    let new_nice: Nice = {
-        let nice_raw = prio.clamp(NiceRange::MIN as i32, NiceRange::MAX as i32) as i8;
+        let nice_raw = prio.clamp(
-        Nice::new(NiceRange::new(nice_raw))
+            Nice::MIN.value().get() as i32,
            Nice::MAX.value().get() as i32,
        ) as i8;
        nice_raw.try_into().unwrap()
    };
    debug!(
@ -35,9 +38,9 @@ pub fn sys_get_priority(which: i32, who: u32, ctx: &Context) -> Result<SyscallRe
    let processes = get_processes(prio_target)?;
    let highest_prio = {
-        let mut nice = NiceRange::MAX;
+        let mut nice = Nice::MAX.value().get();
        for process in processes.iter() {
-            let proc_nice = process.nice().load(Ordering::Relaxed).range().get();
+            let proc_nice = process.nice().load(Ordering::Relaxed).value().get();
            // Returns the highest priority enjoyed by the processes
            if proc_nice < nice {
                nice = proc_nice;
--- a/kernel/src/thread/kernel_thread.rs
+++ b/kernel/src/thread/kernel_thread.rs
@ -6,7 +6,10 @@ use ostd::{
 };
 use super::{oops, AsThread, Thread};
-use crate::{prelude::*, sched::priority::Priority};
+use crate::{
    prelude::*,
    sched::{Nice, SchedPolicy},
 };
 /// The inner data of a kernel thread.
 struct KernelThread;
@ -14,8 +17,8 @@ struct KernelThread;
 /// Options to create or spawn a new kernel thread.
 pub struct ThreadOptions {
    func: Option<Box<dyn FnOnce() + Send>>,
    priority: Priority,
    cpu_affinity: CpuSet,
    sched_policy: SchedPolicy,
 }
 impl ThreadOptions {
@ -25,24 +28,25 @@ impl ThreadOptions {
        F: FnOnce() + Send + 'static,
    {
        let cpu_affinity = CpuSet::new_full();
        let sched_policy = SchedPolicy::Fair(Nice::default());
        Self {
            func: Some(Box::new(func)),
            priority: Priority::default(),
            cpu_affinity,
            sched_policy,
        }
    }
    /// Sets the priority of the new thread.
    pub fn priority(mut self, priority: Priority) -> Self {
        self.priority = priority;
        self
    }
    /// Sets the CPU affinity of the new thread.
    pub fn cpu_affinity(mut self, cpu_affinity: CpuSet) -> Self {
        self.cpu_affinity = cpu_affinity;
        self
    }
    /// Sets the scheduling policy.
    pub fn sched_policy(mut self, sched_policy: SchedPolicy) -> Self {
        self.sched_policy = sched_policy;
        self
    }
 }
 impl ThreadOptions {
@ -58,13 +62,13 @@ impl ThreadOptions {
        Arc::new_cyclic(|weak_task| {
            let thread = {
                let kernel_thread = KernelThread;
                let priority = self.priority;
                let cpu_affinity = self.cpu_affinity;
                let sched_policy = self.sched_policy;
                Arc::new(Thread::new(
                    weak_task.clone(),
                    kernel_thread,
                    priority,
                    cpu_affinity,
                    sched_policy,
                ))
            };
--- a/kernel/src/thread/mod.rs
+++ b/kernel/src/thread/mod.rs
@ -12,10 +12,7 @@ use ostd::{
 use self::status::{AtomicThreadStatus, ThreadStatus};
 use crate::{
    prelude::*,
-    sched::{
+    sched::{SchedAttr, SchedPolicy},
        priority::{AtomicPriority, Priority},
        SchedAttr,
    },
 };
 pub mod exception;
@ -39,8 +36,6 @@ pub struct Thread {
    // mutable part
    /// Thread status
    status: AtomicThreadStatus,
    /// Thread priority
    priority: AtomicPriority,
    /// Thread CPU affinity
    cpu_affinity: AtomicCpuSet,
    sched_attr: SchedAttr,
@ -51,16 +46,15 @@ impl Thread {
    pub fn new(
        task: Weak<Task>,
        data: impl Send + Sync + Any,
        priority: Priority,
        cpu_affinity: CpuSet,
        sched_policy: SchedPolicy,
    ) -> Self {
        Thread {
            task,
            data: Box::new(data),
            status: AtomicThreadStatus::new(ThreadStatus::Init),
            priority: AtomicPriority::new(priority),
            cpu_affinity: AtomicCpuSet::new(cpu_affinity),
-            sched_attr: SchedAttr::new(priority.into()),
+            sched_attr: SchedAttr::new(sched_policy),
        }
    }
@ -135,11 +129,6 @@ impl Thread {
        self.status.store(ThreadStatus::Exited, Ordering::Release);
    }
    /// Returns the reference to the atomic priority.
    pub fn atomic_priority(&self) -> &AtomicPriority {
        &self.priority
    }
    /// Returns the reference to the atomic CPU affinity.
    pub fn atomic_cpu_affinity(&self) -> &AtomicCpuSet {
        &self.cpu_affinity
--- a/kernel/src/thread/work_queue/worker.rs
+++ b/kernel/src/thread/work_queue/worker.rs
@ -10,7 +10,7 @@ use ostd::{
 use super::worker_pool::WorkerPool;
 use crate::{
    prelude::*,
-    sched::priority::Priority,
+    sched::{Nice, SchedPolicy},
    thread::{kernel_thread::ThreadOptions, AsThread},
 };
@ -50,13 +50,15 @@ impl Worker {
            });
            let mut cpu_affinity = CpuSet::new_empty();
            cpu_affinity.add(bound_cpu);
-            let mut priority = Priority::default();
+            let sched_policy =
-            if worker_pool.upgrade().unwrap().is_high_priority() {
+                SchedPolicy::Fair(if worker_pool.upgrade().unwrap().is_high_priority() {
-                priority = Priority::default_real_time();
+                    Nice::MIN
-            }
+                } else {
                    Nice::default()
                });
            let bound_task = ThreadOptions::new(task_fn)
                .cpu_affinity(cpu_affinity)
-                .priority(priority)
+                .sched_policy(sched_policy)
                .build();
            Self {
                worker_pool,
--- a/kernel/src/thread/work_queue/worker_pool.rs
+++ b/kernel/src/thread/work_queue/worker_pool.rs
@ -16,7 +16,7 @@ use ostd::{
 use super::{simple_scheduler::SimpleScheduler, worker::Worker, WorkItem, WorkPriority, WorkQueue};
 use crate::{
    prelude::*,
-    sched::priority::Priority,
+    sched::{Nice, SchedPolicy},
    thread::{kernel_thread::ThreadOptions, AsThread},
 };
@ -237,17 +237,13 @@ impl Monitor {
                current_monitor.run_monitor_loop();
            });
            let cpu_affinity = CpuSet::new_full();
-            // FIXME: remove the use of real-time priority.
+            let sched_policy = SchedPolicy::Fair(match priority {
-            // Logically all monitors should be of default normal priority.
+                WorkPriority::High => Nice::MIN,
-            // This workaround is to make the monitor of high-priority worker pool
+                WorkPriority::Normal => Nice::default(),
-            // starvation-free under the current scheduling policy.
+            });
            let priority = match priority {
                WorkPriority::High => Priority::default_real_time(),
                WorkPriority::Normal => Priority::default(),
            };
            let bound_task = ThreadOptions::new(task_fn)
                .cpu_affinity(cpu_affinity)
-                .priority(priority)
+                .sched_policy(sched_policy)
                .build();
            Self {
                worker_pool,