mirror of
https://github.com/asterinas/asterinas.git
synced 2025-06-26 10:53:25 +00:00
276 lines
8.3 KiB
Rust
276 lines
8.3 KiB
Rust
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
use alloc::{collections::BinaryHeap, sync::Arc};
|
|
use core::{
|
|
cmp::{self, Reverse},
|
|
sync::atomic::{AtomicU64, Ordering::Relaxed},
|
|
};
|
|
|
|
use ostd::{
|
|
cpu::{num_cpus, CpuId},
|
|
task::{
|
|
scheduler::{EnqueueFlags, UpdateFlags},
|
|
Task,
|
|
},
|
|
};
|
|
|
|
use super::{
|
|
time::{base_slice_clocks, min_period_clocks},
|
|
CurrentRuntime, SchedAttr, SchedClassRq,
|
|
};
|
|
use crate::{
|
|
sched::nice::{Nice, NiceValue},
|
|
thread::AsThread,
|
|
};
|
|
|
|
const WEIGHT_0: u64 = 1024;
|
|
pub const fn nice_to_weight(nice: Nice) -> u64 {
|
|
// Calculated by the formula below:
|
|
//
|
|
// weight = 1024 * 1.25^(-nice)
|
|
//
|
|
// We propose that every increment of the nice value results
|
|
// in 12.5% change of the CPU load weight.
|
|
const FACTOR_NUMERATOR: u64 = 5;
|
|
const FACTOR_DENOMINATOR: u64 = 4;
|
|
|
|
const NICE_TO_WEIGHT: [u64; 40] = const {
|
|
let mut ret = [0; 40];
|
|
|
|
let mut index = 0;
|
|
let mut nice = NiceValue::MIN.get();
|
|
while nice <= NiceValue::MAX.get() {
|
|
ret[index] = match nice {
|
|
0 => WEIGHT_0,
|
|
nice @ 1.. => {
|
|
let numerator = FACTOR_DENOMINATOR.pow(nice as u32);
|
|
let denominator = FACTOR_NUMERATOR.pow(nice as u32);
|
|
WEIGHT_0 * numerator / denominator
|
|
}
|
|
nice => {
|
|
let numerator = FACTOR_NUMERATOR.pow((-nice) as u32);
|
|
let denominator = FACTOR_DENOMINATOR.pow((-nice) as u32);
|
|
WEIGHT_0 * numerator / denominator
|
|
}
|
|
};
|
|
|
|
index += 1;
|
|
nice += 1;
|
|
}
|
|
ret
|
|
};
|
|
|
|
NICE_TO_WEIGHT[(nice.value().get() + 20) as usize]
|
|
}
|
|
|
|
/// The scheduling entity for the FAIR scheduling class.
|
|
///
|
|
/// The structure contains a significant indicator: `vruntime`.
|
|
///
|
|
/// # `vruntime`
|
|
///
|
|
/// The vruntime (virtual runtime) is calculated by the formula:
|
|
///
|
|
/// vruntime += runtime_delta * WEIGHT_0 / weight
|
|
///
|
|
/// and a thread with a lower vruntime gains a greater privilege to be
|
|
/// scheduled, making the whole run queue balanced on vruntime (thus FAIR).
|
|
///
|
|
/// # Scheduling periods
|
|
///
|
|
/// Scheduling periods is designed to calculate the time slice for each threads.
|
|
///
|
|
/// The time slice for each threads is calculated by the formula:
|
|
///
|
|
/// time_slice = period * weight / total_weight
|
|
///
|
|
/// where `total_weight` is the sum of all weights in the run queue including
|
|
/// the current thread and [`period`](FairClassRq::period) is calculated
|
|
/// regarding the number of running threads.
|
|
///
|
|
/// When a thread meets the condition below, it will be preempted to the
|
|
/// run queue. See [`FairClassRq::update_current`] for more details.
|
|
///
|
|
/// period_delta > time_slice
|
|
/// || vruntime > rq_min_vruntime + normalized_time_slice
|
|
#[derive(Debug)]
|
|
pub struct FairAttr {
|
|
weight: AtomicU64,
|
|
vruntime: AtomicU64,
|
|
}
|
|
|
|
impl FairAttr {
|
|
pub fn new(nice: Nice) -> Self {
|
|
FairAttr {
|
|
weight: nice_to_weight(nice).into(),
|
|
vruntime: Default::default(),
|
|
}
|
|
}
|
|
|
|
pub fn update(&self, nice: Nice) {
|
|
self.weight.store(nice_to_weight(nice), Relaxed);
|
|
}
|
|
|
|
fn update_vruntime(&self, delta: u64) -> (u64, u64) {
|
|
let weight = self.weight.load(Relaxed);
|
|
let delta = delta * WEIGHT_0 / weight;
|
|
let vruntime = self.vruntime.fetch_add(delta, Relaxed) + delta;
|
|
(vruntime, weight)
|
|
}
|
|
}
|
|
|
|
/// The wrapper for threads in the FAIR run queue.
|
|
///
|
|
/// This structure is used to provide the capability for keying in the
|
|
/// run queue implemented by `BTreeSet` in the `FairClassRq`.
|
|
struct FairQueueItem(Arc<Task>, u64);
|
|
|
|
impl core::fmt::Debug for FairQueueItem {
|
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
|
write!(f, "{:?}", self.key())
|
|
}
|
|
}
|
|
|
|
impl FairQueueItem {
|
|
fn key(&self) -> u64 {
|
|
self.1
|
|
}
|
|
}
|
|
|
|
impl PartialEq for FairQueueItem {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.key().eq(&other.key())
|
|
}
|
|
}
|
|
|
|
impl Eq for FairQueueItem {}
|
|
|
|
impl PartialOrd for FairQueueItem {
|
|
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
|
|
Some(self.cmp(other))
|
|
}
|
|
}
|
|
|
|
impl Ord for FairQueueItem {
|
|
fn cmp(&self, other: &Self) -> cmp::Ordering {
|
|
self.key().cmp(&other.key())
|
|
}
|
|
}
|
|
|
|
/// The per-cpu run queue for the FAIR scheduling class.
|
|
///
|
|
/// See [`FairAttr`] for the explanation of vruntimes and scheduling periods.
|
|
///
|
|
/// The structure contains a `BTreeSet` to store the threads in the run queue to
|
|
/// ensure the efficiency for finding next-to-run threads.
|
|
#[derive(Debug)]
|
|
pub(super) struct FairClassRq {
|
|
#[expect(unused)]
|
|
cpu: CpuId,
|
|
/// The ready-to-run threads.
|
|
entities: BinaryHeap<Reverse<FairQueueItem>>,
|
|
/// The minimum of vruntime in the run queue. Serves as the initial
|
|
/// value of newly-enqueued threads.
|
|
min_vruntime: u64,
|
|
total_weight: u64,
|
|
}
|
|
|
|
impl FairClassRq {
|
|
pub fn new(cpu: CpuId) -> Self {
|
|
Self {
|
|
cpu,
|
|
entities: BinaryHeap::new(),
|
|
min_vruntime: 0,
|
|
total_weight: 0,
|
|
}
|
|
}
|
|
|
|
/// The scheduling period is calculated as the maximum of the following two values:
|
|
///
|
|
/// 1. The minimum period value, defined by [`min_period_clocks`].
|
|
/// 2. `period = min_granularity * n` where
|
|
/// `min_granularity = log2(1 + num_cpus) * base_slice_clocks`, and `n` is the number of
|
|
/// runnable threads (including the current running thread).
|
|
///
|
|
/// The formula is chosen by 3 principles:
|
|
///
|
|
/// 1. The scheduling period should reflect the running threads and CPUs;
|
|
/// 2. The scheduling period should not be too low to limit the overhead of context switching;
|
|
/// 3. The scheduling period should not be too high to ensure the scheduling latency
|
|
/// & responsiveness.
|
|
fn period(&self) -> u64 {
|
|
let base_slice_clks = base_slice_clocks();
|
|
let min_period_clks = min_period_clocks();
|
|
|
|
// `+ 1` means including the current running thread.
|
|
let period_single_cpu =
|
|
(base_slice_clks * (self.entities.len() + 1) as u64).max(min_period_clks);
|
|
period_single_cpu * u64::from((1 + num_cpus()).ilog2())
|
|
}
|
|
|
|
/// The virtual time slice for each thread in the run queue, measured in vruntime clocks.
|
|
fn vtime_slice(&self) -> u64 {
|
|
self.period() / (self.entities.len() + 1) as u64
|
|
}
|
|
|
|
/// The time slice for each thread in the run queue, measured in sched clocks.
|
|
fn time_slice(&self, cur_weight: u64) -> u64 {
|
|
self.period() * cur_weight / (self.total_weight + cur_weight)
|
|
}
|
|
}
|
|
|
|
impl SchedClassRq for FairClassRq {
|
|
fn enqueue(&mut self, entity: Arc<Task>, flags: Option<EnqueueFlags>) {
|
|
let fair_attr = &entity.as_thread().unwrap().sched_attr().fair;
|
|
let vruntime = match flags {
|
|
Some(EnqueueFlags::Spawn) => self.min_vruntime + self.vtime_slice(),
|
|
_ => self.min_vruntime,
|
|
};
|
|
let vruntime = fair_attr
|
|
.vruntime
|
|
.fetch_max(vruntime, Relaxed)
|
|
.max(vruntime);
|
|
|
|
self.total_weight += fair_attr.weight.load(Relaxed);
|
|
self.entities.push(Reverse(FairQueueItem(entity, vruntime)));
|
|
}
|
|
|
|
fn len(&self) -> usize {
|
|
self.entities.len()
|
|
}
|
|
|
|
fn is_empty(&self) -> bool {
|
|
self.entities.is_empty()
|
|
}
|
|
|
|
fn pick_next(&mut self) -> Option<Arc<Task>> {
|
|
let Reverse(FairQueueItem(entity, _)) = self.entities.pop()?;
|
|
|
|
let sched_attr = entity.as_thread().unwrap().sched_attr();
|
|
self.total_weight -= sched_attr.fair.weight.load(Relaxed);
|
|
|
|
Some(entity)
|
|
}
|
|
|
|
fn update_current(
|
|
&mut self,
|
|
rt: &CurrentRuntime,
|
|
attr: &SchedAttr,
|
|
flags: UpdateFlags,
|
|
) -> bool {
|
|
match flags {
|
|
UpdateFlags::Yield => true,
|
|
UpdateFlags::Tick | UpdateFlags::Wait => {
|
|
let (vruntime, weight) = attr.fair.update_vruntime(rt.delta);
|
|
self.min_vruntime = match self.entities.peek() {
|
|
Some(Reverse(leftmost)) => vruntime.min(leftmost.key()),
|
|
None => vruntime,
|
|
};
|
|
|
|
rt.period_delta > self.time_slice(weight)
|
|
|| vruntime > self.min_vruntime + self.vtime_slice()
|
|
}
|
|
}
|
|
}
|
|
}
|