991 lines
31 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

pub mod clock;
pub mod completion;
pub mod cputime;
pub mod fair;
pub mod idle;
pub mod pelt;
pub mod prio;
use core::{
intrinsics::{likely, unlikely},
sync::atomic::{compiler_fence, fence, AtomicUsize, Ordering},
};
use alloc::{
boxed::Box,
collections::LinkedList,
sync::{Arc, Weak},
vec::Vec,
};
use system_error::SystemError;
use crate::{
arch::{interrupt::ipi::send_ipi, CurrentIrqArch},
exception::{
ipi::{IpiKind, IpiTarget},
InterruptArch,
},
libs::{
lazy_init::Lazy,
spinlock::{SpinLock, SpinLockGuard},
},
mm::percpu::{PerCpu, PerCpuVar},
process::{ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, SchedInfo},
sched::idle::IdleScheduler,
smp::{core::smp_get_processor_id, cpu::ProcessorId},
time::{clocksource::HZ, timer::clock},
};
use self::{
clock::{ClockUpdataFlag, SchedClock},
cputime::{irq_time_read, CpuTimeFunc, IrqTime},
fair::{CfsRunQueue, CompletelyFairScheduler, FairSchedEntity},
prio::PrioUtil,
};
static mut CPU_IRQ_TIME: Option<Vec<&'static mut IrqTime>> = None;
// 这里虽然rq是percpu的但是在负载均衡的时候需要修改对端cpu的rq所以仍需加锁
static CPU_RUNQUEUE: Lazy<PerCpuVar<Arc<CpuRunQueue>>> = PerCpuVar::define_lazy();
/// 用于记录系统中所有 CPU 的可执行进程数量的总和。
static CALCULATE_LOAD_TASKS: AtomicUsize = AtomicUsize::new(0);
const LOAD_FREQ: usize = HZ as usize * 5 + 1;
pub const SCHED_FIXEDPOINT_SHIFT: u64 = 10;
#[allow(dead_code)]
pub const SCHED_FIXEDPOINT_SCALE: u64 = 1 << SCHED_FIXEDPOINT_SHIFT;
#[allow(dead_code)]
pub const SCHED_CAPACITY_SHIFT: u64 = SCHED_FIXEDPOINT_SHIFT;
#[allow(dead_code)]
pub const SCHED_CAPACITY_SCALE: u64 = 1 << SCHED_CAPACITY_SHIFT;
#[inline]
pub fn cpu_irq_time(cpu: usize) -> &'static mut IrqTime {
unsafe { CPU_IRQ_TIME.as_mut().unwrap()[cpu] }
}
#[inline]
pub fn cpu_rq(cpu: usize) -> Arc<CpuRunQueue> {
CPU_RUNQUEUE.ensure();
unsafe {
CPU_RUNQUEUE
.get()
.force_get(ProcessorId::new(cpu as u32))
.clone()
}
}
lazy_static! {
pub static ref SCHED_FEATURES: SchedFeature = SchedFeature::GENTLE_FAIR_SLEEPERS
| SchedFeature::START_DEBIT
| SchedFeature::LAST_BUDDY
| SchedFeature::CACHE_HOT_BUDDY
| SchedFeature::WAKEUP_PREEMPTION
| SchedFeature::NONTASK_CAPACITY
| SchedFeature::TTWU_QUEUE
| SchedFeature::SIS_UTIL
| SchedFeature::RT_PUSH_IPI
| SchedFeature::ALT_PERIOD
| SchedFeature::BASE_SLICE
| SchedFeature::UTIL_EST
| SchedFeature::UTIL_EST_FASTUP;
}
pub trait Scheduler {
/// ## 加入当任务进入可运行状态时调用。它将调度实体任务放到红黑树中增加nr_running变量的值。
fn enqueue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag);
/// ## 当任务不再可运行时被调用对应的调度实体被移出红黑树。它减少nr_running变量的值。
fn dequeue(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag);
/// ## 主动让出cpu这个函数的行为基本上是出队紧接着入队
fn yield_task(rq: &mut CpuRunQueue);
/// ## 检查进入可运行状态的任务能否抢占当前正在运行的任务
fn check_preempt_currnet(
rq: &mut CpuRunQueue,
pcb: &Arc<ProcessControlBlock>,
flags: WakeupFlags,
);
/// ## 选择接下来最适合运行的任务
fn pick_task(rq: &mut CpuRunQueue) -> Option<Arc<ProcessControlBlock>>;
/// ## 选择接下来最适合运行的任务
fn pick_next_task(
rq: &mut CpuRunQueue,
pcb: Option<Arc<ProcessControlBlock>>,
) -> Option<Arc<ProcessControlBlock>>;
/// ## 被时间滴答函数调用,它可能导致进程切换。驱动了运行时抢占。
fn tick(rq: &mut CpuRunQueue, pcb: Arc<ProcessControlBlock>, queued: bool);
/// ## 在进程fork时如需加入cfs则调用
fn task_fork(pcb: Arc<ProcessControlBlock>);
fn put_prev_task(rq: &mut CpuRunQueue, prev: Arc<ProcessControlBlock>);
}
/// 调度策略
#[allow(dead_code)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum SchedPolicy {
/// 实时进程
RT,
/// 先进先出调度
FIFO,
/// 完全公平调度
CFS,
/// IDLE
IDLE,
}
#[allow(dead_code)]
pub struct TaskGroup {
/// CFS管理的调度实体percpu的
entitys: Vec<Arc<FairSchedEntity>>,
/// 每个CPU的CFS运行队列
cfs: Vec<Arc<CfsRunQueue>>,
/// 父节点
parent: Option<Arc<TaskGroup>>,
shares: u64,
}
#[derive(Debug, Default)]
pub struct LoadWeight {
/// 负载权重
pub weight: u64,
/// weight的倒数方便计算
pub inv_weight: u32,
}
impl LoadWeight {
/// 用于限制权重在一个合适的区域内
pub const SCHED_FIXEDPOINT_SHIFT: u32 = 10;
pub const WMULT_SHIFT: u32 = 32;
pub const WMULT_CONST: u32 = !0;
pub const NICE_0_LOAD_SHIFT: u32 = Self::SCHED_FIXEDPOINT_SHIFT + Self::SCHED_FIXEDPOINT_SHIFT;
pub fn update_load_add(&mut self, inc: u64) {
self.weight += inc;
self.inv_weight = 0;
}
pub fn update_load_sub(&mut self, dec: u64) {
self.weight -= dec;
self.inv_weight = 0;
}
pub fn update_load_set(&mut self, weight: u64) {
self.weight = weight;
self.inv_weight = 0;
}
/// ## 更新负载权重的倒数
pub fn update_inv_weight(&mut self) {
// 已经更新
if likely(self.inv_weight != 0) {
return;
}
let w = Self::scale_load_down(self.weight);
if unlikely(w >= Self::WMULT_CONST as u64) {
// 高位有数据
self.inv_weight = 1;
} else if unlikely(w == 0) {
// 倒数去最大
self.inv_weight = Self::WMULT_CONST;
} else {
// 计算倒数
self.inv_weight = Self::WMULT_CONST / w as u32;
}
}
/// ## 计算任务的执行时间差
///
/// 计算公式:(delta_exec * (weight * self.inv_weight)) >> WMULT_SHIFT
pub fn calculate_delta(&mut self, delta_exec: u64, weight: u64) -> u64 {
// 降低精度
let mut fact = Self::scale_load_down(weight);
// 记录fact高32位
let mut fact_hi = (fact >> 32) as u32;
// 用于恢复
let mut shift = Self::WMULT_SHIFT;
self.update_inv_weight();
if unlikely(fact_hi != 0) {
// 这里表示高32位还有数据
// 需要计算最高位然后继续调整fact
let fs = 32 - fact_hi.leading_zeros();
shift -= fs;
// 确保高32位全为0
fact >>= fs;
}
// 这里确定了fact已经在32位内
fact *= self.inv_weight as u64;
fact_hi = (fact >> 32) as u32;
if fact_hi != 0 {
// 这里表示高32位还有数据
// 需要计算最高位然后继续调整fact
let fs = 32 - fact_hi.leading_zeros();
shift -= fs;
// 确保高32位全为0
fact >>= fs;
}
return ((delta_exec as u128 * fact as u128) >> shift) as u64;
}
/// ## 将负载权重缩小到到一个小的范围中计算,相当于减小精度计算
pub const fn scale_load_down(mut weight: u64) -> u64 {
if weight != 0 {
weight >>= Self::SCHED_FIXEDPOINT_SHIFT;
if weight < 2 {
weight = 2;
}
}
weight
}
#[allow(dead_code)]
pub const fn scale_load(weight: u64) -> u64 {
weight << Self::SCHED_FIXEDPOINT_SHIFT
}
}
pub trait SchedArch {
/// 开启当前核心的调度
fn enable_sched_local();
/// 关闭当前核心的调度
fn disable_sched_local();
/// 在第一次开启调度之前,进行初始化工作。
///
/// 注意区别于sched_init这个函数只是做初始化时钟的工作等等。
fn initial_setup_sched_local() {}
}
/// ## PerCpu的运行队列其中维护了各个调度器对应的rq
#[allow(dead_code)]
#[derive(Debug)]
pub struct CpuRunQueue {
lock: SpinLock<()>,
lock_on_who: AtomicUsize,
cpu: usize,
clock_task: u64,
clock: u64,
prev_irq_time: u64,
clock_updata_flags: ClockUpdataFlag,
/// 过载
overload: bool,
next_balance: u64,
/// 运行任务数
nr_running: usize,
/// 被阻塞的任务数量
nr_uninterruptible: usize,
/// 记录上次更新负载时间
cala_load_update: usize,
cala_load_active: usize,
/// CFS调度器
cfs: Arc<CfsRunQueue>,
clock_pelt: u64,
lost_idle_time: u64,
clock_idle: u64,
cfs_tasks: LinkedList<Arc<FairSchedEntity>>,
/// 最近一次的调度信息
sched_info: SchedInfo,
/// 当前在运行队列上执行的进程
current: Weak<ProcessControlBlock>,
idle: Weak<ProcessControlBlock>,
}
impl CpuRunQueue {
pub fn new(cpu: usize) -> Self {
Self {
lock: SpinLock::new(()),
lock_on_who: AtomicUsize::new(usize::MAX),
cpu,
clock_task: 0,
clock: 0,
prev_irq_time: 0,
clock_updata_flags: ClockUpdataFlag::empty(),
overload: false,
next_balance: 0,
nr_running: 0,
nr_uninterruptible: 0,
cala_load_update: (clock() + (5 * HZ + 1)) as usize,
cala_load_active: 0,
cfs: Arc::new(CfsRunQueue::new()),
clock_pelt: 0,
lost_idle_time: 0,
clock_idle: 0,
cfs_tasks: LinkedList::new(),
sched_info: SchedInfo::default(),
current: Weak::new(),
idle: Weak::new(),
}
}
/// 此函数只能在关中断的情况下使用!!!
/// 获取到rq的可变引用需要注意的是返回的第二个值需要确保其生命周期
/// 所以可以说这个函数是unsafe的需要确保正确性
/// 在中断上下文,关中断的情况下,此函数是安全的
pub fn self_lock(&self) -> (&mut Self, Option<SpinLockGuard<()>>) {
if self.lock.is_locked()
&& smp_get_processor_id().data() as usize == self.lock_on_who.load(Ordering::SeqCst)
{
// 在本cpu已上锁则可以直接拿
(
unsafe { &mut *(self as *const Self as usize as *mut Self) },
None,
)
} else {
// 否则先上锁再拿
let guard = self.lock();
(
unsafe { &mut *(self as *const Self as usize as *mut Self) },
Some(guard),
)
}
}
fn lock(&self) -> SpinLockGuard<()> {
let guard = self.lock.lock_irqsave();
// 更新在哪一个cpu上锁
self.lock_on_who
.store(smp_get_processor_id().data() as usize, Ordering::SeqCst);
guard
}
pub fn enqueue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: EnqueueFlag) {
if !flags.contains(EnqueueFlag::ENQUEUE_NOCLOCK) {
self.update_rq_clock();
}
if !flags.contains(EnqueueFlag::ENQUEUE_RESTORE) {
let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave();
if sched_info.last_queued == 0 {
sched_info.upgrade().last_queued = self.clock;
}
}
match pcb.sched_info().policy() {
SchedPolicy::CFS => CompletelyFairScheduler::enqueue(self, pcb, flags),
SchedPolicy::FIFO => todo!(),
SchedPolicy::RT => todo!(),
SchedPolicy::IDLE => IdleScheduler::enqueue(self, pcb, flags),
}
// TODO:https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c#239
}
pub fn dequeue_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) {
// TODO:sched_core
if !flags.contains(DequeueFlag::DEQUEUE_NOCLOCK) {
self.update_rq_clock()
}
if !flags.contains(DequeueFlag::DEQUEUE_SAVE) {
let sched_info = pcb.sched_info().sched_stat.upgradeable_read_irqsave();
if sched_info.last_queued > 0 {
let delta = self.clock - sched_info.last_queued;
let mut sched_info = sched_info.upgrade();
sched_info.last_queued = 0;
sched_info.run_delay += delta as usize;
self.sched_info.run_delay += delta as usize;
}
}
match pcb.sched_info().policy() {
SchedPolicy::CFS => CompletelyFairScheduler::dequeue(self, pcb, flags),
SchedPolicy::FIFO => todo!(),
SchedPolicy::RT => todo!(),
SchedPolicy::IDLE => todo!(),
}
}
/// 启用一个任务,将加入队列
pub fn activate_task(&mut self, pcb: &Arc<ProcessControlBlock>, mut flags: EnqueueFlag) {
if *pcb.sched_info().on_rq.lock_irqsave() == OnRq::Migrating {
flags |= EnqueueFlag::ENQUEUE_MIGRATED;
}
if flags.contains(EnqueueFlag::ENQUEUE_MIGRATED) {
todo!()
}
self.enqueue_task(pcb.clone(), flags);
*pcb.sched_info().on_rq.lock_irqsave() = OnRq::Queued;
}
/// 检查对应的task是否可以抢占当前运行的task
#[allow(clippy::comparison_chain)]
pub fn check_preempt_currnet(&mut self, pcb: &Arc<ProcessControlBlock>, flags: WakeupFlags) {
if pcb.sched_info().policy() == self.current().sched_info().policy() {
match self.current().sched_info().policy() {
SchedPolicy::CFS => {
CompletelyFairScheduler::check_preempt_currnet(self, pcb, flags)
}
SchedPolicy::FIFO => todo!(),
SchedPolicy::RT => todo!(),
SchedPolicy::IDLE => IdleScheduler::check_preempt_currnet(self, pcb, flags),
}
} else if pcb.sched_info().policy() < self.current().sched_info().policy() {
// 调度优先级更高
self.resched_current();
}
if *self.current().sched_info().on_rq.lock_irqsave() == OnRq::Queued
&& self.current().flags().contains(ProcessFlags::NEED_SCHEDULE)
{
self.clock_updata_flags
.insert(ClockUpdataFlag::RQCF_REQ_SKIP);
}
}
/// 禁用一个任务,将离开队列
pub fn deactivate_task(&mut self, pcb: Arc<ProcessControlBlock>, flags: DequeueFlag) {
*pcb.sched_info().on_rq.lock_irqsave() = if flags.contains(DequeueFlag::DEQUEUE_SLEEP) {
OnRq::None
} else {
OnRq::Migrating
};
self.dequeue_task(pcb, flags);
}
#[inline]
pub fn cfs_rq(&self) -> Arc<CfsRunQueue> {
self.cfs.clone()
}
/// 更新rq时钟
pub fn update_rq_clock(&mut self) {
// 需要跳过这次时钟更新
if self
.clock_updata_flags
.contains(ClockUpdataFlag::RQCF_ACT_SKIP)
{
return;
}
let clock = SchedClock::sched_clock_cpu(self.cpu);
if clock < self.clock {
return;
}
let delta = clock - self.clock;
self.clock += delta;
// kerror!("clock {}", self.clock);
self.update_rq_clock_task(delta);
}
/// 更新任务时钟
pub fn update_rq_clock_task(&mut self, mut delta: u64) {
let mut irq_delta = irq_time_read(self.cpu) - self.prev_irq_time;
// if self.cpu == 0 {
// kerror!(
// "cpu 0 delta {delta} irq_delta {} irq_time_read(self.cpu) {} self.prev_irq_time {}",
// irq_delta,
// irq_time_read(self.cpu),
// self.prev_irq_time
// );
// }
compiler_fence(Ordering::SeqCst);
if irq_delta > delta {
irq_delta = delta;
}
self.prev_irq_time += irq_delta;
delta -= irq_delta;
// todo: psi?
// send_to_default_serial8250_port(format!("\n{delta}\n",).as_bytes());
compiler_fence(Ordering::SeqCst);
self.clock_task += delta;
compiler_fence(Ordering::SeqCst);
// if self.cpu == 0 {
// kerror!("cpu {} clock_task {}", self.cpu, self.clock_task);
// }
// todo: pelt?
}
/// 计算当前进程中的可执行数量
fn calculate_load_fold_active(&mut self, adjust: usize) -> usize {
let mut nr_active = self.nr_running - adjust;
nr_active += self.nr_uninterruptible;
let mut delta = 0;
if nr_active != self.cala_load_active {
delta = nr_active - self.cala_load_active;
self.cala_load_active = nr_active;
}
delta
}
/// ## tick计算全局负载
pub fn calculate_global_load_tick(&mut self) {
if clock() < self.cala_load_update as u64 {
// 如果当前时间在上次更新时间之前,则直接返回
return;
}
let delta = self.calculate_load_fold_active(0);
if delta != 0 {
CALCULATE_LOAD_TASKS.fetch_add(delta, Ordering::SeqCst);
}
self.cala_load_update += LOAD_FREQ;
}
pub fn add_nr_running(&mut self, nr_running: usize) {
let prev = self.nr_running;
self.nr_running = prev + nr_running;
if prev < 2 && self.nr_running >= 2 && !self.overload {
self.overload = true;
}
}
pub fn sub_nr_running(&mut self, count: usize) {
self.nr_running -= count;
}
/// 在运行idle
pub fn sched_idle_rq(&self) -> bool {
return unlikely(
self.nr_running == self.cfs.idle_h_nr_running as usize && self.nr_running > 0,
);
}
#[inline]
pub fn current(&self) -> Arc<ProcessControlBlock> {
self.current.upgrade().unwrap()
}
#[inline]
pub fn set_current(&mut self, pcb: Weak<ProcessControlBlock>) {
self.current = pcb;
}
#[inline]
pub fn set_idle(&mut self, pcb: Weak<ProcessControlBlock>) {
self.idle = pcb;
}
#[inline]
pub fn clock_task(&self) -> u64 {
self.clock_task
}
/// 重新调度当前进程
pub fn resched_current(&self) {
let current = self.current();
// 又需要被调度?
if unlikely(current.flags().contains(ProcessFlags::NEED_SCHEDULE)) {
return;
}
let cpu = self.cpu;
if cpu == smp_get_processor_id().data() as usize {
// assert!(
// Arc::ptr_eq(&current, &ProcessManager::current_pcb()),
// "rq current name {} process current {}",
// current.basic().name().to_string(),
// ProcessManager::current_pcb().basic().name().to_string(),
// );
// 设置需要调度
ProcessManager::current_pcb()
.flags()
.insert(ProcessFlags::NEED_SCHEDULE);
return;
}
// 向目标cpu发送重调度ipi
send_resched_ipi(ProcessorId::new(cpu as u32));
}
/// 选择下一个task
pub fn pick_next_task(&mut self, prev: Arc<ProcessControlBlock>) -> Arc<ProcessControlBlock> {
if likely(prev.sched_info().policy() >= SchedPolicy::CFS)
&& self.nr_running == self.cfs.h_nr_running as usize
{
let p = CompletelyFairScheduler::pick_next_task(self, Some(prev.clone()));
if let Some(pcb) = p.as_ref() {
return pcb.clone();
} else {
// kerror!(
// "pick idle cfs rq {:?}",
// self.cfs_rq()
// .entities
// .iter()
// .map(|x| x.1.pid)
// .collect::<Vec<_>>()
// );
match prev.sched_info().policy() {
SchedPolicy::FIFO => todo!(),
SchedPolicy::RT => todo!(),
SchedPolicy::CFS => CompletelyFairScheduler::put_prev_task(self, prev),
SchedPolicy::IDLE => IdleScheduler::put_prev_task(self, prev),
}
// 选择idle
return self.idle.upgrade().unwrap();
}
}
todo!()
}
}
bitflags! {
pub struct SchedFeature:u32 {
/// 给予睡眠任务仅有 50% 的服务赤字。这意味着睡眠任务在被唤醒后会获得一定的服务,但不能过多地占用资源。
const GENTLE_FAIR_SLEEPERS = 1 << 0;
/// 将新任务排在前面,以避免已经运行的任务被饿死
const START_DEBIT = 1 << 1;
/// 在调度时优先选择上次唤醒的任务,因为它可能会访问之前唤醒的任务所使用的数据,从而提高缓存局部性。
const NEXT_BUDDY = 1 << 2;
/// 在调度时优先选择上次运行的任务,因为它可能会访问与之前运行的任务相同的数据,从而提高缓存局部性。
const LAST_BUDDY = 1 << 3;
/// 认为任务的伙伴buddy在缓存中是热点减少缓存伙伴被迁移的可能性从而提高缓存局部性。
const CACHE_HOT_BUDDY = 1 << 4;
/// 允许唤醒时抢占当前任务。
const WAKEUP_PREEMPTION = 1 << 5;
/// 基于任务未运行时间来减少 CPU 的容量。
const NONTASK_CAPACITY = 1 << 6;
/// 将远程唤醒排队到目标 CPU并使用调度器 IPI 处理它们,以减少运行队列锁的争用。
const TTWU_QUEUE = 1 << 7;
/// 在唤醒时尝试限制对最后级联缓存LLC域的无谓扫描。
const SIS_UTIL = 1 << 8;
/// 在 RTReal-Time任务迁移时通过发送 IPI 来减少 CPU 之间的锁竞争。
const RT_PUSH_IPI = 1 << 9;
/// 启用估计的 CPU 利用率功能,用于调度决策。
const UTIL_EST = 1 << 10;
const UTIL_EST_FASTUP = 1 << 11;
/// 启用备选调度周期
const ALT_PERIOD = 1 << 12;
/// 启用基本时间片
const BASE_SLICE = 1 << 13;
}
pub struct EnqueueFlag: u8 {
const ENQUEUE_WAKEUP = 0x01;
const ENQUEUE_RESTORE = 0x02;
const ENQUEUE_MOVE = 0x04;
const ENQUEUE_NOCLOCK = 0x08;
const ENQUEUE_MIGRATED = 0x40;
const ENQUEUE_INITIAL = 0x80;
}
pub struct DequeueFlag: u8 {
const DEQUEUE_SLEEP = 0x01;
const DEQUEUE_SAVE = 0x02; /* Matches ENQUEUE_RESTORE */
const DEQUEUE_MOVE = 0x04; /* Matches ENQUEUE_MOVE */
const DEQUEUE_NOCLOCK = 0x08; /* Matches ENQUEUE_NOCLOCK */
}
pub struct WakeupFlags: u8 {
/* Wake flags. The first three directly map to some SD flag value */
const WF_EXEC = 0x02; /* Wakeup after exec; maps to SD_BALANCE_EXEC */
const WF_FORK = 0x04; /* Wakeup after fork; maps to SD_BALANCE_FORK */
const WF_TTWU = 0x08; /* Wakeup; maps to SD_BALANCE_WAKE */
const WF_SYNC = 0x10; /* Waker goes to sleep after wakeup */
const WF_MIGRATED = 0x20; /* Internal use, task got migrated */
const WF_CURRENT_CPU = 0x40; /* Prefer to move the wakee to the current CPU. */
}
pub struct SchedMode: u8 {
/*
* Constants for the sched_mode argument of __schedule().
*
* The mode argument allows RT enabled kernels to differentiate a
* preemption from blocking on an 'sleeping' spin/rwlock. Note that
* SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
* optimize the AND operation out and just check for zero.
*/
/// 在调度过程中不会再次进入队列,即需要手动唤醒
const SM_NONE = 0x0;
/// 重新加入队列,即当前进程被抢占,需要时钟调度
const SM_PREEMPT = 0x1;
/// rt相关
const SM_RTLOCK_WAIT = 0x2;
/// 默认与SM_PREEMPT相同
const SM_MASK_PREEMPT = Self::SM_PREEMPT.bits;
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum OnRq {
Queued,
Migrating,
None,
}
impl ProcessManager {
pub fn update_process_times(user_tick: bool) {
let pcb = Self::current_pcb();
CpuTimeFunc::irqtime_account_process_tick(&pcb, user_tick, 1);
scheduler_tick();
}
}
/// ## 时钟tick时调用此函数
pub fn scheduler_tick() {
fence(Ordering::SeqCst);
// 获取当前CPU索引
let cpu_idx = smp_get_processor_id().data() as usize;
// 获取当前CPU的请求队列
let rq = cpu_rq(cpu_idx);
let (rq, guard) = rq.self_lock();
// 获取当前请求队列的当前请求
let current = rq.current();
// 更新请求队列时钟
rq.update_rq_clock();
match current.sched_info().policy() {
SchedPolicy::CFS => CompletelyFairScheduler::tick(rq, current, false),
SchedPolicy::FIFO => todo!(),
SchedPolicy::RT => todo!(),
SchedPolicy::IDLE => IdleScheduler::tick(rq, current, false),
}
rq.calculate_global_load_tick();
drop(guard);
// TODO:处理负载均衡
}
/// ## 执行调度
/// 若preempt_count不为0则报错
#[inline]
pub fn schedule(sched_mod: SchedMode) {
let _guard = unsafe { CurrentIrqArch::save_and_disable_irq() };
assert_eq!(ProcessManager::current_pcb().preempt_count(), 0);
__schedule(sched_mod);
}
/// ## 执行调度
/// 此函数与schedule的区别为该函数不会检查preempt_count
/// 适用于时钟中断等场景
pub fn __schedule(sched_mod: SchedMode) {
let cpu = smp_get_processor_id().data() as usize;
let rq = cpu_rq(cpu);
let mut prev = rq.current();
if let ProcessState::Exited(_) = prev.clone().sched_info().inner_lock_read_irqsave().state() {
// 从exit进的Schedule
prev = ProcessManager::current_pcb();
}
// TODO: hrtick_clear(rq);
let (rq, _guard) = rq.self_lock();
rq.clock_updata_flags = ClockUpdataFlag::from_bits_truncate(rq.clock_updata_flags.bits() << 1);
rq.update_rq_clock();
rq.clock_updata_flags = ClockUpdataFlag::RQCF_UPDATE;
// kBUG!(
// "before cfs rq pcbs {:?}\nvruntimes {:?}\n",
// rq.cfs
// .entities
// .iter()
// .map(|x| { x.1.pcb().pid() })
// .collect::<Vec<_>>(),
// rq.cfs
// .entities
// .iter()
// .map(|x| { x.1.vruntime })
// .collect::<Vec<_>>(),
// );
// kwarn!(
// "before cfs rq {:?} prev {:?}",
// rq.cfs
// .entities
// .iter()
// .map(|x| { x.1.pcb().pid() })
// .collect::<Vec<_>>(),
// prev.pid()
// );
// kerror!("prev pid {:?} {:?}", prev.pid(), prev.sched_info().policy());
if !sched_mod.contains(SchedMode::SM_MASK_PREEMPT)
&& prev.sched_info().policy() != SchedPolicy::IDLE
&& prev.sched_info().inner_lock_read_irqsave().is_mark_sleep()
{
// kwarn!("deactivate_task prev {:?}", prev.pid());
// TODO: 这里需要处理信号
// https://code.dragonos.org.cn/xref/linux-6.6.21/kernel/sched/core.c?r=&mo=172979&fi=6578#6630
rq.deactivate_task(
prev.clone(),
DequeueFlag::DEQUEUE_SLEEP | DequeueFlag::DEQUEUE_NOCLOCK,
);
}
let next = rq.pick_next_task(prev.clone());
// kBUG!(
// "after cfs rq pcbs {:?}\nvruntimes {:?}\n",
// rq.cfs
// .entities
// .iter()
// .map(|x| { x.1.pcb().pid() })
// .collect::<Vec<_>>(),
// rq.cfs
// .entities
// .iter()
// .map(|x| { x.1.vruntime })
// .collect::<Vec<_>>(),
// );
// kerror!("next {:?}", next.pid());
prev.flags().remove(ProcessFlags::NEED_SCHEDULE);
fence(Ordering::SeqCst);
if likely(!Arc::ptr_eq(&prev, &next)) {
rq.set_current(Arc::downgrade(&next));
// kwarn!(
// "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}",
// prev.pid(),
// next.pid()
// );
// send_to_default_serial8250_port(
// format!(
// "switch_process prev {:?} next {:?} sched_mode {sched_mod:?}\n",
// prev.pid(),
// next.pid()
// )
// .as_bytes(),
// );
// CurrentApic.send_eoi();
compiler_fence(Ordering::SeqCst);
unsafe { ProcessManager::switch_process(prev, next) };
} else {
assert!(
Arc::ptr_eq(&ProcessManager::current_pcb(), &prev),
"{}",
ProcessManager::current_pcb().basic().name()
);
}
}
pub fn sched_fork(pcb: &Arc<ProcessControlBlock>) -> Result<(), SystemError> {
let mut prio_guard = pcb.sched_info().prio_data.write_irqsave();
let current = ProcessManager::current_pcb();
prio_guard.prio = current.sched_info().prio_data.read_irqsave().normal_prio;
if PrioUtil::dl_prio(prio_guard.prio) {
return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
} else if PrioUtil::rt_prio(prio_guard.prio) {
let policy = &pcb.sched_info().sched_policy;
*policy.write_irqsave() = SchedPolicy::RT;
} else {
let policy = &pcb.sched_info().sched_policy;
*policy.write_irqsave() = SchedPolicy::CFS;
}
pcb.sched_info()
.sched_entity()
.force_mut()
.init_entity_runnable_average();
Ok(())
}
pub fn sched_cgroup_fork(pcb: &Arc<ProcessControlBlock>) {
__set_task_cpu(pcb, smp_get_processor_id());
match pcb.sched_info().policy() {
SchedPolicy::RT => todo!(),
SchedPolicy::FIFO => todo!(),
SchedPolicy::CFS => CompletelyFairScheduler::task_fork(pcb.clone()),
SchedPolicy::IDLE => todo!(),
}
}
fn __set_task_cpu(pcb: &Arc<ProcessControlBlock>, cpu: ProcessorId) {
// TODO: Fixme There is not implement group sched;
let se = pcb.sched_info().sched_entity();
let rq = cpu_rq(cpu.data() as usize);
se.force_mut().set_cfs(Arc::downgrade(&rq.cfs));
}
#[inline(never)]
pub fn sched_init() {
// 初始化percpu变量
unsafe {
CPU_IRQ_TIME = Some(Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize));
CPU_IRQ_TIME
.as_mut()
.unwrap()
.resize_with(PerCpu::MAX_CPU_NUM as usize, || Box::leak(Box::default()));
let mut cpu_runqueue = Vec::with_capacity(PerCpu::MAX_CPU_NUM as usize);
for cpu in 0..PerCpu::MAX_CPU_NUM as usize {
let rq = Arc::new(CpuRunQueue::new(cpu));
rq.cfs.force_mut().set_rq(Arc::downgrade(&rq));
cpu_runqueue.push(rq);
}
CPU_RUNQUEUE.init(PerCpuVar::new(cpu_runqueue).unwrap());
};
}
#[inline]
pub fn send_resched_ipi(cpu: ProcessorId) {
send_ipi(IpiKind::KickCpu, IpiTarget::Specified(cpu));
}