static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; /* ==1== 找到当前cpu上的就绪队列rq 并将正在运行的进程curr保存到prev中 */ cpu = smp_processor_id(); rq = cpu_rq(cpu); prev = rq->curr; /* * do_exit() calls schedule() with preemption disabled as an exception; * however we must fix that up, otherwise the next task will see an * inconsistent (higher) preempt count. * * It also avoids the below schedule_debug() test from complaining * about this. */ if (unlikely(prev->state == TASK_DEAD)) preempt_enable_no_resched_notrace(); /* 如果禁止内核抢占,而又调用了cond_resched就会出错 * 这里就是用来捕获该错误的 */ schedule_debug(prev); if (sched_feat(HRTICK)) hrtick_clear(rq); /* 关闭本地中断 */ local_irq_disable(); /* 更新全局状态, * 标识当前CPU发生上下文的切换 */ rcu_note_context_switch(); /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * done by the caller to avoid the race with signal_wake_up(). */ smp_mb__before_spinlock(); /* 锁住该队列 */ raw_spin_lock(&rq->lock); lockdep_pin_lock(&rq->lock); rq->clock_skip_update <<= 1; /* promote REQ to ACT */ /* 切换次数记录, 默认认为非主动调度计数(抢占) */ switch_count = &prev->nivcsw; /* * scheduler检查prev的状态state和内核抢占表示 * 如果prev是不可运行的, 并且在内核态没有被抢占 * * 此时当前进程不是处于运行态, 并且不是被抢占 * 此时不能只检查抢占计数 * 因为可能某个进程(如网卡轮询)直接调用了schedule * 如果不判断prev->stat就可能误认为task进程为RUNNING状态 * 到达这里,有两种可能,一种是主动schedule, 另外一种是被抢占 * 被抢占有两种情况, 一种是时间片到点, 一种是时间片没到点 * 时间片到点后, 主要是置当前进程的need_resched标志 * 接下来在时钟中断结束后, 会preempt_schedule_irq抢占调度 * * 那么我们正常应该做的是应该将进程prev从就绪队列rq中删除, * 但是如果当前进程prev有非阻塞等待信号, * 并且它的状态是TASK_INTERRUPTIBLE * 我们就不应该从就绪队列总删除它 * 而是配置其状态为TASK_RUNNING, 并且把他留在rq中 /* 如果内核态没有被抢占, 并且内核抢占有效 即是否同时满足以下条件: 1 该进程处于停止状态 2 该进程没有在内核态被抢占 */ if (!preempt && prev->state) { /* 如果当前进程有非阻塞等待信号,并且它的状态是TASK_INTERRUPTIBLE */ if (unlikely(signal_pending_state(prev->state, prev))) { /* 将当前进程的状态设为:TASK_RUNNING */ prev->state = TASK_RUNNING; } else /* 否则需要将prev进程从就绪队列中删除*/ { /* 将当前进程从runqueue(运行队列)中删除 */ deactivate_task(rq, prev, DEQUEUE_SLEEP); /* 标识当前进程不在runqueue中 */ prev->on_rq = 0; /* * If a worker went to sleep, notify and ask workqueue * whether it wants to wake up a task to maintain * concurrency. */ if (prev->flags & PF_WQ_WORKER) { struct task_struct *to_wakeup; to_wakeup = wq_worker_sleeping(prev); if (to_wakeup) try_to_wake_up_local(to_wakeup); } } /* 如果不是被抢占的,就累加主动切换次数 */ switch_count = &prev->nvcsw; } /* 如果prev进程仍然在就绪队列上没有被删除 */ if (task_on_rq_queued(prev)) update_rq_clock(rq); /* 跟新就绪队列的时钟 */ /* 挑选一个优先级最高的任务将其排进队列 */ next = pick_next_task(rq, prev); /* 清除pre的TIF_NEED_RESCHED标志 */ clear_tsk_need_resched(prev); /* 清楚内核抢占标识 */ clear_preempt_need_resched(); rq->clock_skip_update = 0; /* 如果prev和next非同一个进程 */ if (likely(prev != next)) { rq->nr_switches++; /* 队列切换次数更新 */ rq->curr = next; /* 将next标记为队列的curr进程 */ ++*switch_count; /* 进程切换次数更新 */ trace_sched_switch(preempt, prev, next); /* 进程之间上下文切换 */ rq = context_switch(rq, prev, next); /* unlocks the rq */ } else /* 如果prev和next为同一进程,则不进行进程切换 */ { lockdep_unpin_lock(&rq->lock); raw_spin_unlock_irq(&rq->lock); } balance_callback(rq); }
/* * schedule() is the main scheduler function. */ asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_note_context_switch(cpu); prev = rq->curr; schedule_debug(prev); if (sched_feat(HRTICK)) hrtick_clear(rq); raw_spin_lock_irq(&rq->lock); switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) { prev->state = TASK_RUNNING; } else { /* * If a worker is going to sleep, notify and * ask workqueue whether it wants to wake up a * task to maintain concurrency. If so, wake * up the task. */ if (prev->flags & PF_WQ_WORKER) { struct task_struct *to_wakeup; to_wakeup = wq_worker_sleeping(prev, cpu); if (to_wakeup) try_to_wake_up_local(to_wakeup); } deactivate_task(rq, prev, DEQUEUE_SLEEP); /* * If we are going to sleep and we have plugged IO queued, make * sure to submit it to avoid deadlocks. */ if (blk_needs_flush_plug(prev)) { raw_spin_unlock(&rq->lock); blk_schedule_flush_plug(prev); raw_spin_lock(&rq->lock); } } switch_count = &prev->nvcsw; } pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); put_prev_task(rq, prev); next = pick_next_task(rq); clear_tsk_need_resched(prev); rq->skip_clock_update = 0; if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; ++*switch_count; context_switch(rq, prev, next); /* unlocks the rq */ /* * The context switch have flipped the stack from under us * and restored the local variables which were saved when * this task called schedule() in the past. prev == current * is still correct, but it can be moved to another cpu/rq. */ cpu = smp_processor_id(); rq = cpu_rq(cpu); } else raw_spin_unlock_irq(&rq->lock); post_schedule(rq); preempt_enable_no_resched(); if (need_resched()) goto need_resched; }