/* * schedule() is the main scheduler function. */ asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_sched_qs(cpu); prev = rq->curr; switch_count = &prev->nivcsw; release_kernel_lock(prev); need_resched_nonpreemptible: schedule_debug(prev); if (sched_feat(HRTICK)) hrtick_clear(rq); raw_spin_lock_irq(&rq->lock); update_rq_clock(rq); clear_tsk_need_resched(prev); if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) prev->state = TASK_RUNNING; else deactivate_task(rq, prev, 1); switch_count = &prev->nvcsw; } pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); put_prev_task(rq, prev); next = pick_next_task(rq); if (likely(prev != next)) { sched_info_switch(prev, next); perf_event_task_sched_out(prev, next); rq->nr_switches++; rq->curr = next; ++*switch_count; context_switch(rq, prev, next); /* unlocks the rq */
static void push_task_rtws(struct rq *rq, struct task_struct *p, int preempted) { struct global_rq *global_rq = rq->rtws.global_rq; if (preempted) deactivate_task(rq, p, 0); printk(KERN_INFO "****global enqueue, task %d on cpu %d *******\n", p->pid, rq->cpu); raw_spin_lock(&global_rq->lock); __enqueue_task_rtws(global_rq, &p->rtws); raw_spin_unlock(&global_rq->lock); }
void ActionWithVessel::runAllTasks(){ if( getExchangeStep() && nactive_tasks!=fullTaskList.size() ) error("contributors must be unlocked during exchange steps"); plumed_massert( functions.size()>0, "you must have a call to readVesselKeywords somewhere" ); unsigned stride=comm.Get_size(); unsigned rank=comm.Get_rank(); if(serial){ stride=1; rank=0; } // Make sure jobs are done doJobsRequiredBeforeTaskList(); for(unsigned i=rank;i<nactive_tasks;i+=stride){ // The index of the task in the full list task_index=indexOfTaskInFullList[i]; // Store the task we are currently working on current=partialTaskList[i]; // Calculate the stuff in the loop for this action performTask(); // Weight should be between zero and one plumed_dbg_assert( thisval[1]>=0 && thisval[1]<=1.0 ); // Check for conditions that allow us to just to skip the calculation // the condition is that the weight of the contribution is low // N.B. Here weights are assumed to be between zero and one if( thisval[1]<tolerance ){ // Clear the derivatives clearAfterTask(); // Deactivate task if it is less than the neighbor list tolerance if( thisval[1]<nl_tolerance && contributorsAreUnlocked ) deactivate_task(); continue; } // Now calculate all the functions // If the contribution of this quantity is very small at neighbour list time ignore it // untill next neighbour list time if( !calculateAllVessels() && contributorsAreUnlocked ) deactivate_task(); } finishComputations(); }
static int steal_pjob_rtws(struct rq *this_rq) { int ret = 0, this_cpu = this_rq->cpu, target_cpu; struct task_struct *p; struct rq *target_rq; struct global_rq *global_rq = this_rq->rtws.global_rq; if (global_rq->random) { /* * Pseudo random selection of our victim rq, * among rqs with to-be-stolen pjobs, that's it. */ target_cpu = find_random_stealable_cpu_rtws(&this_rq->rd->rtwss_cpudl, this_rq->cpu); } else { /* * When not in random mode, we gotta find the rq with the earliest * deadline stealable pjob. */ target_cpu = find_earliest_stealable_cpu_rtws(&this_rq->rd->rtwss_cpudl); } if (target_cpu == -1) return 0; printk(KERN_INFO "stealable cpu %d\n", target_cpu); target_rq = cpu_rq(target_cpu); /* * We can potentially drop this_rq's lock in * double_lock_balance, and another CPU could alter this_rq */ double_lock_balance(this_rq, target_rq); if (unlikely(target_rq->rtws.nr_running <= 1)) goto unlock; if (unlikely(this_rq->rtws.nr_running)) goto unlock; p = pick_next_stealable_pjob_rtws(&target_rq->rtws); if (p) { WARN_ON(p == target_rq->curr); WARN_ON(!p->se.on_rq); WARN_ON(!rtws_task(p)); deactivate_task(target_rq, p, 0); p->rtws.stolen = target_cpu; set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); this_rq->rtws.tot_steals++; printk(KERN_INFO "=task %d STOLEN by cpu %d from cpu %d!\n", p->pid, this_cpu, target_cpu); ret = 1; } unlock: double_unlock_balance(this_rq, target_rq); return ret; }
void ActionWithVessel::runAllTasks(){ if( getExchangeStep() && nactive_tasks!=fullTaskList.size() ) error("contributors must be unlocked during exchange steps"); plumed_massert( functions.size()>0, "you must have a call to readVesselKeywords somewhere" ); unsigned stride=comm.Get_size(); unsigned rank=comm.Get_rank(); if(serial){ stride=1; rank=0; } // Make sure jobs are done if(timers) stopwatch.start("1 Prepare Tasks"); doJobsRequiredBeforeTaskList(); if(timers) stopwatch.stop("1 Prepare Tasks"); // Get number of threads for OpenMP unsigned nt=OpenMP::getNumThreads(); if( nt*stride*10>nactive_tasks) nt=nactive_tasks/stride/10; if( nt==0 ) nt=1; // Get size for buffer unsigned bsize=0, bufsize=getSizeOfBuffer( bsize ); // Clear buffer buffer.assign( buffer.size(), 0.0 ); // Switch off calculation of derivatives in main loop if( dertime_can_be_off ) dertime=false; // std::vector<unsigned> der_list; // if( mydata ) der_list.resize( mydata->getSizeOfDerivativeList(), 0 ); // Build storage stuff for loop // std::vector<double> buffer( bufsize, 0.0 ); if(timers) stopwatch.start("2 Loop over tasks"); #pragma omp parallel num_threads(nt) { std::vector<double> omp_buffer; if( nt>1 ) omp_buffer.resize( bufsize, 0.0 ); MultiValue myvals( getNumberOfQuantities(), getNumberOfDerivatives() ); MultiValue bvals( getNumberOfQuantities(), getNumberOfDerivatives() ); myvals.clearAll(); bvals.clearAll(); #pragma omp for nowait for(unsigned i=rank;i<nactive_tasks;i+=stride){ // Calculate the stuff in the loop for this action performTask( indexOfTaskInFullList[i], partialTaskList[i], myvals ); // Weight should be between zero and one plumed_dbg_assert( myvals.get(0)>=0 && myvals.get(0)<=1.0 ); // Check for conditions that allow us to just to skip the calculation // the condition is that the weight of the contribution is low // N.B. Here weights are assumed to be between zero and one if( myvals.get(0)<tolerance ){ // Deactivate task if it is less than the neighbor list tolerance if( myvals.get(0)<nl_tolerance && contributorsAreUnlocked ) deactivate_task( indexOfTaskInFullList[i] ); // Clear the derivatives myvals.clearAll(); continue; } // Now calculate all the functions // If the contribution of this quantity is very small at neighbour list time ignore it // untill next neighbour list time if( nt>1 ){ if( !calculateAllVessels( indexOfTaskInFullList[i], myvals, bvals, omp_buffer, der_list ) && contributorsAreUnlocked ) deactivate_task( indexOfTaskInFullList[i] ); } else { if( !calculateAllVessels( indexOfTaskInFullList[i], myvals, bvals, buffer, der_list ) && contributorsAreUnlocked ) deactivate_task( indexOfTaskInFullList[i] ); } // Clear the value myvals.clearAll(); } #pragma omp critical if(nt>1) for(unsigned i=0;i<bufsize;++i) buffer[i]+=omp_buffer[i]; } if(timers) stopwatch.stop("2 Loop over tasks"); // Turn back on derivative calculation dertime=true; if(timers) stopwatch.start("3 MPI gather"); // MPI Gather everything if( !serial && buffer.size()>0 ) comm.Sum( buffer ); // MPI Gather index stores if( mydata && !lowmem && !noderiv ){ comm.Sum( der_list ); mydata->setActiveValsAndDerivatives( der_list ); } // Update the elements that are makign contributions to the sum here // this causes problems if we do it in prepare if( !serial && contributorsAreUnlocked ) comm.Sum( taskFlags ); if(timers) stopwatch.stop("3 MPI gather"); if(timers) stopwatch.start("4 Finishing computations"); finishComputations( buffer ); if(timers) stopwatch.stop("4 Finishing computations"); }
static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; /* ==1== 找到当前cpu上的就绪队列rq 并将正在运行的进程curr保存到prev中 */ cpu = smp_processor_id(); rq = cpu_rq(cpu); prev = rq->curr; /* * do_exit() calls schedule() with preemption disabled as an exception; * however we must fix that up, otherwise the next task will see an * inconsistent (higher) preempt count. * * It also avoids the below schedule_debug() test from complaining * about this. */ if (unlikely(prev->state == TASK_DEAD)) preempt_enable_no_resched_notrace(); /* 如果禁止内核抢占,而又调用了cond_resched就会出错 * 这里就是用来捕获该错误的 */ schedule_debug(prev); if (sched_feat(HRTICK)) hrtick_clear(rq); /* 关闭本地中断 */ local_irq_disable(); /* 更新全局状态, * 标识当前CPU发生上下文的切换 */ rcu_note_context_switch(); /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * done by the caller to avoid the race with signal_wake_up(). */ smp_mb__before_spinlock(); /* 锁住该队列 */ raw_spin_lock(&rq->lock); lockdep_pin_lock(&rq->lock); rq->clock_skip_update <<= 1; /* promote REQ to ACT */ /* 切换次数记录, 默认认为非主动调度计数(抢占) */ switch_count = &prev->nivcsw; /* * scheduler检查prev的状态state和内核抢占表示 * 如果prev是不可运行的, 并且在内核态没有被抢占 * * 此时当前进程不是处于运行态, 并且不是被抢占 * 此时不能只检查抢占计数 * 因为可能某个进程(如网卡轮询)直接调用了schedule * 如果不判断prev->stat就可能误认为task进程为RUNNING状态 * 到达这里,有两种可能,一种是主动schedule, 另外一种是被抢占 * 被抢占有两种情况, 一种是时间片到点, 一种是时间片没到点 * 时间片到点后, 主要是置当前进程的need_resched标志 * 接下来在时钟中断结束后, 会preempt_schedule_irq抢占调度 * * 那么我们正常应该做的是应该将进程prev从就绪队列rq中删除, * 但是如果当前进程prev有非阻塞等待信号, * 并且它的状态是TASK_INTERRUPTIBLE * 我们就不应该从就绪队列总删除它 * 而是配置其状态为TASK_RUNNING, 并且把他留在rq中 /* 如果内核态没有被抢占, 并且内核抢占有效 即是否同时满足以下条件: 1 该进程处于停止状态 2 该进程没有在内核态被抢占 */ if (!preempt && prev->state) { /* 如果当前进程有非阻塞等待信号,并且它的状态是TASK_INTERRUPTIBLE */ if (unlikely(signal_pending_state(prev->state, prev))) { /* 将当前进程的状态设为:TASK_RUNNING */ prev->state = TASK_RUNNING; } else /* 否则需要将prev进程从就绪队列中删除*/ { /* 将当前进程从runqueue(运行队列)中删除 */ deactivate_task(rq, prev, DEQUEUE_SLEEP); /* 标识当前进程不在runqueue中 */ prev->on_rq = 0; /* * If a worker went to sleep, notify and ask workqueue * whether it wants to wake up a task to maintain * concurrency. */ if (prev->flags & PF_WQ_WORKER) { struct task_struct *to_wakeup; to_wakeup = wq_worker_sleeping(prev); if (to_wakeup) try_to_wake_up_local(to_wakeup); } } /* 如果不是被抢占的,就累加主动切换次数 */ switch_count = &prev->nvcsw; } /* 如果prev进程仍然在就绪队列上没有被删除 */ if (task_on_rq_queued(prev)) update_rq_clock(rq); /* 跟新就绪队列的时钟 */ /* 挑选一个优先级最高的任务将其排进队列 */ next = pick_next_task(rq, prev); /* 清除pre的TIF_NEED_RESCHED标志 */ clear_tsk_need_resched(prev); /* 清楚内核抢占标识 */ clear_preempt_need_resched(); rq->clock_skip_update = 0; /* 如果prev和next非同一个进程 */ if (likely(prev != next)) { rq->nr_switches++; /* 队列切换次数更新 */ rq->curr = next; /* 将next标记为队列的curr进程 */ ++*switch_count; /* 进程切换次数更新 */ trace_sched_switch(preempt, prev, next); /* 进程之间上下文切换 */ rq = context_switch(rq, prev, next); /* unlocks the rq */ } else /* 如果prev和next为同一进程,则不进行进程切换 */ { lockdep_unpin_lock(&rq->lock); raw_spin_unlock_irq(&rq->lock); } balance_callback(rq); }
/* * schedule() is the main scheduler function. */ asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_note_context_switch(cpu); prev = rq->curr; schedule_debug(prev); if (sched_feat(HRTICK)) hrtick_clear(rq); raw_spin_lock_irq(&rq->lock); switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely(signal_pending_state(prev->state, prev))) { prev->state = TASK_RUNNING; } else { /* * If a worker is going to sleep, notify and * ask workqueue whether it wants to wake up a * task to maintain concurrency. If so, wake * up the task. */ if (prev->flags & PF_WQ_WORKER) { struct task_struct *to_wakeup; to_wakeup = wq_worker_sleeping(prev, cpu); if (to_wakeup) try_to_wake_up_local(to_wakeup); } deactivate_task(rq, prev, DEQUEUE_SLEEP); /* * If we are going to sleep and we have plugged IO queued, make * sure to submit it to avoid deadlocks. */ if (blk_needs_flush_plug(prev)) { raw_spin_unlock(&rq->lock); blk_schedule_flush_plug(prev); raw_spin_lock(&rq->lock); } } switch_count = &prev->nvcsw; } pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); put_prev_task(rq, prev); next = pick_next_task(rq); clear_tsk_need_resched(prev); rq->skip_clock_update = 0; if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; ++*switch_count; context_switch(rq, prev, next); /* unlocks the rq */ /* * The context switch have flipped the stack from under us * and restored the local variables which were saved when * this task called schedule() in the past. prev == current * is still correct, but it can be moved to another cpu/rq. */ cpu = smp_processor_id(); rq = cpu_rq(cpu); } else raw_spin_unlock_irq(&rq->lock); post_schedule(rq); preempt_enable_no_resched(); if (need_resched()) goto need_resched; }