static void irqtime_account_idle_ticks(int ticks) { int i; struct rq *rq = this_rq(); for (i = 0; i < ticks; i++) irqtime_account_process_tick(current, 0, rq); }
/* * When a guest is interrupted for a longer amount of time, missed clock * ticks are not redelivered later. Due to that, this function may on * occasion account more time than the calling functions think elapsed. */ static __always_inline u64 steal_account_process_time(u64 maxtime) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { u64 steal; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; steal = min(steal, maxtime); account_steal_time(steal); this_rq()->prev_steal_time += steal; return steal; } #endif return 0; }
static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { u64 steal, st = 0; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; st = steal_ticks(steal); this_rq()->prev_steal_time += st * TICK_NSEC; account_steal_time(st); return st; } #endif return false; }
/* * Account for idle time. * @cputime: the cpu time spent in idle wait */ void account_idle_time(cputime_t cputime) { u64 *cpustat = kcpustat_this_cpu->cpustat; struct rq *rq = this_rq(); if (atomic_read(&rq->nr_iowait) > 0) cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; else cpustat[CPUTIME_IDLE] += (__force u64) cputime; }
/* * this_rq_lock - lock this runqueue and disable interrupts. */ static struct rq *this_rq_lock(void) { struct rq *rq; local_irq_disable(); rq = this_rq(); raw_spin_lock(&rq->lock); return rq; }
static inline runqueue_t *this_rq_lock(void) { runqueue_t *rq; local_irq_disable(); rq = this_rq(); spin_lock(&rq->lock); return rq; }
/* * When a guest is interrupted for a longer amount of time, missed clock * ticks are not redelivered later. Due to that, this function may on * occasion account more time than the calling functions think elapsed. */ static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { cputime_t steal_cputime; u64 steal; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; steal_cputime = min(nsecs_to_cputime(steal), maxtime); account_steal_time(steal_cputime); this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime); return steal_cputime; } #endif return 0; }
/* * Called to set the hrtick timer state. * * called with rq->lock held and irqs disabled */ void hrtick_start(struct rq *rq, int delay) { struct hrtimer *timer = &rq->hrtick_timer; int time = ktime_add_ns(timer->base->get_time(), delay); hrtimer_set_expires(timer, time); if (rq == this_rq()) { hrtimer_restart(timer); } else if (!rq->hrtick_csd_pending) { smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); rq->hrtick_csd_pending = 1; } }
static inline void context_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* * For paravirt, this is coupled with an exit in switch_to to * combine the page table reload and the switch backend into * one hypercall. */ arch_start_context_switch(prev); if (!mm) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); if (!prev->mm) { prev->active_mm = NULL; rq->prev_mm = oldmm; } /* * Since the runqueue lock will be released by the next * task (which is an invalid locking op but in the case * of the scheduler it's an obvious special-case), so we * do an early lockdep release here: */ #ifndef __ARCH_WANT_UNLOCKED_CTXSW spin_release(&rq->lock.dep_map, 1, _THIS_IP_); #endif /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); barrier(); /* * this_rq must be evaluated again because prev may have moved * CPUs since it called schedule(), thus the 'rq' on its stack * frame will be invalid. */ finish_task_switch(this_rq(), prev); }
void calc_load_enter_idle(void) { struct rq *this_rq = this_rq(); long delta; /* * We're going into NOHZ mode, if there's any pending delta, fold it * into the pending idle delta. */ delta = calc_load_fold_active(this_rq); if (delta) { int idx = calc_load_write_idx(); atomic_long_add(delta, &calc_load_idle[idx]); } }
void calc_load_nohz_start(void) { struct rq *this_rq = this_rq(); long delta; /* * We're going into NO_HZ mode, if there's any pending delta, fold it * into the pending NO_HZ delta. */ delta = calc_load_fold_active(this_rq, 0); if (delta) { int idx = calc_load_write_idx(); atomic_long_add(delta, &calc_load_nohz[idx]); } }
void calc_load_exit_idle(void) { struct rq *this_rq = this_rq(); /* * If we're still before the sample window, we're done. */ if (time_before(jiffies, this_rq->calc_load_update)) return; /* * We woke inside or after the sample window, this means we're already * accounted through the nohz accounting, so skip the entire deal and * sync up for the next window. */ this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update + 10)) this_rq->calc_load_update += LOAD_FREQ; }
/* * Account a single tick of cpu time. * @p: the process that the cpu time gets accounted to * @user_tick: indicates if the tick is a user or a system tick */ void account_process_tick(struct task_struct *p, int user_tick) { cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); if (sched_clock_irqtime) { irqtime_account_process_tick(p, user_tick, rq); return; } if (steal_account_process_tick()) return; if (user_tick) account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, one_jiffy_scaled); else account_idle_time(cputime_one_jiffy); }
/** * cpuidle_idle_call - the main idle function * * NOTE: no locks or semaphores should be used here * * On archs that support TIF_POLLING_NRFLAG, is called with polling * set, and it returns with polling set. If it ever stops polling, it * must clear the polling bit. */ static void cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; /* * Check if the idle task must be rescheduled. If it is the * case, exit the function after re-enabling the local irq. */ if (need_resched()) { local_irq_enable(); return; } /* * During the idle period, stop measuring the disabled irqs * critical sections latencies */ stop_critical_timings(); /* * Tell the RCU framework we are entering an idle section, * so no more rcu read side critical sections and one more * step to the grace period */ rcu_idle_enter(); /* * Check if the cpuidle framework is ready, otherwise fallback * to the default arch specific idle method */ next_state = cpuidle_select(drv, dev); if (next_state < 0) { default_idle_call(); goto exit_idle; } /* * The idle task must be scheduled, it is pointless to * go to idle, just update no idle residency and get * out of this function */ if (current_clr_polling_and_test()) { dev->last_residency = 0; entered_state = next_state; local_irq_enable(); goto exit_idle; } /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); /* * Enter the idle state previously returned by the governor decision. * This function will block until an interrupt occurs and will take * care of re-enabling the local interrupts */ entered_state = cpuidle_enter(drv, dev, next_state); /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); if (entered_state == -EBUSY) { default_idle_call(); goto exit_idle; } /* * Give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); /* * It is up to the idle functions to reenable local interrupts */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); rcu_idle_exit(); start_critical_timings(); }
/** * cpuidle_idle_call - the main idle function * * NOTE: no locks or semaphores should be used here * * On archs that support TIF_POLLING_NRFLAG, is called with polling * set, and it returns with polling set. If it ever stops polling, it * must clear the polling bit. */ static void cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; bool broadcast; /* * Check if the idle task must be rescheduled. If it is the * case, exit the function after re-enabling the local irq. */ if (need_resched()) { local_irq_enable(); return; } /* * During the idle period, stop measuring the disabled irqs * critical sections latencies */ stop_critical_timings(); /* * Tell the RCU framework we are entering an idle section, * so no more rcu read side critical sections and one more * step to the grace period */ rcu_idle_enter(); /* * Ask the cpuidle framework to choose a convenient idle state. * Fall back to the default arch specific idle method on errors. */ next_state = cpuidle_select(drv, dev); if (next_state < 0) { use_default: /* * We can't use the cpuidle framework, let's use the default * idle routine. */ if (current_clr_polling_and_test()) local_irq_enable(); else arch_cpu_idle(); goto exit_idle; } /* * The idle task must be scheduled, it is pointless to * go to idle, just update no idle residency and get * out of this function */ if (current_clr_polling_and_test()) { dev->last_residency = 0; entered_state = next_state; local_irq_enable(); goto exit_idle; } broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); /* * Tell the time framework to switch to a broadcast timer * because our local timer will be shutdown. If a local timer * is used from another cpu as a broadcast timer, this call may * fail if it is not available */ if (broadcast && clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) goto use_default; /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); /* * Enter the idle state previously returned by the governor decision. * This function will block until an interrupt occurs and will take * care of re-enabling the local interrupts */ entered_state = cpuidle_enter(drv, dev, next_state); /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); if (broadcast) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); /* * Give the governor an opportunity to reflect on the outcome */ cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); /* * It is up to the idle functions to reenable local interrupts */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); rcu_idle_exit(); start_critical_timings(); }
unsigned long this_cpu_load(void) { struct rq *this = this_rq(); return this->cpu_load[0]; }
/** * cpuidle_idle_call - the main idle function * * NOTE: no locks or semaphores should be used here * * On archs that support TIF_POLLING_NRFLAG, is called with polling * set, and it returns with polling set. If it ever stops polling, it * must clear the polling bit. */ static void cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; unsigned int broadcast; bool reflect; /* * Check if the idle task must be rescheduled. If it is the * case, exit the function after re-enabling the local irq. */ if (need_resched()) { local_irq_enable(); return; } /* * During the idle period, stop measuring the disabled irqs * critical sections latencies */ stop_critical_timings(); /* * Tell the RCU framework we are entering an idle section, * so no more rcu read side critical sections and one more * step to the grace period */ rcu_idle_enter(); if (cpuidle_not_available(drv, dev)) goto use_default; /* * Suspend-to-idle ("freeze") is a system state in which all user space * has been frozen, all I/O devices have been suspended and the only * activity happens here and in iterrupts (if any). In that case bypass * the cpuidle governor and go stratight for the deepest idle state * available. Possibly also suspend the local tick and the entire * timekeeping to prevent timer interrupts from kicking us out of idle * until a proper wakeup interrupt happens. */ if (idle_should_freeze()) { entered_state = cpuidle_enter_freeze(drv, dev); if (entered_state >= 0) { local_irq_enable(); goto exit_idle; } reflect = false; next_state = cpuidle_find_deepest_state(drv, dev); } else { reflect = true; /* * Ask the cpuidle framework to choose a convenient idle state. */ next_state = cpuidle_select(drv, dev); } /* Fall back to the default arch idle method on errors. */ if (next_state < 0) goto use_default; /* * The idle task must be scheduled, it is pointless to * go to idle, just update no idle residency and get * out of this function */ if (current_clr_polling_and_test()) { dev->last_residency = 0; entered_state = next_state; local_irq_enable(); goto exit_idle; } broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; /* * Tell the time framework to switch to a broadcast timer * because our local timer will be shutdown. If a local timer * is used from another cpu as a broadcast timer, this call may * fail if it is not available */ if (broadcast && tick_broadcast_enter()) goto use_default; /* Take note of the planned idle state. */ idle_set_state(this_rq(), &drv->states[next_state]); /* * Enter the idle state previously returned by the governor decision. * This function will block until an interrupt occurs and will take * care of re-enabling the local interrupts */ entered_state = cpuidle_enter(drv, dev, next_state); /* The cpu is no longer idle or about to enter idle. */ idle_set_state(this_rq(), NULL); if (broadcast) tick_broadcast_exit(); /* * Give the governor an opportunity to reflect on the outcome */ if (reflect) cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); /* * It is up to the idle functions to reenable local interrupts */ if (WARN_ON_ONCE(irqs_disabled())) local_irq_enable(); rcu_idle_exit(); start_critical_timings(); return; use_default: /* * We can't use the cpuidle framework, let's use the default * idle routine. */ if (current_clr_polling_and_test()) local_irq_enable(); else arch_cpu_idle(); goto exit_idle; }
/** * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. */ void sched_idle_set_state(struct cpuidle_state *idle_state) { idle_set_state(this_rq(), idle_state); }