static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { /* * If a CPU goes away, splice its entries to the current CPU * and trigger a run of the softirq */ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { int cpu = (unsigned long) hcpu; local_irq_disable(); list_splice_init(&per_cpu(blk_cpu_done, cpu), &__get_cpu_var(blk_cpu_done)); raise_softirq_irqoff(BLOCK_SOFTIRQ); local_irq_enable(); } return NOTIFY_OK; }
void __blk_complete_request(struct request *req) { int ccpu, cpu, group_cpu = NR_CPUS; struct request_queue *q = req->q; unsigned long flags; BUG_ON(!q->softirq_done_fn); local_irq_save(flags); cpu = smp_processor_id(); /* * Select completion CPU */ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { ccpu = req->cpu; if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { ccpu = blk_cpu_to_group(ccpu); group_cpu = blk_cpu_to_group(cpu); } } else ccpu = cpu; if (ccpu == cpu || ccpu == group_cpu) { struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); list_add_tail(&req->csd.list, list); /* * if the list only contains our just added request, * signal a raise of the softirq. If there are already * entries there, someone already raised the irq but it * hasn't run yet. */ if (list->next == &req->csd.list) raise_softirq_irqoff(BLOCK_SOFTIRQ); } else if (raise_blk_irq(ccpu, req)) goto do_local; local_irq_restore(flags); }
void __blk_complete_request(struct request *req) { struct request_queue *q = req->q; unsigned long flags; int ccpu, cpu, group_cpu; BUG_ON(!q->softirq_done_fn); /* 计算接收到中断的CPU */ local_irq_save(flags); cpu = smp_processor_id(); group_cpu = blk_cpu_to_group(cpu); /* * Select completion CPU, 找到应该处理完成请求的CPU */ /* QUEUE_FLAG_SAME_COMP要求提交请求的CPU来做请求完成处理 */ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) ccpu = req->cpu; else ccpu = cpu; if (ccpu == cpu || ccpu == group_cpu) { /* 处理完成请求的CPU和接收请求的CPU是同一个 */ struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); list_add_tail(&req->csd.list, list); /* 将完成请求插入CPU的队列 */ /* * if the list only contains our just added request, * signal a raise of the softirq. If there are already * entries there, someone already raised the irq but it * hasn't run yet. */ if (list->next == &req->csd.list) raise_softirq_irqoff(BLOCK_SOFTIRQ); } else if (raise_blk_irq(ccpu, req)) /* 路由中断,路由成功返回0 */ goto do_local; local_irq_restore(flags); }
int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, const enum hrtimer_mode mode, int wakeup) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; int ret, leftmost; base = lock_hrtimer_base(timer, &flags); /* Remove an active timer from the queue: */ ret = remove_hrtimer(timer, base); if (mode & HRTIMER_MODE_REL) { tim = ktime_add_safe(tim, base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in * do_gettimeoffset(). In this case we want to round up by * resolution when starting a relative timer, to avoid short * timeouts. This will go away with the GTOD framework. */ #ifdef CONFIG_TIME_LOW_RES tim = ktime_add_safe(tim, base->resolution); #endif } hrtimer_set_expires_range_ns(timer, tim, delta_ns); /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); timer_stats_hrtimer_set_start_info(timer); leftmost = enqueue_hrtimer(timer, new_base); /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) * * XXX send_remote_softirq() ? */ if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases) && hrtimer_enqueue_reprogram(timer, new_base)) { if (wakeup) { /* * We need to drop cpu_base->lock to avoid a * lock ordering issue vs. rq->lock. */ raw_spin_unlock(&new_base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); local_irq_restore(flags); return ret; } else { __raise_softirq_irqoff(HRTIMER_SOFTIRQ); } } unlock_hrtimer_base(timer, &flags); return ret; }
/** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * * When the next event is more than a tick into the future, stop the idle tick * Called either from the idle loop or from irq_exit() when an idle period was * just interrupted by an interrupt which did not cause a reschedule. */ void tick_nohz_stop_sched_tick(int inidle) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; struct tick_sched *ts; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; local_irq_save(flags); cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); /* * Call to tick_nohz_start_idle stops the last_update_time from being * updated. Thus, it must not be called in the event we are called from * irq_exit() with the prior state different than idle. */ if (!inidle && !ts->inidle) goto end; /* * Set ts->inidle unconditionally. Even if the system did not * switch to NOHZ mode the cpu frequency governers rely on the * update of the idle time accounting in tick_nohz_start_idle(). */ ts->inidle = 1; now = tick_nohz_start_idle(ts); /* * If this cpu is offline and it is the one which updates * jiffies, then give up the assignment and let it be taken by * the cpu which runs the tick timer next. If we don't drop * this here the jiffies might be stale and do_timer() never * invoked. */ if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; if (need_resched()) goto end; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; if (ratelimit < 10) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", local_softirq_pending()); ratelimit++; } goto end; } ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; /* * On SMP we really should only care for the CPU which * has the do_timer duty assigned. All other CPUs can * sleep as long as they want. */ if (cpu == tick_do_timer_cpu || tick_do_timer_cpu == TICK_DO_TIMER_NONE) time_delta = timekeeping_max_deferment(); else time_delta = KTIME_MAX; } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; } /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu */ if (!ts->tick_stopped && delta_jiffies == 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals * that there is no timer pending or at least extremely * far into the future (12 days for HZ=1000). In this * case we set the expiry to the end of time. */ if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { /* * Calculate the time delta for the next timer event. * If the time delta exceeds the maximum time delta * permitted by the current clocksource then adjust * the time delta accordingly to ensure the * clocksource does not wrap. */ time_delta = min_t(u64, time_delta, tick_period.tv64 * delta_jiffies); expires = ktime_add_ns(last_update, time_delta); } else { expires.tv64 = KTIME_MAX; } /* * If this cpu is the one which updates jiffies, then * give up the assignment and let it be taken by the * cpu which runs the tick timer next, which might be * this cpu as well. If we don't drop this here the * jiffies might be stale and do_timer() never * invoked. */ if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; if (delta_jiffies > 1) cpumask_set_cpu(cpu, nohz_cpu_mask); /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when * interrupts arrive which do not cause a reschedule. In the * first call we save the current tick time, so we can restart * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { if (select_nohz_load_balancer(1)) { /* * sched tick not stopped! */ cpumask_clear_cpu(cpu, nohz_cpu_mask); goto out; } ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; rcu_enter_nohz(); } ts->idle_sleeps++; /* Mark expires */ ts->idle_expires = expires; /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. */ if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) goto out; } else if (!tick_program_event(expires, 0)) goto out; /* * We are past the event already. So we crossed a * jiffie boundary. Update jiffies and raise the * softirq. */ tick_do_update_jiffies64(ktime_get()); cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); end: local_irq_restore(flags); }
static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; unsigned long rcu_delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); now = tick_nohz_start_idle(cpu, ts); if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) return; if (need_resched()) return; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; if (ratelimit < 10) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", (unsigned int) local_softirq_pending()); ratelimit++; } return; } ts->idle_calls++; do { seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; if (rcu_delta_jiffies < delta_jiffies) { next_jiffies = last_jiffies + rcu_delta_jiffies; delta_jiffies = rcu_delta_jiffies; } } if (!ts->tick_stopped && delta_jiffies == 1) goto out; if ((long)delta_jiffies >= 1) { if (cpu == tick_do_timer_cpu) { tick_do_timer_cpu = TICK_DO_TIMER_NONE; ts->do_timer_last = 1; } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { time_delta = KTIME_MAX; ts->do_timer_last = 0; } else if (!ts->do_timer_last) { time_delta = KTIME_MAX; } if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { time_delta = min_t(u64, time_delta, tick_period.tv64 * delta_jiffies); } if (time_delta < KTIME_MAX) expires = ktime_add_ns(last_update, time_delta); else expires.tv64 = KTIME_MAX; if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; if (!ts->tick_stopped) { select_nohz_load_balancer(1); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; } ts->idle_sleeps++; ts->idle_expires = expires; if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); if (hrtimer_active(&ts->sched_timer)) goto out; } else if (!tick_program_event(expires, 0)) goto out; tick_do_update_jiffies64(ktime_get()); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; }
/** * tick_nohz_stop_sched_tick - stop the idle tick from the idle task * * When the next event is more than a tick into the future, stop the idle tick * Called either from the idle loop or from irq_exit() when an idle period was * just interrupted by an interrupt which did not cause a reschedule. */ void tick_nohz_stop_sched_tick(void) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; struct tick_sched *ts; ktime_t last_update, expires, now, delta; int cpu; local_irq_save(flags); cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; if (need_resched()) goto end; cpu = smp_processor_id(); if (unlikely(local_softirq_pending())) printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", local_softirq_pending()); now = ktime_get(); /* * When called from irq_exit we need to account the idle sleep time * correctly. */ if (ts->tick_stopped) { delta = ktime_sub(now, ts->idle_entrytime); ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); } ts->idle_entrytime = now; ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; } while (read_seqretry(&xtime_lock, seq)); /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; if (rcu_needs_cpu(cpu)) delta_jiffies = 1; /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu */ if (!ts->tick_stopped && delta_jiffies == 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { if (delta_jiffies > 1) cpu_set(cpu, nohz_cpu_mask); /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when * interrupts arrive which do not cause a reschedule. In the * first call we save the current tick time, so we can restart * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { ts->idle_tick = ts->sched_timer.expires; ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; } /* * If this cpu is the one which updates jiffies, then * give up the assignment and let it be taken by the * cpu which runs the tick timer next, which might be * this cpu as well. If we don't drop this here the * jiffies might be stale and do_timer() never * invoked. */ if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = -1; /* * calculate the expiry time for the next timer wheel * timer */ expires = ktime_add_ns(last_update, tick_period.tv64 * delta_jiffies); ts->idle_expires = expires; ts->idle_sleeps++; if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS); /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) goto out; } else if(!tick_program_event(expires, 0)) goto out; /* * We are past the event already. So we crossed a * jiffie boundary. Update jiffies and raise the * softirq. */ tick_do_update_jiffies64(ktime_get()); cpu_clear(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; end: local_irq_restore(flags); }
static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); now = tick_nohz_start_idle(cpu, ts); /* * If this cpu is offline and it is the one which updates * jiffies, then give up the assignment and let it be taken by * the cpu which runs the tick timer next. If we don't drop * this here the jiffies might be stale and do_timer() never * invoked. */ if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) return; if (need_resched()) return; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; if (ratelimit < 10) { printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", (unsigned int) local_softirq_pending()); ratelimit++; } return; } ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); last_update = last_jiffies_update; last_jiffies = jiffies; time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; } /* * Do not stop the tick, if we are only one off * or if the cpu is required for rcu */ if (!ts->tick_stopped && delta_jiffies == 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { /* * If this cpu is the one which updates jiffies, then * give up the assignment and let it be taken by the * cpu which runs the tick timer next, which might be * this cpu as well. If we don't drop this here the * jiffies might be stale and do_timer() never * invoked. Keep track of the fact that it was the one * which had the do_timer() duty last. If this cpu is * the one which had the do_timer() duty last, we * limit the sleep time to the timekeeping * max_deferement value which we retrieved * above. Otherwise we can sleep as long as we want. */ if (cpu == tick_do_timer_cpu) { tick_do_timer_cpu = TICK_DO_TIMER_NONE; ts->do_timer_last = 1; } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { time_delta = KTIME_MAX; ts->do_timer_last = 0; } else if (!ts->do_timer_last) { time_delta = KTIME_MAX; } /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals * that there is no timer pending or at least extremely * far into the future (12 days for HZ=1000). In this * case we set the expiry to the end of time. */ if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { /* * Calculate the time delta for the next timer event. * If the time delta exceeds the maximum time delta * permitted by the current clocksource then adjust * the time delta accordingly to ensure the * clocksource does not wrap. */ time_delta = min_t(u64, time_delta, tick_period.tv64 * delta_jiffies); } if (time_delta < KTIME_MAX) expires = ktime_add_ns(last_update, time_delta); else expires.tv64 = KTIME_MAX; /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when * interrupts arrive which do not cause a reschedule. In the * first call we save the current tick time, so we can restart * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { select_nohz_load_balancer(1); calc_load_enter_idle(); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; } ts->idle_sleeps++; /* Mark expires */ ts->idle_expires = expires; /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. */ if (unlikely(expires.tv64 == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; } if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); /* Check, if the timer was already in the past */ if (hrtimer_active(&ts->sched_timer)) goto out; } else if (!tick_program_event(expires, 0)) goto out; /* * We are past the event already. So we crossed a * jiffie boundary. Update jiffies and raise the * softirq. */ tick_do_update_jiffies64(ktime_get()); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; }