static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) { struct hrtimer_clock_base *base = timer->base; struct hrtimer_cpu_base *cpu_base = base->cpu_base; enum hrtimer_restart (*fn)(struct hrtimer *); int restart; WARN_ON(!irqs_disabled()); debug_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); fn = timer->function; /* * Because we run timers from hardirq context, there is no chance * they get migrated to another cpu, therefore its safe to unlock * the timer base. */ raw_spin_unlock(&cpu_base->lock); trace_hrtimer_expire_entry(timer, now); restart = fn(timer); trace_hrtimer_expire_exit(timer); raw_spin_lock(&cpu_base->lock); /* * Note: We clear the CALLBACK bit after enqueue_hrtimer and * we do not reprogramm the event hardware. Happens either in * hrtimer_start_range_ns() or in hrtimer_interrupt() */ if (restart != HRTIMER_NORESTART) { BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); enqueue_hrtimer(timer, base); } WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); timer->state &= ~HRTIMER_STATE_CALLBACK; }
/* * Retrigger next event is called after clock was set * * Called with interrupts disabled via on_each_cpu() */ static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); struct timespec realtime_offset, xtim, wtm, sleep; if (!hrtimer_hres_active()) return; /* Optimized out for !HIGH_RES */ get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); /* Adjust CLOCK_REALTIME offset */ raw_spin_lock(&base->lock); base->clock_base[HRTIMER_BASE_REALTIME].offset = timespec_to_ktime(realtime_offset); base->clock_base[HRTIMER_BASE_BOOTTIME].offset = timespec_to_ktime(sleep); hrtimer_force_reprogram(base, 0); raw_spin_unlock(&base->lock); }
static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) { unsigned long timeout; /* * set synchronisation state between this boot processor * and the secondary one */ raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from * the holding pen - release it, then wait for it to flag * that it has been released by resetting pen_release. * * Note that "pen_release" is the hardware CPU ID, whereas * "cpu" is Linux's internal ID. */ pen_release = cpu; flush_cache_all(); outer_flush_all(); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { smp_rmb(); if (pen_release == -1) break; udelay(10); } /* * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; }
/* * The current CPU has been marked offline. Migrate IRQs off this CPU. * If the affinity settings do not allow other CPUs, force them onto any * available CPU. * * Note: we must iterate over all IRQs, whether they have an attached * action structure or not, as we need to get chained interrupts too. */ void migrate_irqs(void) { unsigned int i; struct irq_desc *desc; unsigned long flags; local_irq_save(flags); for_each_irq_desc(i, desc) { bool affinity_broken = false; if (!desc) continue; raw_spin_lock(&desc->lock); affinity_broken = migrate_one_irq(desc); raw_spin_unlock(&desc->lock); if (affinity_broken && printk_ratelimit()) pr_warning("IRQ%u no longer affine to CPU%u\n", i, smp_processor_id()); }
void handle_level_irq(unsigned int irq, struct irq_desc *desc) { raw_spin_lock(&desc->lock); mask_ack_irq(desc); if (irqd_irq_inprogress(&desc->irq_data)) if (!irq_check_poll(desc)) goto out_unlock; desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); /* kstat_incr_irqs_this_cpu(irq, desc); */ if (!desc->action || irqd_irq_disabled(&desc->irq_data)) goto out_unlock; handle_irq_event(desc); if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT)) unmask_irq(desc); out_unlock: raw_spin_unlock(&desc->lock); }
/** * handle_fasteoi_irq - irq handler for transparent controllers * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Only a single callback will be issued to the chip: an ->eoi() * call when the interrupt has been serviced. This enables support * for modern forms of interrupt handlers, which handle the flow * details in hardware, transparently. */ void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) { raw_spin_lock(&desc->lock); if (unlikely(irqd_irq_inprogress(&desc->irq_data))) if (!irq_check_poll(desc)) goto out; desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); kstat_incr_irqs_this_cpu(irq, desc); /* * If its disabled or no action available * then mask it and get out of here: */ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { if (!irq_settings_is_level(desc)) desc->istate |= IRQS_PENDING; mask_irq(desc); goto out; } if (desc->istate & IRQS_ONESHOT) mask_irq(desc); preflow_handler(desc); handle_irq_event(desc); out_eoi: desc->irq_data.chip->irq_eoi(&desc->irq_data); out_unlock: raw_spin_unlock(&desc->lock); return; out: if (!(desc->irq_data.chip->flags & IRQCHIP_EOI_IF_HANDLED)) goto out_eoi; goto out_unlock; }
/** * handle_simple_irq - Simple and software-decoded IRQs. * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Simple interrupts are either sent from a demultiplexing interrupt * handler or come from hardware, where no interrupt hardware control * is necessary. * * Note: The caller is expected to handle the ack, clear, mask and * unmask issues if necessary. */ void handle_simple_irq(unsigned int irq, struct irq_desc *desc) { raw_spin_lock(&desc->lock); if (unlikely(irqd_irq_inprogress(&desc->irq_data))) if (!irq_check_poll(desc)) goto out_unlock; desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); kstat_incr_irqs_this_cpu(irq, desc); if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { desc->istate |= IRQS_PENDING; goto out_unlock; } handle_irq_event(desc); out_unlock: raw_spin_unlock(&desc->lock); }
static unsigned int steal_context_smp(unsigned int id) { struct mm_struct *mm; unsigned int cpu, max, i; max = last_context - first_context; while (max--) { mm = context_mm[id]; if (mm->context.active) { id++; if (id > last_context) id = first_context; continue; } pr_hardcont(" | steal %d from 0x%p", id, mm); mm->context.id = MMU_NO_CONTEXT; for_each_cpu(cpu, mm_cpumask(mm)) { for (i = cpu_first_thread_sibling(cpu); i <= cpu_last_thread_sibling(cpu); i++) __set_bit(id, stale_map[i]); cpu = i - 1; } return id; } raw_spin_unlock(&context_lock); cpu_relax(); raw_spin_lock(&context_lock); return MMU_NO_CONTEXT; }
asmlinkage int vstlog(const char *fmt, va_list args) { static char textbuf[RINGBUF_LINE_MAX]; char *text = textbuf; size_t text_len; enum ringbuf_flags lflags = 0; unsigned long flags; int this_cpu; int printed_len = 0; bool stored = false; local_irq_save(flags); this_cpu = smp_processor_id(); lockdep_off(); raw_spin_lock(&ringbuf_lock); ringbuf_cpu = this_cpu; text_len = vscnprintf(text, sizeof(textbuf), fmt, args); /* mark and strip a trailing newline */ if (text_len && text[text_len-1] == '\n') { text_len--; lflags |= RINGBUF_NEWLINE; } if (!stored) ringbuf_store(lflags,text, text_len, ringbuf_cpu, current); printed_len += text_len; raw_spin_unlock(&ringbuf_lock); local_irq_restore(flags); return printed_len; }
/** * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop * @lock: the rt_mutex to take * @state: the state the task should block in (TASK_INTERRUPTIBLE * or TASK_UNINTERRUPTIBLE) * @timeout: the pre-initialized and started timer, or NULL for none * @waiter: the pre-initialized rt_mutex_waiter * * lock->wait_lock must be held by the caller. */ static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, struct rt_mutex_waiter *waiter) { int ret = 0; for (;;) { /* Try to acquire the lock: */ if (try_to_take_rt_mutex(lock, current, waiter)) break; /* * TASK_INTERRUPTIBLE checks for signals and * timeout. Ignored otherwise. */ if (unlikely(state == TASK_INTERRUPTIBLE)) { /* Signal pending? */ if (signal_pending(current)) ret = -EINTR; if (timeout && !timeout->task) ret = -ETIMEDOUT; if (ret) break; } raw_spin_unlock(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); schedule_rt_mutex(lock); raw_spin_lock(&lock->wait_lock); set_current_state(state); } return ret; }
/** * irq_migrate_all_off_this_cpu - Migrate irqs away from offline cpu * * The current CPU has been marked offline. Migrate IRQs off this CPU. * If the affinity settings do not allow other CPUs, force them onto any * available CPU. * * Note: we must iterate over all IRQs, whether they have an attached * action structure or not, as we need to get chained interrupts too. */ void irq_migrate_all_off_this_cpu(void) { unsigned int irq; struct irq_desc *desc; unsigned long flags; local_irq_save(flags); for_each_active_irq(irq) { bool affinity_broken; desc = irq_to_desc(irq); raw_spin_lock(&desc->lock); affinity_broken = migrate_one_irq(desc); raw_spin_unlock(&desc->lock); if (affinity_broken) pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", irq, smp_processor_id()); } local_irq_restore(flags); }
void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { unsigned long pid; unsigned long ap = mmu_get_ap(psize); preempt_disable(); pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); }
/* * Try to get console ownership to actually show the kernel * messages from a 'printk'. Return true (and with the * console_mutex held, and 'console_locked' set) if it * is successful, false otherwise. * * This gets called with the 'logbuf_lock' spinlock held and * interrupts disabled. It should return with 'lockbuf_lock' * released but interrupts still disabled. */ static int acquire_console_mutex_for_printk(unsigned int cpu) { int retval = 0; if (!try_acquire_console_mutex()) { retval = 1; /* * If we can't use the console, we need to release * the console mutex by hand to avoid flushing * the buffer. We need to hold the console mutex * in order to do this test safely. */ if (!can_use_console(cpu)) { console_locked = 0; mutex_unlock(&console_mutex); retval = 0; } } printk_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); return retval; }
/** * rt_mutex_finish_proxy_lock() - Complete lock acquisition * @lock: the rt_mutex we were woken on * @to: the timeout, null if none. hrtimer should already have * been started. * @waiter: the pre-initialized rt_mutex_waiter * @detect_deadlock: perform deadlock detection (1) or not (0) * * Complete the lock acquisition started our behalf by another thread. * * Returns: * 0 - success * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK * * Special API call for PI-futex requeue support */ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, struct rt_mutex_waiter *waiter, int detect_deadlock) { int ret; raw_spin_lock(&lock->wait_lock); set_current_state(TASK_INTERRUPTIBLE); ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, detect_deadlock); set_current_state(TASK_RUNNING); if (unlikely(waiter->task)) remove_waiter(lock, waiter); /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. */ fixup_rt_mutex_waiters(lock); raw_spin_unlock(&lock->wait_lock); /* * Readjust priority, when we did not get the lock. We might have been * the pending owner and boosted. Since we did not take the lock, the * PI boost has to go. */ if (unlikely(ret)) rt_mutex_adjust_prio(current); return ret; }
static int panic_flush(struct notifier_block *nb, unsigned long l, void *buf) { int i; raw_spin_lock(&panic_lock); pr_emerg("EMMD: ready to perform memory dump\n"); for (i = 0; i < nr_cpu_ids; i++) coresight_dump_pcsr(i); set_emmd_indicator(); ramtag_setup(); kmsg_dump(KMSG_DUMP_PANIC); dump_task_info(); #ifdef CONFIG_PXA_RAMDUMP ramdump_panic(); #endif #ifdef CONFIG_REGDUMP dump_reg_to_console(); #endif pr_emerg("EMMD: done\n"); arm_machine_flush_console(); flush_cache_all(); #ifdef CONFIG_ARM outer_flush_all(); #endif drain_mc_buffer(); raw_spin_unlock(&panic_lock); return NOTIFY_DONE; }
/** * handle_edge_eoi_irq - edge eoi type IRQ handler * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Similar as the above handle_edge_irq, but using eoi and w/o the * mask/unmask logic. */ bool handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) { bool handled = false; struct irq_chip *chip = irq_desc_get_chip(desc); raw_spin_lock(&desc->lock); desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); /* * If we're currently running this IRQ, or its disabled, * we shouldn't process the IRQ. Mark it pending, handle * the necessary masking and go out */ if (unlikely(irqd_irq_disabled(&desc->irq_data) || irqd_irq_inprogress(&desc->irq_data) || !desc->action)) { if (!irq_check_poll(desc)) { desc->istate |= IRQS_PENDING; goto out_eoi; } } kstat_incr_irqs_this_cpu(irq, desc); do { if (unlikely(!desc->action)) goto out_eoi; handle_irq_event(desc); handled = true; } while ((desc->istate & IRQS_PENDING) && !irqd_irq_disabled(&desc->irq_data)); out_eoi: chip->irq_eoi(&desc->irq_data); raw_spin_unlock(&desc->lock); return handled; }
/** * handle_edge_eoi_irq - edge eoi type IRQ handler * @desc: the interrupt description structure for this irq * * Similar as the above handle_edge_irq, but using eoi and w/o the * mask/unmask logic. */ void handle_edge_eoi_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); raw_spin_lock(&desc->lock); desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); if (!irq_may_run(desc)) { desc->istate |= IRQS_PENDING; goto out_eoi; } /* * If its disabled or no action available then mask it and get * out of here. */ if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { desc->istate |= IRQS_PENDING; goto out_eoi; } kstat_incr_irqs_this_cpu(desc); do { if (unlikely(!desc->action)) goto out_eoi; handle_irq_event(desc); } while ((desc->istate & IRQS_PENDING) && !irqd_irq_disabled(&desc->irq_data)); out_eoi: chip->irq_eoi(&desc->irq_data); raw_spin_unlock(&desc->lock); }
static int allocate_gic_irq(struct irq_domain *domain, unsigned virq, irq_hw_number_t hwirq) { struct irq_fwspec fwspec; int i; int err; if (!irq_domain_get_of_node(domain->parent)) return -EINVAL; raw_spin_lock(&cb->lock); for (i = cb->int_max - 1; i >= 0; i--) { if (cb->irq_map[i] == IRQ_FREE) { cb->irq_map[i] = hwirq; break; } } raw_spin_unlock(&cb->lock); if (i < 0) return -ENODEV; fwspec.fwnode = domain->parent->fwnode; fwspec.param_count = 3; fwspec.param[0] = 0; /* SPI */ fwspec.param[1] = i; fwspec.param[2] = IRQ_TYPE_LEVEL_HIGH; err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); if (err) cb->irq_map[i] = IRQ_FREE; else cb->write(i, hwirq); return err; }
void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) { unsigned long pid; struct mm_struct *mm = tlb->mm; preempt_disable(); pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); _tlbie_pid(pid, RIC_FLUSH_PWC); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else _tlbiel_pid(pid, RIC_FLUSH_PWC); no_context: preempt_enable(); }
/** * rt_mutex_start_proxy_lock() - Start lock acquisition for another task * @lock: the rt_mutex to take * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare * @detect_deadlock: perform deadlock detection (1) or not (0) * * Returns: * 0 - task blocked on lock * 1 - acquired the lock for task, caller should wake it up * <0 - error * * Special API call for FUTEX_REQUEUE_PI support. */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, int detect_deadlock) { unsigned long flags; int ret; raw_spin_lock_irqsave(&lock->wait_lock, flags); if (try_to_take_rt_mutex(lock, task, NULL)) { raw_spin_unlock(&lock->wait_lock); return 1; } ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock, flags, 0); if (ret == -EDEADLK && !rt_mutex_owner(lock)) { /* * Reset the return value. We might have * returned with -EDEADLK and the owner * released the lock while we were walking the * pi chain. Let the waiter sort it out. */ ret = 0; } if (unlikely(ret)) remove_waiter(lock, waiter, flags); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); debug_rt_mutex_print_deadlock(waiter); return ret; }
/* * Called from hardirq context every jiffy */ void hrtimer_run_queues(void) { struct rb_node *node; struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; int index, gettime = 1; if (hrtimer_hres_active()) return; for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { base = &cpu_base->clock_base[index]; if (!base->first) continue; if (gettime) { hrtimer_get_softirq_time(cpu_base); gettime = 0; } raw_spin_lock(&cpu_base->lock); while ((node = base->first)) { struct hrtimer *timer; timer = rb_entry(node, struct hrtimer, node); if (base->softirq_time.tv64 <= hrtimer_get_expires_tv64(timer)) break; __run_hrtimer(timer, &base->softirq_time); } raw_spin_unlock(&cpu_base->lock); } }
/* applies to both peripheral and syswake interrupts */ static int pdc_irq_set_wake(struct irq_data *data, unsigned int on) { struct pdc_intc_priv *priv = irqd_to_priv(data); irq_hw_number_t hw = data->hwirq; unsigned int mask = (1 << 16) << hw; unsigned int dst_irq; raw_spin_lock(&priv->lock); if (on) priv->irq_route |= mask; else priv->irq_route &= ~mask; pdc_write(priv, PDC_IRQ_ROUTE, priv->irq_route); raw_spin_unlock(&priv->lock); /* control the destination IRQ wakeup too for standby mode */ if (hwirq_is_syswake(hw)) dst_irq = priv->syswake_irq; else dst_irq = priv->perip_irqs[hw]; irq_set_irq_wake(dst_irq, on); return 0; }
int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, const enum hrtimer_mode mode, int wakeup) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; int ret, leftmost; base = lock_hrtimer_base(timer, &flags); /* Remove an active timer from the queue: */ ret = remove_hrtimer(timer, base); if (mode & HRTIMER_MODE_REL) { tim = ktime_add_safe(tim, base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in * do_gettimeoffset(). In this case we want to round up by * resolution when starting a relative timer, to avoid short * timeouts. This will go away with the GTOD framework. */ #ifdef CONFIG_TIME_LOW_RES tim = ktime_add_safe(tim, base->resolution); #endif } hrtimer_set_expires_range_ns(timer, tim, delta_ns); /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); timer_stats_hrtimer_set_start_info(timer); leftmost = enqueue_hrtimer(timer, new_base); /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) * * XXX send_remote_softirq() ? */ if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases) && hrtimer_enqueue_reprogram(timer, new_base)) { if (wakeup) { /* * We need to drop cpu_base->lock to avoid a * lock ordering issue vs. rq->lock. */ raw_spin_unlock(&new_base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); local_irq_restore(flags); return ret; } else { __raise_softirq_irqoff(HRTIMER_SOFTIRQ); } } unlock_hrtimer_base(timer, &flags); return ret; }
/* * High resolution timer interrupt * Called with interrupts disabled */ void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); ktime_t expires_next, now, entry_time, delta; int i, retries = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event.tv64 = KTIME_MAX; raw_spin_lock(&cpu_base->lock); entry_time = now = hrtimer_update_base(cpu_base); retry: expires_next.tv64 = KTIME_MAX; /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via * the migration code. This does not affect enqueueing of * timers which run their callback and need to be requeued on * this CPU. */ cpu_base->expires_next.tv64 = KTIME_MAX; for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { struct hrtimer_clock_base *base; struct timerqueue_node *node; ktime_t basenow; if (!(cpu_base->active_bases & (1 << i))) continue; base = cpu_base->clock_base + i; basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { struct hrtimer *timer; timer = container_of(node, struct hrtimer, node); /* * The immediate goal for using the softexpires is * minimizing wakeups, not running timers at the * earliest interrupt after their soft expiration. * This allows us to avoid using a Priority Search * Tree, which can answer a stabbing querry for * overlapping intervals and instead use the simple * BST we already have. * We don't add extra wakeups by delaying timers that * are right-of a not yet expired timer, because that * timer will have to trigger a wakeup anyway. */ if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires.tv64 < 0) expires.tv64 = KTIME_MAX; if (expires.tv64 < expires_next.tv64) expires_next = expires; break; } __run_hrtimer(timer, &basenow); } } /* * Store the new expiry value so the migration code can verify * against it. */ cpu_base->expires_next = expires_next; raw_spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ if (expires_next.tv64 == KTIME_MAX || !tick_program_event(expires_next, 0)) { cpu_base->hang_detected = 0; return; } /* * The next timer was already expired due to: * - tracing * - long lasting callbacks * - being scheduled away when running in a VM * * We need to prevent that we loop forever in the hrtimer * interrupt routine. We give it 3 attempts to avoid * overreacting on some spurious event. * * Acquire base lock for updating the offsets and retrieving * the current time. */ raw_spin_lock(&cpu_base->lock); now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) goto retry; /* * Give the system a chance to do something else than looping * here. We stored the entry time, so we know exactly how long * we spent here. We schedule the next event this amount of * time away. */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; raw_spin_unlock(&cpu_base->lock); delta = ktime_sub(now, entry_time); if (delta.tv64 > cpu_base->max_hang_time.tv64) cpu_base->max_hang_time = delta; /* * Limit it to a sensible value as we enforce a longer * delay. Give the CPU at least 100ms to catch up. */ if (delta.tv64 > 100 * NSEC_PER_MSEC) expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); else expires_next = ktime_add(now, delta); tick_program_event(expires_next, 1); printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); }
static void dec_pjobs_rtws(struct rtws_rq *rtws_rq, struct sched_rtws_entity *rtws_se) { struct rq *rq = rq_of_rtws_rq(rtws_rq); struct task_struct *p = task_of_rtws_se(rtws_se); struct sched_rtws_entity *stealable; struct global_rq * global_rq = rtws_rq->global_rq; int ret = 0; WARN_ON(!rtws_prio(p->prio)); WARN_ON(!rtws_rq->nr_running); rtws_rq->nr_running--; if (!rtws_rq->nr_running) { /* If there are no more pjobs to run, we declare this rq idle */ rtws_rq->earliest_dl = 0; cpudl_set(&rq->rd->rtwsc_cpudl, rq->cpu, 0, 0, 0); smp_wmb(); } else { if (!RB_EMPTY_NODE(&rtws_se->stealable_pjob_node)) return; if (rtws_rq->earliest_dl != rtws_se->job.deadline) return; if (!has_stealable_pjobs(rtws_rq)) return; /* The leftmost stealable pjob and our next pjob share the same deadline */ stealable = rb_entry(rtws_rq->leftmost_stealable_pjob, struct sched_rtws_entity, stealable_pjob_node); if (stealable->job.deadline > rtws_rq->earliest_dl) { /* * If the next pjob has lower priority than the highest priority task on * global rq, we try to pull that task. * Clearing the earlieast deadline value (earliest_dl=0) on the rq is * a trick to allow next's rq statistics update, keeping the current ones. */ if (priority_inversion_rtws(rtws_rq, stealable)) { rtws_rq->earliest_dl = 0; raw_spin_lock(&global_rq->lock); ret = pull_task_rtws(rq); raw_spin_unlock(&global_rq->lock); if (ret) return; } } /* * We update statistics about this rq when next * pjob has a different deadline than the dequeueing one. */ if (rtws_rq->earliest_dl != stealable->job.deadline) { rtws_rq->earliest_dl = stealable->job.deadline; cpudl_set(&rq->rd->rtwsc_cpudl, rq->cpu, rtws_rq->earliest_dl, 0, 1); smp_wmb(); } } printk(KERN_INFO "dequeing task %d on cpu %d, current deadline %llu, remaining tasks %lu\n", p->pid, rq->cpu, rtws_rq->earliest_dl, rtws_rq->nr_running); }
/* * Task blocks on lock. * * Prepare waiter and propagate pi chain * * This must be called with lock->wait_lock held. */ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, int detect_deadlock) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; unsigned long flags; int chain_walk = 0, res; raw_spin_lock_irqsave(&task->pi_lock, flags); __rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; plist_node_init(&waiter->list_entry, task->prio); plist_node_init(&waiter->pi_list_entry, task->prio); /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) top_waiter = rt_mutex_top_waiter(lock); plist_add(&waiter->list_entry, &lock->wait_list); task->pi_blocked_on = waiter; raw_spin_unlock_irqrestore(&task->pi_lock, flags); if (!owner) return 0; if (waiter == rt_mutex_top_waiter(lock)) { raw_spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); plist_add(&waiter->pi_list_entry, &owner->pi_waiters); __rt_mutex_adjust_prio(owner); if (owner->pi_blocked_on) chain_walk = 1; raw_spin_unlock_irqrestore(&owner->pi_lock, flags); } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) chain_walk = 1; if (!chain_walk) return 0; /* * The owner can't disappear while holding a lock, * so the owner struct is protected by wait_lock. * Gets dropped in rt_mutex_adjust_prio_chain()! */ get_task_struct(owner); raw_spin_unlock(&lock->wait_lock); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, task); raw_spin_lock(&lock->wait_lock); return res; }
/* * Adjust the priority chain. Also used for deadlock detection. * Decreases task's usage by one - may thus free the task. * Returns 0 or -EDEADLK. */ static int rt_mutex_adjust_prio_chain(struct task_struct *task, int deadlock_detect, struct rt_mutex *orig_lock, struct rt_mutex_waiter *orig_waiter, struct task_struct *top_task) { struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; int detect_deadlock, ret = 0, depth = 0; unsigned long flags; detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, deadlock_detect); /* * The (de)boosting is a step by step approach with a lot of * pitfalls. We want this to be preemptible and we want hold a * maximum of two locks per step. So we have to check * carefully whether things change under us. */ again: if (++depth > max_lock_depth) { static int prev_max; /* * Print this only once. If the admin changes the limit, * print a new message when reaching the limit again. */ if (prev_max != max_lock_depth) { prev_max = max_lock_depth; printk(KERN_WARNING "Maximum lock depth %d reached " "task: %s (%d)\n", max_lock_depth, top_task->comm, task_pid_nr(top_task)); } put_task_struct(task); return deadlock_detect ? -EDEADLK : 0; } retry: /* * Task can not go away as we did a get_task() before ! */ raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; /* * Check whether the end of the boosting chain has been * reached or the state of the chain has changed while we * dropped the locks. */ if (!waiter) goto out_unlock_pi; /* * Check the orig_waiter state. After we dropped the locks, * the previous owner of the lock might have released the lock. */ if (orig_waiter && !rt_mutex_owner(orig_lock)) goto out_unlock_pi; /* * Drop out, when the task has no waiters. Note, * top_waiter can be NULL, when we are in the deboosting * mode! */ if (top_waiter && (!task_has_pi_waiters(task) || top_waiter != task_top_pi_waiter(task))) goto out_unlock_pi; /* * When deadlock detection is off then we check, if further * priority adjustment is necessary. */ if (!detect_deadlock && waiter->list_entry.prio == task->prio) goto out_unlock_pi; lock = waiter->lock; if (!raw_spin_trylock(&lock->wait_lock)) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); cpu_relax(); goto retry; } /* Deadlock detection */ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); raw_spin_unlock(&lock->wait_lock); ret = deadlock_detect ? -EDEADLK : 0; goto out_unlock_pi; } top_waiter = rt_mutex_top_waiter(lock); /* Requeue the waiter */ plist_del(&waiter->list_entry, &lock->wait_list); waiter->list_entry.prio = task->prio; plist_add(&waiter->list_entry, &lock->wait_list); /* Release the task */ raw_spin_unlock_irqrestore(&task->pi_lock, flags); if (!rt_mutex_owner(lock)) { /* * If the requeue above changed the top waiter, then we need * to wake the new top waiter up to try to get the lock. */ if (top_waiter != rt_mutex_top_waiter(lock)) wake_up_process(rt_mutex_top_waiter(lock)->task); raw_spin_unlock(&lock->wait_lock); goto out_put_task; } put_task_struct(task); /* Grab the next task */ task = rt_mutex_owner(lock); get_task_struct(task); raw_spin_lock_irqsave(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { /* Boost the owner */ plist_del(&top_waiter->pi_list_entry, &task->pi_waiters); waiter->pi_list_entry.prio = waiter->list_entry.prio; plist_add(&waiter->pi_list_entry, &task->pi_waiters); __rt_mutex_adjust_prio(task); } else if (top_waiter == waiter) { /* Deboost the owner */ plist_del(&waiter->pi_list_entry, &task->pi_waiters); waiter = rt_mutex_top_waiter(lock); waiter->pi_list_entry.prio = waiter->list_entry.prio; plist_add(&waiter->pi_list_entry, &task->pi_waiters); __rt_mutex_adjust_prio(task); } raw_spin_unlock_irqrestore(&task->pi_lock, flags); top_waiter = rt_mutex_top_waiter(lock); raw_spin_unlock(&lock->wait_lock); if (!detect_deadlock && waiter != top_waiter) goto out_put_task; goto again; out_unlock_pi: raw_spin_unlock_irqrestore(&task->pi_lock, flags); out_put_task: put_task_struct(task); return ret; }
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { unsigned int irq, vector; static int warned; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; for_each_irq_desc(irq, desc) { int break_affinity = 0; int set_affinity = 1; const struct cpumask *affinity; if (!desc) continue; if (irq == 2) continue; /* interrupt's are disabled at this point */ raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); affinity = data->affinity; if (!irq_has_action(irq) || irqd_is_per_cpu(data) || cpumask_subset(affinity, cpu_online_mask)) { raw_spin_unlock(&desc->lock); continue; } /* * Complete the irq move. This cpu is going down and for * non intr-remapping case, we can't wait till this interrupt * arrives at this cpu before completing the irq move. */ irq_force_complete_move(irq); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; affinity = cpu_online_mask; } chip = irq_data_get_irq_chip(data); if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); if (chip->irq_set_affinity) chip->irq_set_affinity(data, affinity, true); else if (!(warned++)) set_affinity = 0; /* * We unmask if the irq was not marked masked by the * core code. That respects the lazy irq disable * behaviour. */ if (!irqd_can_move_in_process_context(data) && !irqd_irq_masked(data) && chip->irq_unmask) chip->irq_unmask(data); raw_spin_unlock(&desc->lock); if (break_affinity && set_affinity) pr_notice("Broke affinity for irq %i\n", irq); else if (!set_affinity) pr_notice("Cannot set affinity for irq %i\n", irq); }
int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; int cpu, ret = 0; ktime_t now; /* * If there is no broadcast device, tell the caller not to go * into deep idle. */ if (!tick_broadcast_device.evtdev) return -EBUSY; dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { /* * If the current CPU owns the hrtimer broadcast * mechanism, it cannot go deep idle and we do not add * the CPU to the broadcast mask. We don't have to go * through the EXIT path as the local timer is not * shutdown. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) goto out; /* * If the broadcast device is in periodic mode, we * return. */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { /* If it is a hrtimer based broadcast, return busy */ if (bc->features & CLOCK_EVT_FEAT_HRTIMER) ret = -EBUSY; goto out; } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be * woken by the IPI right away; we return * busy, so the CPU does not try to go deep * idle. */ if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { ret = -EBUSY; } else if (dev->next_event < bc->next_event) { tick_broadcast_set_event(bc, cpu, dev->next_event); /* * In case of hrtimer broadcasts the * programming might have moved the * timer to this cpu. If yes, remove * us from the broadcast mask and * return busy. */ ret = broadcast_needs_cpu(bc, cpu); if (ret) { cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } } } } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast * pending mask and fired the broadcast * IPI. So we are going to handle the expired * event anyway via the broadcast IPI * handler. No need to reprogram the timer * with an already expired event. */ if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_pending_mask)) goto out; /* * Bail out if there is no next event. */ if (dev->next_event == KTIME_MAX) goto out; /* * If the pending bit is not set, then we are * either the CPU handling the broadcast * interrupt or we got woken by something else. * * We are not longer in the broadcast mask, so * if the cpu local expiry time is already * reached, we would reprogram the cpu local * timer with an already expired event. * * This can lead to a ping-pong when we return * to idle and therefor rearm the broadcast * timer before the cpu local timer was able * to fire. This happens because the forced * reprogramming makes sure that the event * will happen in the future and depending on * the min_delta setting this might be far * enough out that the ping-pong starts. * * If the cpu local next_event has expired * then we know that the broadcast timer * next_event has expired as well and * broadcast is about to be handled. So we * avoid reprogramming and enforce that the * broadcast handler, which did not run yet, * will invoke the cpu local handler. * * We cannot call the handler directly from * here, because we might be in a NOHZ phase * and we did not go through the irq_enter() * nohz fixups. */ now = ktime_get(); if (dev->next_event <= now) { cpumask_set_cpu(cpu, tick_broadcast_force_mask); goto out; } /* * We got woken by something else. Reprogram * the cpu local timer device. */ tick_program_event(dev->next_event, 1); } } out: raw_spin_unlock(&tick_broadcast_lock); return ret; }
/* * Handle oneshot mode broadcasting */ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) { struct tick_device *td; ktime_t now, next_event; int cpu, next_cpu = 0; bool bc_local; raw_spin_lock(&tick_broadcast_lock); dev->next_event = KTIME_MAX; next_event = KTIME_MAX; cpumask_clear(tmpmask); now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { /* * Required for !SMP because for_each_cpu() reports * unconditionally CPU0 as set on UP kernels. */ if (!IS_ENABLED(CONFIG_SMP) && cpumask_empty(tick_broadcast_oneshot_mask)) break; td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event <= now) { cpumask_set_cpu(cpu, tmpmask); /* * Mark the remote cpu in the pending mask, so * it can avoid reprogramming the cpu local * timer in tick_broadcast_oneshot_control(). */ cpumask_set_cpu(cpu, tick_broadcast_pending_mask); } else if (td->evtdev->next_event < next_event) { next_event = td->evtdev->next_event; next_cpu = cpu; } } /* * Remove the current cpu from the pending mask. The event is * delivered immediately in tick_do_broadcast() ! */ cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); /* Take care of enforced broadcast requests */ cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); cpumask_clear(tick_broadcast_force_mask); /* * Sanity check. Catch the case where we try to broadcast to * offline cpus. */ if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) cpumask_and(tmpmask, tmpmask, cpu_online_mask); /* * Wakeup the cpus which have an expired event. */ bc_local = tick_do_broadcast(tmpmask); /* * Two reasons for reprogram: * * - The global event did not expire any CPU local * events. This happens in dyntick mode, as the maximum PIT * delta is quite small. * * - There are pending events on sleeping CPUs which were not * in the event mask */ if (next_event != KTIME_MAX) tick_broadcast_set_event(dev, next_cpu, next_event); raw_spin_unlock(&tick_broadcast_lock); if (bc_local) { td = this_cpu_ptr(&tick_cpu_device); td->evtdev->event_handler(td->evtdev); } }