/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */ static void rcu_idle_exit_common(long long oldval) { if (oldval) { RCU_TRACE(trace_rcu_dyntick(TPS("++="), oldval, rcu_dynticks_nesting)); return; } RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } }
/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ static void rcu_idle_exit_common(long long oldval) { if (oldval) { RCU_TRACE(trace_rcu_dyntick("++=", oldval, rcu_dynticks_nesting)); return; } RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } }
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long oldval) { if (rcu_dynticks_nesting) { RCU_TRACE(trace_rcu_dyntick("--=", oldval, rcu_dynticks_nesting)); return; } RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting)); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ }
static int irq_notifier(struct notifier_block *self, unsigned long cmd, void *v) { switch (cmd) { case CPU_CLUSTER_PM_ENTER: if (omap_type() == OMAP2_DEVICE_TYPE_GP) irq_save_context(); else irq_save_secure_context(); break; case CPU_CLUSTER_PM_EXIT: if (omap_type() == OMAP2_DEVICE_TYPE_GP) irq_sar_clear(); break; case CPU_PM_EXIT: if (!is_idle_task(current)) omap_wakeupgen_check_interrupts("At Resume"); break; } return NOTIFY_OK; }
static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) { smp_mb__before_atomic_inc(); atomic_inc(&rdtp->dynticks); smp_mb__after_atomic_inc(); WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); if (!is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); trace_rcu_dyntick("Error on exit: not idle task", oldval, rdtp->dynticks_nesting); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); } }
/* * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure * whose grace period has elapsed. */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { const char *rn = NULL; struct rcu_head *next, *list; unsigned long flags; RCU_TRACE(int cb_count = 0); /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); if (rcp->donetail == &rcp->rcucblist) { /* No callbacks ready, so just leave. */ local_irq_restore(flags); return; } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; if (rcp->curtail == rcp->donetail) rcp->curtail = &rcp->rcucblist; rcp->donetail = &rcp->rcucblist; local_irq_restore(flags); /* Invoke the callbacks on the local list. */ RCU_TRACE(rn = rcp->name); while (list) { next = list->next; prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); __rcu_reclaim(rn, list); local_bh_enable(); list = next; RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), is_idle_task(current), false)); }
/* * Helper function for call_rcu() and call_rcu_bh(). */ static void __call_rcu(struct rcu_head *head, rcu_callback_t func, struct rcu_ctrlblk *rcp) { unsigned long flags; debug_rcu_head_queue(head); head->func = func; head->next = NULL; local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; RCU_TRACE(rcp->qlen++); local_irq_restore(flags); if (unlikely(is_idle_task(current))) { /* force scheduling for rcu_sched_qs() */ resched_cpu(0); } }
static noinline void force_sig_info_fault(const char *type, int si_signo, int si_code, unsigned long address, int fault_num, struct task_struct *tsk, struct pt_regs *regs) { siginfo_t info; if (unlikely(tsk->pid < 2)) { panic("Signal %d (code %d) at %#lx sent to %s!", si_signo, si_code & 0xffff, address, is_idle_task(tsk) ? "the idle task" : "init"); } info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; info.si_trapno = fault_num; trace_unhandled_signal(type, regs, address, si_signo); force_sig_info(si_signo, &info, tsk); }
/* * Fetch cputime raw values from fields of task_struct and * add up the pending nohz execution time since the last * cputime snapshot. */ static void fetch_task_cputime(struct task_struct *t, cputime_t *u_dst, cputime_t *s_dst, cputime_t *u_src, cputime_t *s_src, cputime_t *udelta, cputime_t *sdelta) { unsigned int seq; unsigned long long delta; do { *udelta = 0; *sdelta = 0; seq = read_seqbegin(&t->vtime_seqlock); if (u_dst) *u_dst = *u_src; if (s_dst) *s_dst = *s_src; /* Task is sleeping, nothing to add */ if (t->vtime_snap_whence == VTIME_SLEEPING || is_idle_task(t)) continue; delta = vtime_delta(t); /* * Task runs either in user or kernel space, add pending nohz time to * the right place. */ if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { *udelta = delta; } else { if (t->vtime_snap_whence == VTIME_SYS) *sdelta = delta; } } while (read_seqretry(&t->vtime_seqlock, seq)); }
/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */ static void rcu_idle_enter_common(long long newval) { if (newval) { RCU_TRACE(trace_rcu_dyntick(TPS("--="), rcu_dynticks_nesting, newval)); rcu_dynticks_nesting = newval; return; } RCU_TRACE(trace_rcu_dyntick(TPS("Start"), rcu_dynticks_nesting, newval)); if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, idle->pid, idle->comm); /* must be idle task! */ } rcu_sched_qs(); /* implies rcu_bh_inc() */ barrier(); rcu_dynticks_nesting = newval; }
void vtime_account_irq_enter(struct task_struct *tsk) { if (!vtime_accounting_enabled()) return; if (!in_interrupt()) { /* * If we interrupted user, context_tracking_in_user() * is 1 because the context tracking don't hook * on irq entry/exit. This way we know if * we need to flush user time on kernel entry. */ if (context_tracking_in_user()) { vtime_account_user(tsk); return; } if (is_idle_task(tsk)) { vtime_account_idle(tsk); return; } } vtime_account_system(tsk); }
/* * This routine is responsible for faulting in user pages. * It passes the work off to one of the appropriate routines. * It returns true if the fault was successfully handled. */ static int handle_page_fault(struct pt_regs *regs, int fault_num, int is_page_fault, unsigned long address, int write) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long stack_offset; int fault; int si_code; int is_kernel_mode; pgd_t *pgd; /* on TILE, protection faults are always writes */ if (!is_page_fault) write = 1; flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); tsk = validate_current(); /* * Check to see if we might be overwriting the stack, and bail * out if so. The page fault code is a relatively likely * place to get trapped in an infinite regress, and once we * overwrite the whole stack, it becomes very hard to recover. */ stack_offset = stack_pointer & (THREAD_SIZE-1); if (stack_offset < THREAD_SIZE / 8) { pr_alert("Potential stack overrun: sp %#lx\n", stack_pointer); show_regs(regs); pr_alert("Killing current process %d/%s\n", tsk->pid, tsk->comm); do_group_exit(SIGKILL); } /* * Early on, we need to check for migrating PTE entries; * see homecache.c. If we find a migrating PTE, we wait until * the backing page claims to be done migrating, then we proceed. * For kernel PTEs, we rewrite the PTE and return and retry. * Otherwise, we treat the fault like a normal "no PTE" fault, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * and that the fault was not a protection fault. */ if (unlikely(address >= TASK_SIZE && !is_arch_mappable_range(address, 0))) { if (is_kernel_mode && is_page_fault && vmalloc_fault(pgd, address) >= 0) return 1; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ mm = NULL; /* happy compiler */ vma = NULL; goto bad_area_nosemaphore; } /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the * kernel, so either way we can re-enable interrupts here * unless we are doing atomic access to user space with * interrupts disabled. */ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) local_irq_enable(); mm = tsk->mm; /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ if (in_atomic() || !mm) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } if (!is_kernel_mode) flags |= FAULT_FLAG_USER; /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user * space from well defined areas of code, which are listed in the * exceptions table. * * As the vast majority of faults will be valid we will only perform * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ if (!down_read_trylock(&mm->mmap_sem)) { if (is_kernel_mode && !search_exception_tables(regs->pc)) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (regs->sp < PAGE_OFFSET) { /* * accessing the stack below sp is always a bug. */ if (address < regs->sp) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: si_code = SEGV_ACCERR; if (fault_num == INT_ITLB_MISS) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (write) { #ifdef TEST_VERIFY_AREA if (!is_page_fault && regs->cs == KERNEL_CS) pr_err("WP fault at "REGFMT"\n", regs->eip); #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() /* * If this was an asynchronous fault, * restart the appropriate engine. */ switch (fault_num) { #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); break; #endif #if CHIP_HAS_SN_PROC() case INT_SNITLB_MISS: case INT_SNITLB_MISS_DWNCL: __insn_mtspr(SPR_SNCTL, __insn_mfspr(SPR_SNCTL) & ~SPR_SNCTL__FRZPROC_MASK); break; #endif } #endif up_read(&mm->mmap_sem); return 1; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (!is_kernel_mode) { /* * It's possible to have interrupts off here. */ local_irq_enable(); force_sig_info_fault("segfault", SIGSEGV, si_code, address, fault_num, tsk, regs); return 0; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return 0; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); /* FIXME: no lookup_address() yet */ #ifdef SUPPORT_LOOKUP_ADDRESS if (fault_num == INT_ITLB_MISS) { pte_t *pte = lookup_address(address); if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) pr_crit("kernel tried to execute" " non-executable page - exploit attempt?" " (uid: %d)\n", current->uid); } #endif if (address < PAGE_SIZE) pr_alert("Unable to handle kernel NULL pointer dereference\n"); else pr_alert("Unable to handle kernel paging request\n"); pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n", address, regs->pc); show_regs(regs); if (unlikely(tsk->pid < 2)) { panic("Kernel page fault running %s!", is_idle_task(tsk) ? "the idle task" : "init"); } /* * More FIXME: we should probably copy the i386 here and * implement a generic die() routine. Not today. */ #ifdef SUPPORT_DIE die("Oops", regs); #endif bust_spinlocks(1); do_group_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_kernel_mode) goto no_context; pagefault_out_of_memory(); return 0; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (is_kernel_mode) goto no_context; force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, fault_num, tsk, regs); return 0; }
/* * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled and timer->base->cpu_base->lock held. */ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) { struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); int cpu = smp_processor_id(); #ifdef CONFIG_NO_HZ /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went * into a long sleep. If two cpus happen to assign themself to * this duty, then the jiffies update is still serialized by * xtime_lock. */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) tick_do_timer_cpu = cpu; #endif /* Check, if the jiffies need an update */ if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); /* * Do not call, when we are not in irq context and have * no valid regs pointer */ if (regs) { /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long * time. This happens on complete idle SMP systems while * waiting on the login prompt. We also increment the "start of * idle" jiffy stamp so the idle accounting adjustment we do * when we go busy again does not account too much ticks. */ if (ts->tick_stopped) { touch_softlockup_watchdog(); if (is_idle_task(current)) ts->idle_jiffies++; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); if ((rq_info.init == 1) && (tick_do_timer_cpu == cpu)) { /* * update run queue statistics */ update_rq_stats(); /* * wakeup user if needed */ wakeup_user(); } } hrtimer_forward(timer, now, tick_period); return HRTIMER_RESTART; }
/** * sr_classp5_disable() - disable for a voltage domain * @sr: SmartReflex module, which need to be disabled * @is_volt_reset: reset the voltage? * * This function has the necessity to either disable SR alone OR disable SR * and reset voltage to appropriate level depending on is_volt_reset parameter. * * NOTE: Appropriate locks must be held by calling path to ensure mutual * exclusivity */ static int sr_classp5_disable(struct omap_sr *sr, int is_volt_reset) { struct voltagedomain *voltdm = NULL; struct omap_volt_data *volt_data = NULL; struct sr_classp5_calib_data *work_data = NULL; if (IS_ERR_OR_NULL(sr) || IS_ERR_OR_NULL(sr->voltdm)) { pr_err("%s: bad parameters!\n", __func__); return -EINVAL; } work_data = (struct sr_classp5_calib_data *)sr->voltdm_cdata; if (IS_ERR_OR_NULL(work_data)) { pr_err("%s: bad work data %s\n", __func__, sr->name); return -EINVAL; } if (is_idle_task(current)) { /* * we should not have seen this path if calibration !complete * pm_qos constraint is already released after voltage * calibration work is finished */ WARN_ON(work_data->work_active); return 0; } /* Rest is regular DVFS path */ voltdm = sr->voltdm; volt_data = omap_voltage_get_curr_vdata(voltdm); if (IS_ERR_OR_NULL(volt_data)) { pr_warning("%s: Voltage data is NULL. Cannot disable %s\n", __func__, sr->name); return -ENODATA; } /* need to do rest of code ONLY if required */ if (volt_data->volt_calibrated && !work_data->work_active) { /* * We are going OFF - disable clocks manually to allow OFF-mode. */ if (sr->suspended) sr->ops->put(sr); return 0; } if (work_data->work_active) { /* flag work is dead and remove the old work */ work_data->work_active = false; cancel_delayed_work_sync(&work_data->work); sr_notifier_control(sr, false); } sr_classp5_stop_hw_loop(sr); if (is_volt_reset) voltdm_reset(sr->voltdm); /* Canceled SR, so no more need to keep request */ pm_qos_update_request(&work_data->qos, PM_QOS_DEFAULT_VALUE); /* * We are going OFF - disable clocks manually to allow OFF-mode. */ if (sr->suspended) { /* !!! Should never ever be here - no guarantee to recover !!!*/ WARN(true, "Trying to go OFF with invalid AVS state\n"); sr->ops->put(sr); } return 0; }
int save_stack_trace_tsk_reliable(struct task_struct *tsk, struct stack_trace *trace) { unsigned long sp; unsigned long stack_page = (unsigned long)task_stack_page(tsk); unsigned long stack_end; int graph_idx = 0; /* * The last frame (unwinding first) may not yet have saved * its LR onto the stack. */ int firstframe = 1; if (tsk == current) sp = current_stack_pointer(); else sp = tsk->thread.ksp; stack_end = stack_page + THREAD_SIZE; if (!is_idle_task(tsk)) { /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and * system_call_common()/EXCEPTION_PROLOG_COMMON(). * * Likewise for non-swapper kernel threads, * this also happens to be the top of the stack * as setup by copy_thread(). * * Note that stack backlinks are not properly setup by * copy_thread() and thus, a forked task() will have * an unreliable stack trace until it's been * _switch()'ed to for the first time. */ stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); } else { /* * idle tasks have a custom stack layout, * c.f. cpu_idle_thread_init(). */ stack_end -= STACK_FRAME_OVERHEAD; } if (sp < stack_page + sizeof(struct thread_struct) || sp > stack_end - STACK_FRAME_MIN_SIZE) { return 1; } for (;;) { unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; /* sanity check: ABI requires SP to be aligned 16 bytes. */ if (sp & 0xF) return 1; /* Mark stacktraces with exception frames as unreliable. */ if (sp <= stack_end - STACK_INT_FRAME_SIZE && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { return 1; } newsp = stack[0]; /* Stack grows downwards; unwinder may only go up. */ if (newsp <= sp) return 1; if (newsp != stack_end && newsp > stack_end - STACK_FRAME_MIN_SIZE) { return 1; /* invalid backlink, too far up. */ } /* Examine the saved LR: it must point into kernel code. */ ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe && !__kernel_text_address(ip)) return 1; firstframe = 0; /* * FIXME: IMHO these tests do not belong in * arch-dependent code, they are generic. */ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL); #ifdef CONFIG_KPROBES /* * Mark stacktraces with kretprobed functions on them * as unreliable. */ if (ip == (unsigned long)kretprobe_trampoline) return 1; #endif if (!trace->skip) trace->entries[trace->nr_entries++] = ip; else trace->skip--; if (newsp == stack_end) break; if (trace->nr_entries >= trace->max_entries) return -E2BIG; sp = newsp; } return 0; }
static void smp_callback(void *v) { /* we already woke the CPU up, nothing more to do */ if (is_idle_task(current)) set_tsk_need_resched(current); }