void show_registers(struct pt_regs * regs) { /* We either use rdusp() - the USP register, which might not correspond to the current process for all cases we're called, or we use the current->thread.usp, which is not up to date for the current process. Experience shows we want the USP register. */ unsigned long usp = rdusp(); raw_printk("IRP: %08lx SRP: %08lx DCCR: %08lx USP: %08lx MOF: %08lx\n", regs->irp, regs->srp, regs->dccr, usp, regs->mof ); raw_printk(" r0: %08lx r1: %08lx r2: %08lx r3: %08lx\n", regs->r0, regs->r1, regs->r2, regs->r3); raw_printk(" r4: %08lx r5: %08lx r6: %08lx r7: %08lx\n", regs->r4, regs->r5, regs->r6, regs->r7); raw_printk(" r8: %08lx r9: %08lx r10: %08lx r11: %08lx\n", regs->r8, regs->r9, regs->r10, regs->r11); raw_printk("r12: %08lx r13: %08lx oR10: %08lx sp: %08lx\n", regs->r12, regs->r13, regs->orig_r10, regs); raw_printk("R_MMU_CAUSE: %08lx\n", (unsigned long)*R_MMU_CAUSE); raw_printk("Process %s (pid: %d, stackpage=%08lx)\n", current->comm, current->pid, (unsigned long)current); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ if (! user_mode(regs)) { int i; show_stack(NULL, (unsigned long*)usp); /* Dump kernel stack if the previous dump wasn't one. */ if (usp != 0) show_stack (NULL, NULL); raw_printk("\nCode: "); if(regs->irp < PAGE_OFFSET) goto bad; /* Often enough the value at regs->irp does not point to the interesting instruction, which is most often the _previous_ instruction. So we dump at an offset large enough that instruction decoding should be in sync at the interesting point, but small enough to fit on a row (sort of). We point out the regs->irp location in a ksymoops-friendly way by wrapping the byte for that address in parentheses. */ for(i = -12; i < 12; i++) { unsigned char c; if(__get_user(c, &((unsigned char*)regs->irp)[i])) { bad: raw_printk(" Bad IP value."); break; } if (i == 0) raw_printk("(%02x) ", c); else raw_printk("%02x ", c); } raw_printk("\n"); } }
static irqreturn_t timer_interrupt (int irq, void *dev_id) { unsigned long new_itm; if (cpu_is_offline(smp_processor_id())) { return IRQ_HANDLED; } platform_timer_interrupt(irq, dev_id); new_itm = local_cpu_data->itm_next; if (!time_after(ia64_get_itc(), new_itm)) printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", ia64_get_itc(), new_itm); profile_tick(CPU_PROFILING); if (paravirt_do_steal_accounting(&new_itm)) goto skip_process_time_accounting; while (1) { update_process_times(user_mode(get_irq_regs())); new_itm += local_cpu_data->itm_delta; if (smp_processor_id() == time_keeper_id) xtime_update(1); local_cpu_data->itm_next = new_itm; if (time_after(new_itm, ia64_get_itc())) break; /* * Allow IPIs to interrupt the timer loop. */ local_irq_enable(); local_irq_disable(); } skip_process_time_accounting: do { /* * If we're too close to the next clock tick for * comfort, we increase the safety margin by * intentionally dropping the next tick(s). We do NOT * update itm.next because that would force us to call * xtime_update() which in turn would let our clock run * too fast (with the potentially devastating effect * of losing monotony of time). */ while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) new_itm += local_cpu_data->itm_delta; ia64_set_itm(new_itm); /* double check, in case we got hit by a (slow) PMI: */ } while (time_after_eq(ia64_get_itc(), new_itm)); return IRQ_HANDLED; }
static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { int i, step = 0, *kernel_step, access; u32 ctrl_reg; u64 val, alignment_mask; struct perf_event *wp, **slots; struct debug_info *debug_info; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = (struct perf_event **)__get_cpu_var(wp_on_reg); debug_info = ¤t->thread.debug; for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); wp = slots[i]; if (wp == NULL) goto unlock; info = counter_arch_bp(wp); /* AArch32 watchpoints are either 4 or 8 bytes aligned. */ if (is_compat_task()) { if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; else alignment_mask = 0x3; } else { alignment_mask = 0x7; } /* Check if the watchpoint value matches. */ val = read_wb_reg(AARCH64_DBG_REG_WVR, i); if (val != (addr & ~alignment_mask)) goto unlock; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); decode_ctrl_reg(ctrl_reg, &ctrl); if (!((1 << (addr & alignment_mask)) & ctrl.len)) goto unlock; /* * Check that the access type matches. * 0 => load, otherwise => store */ access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) goto unlock; info->trigger = addr; perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ if (!wp->overflow_handler) step = 1; unlock: rcu_read_unlock(); } if (!step) return 0; /* * We always disable EL0 watchpoints because the kernel can * cause these to fire via an unprivileged access. */ toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0); if (user_mode(regs)) { debug_info->wps_disabled = 1; /* If we're already stepping a breakpoint, just return. */ if (debug_info->bps_disabled) return 0; if (test_thread_flag(TIF_SINGLESTEP)) debug_info->suspended_step = 1; else user_enable_single_step(current); } else { toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); kernel_step = &__get_cpu_var(stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) return 0; if (kernel_active_single_step()) { *kernel_step = ARM_KERNEL_STEP_SUSPEND; } else { *kernel_step = ARM_KERNEL_STEP_ACTIVE; kernel_enable_single_step(regs); } } return 0; }
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; /* * Do some preliminary argument and permissions checking before we * actually start allocating stuff */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } /* * We hope to recycle these flags after 2.6.26 */ if (unlikely(clone_flags & CLONE_STOPPED)) { static int __read_mostly count = 100; if (count > 0 && printk_ratelimit()) { char comm[TASK_COMM_LEN]; count--; printk(KERN_INFO "fork(): process `%s' used deprecated " "clone flags 0x%lx\n", get_task_comm(comm, current), clone_flags & CLONE_STOPPED); } } /* * When called from kernel_thread, don't do user tracing stuff. */ if (likely(user_mode(regs))) trace = tracehook_prepare_clone(clone_flags); p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet. Now we * clear it and set the child going. */ p->flags &= ~PF_STARTING; if (unlikely(clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); __set_task_state(p, TASK_STOPPED); } else { wake_up_new_task(p, clone_flags); } tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); tracehook_report_vfork_done(p, nr); } } else { nr = PTR_ERR(p); } return nr; }
/* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. * * Note that we go through the signals twice: once to check the signals that * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall) { struct k_sigaction ka; siginfo_t info; int signr; #ifdef CONFIG_PREEMPT_RT /* * Fully-preemptible kernel does not need interrupts disabled: */ local_irq_enable(); preempt_check_resched(); #endif /* * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything * if so. */ if (!user_mode(regs)) return 0; if (try_to_freeze()) goto no_signal; if (current->ptrace & PT_SINGLESTEP) ptrace_cancel_bpt(current); signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { handle_signal(signr, &ka, &info, oldset, regs, syscall); if (current->ptrace & PT_SINGLESTEP) ptrace_set_bpt(current); return 1; } no_signal: /* * No signal to deliver to the process - restart the syscall. */ if (syscall) { if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) { if (thumb_mode(regs)) { regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE; regs->ARM_pc -= 2; } else { #if defined(CONFIG_AEABI) && !defined(CONFIG_OABI_COMPAT) regs->ARM_r7 = __NR_restart_syscall; regs->ARM_pc -= 4; #else u32 __user *usp; u32 swival = __NR_restart_syscall; regs->ARM_sp -= 12; usp = (u32 __user *)regs->ARM_sp; /* * Either we supports OABI only, or we have * EABI with the OABI compat layer enabled. * In the later case we don't know if user * space is EABI or not, and if not we must * not clobber r7. Always using the OABI * syscall solves that issue and works for * all those cases. */ swival = swival - __NR_SYSCALL_BASE + __NR_OABI_SYSCALL_BASE; put_user(regs->ARM_pc, &usp[0]); /* swi __NR_restart_syscall */ put_user(0xef000000 | swival, &usp[1]); /* ldr pc, [sp], #12 */ put_user(0xe49df00c, &usp[2]); flush_icache_range((unsigned long)usp, (unsigned long)(usp + 3)); regs->ARM_pc = regs->ARM_sp + 4; #endif } } if (regs->ARM_r0 == -ERESTARTNOHAND || regs->ARM_r0 == -ERESTARTSYS || regs->ARM_r0 == -ERESTARTNOINTR) { restart_syscall(regs); } } if (current->ptrace & PT_SINGLESTEP) ptrace_set_bpt(current); return 0; }
/* * Canonical page fault handler */ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; siginfo_t info; int si_code = SEGV_MAPERR; int fault; const struct exception_table_entry *fixup; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (cause > 0 ? FAULT_FLAG_WRITE : 0); /* * If we're in an interrupt or have no user context, * then must not take the fault. */ if (unlikely(in_interrupt() || !mm)) goto no_context; local_irq_enable(); retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; good_area: /* Address space is OK. Now check access rights. */ si_code = SEGV_ACCERR; switch (cause) { case FLT_IFETCH: if (!(vma->vm_flags & VM_EXEC)) goto bad_area; break; case FLT_LOAD: if (!(vma->vm_flags & VM_READ)) goto bad_area; break; case FLT_STORE: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; break; } fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } up_read(&mm->mmap_sem); return; } up_read(&mm->mmap_sem); /* Handle copyin/out exception cases */ if (!user_mode(regs)) goto no_context; if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); return; } /* User-mode address is in the memory map, but we are * unable to fix up the page fault. */ if (fault & VM_FAULT_SIGBUS) { info.si_signo = SIGBUS; info.si_code = BUS_ADRERR; } /* Address is not in the memory map */ else { info.si_signo = SIGSEGV; info.si_code = SEGV_ACCERR; } info.si_errno = 0; info.si_addr = (void __user *)address; force_sig_info(info.si_code, &info, current); return; bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, current); return; } /* Kernel-mode fault falls through */ no_context: fixup = search_exception_tables(pt_elr(regs)); if (fixup) { pt_set_elr(regs, fixup->fixup); return; } /* Things are looking very, very bad now */ bust_spinlocks(1); printk(KERN_EMERG "Unable to handle kernel paging request at " "virtual address 0x%08lx, regs %p\n", address, regs); die("Bad Kernel VA", regs, SIGKILL); }
asmlinkage void do_page_fault(unsigned long address, struct pt_regs *regs, int protection, int writeaccess) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; siginfo_t info; int fault; D(printk(KERN_DEBUG "Page fault for %lX on %X at %lX, prot %d write %d\n", address, smp_processor_id(), instruction_pointer(regs), protection, writeaccess)); tsk = current; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * NOTE2: This is done so that, when updating the vmalloc * mappings we don't have to walk all processes pgdirs and * add the high mappings all at once. Instead we do it as they * are used. However vmalloc'ed page entries have the PAGE_GLOBAL * bit set so sometimes the TLB can use a lingering entry. * * This verifies that the fault happens in kernel space * and that the fault was not a protection error (error_code & 1). */ if (address >= VMALLOC_START && !protection && !user_mode(regs)) goto vmalloc_fault; /* When stack execution is not allowed we store the signal * trampolines in the reserved cris_signal_return_page. * Handle this in the exact same way as vmalloc (we know * that the mapping is there and is valid so no need to * call handle_mm_fault). */ if (cris_signal_return_page && address == cris_signal_return_page && !protection && user_mode(regs)) goto vmalloc_fault; /* we can and should enable interrupts at this point */ local_irq_enable(); mm = tsk->mm; info.si_code = SEGV_MAPERR; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_interrupt() || !mm) goto no_context; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (user_mode(regs)) { /* * accessing the stack below usp is always a bug. * we get page-aligned addresses so we can only check * if we're within a page from usp, but that might be * enough to catch brutal errors at least. */ if (address + PAGE_SIZE < rdusp()) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; /* first do some preliminary protection checks */ if (writeaccess == 2){ if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (writeaccess == 1) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, writeaccess & 1); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: DPG(show_registers(regs)); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); printk(KERN_NOTICE "%s (pid %d) segfaults for page " "address %08lx at pc %08lx\n", tsk->comm, tsk->pid, address, instruction_pointer(regs)); return; } no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source * when it acesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) */ if (find_fixup_code(regs)) return; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ if (!oops_in_progress) { oops_in_progress = 1; if ((unsigned long) (address) < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL " "pointer dereference"); else printk(KERN_ALERT "Unable to handle kernel access" " at virtual address %08lx\n", address); die_if_kernel("Oops", regs, (writeaccess << 1) | protection); oops_in_progress = 0; } do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); printk("VM: killing process %s\n", tsk->comm); if (user_mode(regs)) do_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void *)address; force_sig_info(SIGBUS, &info, tsk); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Use current_pgd instead of tsk->active_mm->pgd * since the latter might be unavailable if this * code is executed in a misfortunately run irq * (like inside schedule() between switch_mm and * switch_to...). */ int offset = pgd_index(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset; pgd_k = init_mm.pgd + offset; /* Since we're two-level, we don't need to do both * set_pgd and set_pmd (they do the same thing). If * we go three-level at some point, do the right thing * with pgd_present and set_pgd here. * * Also, since the vmalloc area is global, we don't * need to copy individual PTE's, it is enough to * copy the pgd pointer into the pte page of the * root task. If that is there, we'll find our pte if * it exists. */ pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto bad_area_nosemaphore; set_pmd(pmd, *pmd_k); /* Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }
/* * This routine handles page faults. It determines the problem, and * then passes it off to one of the appropriate routines. * * error_code: * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * * If this routine detects a bad access, it returns 1, otherwise it * returns 0. */ int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct mm_struct *mm = current->mm; struct vm_area_struct * vma; int write, fault; #ifdef DEBUG printk ("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n", regs->sr, regs->pc, address, error_code, current->mm->pgd); #endif /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_interrupt() || !mm) goto no_context; down(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto map_err; if (vma->vm_flags & VM_IO) goto acc_err; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto map_err; if (user_mode(regs)) { /* Accessing the stack below usp is always a bug. The "+ 256" is there due to some instructions doing pre-decrement on the stack and that doesn't show up until later. */ if (address + 256 < rdusp()) goto map_err; } if (expand_stack(vma, address)) goto map_err; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: #ifdef DEBUG printk("do_page_fault: good_area\n"); #endif write = 0; switch (error_code & 3) { default: /* 3: write, present */ /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto acc_err; write++; break; case 1: /* read, present */ goto acc_err; case 0: /* read, not present */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto acc_err; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, write); #ifdef DEBUG printk("handle_mm_fault returns %d\n",fault); #endif if (fault < 0) goto out_of_memory; if (!fault) goto bus_err; /* There seems to be a missing invalidate somewhere in do_no_page. * Until I found it, this one cures the problem and makes * 1.2 run on the 68040 (Martin Apel). */ #warning should be obsolete now... if (CPU_IS_040_OR_060) flush_tlb_page(vma, address); up(&mm->mmap_sem); return 0; /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: printk("VM: killing process %s\n", current->comm); if (user_mode(regs)) do_exit(SIGKILL); no_context: current->thread.signo = SIGBUS; current->thread.faddr = address; return send_fault_sig(regs); bus_err: current->thread.signo = SIGBUS; current->thread.code = BUS_ADRERR; current->thread.faddr = address; goto send_sig; map_err: current->thread.signo = SIGSEGV; current->thread.code = SEGV_MAPERR; current->thread.faddr = address; goto send_sig; acc_err: current->thread.signo = SIGSEGV; current->thread.code = SEGV_ACCERR; current->thread.faddr = address; send_sig: up(&mm->mmap_sem); return send_fault_sig(regs); }
long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { if (clone_flags & CLONE_VFORK) trace = PTRACE_EVENT_VFORK; else if ((clone_flags & CSIGNAL) != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK; if (likely(!ptrace_event_enabled(current, trace))) trace = 0; } p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); atomic_notifier_call_chain(&task_fork_notifier, 0, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); get_task_struct(p); } wake_up_new_task(p); if (unlikely(trace)) ptrace_event(trace, nr); if (clone_flags & CLONE_VFORK) { if (!wait_for_vfork_done(p, &vfork)) ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); } } else { nr = PTR_ERR(p); } return nr; }
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; /* * Do some preliminary argument and permissions checking before we * actually start allocating stuff */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event * for the type of forking is enabled. */ if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { if (clone_flags & CLONE_VFORK) trace = PTRACE_EVENT_VFORK; else if ((clone_flags & CSIGNAL) != SIGCHLD) trace = PTRACE_EVENT_CLONE; else trace = PTRACE_EVENT_FORK; if (likely(!ptrace_event_enabled(current, trace))) trace = 0; } p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); get_task_struct(p); } wake_up_new_task(p); /* forking complete and child started to run, tell ptracer */ if (unlikely(trace)) ptrace_event(trace, nr); if (clone_flags & CLONE_VFORK) { if (!wait_for_vfork_done(p, &vfork)) ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); } } else { nr = PTR_ERR(p); } return nr; }
void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address) { struct vm_area_struct * vma; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const struct exception_table_entry *fix; unsigned long acc_type; if (in_interrupt() || !mm) goto no_context; down_read(&mm->mmap_sem); vma = pa_find_vma(mm, address); if (!vma) goto bad_area; if (address < vma->vm_end) goto good_area; if (!(vma->vm_flags & VM_GROWSUP) || expand_stackup(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access. We still need to * check the access permissions. */ good_area: acc_type = parisc_acctyp(code,regs->iir); if ((vma->vm_flags & acc_type) != acc_type) goto bad_area; /* * If for any reason at all we couldn't handle the fault, make * sure we exit gracefully rather than endlessly redo the * fault. */ switch (handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) != 0)) { case 1: ++current->min_flt; break; case 2: ++current->maj_flt; break; case 0: /* * We ran out of memory, or some other thing happened * to us that made us unable to handle the page fault * gracefully. */ goto bad_area; default: goto out_of_memory; } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. */ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { struct siginfo si; printk("\ndo_page_fault() pid=%d command='%s'\n", tsk->pid, tsk->comm); show_regs(regs); /* FIXME: actually we need to get the signo and code correct */ si.si_signo = SIGSEGV; si.si_errno = 0; si.si_code = SEGV_MAPERR; si.si_addr = (void *) address; force_sig_info(SIGSEGV, &si, current); return; } no_context: if (!user_mode(regs)) { fix = search_exception_table(regs->iaoq[0]); if (fix) { if (fix->skip & 1) regs->gr[8] = -EFAULT; if (fix->skip & 2) regs->gr[9] = 0; regs->iaoq[0] += ((fix->skip) & ~3); /* * NOTE: In some cases the faulting instruction * may be in the delay slot of a branch. We * don't want to take the branch, so we don't * increment iaoq[1], instead we set it to be * iaoq[0]+4, and clear the B bit in the PSW */ regs->iaoq[1] = regs->iaoq[0] + 4; regs->gr[0] &= ~PSW_B; /* IPSW in gr[0] */ return; } } parisc_terminate("Bad Address (null pointer deref?)",regs,code,address); out_of_memory: up_read(&mm->mmap_sem); printk("VM: killing process %s\n", current->comm); if (user_mode(regs)) do_exit(SIGKILL); goto no_context; }
/* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. * * Note that we go through the signals twice: once to check the signals that * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ int do_signal(struct pt_regs *regs, sigset_t *oldset) { siginfo_t info; struct k_sigaction *ka; /* * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything * if so. */ if (!user_mode(regs)) return 1; if (!oldset) oldset = ¤t->blocked; for (;;) { unsigned long signr; spin_lock_irq(¤t->sigmask_lock); signr = dequeue_signal(¤t->blocked, &info); spin_unlock_irq(¤t->sigmask_lock); if (!signr) break; if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { /* Let the debugger run. */ current->exit_code = signr; current->state = TASK_STOPPED; notify_parent(current, SIGCHLD); schedule(); /* We're back. Did the debugger cancel the sig? */ if (!(signr = current->exit_code)) continue; current->exit_code = 0; /* The debugger continued. Ignore SIGSTOP. */ if (signr == SIGSTOP) continue; /* Update the siginfo structure. Is this good? */ if (signr != info.si_signo) { info.si_signo = signr; info.si_errno = 0; info.si_code = SI_USER; info.si_pid = current->p_pptr->pid; info.si_uid = current->p_pptr->uid; } /* If the (new) signal is now blocked, requeue it. */ if (sigismember(¤t->blocked, signr)) { send_sig_info(signr, &info, current); continue; } } ka = ¤t->sig->action[signr-1]; if (ka->sa.sa_handler == SIG_IGN) { if (signr != SIGCHLD) continue; /* Check for SIGCHLD: it's special. */ while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) /* nothing */; continue; } if (ka->sa.sa_handler == SIG_DFL) { int exit_code = signr; /* Init gets no signals it doesn't want. */ if (current->pid == 1) continue; switch (signr) { case SIGCONT: case SIGCHLD: case SIGWINCH: continue; case SIGTSTP: case SIGTTIN: case SIGTTOU: if (is_orphaned_pgrp(current->pgrp)) continue; /* FALLTHRU */ case SIGSTOP: current->state = TASK_STOPPED; current->exit_code = signr; if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) notify_parent(current, SIGCHLD); schedule(); continue; case SIGQUIT: case SIGILL: case SIGTRAP: case SIGABRT: case SIGFPE: case SIGSEGV: case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: if (do_coredump(signr, regs)) exit_code |= 0x80; /* FALLTHRU */ default: sigaddset(¤t->pending.signal, signr); recalc_sigpending(current); current->flags |= PF_SIGNALED; do_exit(exit_code); /* NOTREACHED */ } } /* Whee! Actually deliver the signal. */ handle_signal(signr, ka, &info, oldset, regs); return 1; } /* Did we come from a system call? */ if (PT_REGS_SYSCALL (regs)) { /* Restart the system call - no handlers present */ if (regs->gpr[GPR_RVAL] == -ERESTARTNOHAND || regs->gpr[GPR_RVAL] == -ERESTARTSYS || regs->gpr[GPR_RVAL] == -ERESTARTNOINTR) { regs->gpr[12] = PT_REGS_SYSCALL (regs); regs->pc -= 12; /* 4 + 8 for microblaze return offset wierdness */ } } return 0; }
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; /* * Do some preliminary argument and permissions checking before we * actually start allocating stuff */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } /* * When called from kernel_thread, don't do user tracing stuff. */ if (likely(user_mode(regs))) trace = tracehook_prepare_clone(clone_flags); p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet. Now we * clear it and set the child going. */ p->flags &= ~PF_STARTING; wake_up_new_task(p); tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); tracehook_report_vfork_done(p, nr); } } else { nr = PTR_ERR(p); } return nr; }
static void timer_notify(struct pt_regs *regs, int cpu) { struct trace_array_cpu *data; struct stack_frame frame; struct trace_array *tr; const void __user *fp; int is_user; int i; if (!regs) return; tr = sysprof_trace; data = tr->data[cpu]; is_user = user_mode(regs); if (!current || current->pid == 0) return; if (is_user && current->state != TASK_RUNNING) return; __trace_special(tr, data, 0, 0, current->pid); if (!is_user) i = trace_kernel(regs, tr, data); else i = 0; /* * Trace user stack if we are not a kernel thread */ if (current->mm && i < sample_max_depth) { regs = (struct pt_regs *)current->thread.sp0 - 1; fp = (void __user *)regs->bp; __trace_special(tr, data, 2, regs->ip, 0); while (i < sample_max_depth) { frame.next_fp = NULL; frame.return_address = 0; if (!copy_stack_frame(fp, &frame)) break; if ((unsigned long)fp < regs->sp) break; __trace_special(tr, data, 2, frame.return_address, (unsigned long)fp); fp = frame.next_fp; i++; } } /* * Special trace entry if we overflow the max depth: */ if (i == sample_max_depth) __trace_special(tr, data, -1, -1, -1); __trace_special(tr, data, 3, current->pid, i); }
/* * Copy architecture-specific thread state */ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); struct hexagon_switch_stack *ss; struct pt_regs *childregs; asmlinkage void ret_from_fork(void); childregs = (struct pt_regs *) (((unsigned long) ti + THREAD_SIZE) - sizeof(*childregs)); memcpy(childregs, regs, sizeof(*childregs)); ti->regs = childregs; /* * Establish kernel stack pointer and initial PC for new thread */ ss = (struct hexagon_switch_stack *) ((unsigned long) childregs - sizeof(*ss)); ss->lr = (unsigned long)ret_from_fork; p->thread.switch_sp = ss; /* If User mode thread, set pt_reg stack pointer as per parameter */ if (user_mode(childregs)) { pt_set_rte_sp(childregs, usp); /* Child sees zero return value */ childregs->r00 = 0; /* * The clone syscall has the C signature: * int [r0] clone(int flags [r0], * void *child_frame [r1], * void *parent_tid [r2], * void *child_tid [r3], * void *thread_control_block [r4]); * ugp is used to provide TLS support. */ if (clone_flags & CLONE_SETTLS) childregs->ugp = childregs->r04; /* * Parent sees new pid -- not necessary, not even possible at * this point in the fork process * Might also want to set things like ti->addr_limit */ } else { /* * If kernel thread, resume stack is kernel stack base. * Note that this is pointer arithmetic on pt_regs * */ pt_set_rte_sp(childregs, (unsigned long)(childregs + 1)); /* * We need the current thread_info fast path pointer * set up in pt_regs. The register to be used is * parametric for assembler code, but the mechanism * doesn't drop neatly into C. Needs to be fixed. */ childregs->THREADINFO_REG = (unsigned long) ti; } /* * thread_info pointer is pulled out of task_struct "stack" * field on switch_to. */ p->stack = (void *)ti; return 0; }
asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0, unsigned long esr0) { static DEFINE_SPINLOCK(atomic_op_lock); unsigned long x, y, z; unsigned long __user *p; mm_segment_t oldfs; siginfo_t info; int ret; y = 0; z = 0; oldfs = get_fs(); if (!user_mode(__frame)) set_fs(KERNEL_DS); switch (__frame->tbr & TBR_TT) { /* */ case TBR_TT_ATOMIC_CMPXCHG32: p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; y = __frame->gr10; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; if (z != x) goto done; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { if (z != x) goto done2; if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } /* */ case TBR_TT_ATOMIC_XCHG32: p = (unsigned long __user *) __frame->gr8; y = __frame->gr9; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } /* */ case TBR_TT_ATOMIC_XOR: p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { y = x ^ z; if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } /* */ case TBR_TT_ATOMIC_OR: p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { y = x ^ z; if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } /* */ case TBR_TT_ATOMIC_AND: p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { y = x & z; if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } /* */ case TBR_TT_ATOMIC_SUB: p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { y = z - x; if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } /* */ case TBR_TT_ATOMIC_ADD: p = (unsigned long __user *) __frame->gr8; x = __frame->gr9; for (;;) { ret = get_user(z, p); if (ret < 0) goto error; spin_lock_irq(&atomic_op_lock); if (__get_user(z, p) == 0) { y = z + x; if (__put_user(y, p) == 0) goto done2; goto error2; } spin_unlock_irq(&atomic_op_lock); } default: BUG(); } done2: spin_unlock_irq(&atomic_op_lock); done: if (!user_mode(__frame)) set_fs(oldfs); __frame->gr5 = z; __frame->gr9 = y; return; error2: spin_unlock_irq(&atomic_op_lock); error: if (!user_mode(__frame)) set_fs(oldfs); __frame->pc -= 4; die_if_kernel("-- Atomic Op Error --\n"); info.si_signo = SIGSEGV; info.si_code = SEGV_ACCERR; info.si_errno = 0; info.si_addr = (void __user *) __frame->pc; force_sig_info(info.si_signo, &info, current); }
/* * fork系统调用的主要实现部分,由fork,vfork,clone系列函数(process.c)调用. * 不同之处是各个函数调用do_fork传入的clone_flags不同 * 复制进程, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; /* * 在开始分配之前做初步的验证和权限检查 */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } /* * 内核进程调用时, 不做user trace */ if (likely(user_mode(regs))) trace = tracehook_prepare_clone(clone_flags); /* copy_process函数是do_fork的主要功能实现部分 */ p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); /* * 唤醒新进程之前要检查task_struct,如果进程很快退出,这个值可能是非法的 */ if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet. Now we * clear it and set the child going. */ p->flags &= ~PF_STARTING; wake_up_new_task(p, clone_flags); tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); tracehook_report_vfork_done(p, nr); } } else { nr = PTR_ERR(p); } return nr; }
static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { struct task_struct *tsk = current; #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ if (trapnr < 6) goto vm86_trap; goto trap_signal; } #endif if (!user_mode(regs)) goto kernel_trap; #ifdef CONFIG_X86_32 trap_signal: #endif /* * We want error_code and trap_no set for userspace faults and * kernelspace faults which result in die(), but not * kernelspace faults which are fixed up. die() gives the * process no chance to handle the signal and notice the * kernel fault information, so that won't result in polluting * the information about previously queued, but not yet * delivered, faults. See also do_general_protection below. */ tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; #ifdef CONFIG_X86_64 if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { printk(KERN_INFO "%s[%d] trap %s ip:%lx sp:%lx error:%lx", tsk->comm, tsk->pid, str, regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); } #endif if (info) force_sig_info(signr, info, tsk); else force_sig(signr, tsk); return; kernel_trap: if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; die(str, regs, error_code); } return; #ifdef CONFIG_X86_32 vm86_trap: if (handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr)) goto trap_signal; return; #endif }
/* * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled and timer->base->cpu_base->lock held. */ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) { struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer); struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); int cpu = smp_processor_id(); #ifdef CONFIG_NO_HZ /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went * into a long sleep. If two cpus happen to assign themself to * this duty, then the jiffies update is still serialized by * xtime_lock. */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) tick_do_timer_cpu = cpu; #endif /* Check, if the jiffies need an update */ if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); /* * Do not call, when we are not in irq context and have * no valid regs pointer */ if (regs) { /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long * time. This happens on complete idle SMP systems while * waiting on the login prompt. We also increment the "start of * idle" jiffy stamp so the idle accounting adjustment we do * when we go busy again does not account too much ticks. */ if (ts->tick_stopped) { touch_softlockup_watchdog(); ts->idle_jiffies++; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); if ((rq_info.init == 1) && (tick_do_timer_cpu == cpu)) { /* * update run queue statistics */ update_rq_stats(); /* * wakeup user if needed */ wakeup_user(); } } hrtimer_forward(timer, now, tick_period); return HRTIMER_RESTART; }
/* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore * it is possible to get a watchpoint trap here from inside the kernel. * However, the code in ./ptrace.c has ensured that the user can * only set watchpoints on userspace addresses. Therefore the in-kernel * watchpoint trap can only occur in code which is reading/writing * from user space. Such code must not hold kernel locks (since it * can equally take a page fault), therefore it is safe to call * force_sig_info even though that claims and releases locks. * * Code in ./signal.c ensures that the debug control register * is restored before we deliver any signal, and therefore that * user code runs with the correct debug control register even though * we clear it here. * * Being careful here means that we don't have to be as careful in a * lot of more complicated places (task switching can be a bit lazy * about restoring all the debug state, and ptrace doesn't have to * find every occurrence of the TF bit that could be saved away even * by user code) * * May run on IST stack. */ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned long condition; int si_code; get_debugreg(condition, 6); /* Catch kmemcheck conditions first of all! */ if (condition & DR_STEP && kmemcheck_trap(regs)) return; /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; #ifdef CONFIG_KDB if (kdb(KDB_REASON_DEBUG, error_code, regs)) return; #endif /* CONFIG_KDB */ if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg7) goto clear_dr7; } #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) goto debug_vm86; #endif /* Save debug status register where ptrace can see it */ tsk->thread.debugreg6 = condition; /* * Single-stepping through TF: make sure we ignore any events in * kernel space (but re-enable TF when returning to user mode). */ if (condition & DR_STEP) { if (!user_mode(regs)) goto clear_TF_reenable; } si_code = get_si_code(condition); /* Ok, finally something we can handle */ send_sigtrap(tsk, regs, error_code, si_code); /* * Disable additional traps. They'll be re-enabled when * the signal is delivered. */ clear_dr7: set_debugreg(0, 7); preempt_conditional_cli(regs); return; #ifdef CONFIG_X86_32 debug_vm86: /* reenable preemption: handle_vm86_trap() might sleep */ dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); conditional_cli(regs); return; #endif clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; preempt_conditional_cli(regs); return; }
/* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. * We set it up to overflow again in 1/HZ seconds. */ void timer_interrupt(struct pt_regs * regs) { int next_dec; unsigned long cpu = smp_processor_id(); unsigned jiffy_stamp = last_jiffy_stamp(cpu); extern void do_IRQ(struct pt_regs *); if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #if 0 /* added by 2.6.17 lttng patch */ trace_kernel_trap_entry(regs->trap, instruction_pointer(regs)); #endif irq_enter(); while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) { jiffy_stamp += tb_ticks_per_jiffy; profile_tick(CPU_PROFILING, regs); update_process_times(user_mode(regs)); if (smp_processor_id()) continue; /* We are in an interrupt, no need to save/restore flags */ write_seqlock(&xtime_lock); tb_last_stamp = jiffy_stamp; #ifdef CONFIG_LTT ltt_reset_timestamp(); #endif //CONFIG_LTT do_timer(regs); /* * update the rtc when needed, this should be performed on the * right fraction of a second. Half or full second ? * Full second works on mk48t59 clocks, others need testing. * Note that this update is basically only used through * the adjtimex system calls. Setting the HW clock in * any other way is a /dev/rtc and userland business. * This is still wrong by -0.5/+1.5 jiffies because of the * timer interrupt resolution and possible delay, but here we * hit a quantization limit which can only be solved by higher * resolution timers and decoupling time management from timer * interrupts. This is also wrong on the clocks * which require being written at the half second boundary. * We should have an rtc call that only sets the minutes and * seconds like on Intel to avoid problems with non UTC clocks. */ if ( ppc_md.set_rtc_time && ntp_synced() && xtime.tv_sec - last_rtc_update >= 659 && abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 500000/HZ && jiffies - wall_jiffies == 1) { if (ppc_md.set_rtc_time(xtime.tv_sec+1 + timezone_offset) == 0) last_rtc_update = xtime.tv_sec+1; else /* Try again one minute later */ last_rtc_update += 60; } write_sequnlock(&xtime_lock); } if ( !disarm_decr[smp_processor_id()] ) set_dec(next_dec); last_jiffy_stamp(cpu) = jiffy_stamp; if (ppc_md.heartbeat && !ppc_md.heartbeat_count--) ppc_md.heartbeat(); irq_exit(); #if 0 /* added by 2.6.17 lttng patch */ trace_kernel_trap_exit(); #endif }
static int do_grant(struct Client *source_p, struct Client *target_p, const char *new_privset, int deoper) { int dooper = 0, dodeoper = 0; struct PrivilegeSet *privset = 0; if (deoper) { if (!IsOper(target_p)) { sendto_one_notice(source_p, ":You can't deoper someone who isn't an oper."); return 0; } new_privset = "default"; dodeoper = 1; sendto_one_notice(target_p, ":%s is deopering you.", source_p->name); sendto_realops_snomask(SNO_GENERAL, L_NETWIDE, "%s is deopering %s.", get_oper_name(source_p), target_p->name); } else { if (!(privset = privilegeset_get(new_privset))) { sendto_one_notice(source_p, ":There is no privilege set named '%s'.", new_privset); return 0; } if (privset == target_p->localClient->privset) { sendto_one_notice(source_p, ":%s already has privilege set %s.", target_p->name, target_p->localClient->privset->name); return 0; } } if (!dodeoper) { if (!IsOper(target_p)) { sendto_one_notice(target_p, ":%s is opering you with privilege set %s", source_p->name, privset->name); sendto_realops_snomask(SNO_GENERAL, L_NETWIDE, "%s is opering %s with privilege set %s", get_oper_name(source_p), target_p->name, privset->name); dooper = 1; } else { sendto_one_notice(target_p, ":%s is changing your privilege set to %s", source_p->name, privset->name); sendto_realops_snomask(SNO_GENERAL, L_NETWIDE, "%s is changing the privilege set of %s to %s", get_oper_name(source_p), target_p->name, privset->name); } } if (dodeoper) { const char *modeparv[4]; modeparv[0] = modeparv[1] = target_p->name; modeparv[2] = "-o"; modeparv[3] = NULL; user_mode(target_p, target_p, 3, modeparv); } if (dooper) { struct oper_conf oper; memset(&oper, '\0', sizeof(oper)); oper.name = "<grant>"; oper.umodes = 0; oper.snomask = 0; oper.privset = privset; oper_up(target_p, &oper); } target_p->localClient->privset = privset; const char *modeparv[4]; modeparv[0] = modeparv[1] = target_p->name; modeparv[2] = "+"; modeparv[3] = NULL; user_mode(target_p, target_p, 3, modeparv); return 0; }
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; if (interrupts_enabled(regs)) local_irq_enable(); if (in_atomic() || !mm) goto no_context; if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; if (esr & ESR_LNX_EXEC) { vm_flags = VM_EXEC; } else if (esr & ESR_EL1_WRITE) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; retry: down_read(&mm->mmap_sem); } else { might_sleep(); #ifdef CONFIG_DEBUG_VM if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; #endif } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return 0; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; goto retry; } } up_read(&mm->mmap_sem); if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; if (!user_mode(regs)) goto no_context; if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); return 0; } if (fault & VM_FAULT_SIGBUS) { sig = SIGBUS; code = BUS_ADRERR; } else { sig = SIGSEGV; code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; } __do_user_fault(tsk, addr, esr, sig, code, regs); return 0; no_context: __do_kernel_fault(mm, addr, esr, regs); return 0; }
/* * Note that `init' is a special process: it doesn't get signals it doesn't want to * handle. Thus you cannot kill init even with a SIGKILL even by mistake. */ void ia64_do_signal (struct sigscratch *scr, long in_syscall) { struct k_sigaction ka; sigset_t *oldset; siginfo_t info; long restart = in_syscall; long errno = scr->pt.r8; # define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c)) /* * In the ia64_leave_kernel code path, we want the common case to go fast, which * is why we may in certain cases get here from kernel mode. Just return without * doing anything if so. */ if (!user_mode(&scr->pt)) return; if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; /* * This only loops in the rare cases of handle_signal() failing, in which case we * need to push through a forced SIGSEGV. */ while (1) { int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL); /* * get_signal_to_deliver() may have run a debugger (via notify_parent()) * and the debugger may have modified the state (e.g., to arrange for an * inferior call), thus it's important to check for restarting _after_ * get_signal_to_deliver(). */ if (IS_IA32_PROCESS(&scr->pt)) { if (in_syscall) { if (errno >= 0) restart = 0; else errno = -errno; } } else if ((long) scr->pt.r10 != -1) /* * A system calls has to be restarted only if one of the error codes * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 * isn't -1 then r8 doesn't hold an error code and we don't need to * restart the syscall, so we can clear the "restart" flag here. */ restart = 0; if (signr <= 0) break; if (unlikely(restart)) { switch (errno) { case ERESTART_RESTARTBLOCK: case ERESTARTNOHAND: scr->pt.r8 = ERR_CODE(EINTR); /* note: scr->pt.r10 is already -1 */ break; case ERESTARTSYS: if ((ka.sa.sa_flags & SA_RESTART) == 0) { scr->pt.r8 = ERR_CODE(EINTR); /* note: scr->pt.r10 is already -1 */ break; } case ERESTARTNOINTR: if (IS_IA32_PROCESS(&scr->pt)) { scr->pt.r8 = scr->pt.r1; scr->pt.cr_iip -= 2; } else ia64_decrement_ip(&scr->pt); restart = 0; /* don't restart twice if handle_signal() fails... */ } } /* * Whee! Actually deliver the signal. If the delivery failed, we need to * continue to iterate in this loop so we can deliver the SIGSEGV... */ if (handle_signal(signr, &ka, &info, oldset, scr)) { /* a signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply * clear the TIF_RESTORE_SIGMASK flag */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); return; } } /* Did we come from a system call? */ if (restart) { /* Restart the system call - no handlers present */ if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR || errno == ERESTART_RESTARTBLOCK) { if (IS_IA32_PROCESS(&scr->pt)) { scr->pt.r8 = scr->pt.r1; scr->pt.cr_iip -= 2; if (errno == ERESTART_RESTARTBLOCK) scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */ } else { /* * Note: the syscall number is in r15 which is saved in * pt_regs so all we need to do here is adjust ip so that * the "break" instruction gets re-executed. */ ia64_decrement_ip(&scr->pt); if (errno == ERESTART_RESTARTBLOCK) scr->pt.r15 = __NR_restart_syscall; } } } /* if there's no signal to deliver, we just put the saved sigmask * back */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } }
/* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. * * Note that we go through the signals twice: once to check the signals that * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ int do_signal(struct pt_regs *regs, sigset_t *oldset) { unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; /* * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything * if so. */ if (!user_mode(regs)) return 1; if (!oldset) oldset = ¤t->blocked; /* Are we from a system call? */ if (regs->trap == __LC_SVC_OLD_PSW) { continue_addr = regs->psw.addr; restart_addr = continue_addr - regs->ilc; retval = regs->gprs[2]; /* Prepare for system call restart. We do this here so that a debugger will see the already changed PSW. */ if (retval == -ERESTARTNOHAND || retval == -ERESTARTSYS || retval == -ERESTARTNOINTR) { regs->gprs[2] = regs->orig_gpr2; regs->psw.addr = restart_addr; } else if (retval == -ERESTART_RESTARTBLOCK) { regs->gprs[2] = -EINTR; } } /* Get signal to deliver. When running under ptrace, at this point the debugger may change all our registers ... */ signr = get_signal_to_deliver(&info, &ka, regs, NULL); /* Depending on the signal settings we may need to revert the decision to restart the system call. */ if (signr > 0 && regs->psw.addr == restart_addr) { if (retval == -ERESTARTNOHAND || (retval == -ERESTARTSYS && !(current->sighand->action[signr-1].sa.sa_flags & SA_RESTART))) { regs->gprs[2] = -EINTR; regs->psw.addr = continue_addr; } } if (signr > 0) { /* Whee! Actually deliver the signal. */ #ifdef CONFIG_S390_SUPPORT if (test_thread_flag(TIF_31BIT)) { extern void handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); handle_signal32(signr, &ka, &info, oldset, regs); return 1; } #endif handle_signal(signr, &ka, &info, oldset, regs); return 1; } /* Restart a different system call. */ if (retval == -ERESTART_RESTARTBLOCK && regs->psw.addr == continue_addr) { regs->gprs[2] = __NR_restart_syscall; set_thread_flag(TIF_RESTART_SVC); } return 0; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long address) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; unsigned long flags = 0; siginfo_t info; int fault; info.si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) goto vmalloc_fault; #ifdef MODULE_START if (unlikely(address >= MODULE_START && address < MODULE_END)) goto vmalloc_fault; #endif /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) flags |= FAULT_FLAG_USER; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.cp0_badvaddr = address; tsk->thread.error_code = write; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *) address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { current->thread.cp0_baduaddr = address; return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", 0, field, address, field, regs->cp0_epc, field, regs->regs[3]); die("Oops", regs); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; else /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ tsk->thread.cp0_badvaddr = address; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *) address; force_sig_info(SIGBUS, &info, tsk); return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; pgd = (pgd_t *) pgd_current + offset; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }
/* * Debug exception handlers. */ static int breakpoint_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs) { int i, step = 0, *kernel_step; u32 ctrl_reg; u64 addr, val; struct perf_event *bp, **slots; struct debug_info *debug_info; struct arch_hw_breakpoint_ctrl ctrl; slots = (struct perf_event **)__get_cpu_var(bp_on_reg); addr = instruction_pointer(regs); debug_info = ¤t->thread.debug; for (i = 0; i < core_num_brps; ++i) { rcu_read_lock(); bp = slots[i]; if (bp == NULL) goto unlock; /* Check if the breakpoint value matches. */ val = read_wb_reg(AARCH64_DBG_REG_BVR, i); if (val != (addr & ~0x3)) goto unlock; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i); decode_ctrl_reg(ctrl_reg, &ctrl); if (!((1 << (addr & 0x3)) & ctrl.len)) goto unlock; counter_arch_bp(bp)->trigger = addr; perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ if (!bp->overflow_handler) step = 1; unlock: rcu_read_unlock(); } if (!step) return 0; if (user_mode(regs)) { debug_info->bps_disabled = 1; toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0); /* If we're already stepping a watchpoint, just return. */ if (debug_info->wps_disabled) return 0; if (test_thread_flag(TIF_SINGLESTEP)) debug_info->suspended_step = 1; else user_enable_single_step(current); } else { toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); kernel_step = &__get_cpu_var(stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) return 0; if (kernel_active_single_step()) { *kernel_step = ARM_KERNEL_STEP_SUSPEND; } else { *kernel_step = ARM_KERNEL_STEP_ACTIVE; kernel_enable_single_step(regs); } } return 0; }
/** * __do_IRQ - original all in one highlevel IRQ handler * @irq: the interrupt number * * __do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific * handlers). * * This is the original x86 implementation which is used for every * interrupt type. */ fastcall notrace unsigned int __do_IRQ(unsigned int irq) { struct irq_desc *desc = irq_desc + irq; struct irqaction *action; unsigned int status; kstat_this_cpu.irqs[irq]++; if (CHECK_IRQ_PER_CPU(desc->status)) { irqreturn_t action_ret; /* * No locking required for CPU-local interrupts: */ if (desc->chip->ack) desc->chip->ack(irq); action_ret = handle_IRQ_event(irq, desc->action); desc->chip->end(irq); return 1; } /* * If the task is currently running in user mode, don't * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not * configured, this should be optimized out. */ if (user_mode(get_irq_regs())) touch_softlockup_watchdog(); spin_lock(&desc->lock); if (desc->chip->ack) desc->chip->ack(irq); /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested */ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); status |= IRQ_PENDING; /* we _want_ to handle it */ /* * If the IRQ is disabled for whatever reason, we cannot * use the action we have. */ action = NULL; if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { action = desc->action; status &= ~IRQ_PENDING; /* we commit to handling */ status |= IRQ_INPROGRESS; /* we are handling it */ } desc->status = status; /* * If there is no IRQ handler or it was disabled, exit early. * Since we set PENDING, if another processor is handling * a different instance of this same irq, the other processor * will take care of it. */ if (unlikely(!action)) goto out; /* * Edge triggered interrupts need to remember * pending events. * This applies to any hw interrupts that allow a second * instance of the same irq to arrive while we are in do_IRQ * or in the handler. But the code here only handles the _second_ * instance of the irq, not the third or fourth. So it is mostly * useful for irq hardware that does not mask cleanly in an * SMP environment. */ for (;;) { irqreturn_t action_ret; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); if (!noirqdebug) note_interrupt(irq, desc, action_ret); spin_lock(&desc->lock); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; } desc->status &= ~IRQ_INPROGRESS; out: /* * The ->end() handler has to deal with interrupts which got * disabled while the handler was running. */ desc->chip->end(irq); spin_unlock(&desc->lock); return 1; }
/* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ static void do_signal(void) { struct k_sigaction ka; siginfo_t info; sigset_t *oldset; int signr; /* * We want the common case to go fast, which * is why we may in certain cases get here from * kernel mode. Just return without doing anything * if so. */ if (!user_mode(__frame)) return; if (try_to_freeze()) goto no_signal; if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, __frame, NULL); if (signr > 0) { if (handle_signal(signr, &info, &ka, oldset) == 0) { /* a signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, * and will be restored by sigreturn, so we can simply * clear the TIF_RESTORE_SIGMASK flag */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); } return; } no_signal: /* Did we come from a system call? */ if (__frame->syscallno >= 0) { /* Restart the system call - no handlers present */ switch (__frame->gr8) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: __frame->gr8 = __frame->orig_gr8; __frame->pc -= 4; break; case -ERESTART_RESTARTBLOCK: __frame->gr8 = __NR_restart_syscall; __frame->pc -= 4; break; } } /* if there's no signal to deliver, we just put the saved sigmask * back */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } } /* end do_signal() */
asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) #endif { /* * We ack quickly, we don't want the irq controller * thinking we're snobs just because some other CPU has * disabled global interrupts (we have already done the * INT_ACK cycles, it's too late to try to pretend to the * controller that we aren't taking the interrupt). * * 0 return value means that this irq is already being * handled by some other CPU. (or is disabled) */ int cpu = smp_processor_id(); irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; ILATENCY_INTERRUPT_OVERHEAD_START(); TRACE_IRQ_ENTRY(irq, !user_mode(regs)); kstat.irqs[cpu][irq]++; spin_lock(&desc->lock); desc->handler->ack(irq); /* REPLAY is when Linux resends an IRQ that was dropped earlier WAITING is used by probe to mark irqs that are being tested */ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); status |= IRQ_PENDING; /* we _want_ to handle it */ /* * If the IRQ is disabled for whatever reason, we cannot * use the action we have. */ action = NULL; if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { action = desc->action; status &= ~IRQ_PENDING; /* we commit to handling */ status |= IRQ_INPROGRESS; /* we are handling it */ } desc->status = status; /* * If there is no IRQ handler or it was disabled, exit early. Since we set PENDING, if another processor is handling a different instance of this same irq, the other processor will take care of it. */ if (!action) goto out; ILATENCY_INTERRUPT_OVERHEAD_STOP(); /* * Edge triggered interrupts need to remember * pending events. * This applies to any hw interrupts that allow a second * instance of the same irq to arrive while we are in do_IRQ * or in the handler. But the code here only handles the _second_ * instance of the irq, not the third or fourth. So it is mostly * useful for irq hardware that does not mask cleanly in an * SMP environment. */ for (;;) { spin_unlock(&desc->lock); handle_IRQ_event(irq, regs, action); spin_lock(&desc->lock); if (!(desc->status & IRQ_PENDING)) break; desc->status &= ~IRQ_PENDING; } desc->status &= ~IRQ_INPROGRESS; out: /* * The ->end() handler has to deal with interrupts which got * disabled while the handler was running. */ desc->handler->end(irq); spin_unlock(&desc->lock); TRACE_IRQ_EXIT(); if (softirq_pending(cpu)) do_softirq(); ILATENCY_RET_FROM_EXCEPTION(); return 1; }