void do_page_fault(struct pt_regs *regs) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int exccause = regs->exccause; unsigned int address = regs->excvaddr; siginfo_t info; int is_write, is_exec; int fault; info.si_code = SEGV_MAPERR; /* We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. */ if (address >= TASK_SIZE && !user_mode(regs)) goto vmalloc_fault; /* If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) { bad_page_fault(regs, address, SIGSEGV); return; } is_write = (exccause == EXCCAUSE_STORE_CACHE_ATTRIBUTE) ? 1 : 0; is_exec = (exccause == EXCCAUSE_ITLB_PRIVILEGE || exccause == EXCCAUSE_ITLB_MISS || exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0; #ifdef DEBUG_PAGE_FAULT printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid, address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); #endif down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else if (is_exec) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else /* Allow read even from write-only pages. */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) goto bad_area; /* If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ survive: fault = handle_mm_fault(mm, vma, address, is_write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; up_read(&mm->mmap_sem); return; /* Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { current->thread.bad_vaddr = address; current->thread.error_code = is_write; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void *) address; force_sig_info(SIGSEGV, &info, current); return; } bad_page_fault(regs, address, SIGSEGV); return; /* We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_global_init(current)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", current->comm); if (user_mode(regs)) do_group_exit(SIGKILL); bad_page_fault(regs, address, SIGKILL); return; do_sigbus: up_read(&mm->mmap_sem); /* Send a sigbus, regardless of whether we were in kernel * or user mode. */ current->thread.bad_vaddr = address; info.si_code = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void *) address; force_sig_info(SIGBUS, &info, current); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) bad_page_fault(regs, address, SIGBUS); vmalloc_fault: { /* Synchronize this task's top level page-table * with the 'reference' page table. */ struct mm_struct *act_mm = current->active_mm; int index = pgd_index(address); pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; if (act_mm == NULL) goto bad_page_fault; pgd = act_mm->pgd + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) goto bad_page_fault; pgd_val(*pgd) = pgd_val(*pgd_k); pmd = pmd_offset(pgd, address); pmd_k = pmd_offset(pgd_k, address); if (!pmd_present(*pmd) || !pmd_present(*pmd_k)) goto bad_page_fault; pmd_val(*pmd) = pmd_val(*pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto bad_page_fault; return; } bad_page_fault: bad_page_fault(regs, address, SIGKILL); return; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long address) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; siginfo_t info; int fault; info.si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) goto vmalloc_fault; #ifdef MODULE_START if (unlikely(address >= MODULE_START && address < MODULE_END)) goto vmalloc_fault; #endif /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGSEGV) goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.cp0_badvaddr = address; tsk->thread.error_code = write; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *) address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { current->thread.cp0_baduaddr = address; return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", 0, field, address, field, regs->cp0_epc, field, regs->regs[3]); die("Oops", regs); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (user_mode(regs)) do_group_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; else /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ tsk->thread.cp0_badvaddr = address; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *) address; force_sig_info(SIGBUS, &info, tsk); return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; pgd = (pgd_t *) pgd_current + offset; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }
static void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, int align_ctl) { struct thread_info *info = current_thread_info(); struct unaligned_jit_fragment frag; struct unaligned_jit_fragment *jit_code_area; tilegx_bundle_bits bundle_2 = 0; /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */ bool bundle_2_enable = true; uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1; /* * Indicate if the unalign access * instruction's registers hit with * others in the same bundle. */ bool alias = false; bool load_n_store = true; bool load_store_signed = false; unsigned int load_store_size = 8; bool y1_br = false; /* True, for a branch in same bundle at Y1.*/ int y1_br_reg = 0; /* True for link operation. i.e. jalr or lnk at Y1 */ bool y1_lr = false; int y1_lr_reg = 0; bool x1_add = false;/* True, for load/store ADD instruction at X1*/ int x1_add_imm8 = 0; bool unexpected = false; int n = 0, k; jit_code_area = (struct unaligned_jit_fragment *)(info->unalign_jit_base); memset((void *)&frag, 0, sizeof(frag)); /* 0: X mode, Otherwise: Y mode. */ if (bundle & TILEGX_BUNDLE_MODE_MASK) { unsigned int mod, opcode; if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 && get_RRROpcodeExtension_Y1(bundle) == UNARY_RRR_1_OPCODE_Y1) { opcode = get_UnaryOpcodeExtension_Y1(bundle); /* * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1 * pipeline. */ switch (opcode) { case JALR_UNARY_OPCODE_Y1: case JALRP_UNARY_OPCODE_Y1: y1_lr = true; y1_lr_reg = 55; /* Link register. */ /* FALLTHROUGH */ case JR_UNARY_OPCODE_Y1: case JRP_UNARY_OPCODE_Y1: y1_br = true; y1_br_reg = get_SrcA_Y1(bundle); break; case LNK_UNARY_OPCODE_Y1: /* "lnk" at Y1 pipeline. */ y1_lr = true; y1_lr_reg = get_Dest_Y1(bundle); break; } } opcode = get_Opcode_Y2(bundle); mod = get_Mode(bundle); /* * bundle_2 is bundle after making Y2 as a dummy operation * - ld zero, sp */ bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy(); /* Make Y1 as fnop if Y1 is a branch or lnk operation. */ if (y1_br || y1_lr) { bundle_2 &= ~(GX_INSN_Y1_MASK); bundle_2 |= jit_y1_fnop(); } if (is_y0_y1_nop(bundle_2)) bundle_2_enable = false; if (mod == MODE_OPCODE_YC2) { /* Store. */ load_n_store = false; load_store_size = 1 << opcode; load_store_signed = false; find_regs(bundle, 0, &ra, &rb, &clob1, &clob2, &clob3, &alias); if (load_store_size > 8) unexpected = true; } else { /* Load. */ load_n_store = true; if (mod == MODE_OPCODE_YB2) { switch (opcode) { case LD_OPCODE_Y2: load_store_signed = false; load_store_size = 8; break; case LD4S_OPCODE_Y2: load_store_signed = true; load_store_size = 4; break; case LD4U_OPCODE_Y2: load_store_signed = false; load_store_size = 4; break; default: unexpected = true; } } else if (mod == MODE_OPCODE_YA2) { if (opcode == LD2S_OPCODE_Y2) { load_store_signed = true; load_store_size = 2; } else if (opcode == LD2U_OPCODE_Y2) { load_store_signed = false; load_store_size = 2; } else unexpected = true; } else unexpected = true; find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2, &clob3, &alias); } } else { unsigned int opcode; /* bundle_2 is bundle after making X1 as "fnop". */ bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop(); if (is_x0_x1_nop(bundle_2)) bundle_2_enable = false; if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) { opcode = get_UnaryOpcodeExtension_X1(bundle); if (get_RRROpcodeExtension_X1(bundle) == UNARY_RRR_0_OPCODE_X1) { load_n_store = true; find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2, &clob3, &alias); switch (opcode) { case LD_UNARY_OPCODE_X1: load_store_signed = false; load_store_size = 8; break; case LD4S_UNARY_OPCODE_X1: load_store_signed = true; /* FALLTHROUGH */ case LD4U_UNARY_OPCODE_X1: load_store_size = 4; break; case LD2S_UNARY_OPCODE_X1: load_store_signed = true; /* FALLTHROUGH */ case LD2U_UNARY_OPCODE_X1: load_store_size = 2; break; default: unexpected = true; } } else { load_n_store = false; load_store_signed = false; find_regs(bundle, 0, &ra, &rb, &clob1, &clob2, &clob3, &alias); opcode = get_RRROpcodeExtension_X1(bundle); switch (opcode) { case ST_RRR_0_OPCODE_X1: load_store_size = 8; break; case ST4_RRR_0_OPCODE_X1: load_store_size = 4; break; case ST2_RRR_0_OPCODE_X1: load_store_size = 2; break; default: unexpected = true; } } } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) { load_n_store = true; opcode = get_Imm8OpcodeExtension_X1(bundle); switch (opcode) { case LD_ADD_IMM8_OPCODE_X1: load_store_size = 8; break; case LD4S_ADD_IMM8_OPCODE_X1: load_store_signed = true; /* FALLTHROUGH */ case LD4U_ADD_IMM8_OPCODE_X1: load_store_size = 4; break; case LD2S_ADD_IMM8_OPCODE_X1: load_store_signed = true; /* FALLTHROUGH */ case LD2U_ADD_IMM8_OPCODE_X1: load_store_size = 2; break; case ST_ADD_IMM8_OPCODE_X1: load_n_store = false; load_store_size = 8; break; case ST4_ADD_IMM8_OPCODE_X1: load_n_store = false; load_store_size = 4; break; case ST2_ADD_IMM8_OPCODE_X1: load_n_store = false; load_store_size = 2; break; default: unexpected = true; } if (!unexpected) { x1_add = true; if (load_n_store) x1_add_imm8 = get_Imm8_X1(bundle); else x1_add_imm8 = get_Dest_Imm8_X1(bundle); } find_regs(bundle, load_n_store ? (&rd) : NULL, &ra, &rb, &clob1, &clob2, &clob3, &alias); } else unexpected = true; } /* * Some sanity check for register numbers extracted from fault bundle. */ if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true) unexpected = true; /* Give warning if register ra has an aligned address. */ if (!unexpected) WARN_ON(!((load_store_size - 1) & (regs->regs[ra]))); /* * Fault came from kernel space, here we only need take care of * unaligned "get_user/put_user" macros defined in "uaccess.h". * Basically, we will handle bundle like this: * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0} * (Refer to file "arch/tile/include/asm/uaccess.h" for details). * For either load or store, byte-wise operation is performed by calling * get_user() or put_user(). If the macro returns non-zero value, * set the value to rx, otherwise set zero to rx. Finally make pc point * to next bundle and return. */ if (EX1_PL(regs->ex1) != USER_PL) { unsigned long rx = 0; unsigned long x = 0, ret = 0; if (y1_br || y1_lr || x1_add || (load_store_signed != (load_n_store && load_store_size == 4))) { /* No branch, link, wrong sign-ext or load/store add. */ unexpected = true; } else if (!unexpected) { if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* * Fault bundle is Y mode. * Check if the Y1 and Y0 is the form of * { movei rx, 0; nop/fnop }, if yes, * find the rx. */ if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1) && (get_SrcA_Y1(bundle) == TREG_ZERO) && (get_Imm8_Y1(bundle) == 0) && is_bundle_y0_nop(bundle)) { rx = get_Dest_Y1(bundle); } else if ((get_Opcode_Y0(bundle) == ADDI_OPCODE_Y0) && (get_SrcA_Y0(bundle) == TREG_ZERO) && (get_Imm8_Y0(bundle) == 0) && is_bundle_y1_nop(bundle)) { rx = get_Dest_Y0(bundle); } else { unexpected = true; } } else { /* * Fault bundle is X mode. * Check if the X0 is 'movei rx, 0', * if yes, find the rx. */ if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0) && (get_Imm8OpcodeExtension_X0(bundle) == ADDI_IMM8_OPCODE_X0) && (get_SrcA_X0(bundle) == TREG_ZERO) && (get_Imm8_X0(bundle) == 0)) { rx = get_Dest_X0(bundle); } else { unexpected = true; } } /* rx should be less than 56. */ if (!unexpected && (rx >= 56)) unexpected = true; } if (!search_exception_tables(regs->pc)) { /* No fixup in the exception tables for the pc. */ unexpected = true; } if (unexpected) { /* Unexpected unalign kernel fault. */ struct task_struct *tsk = validate_current(); bust_spinlocks(1); show_regs(regs); if (unlikely(tsk->pid < 2)) { panic("Kernel unalign fault running %s!", tsk->pid ? "init" : "the idle task"); } #ifdef SUPPORT_DIE die("Oops", regs); #endif bust_spinlocks(1); do_group_exit(SIGKILL); } else { unsigned long i, b = 0; unsigned char *ptr = (unsigned char *)regs->regs[ra]; if (load_n_store) { /* handle get_user(x, ptr) */ for (i = 0; i < load_store_size; i++) { ret = get_user(b, ptr++); if (!ret) { /* Success! update x. */ #ifdef __LITTLE_ENDIAN x |= (b << (8 * i)); #else x <<= 8; x |= b; #endif /* __LITTLE_ENDIAN */ } else { x = 0; break; } } /* Sign-extend 4-byte loads. */ if (load_store_size == 4) x = (long)(int)x; /* Set register rd. */ regs->regs[rd] = x; /* Set register rx. */ regs->regs[rx] = ret; /* Bump pc. */ regs->pc += 8; } else { /* Handle put_user(x, ptr) */ x = regs->regs[rb]; #ifdef __LITTLE_ENDIAN b = x; #else /* * Swap x in order to store x from low * to high memory same as the * little-endian case. */ switch (load_store_size) { case 8: b = swab64(x); break; case 4: b = swab32(x); break; case 2: b = swab16(x); break; } #endif /* __LITTLE_ENDIAN */ for (i = 0; i < load_store_size; i++) { ret = put_user(b, ptr++); if (ret) break; /* Success! shift 1 byte. */ b >>= 8; } /* Set register rx. */ regs->regs[rx] = ret; /* Bump pc. */ regs->pc += 8; } } unaligned_fixup_count++; if (unaligned_printk) { pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n", current->comm, current->pid, regs->regs[ra]); } /* Done! Return to the exception handler. */ return; }
/* * This routine is responsible for faulting in user pages. * It passes the work off to one of the appropriate routines. * It returns true if the fault was successfully handled. */ static int handle_page_fault(struct pt_regs *regs, int fault_num, int is_page_fault, unsigned long address, int write) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long stack_offset; int fault; int si_code; int is_kernel_mode; pgd_t *pgd; /* on TILE, protection faults are always writes */ if (!is_page_fault) write = 1; is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); tsk = validate_current(); /* * Check to see if we might be overwriting the stack, and bail * out if so. The page fault code is a relatively likely * place to get trapped in an infinite regress, and once we * overwrite the whole stack, it becomes very hard to recover. */ stack_offset = stack_pointer & (THREAD_SIZE-1); if (stack_offset < THREAD_SIZE / 8) { pr_alert("Potential stack overrun: sp %#lx\n", stack_pointer); show_regs(regs); pr_alert("Killing current process %d/%s\n", tsk->pid, tsk->comm); do_group_exit(SIGKILL); } /* * Early on, we need to check for migrating PTE entries; * see homecache.c. If we find a migrating PTE, we wait until * the backing page claims to be done migrating, then we proceed. * For kernel PTEs, we rewrite the PTE and return and retry. * Otherwise, we treat the fault like a normal "no PTE" fault, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * and that the fault was not a protection fault. */ if (unlikely(address >= TASK_SIZE && !is_arch_mappable_range(address, 0))) { if (is_kernel_mode && is_page_fault && vmalloc_fault(pgd, address) >= 0) return 1; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ mm = NULL; /* happy compiler */ vma = NULL; goto bad_area_nosemaphore; } /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the * kernel, so either way we can re-enable interrupts here * unless we are doing atomic access to user space with * interrupts disabled. */ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) local_irq_enable(); mm = tsk->mm; /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ if (in_atomic() || !mm) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user * space from well defined areas of code, which are listed in the * exceptions table. * * As the vast majority of faults will be valid we will only perform * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ if (!down_read_trylock(&mm->mmap_sem)) { if (is_kernel_mode && !search_exception_tables(regs->pc)) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (regs->sp < PAGE_OFFSET) { /* * accessing the stack below sp is always a bug. */ if (address < regs->sp) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: si_code = SEGV_ACCERR; if (fault_num == INT_ITLB_MISS) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (write) { #ifdef TEST_VERIFY_AREA if (!is_page_fault && regs->cs == KERNEL_CS) pr_err("WP fault at "REGFMT"\n", regs->eip); #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGSEGV) goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() /* * If this was an asynchronous fault, * restart the appropriate engine. */ switch (fault_num) { #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); break; #endif #if CHIP_HAS_SN_PROC() case INT_SNITLB_MISS: case INT_SNITLB_MISS_DWNCL: __insn_mtspr(SPR_SNCTL, __insn_mfspr(SPR_SNCTL) & ~SPR_SNCTL__FRZPROC_MASK); break; #endif } #endif up_read(&mm->mmap_sem); return 1; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (!is_kernel_mode) { /* * It's possible to have interrupts off here. */ local_irq_enable(); force_sig_info_fault("segfault", SIGSEGV, si_code, address, fault_num, tsk, regs); return 0; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return 0; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); /* FIXME: no lookup_address() yet */ #ifdef SUPPORT_LOOKUP_ADDRESS if (fault_num == INT_ITLB_MISS) { pte_t *pte = lookup_address(address); if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) pr_crit("kernel tried to execute" " non-executable page - exploit attempt?" " (uid: %d)\n", current->uid); } #endif if (address < PAGE_SIZE) pr_alert("Unable to handle kernel NULL pointer dereference\n"); else pr_alert("Unable to handle kernel paging request\n"); pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n", address, regs->pc); show_regs(regs); if (unlikely(tsk->pid < 2)) { panic("Kernel page fault running %s!", is_idle_task(tsk) ? "the idle task" : "init"); } /* * More FIXME: we should probably copy the i386 here and * implement a generic die() routine. Not today. */ #ifdef SUPPORT_DIE die("Oops", regs); #endif bust_spinlocks(1); do_group_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } pr_alert("VM: killing process %s\n", tsk->comm); if (!is_kernel_mode) do_group_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (is_kernel_mode) goto no_context; force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, fault_num, tsk, regs); return 0; }
/* * This routine handles page faults. It determines the address, and the * problem, and then passes it handle_page_fault() for normal DTLB and * ITLB issues, and for DMA or SN processor faults when we are in user * space. For the latter, if we're in kernel mode, we just save the * interrupt away appropriately and return immediately. We can't do * page faults for user code while in kernel mode. */ void do_page_fault(struct pt_regs *regs, int fault_num, unsigned long address, unsigned long write) { int is_page_fault; #ifdef CONFIG_KPROBES /* * This is to notify the fault handler of the kprobes. The * exception code is redundant as it is also carried in REGS, * but we pass it anyhow. */ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, regs->faultnum, SIGSEGV) == NOTIFY_STOP) return; #endif #ifdef __tilegx__ /* * We don't need early do_page_fault_ics() support, since unlike * Pro we don't need to worry about unlocking the atomic locks. * There is only one current case in GX where we touch any memory * under ICS other than our own kernel stack, and we handle that * here. (If we crash due to trying to touch our own stack, * we're in too much trouble for C code to help out anyway.) */ if (write & ~1) { unsigned long pc = write & ~1; if (pc >= (unsigned long) __start_unalign_asm_code && pc < (unsigned long) __end_unalign_asm_code) { struct thread_info *ti = current_thread_info(); /* * Our EX_CONTEXT is still what it was from the * initial unalign exception, but now we've faulted * on the JIT page. We would like to complete the * page fault however is appropriate, and then retry * the instruction that caused the unalign exception. * Our state has been "corrupted" by setting the low * bit in "sp", and stashing r0..r3 in the * thread_info area, so we revert all of that, then * continue as if this were a normal page fault. */ regs->sp &= ~1UL; regs->regs[0] = ti->unalign_jit_tmp[0]; regs->regs[1] = ti->unalign_jit_tmp[1]; regs->regs[2] = ti->unalign_jit_tmp[2]; regs->regs[3] = ti->unalign_jit_tmp[3]; write &= 1; } else { pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n", current->comm, current->pid, pc, address); show_regs(regs); do_group_exit(SIGKILL); return; } } #else /* This case should have been handled by do_page_fault_ics(). */ BUG_ON(write & ~1); #endif #if CHIP_HAS_TILE_DMA() /* * If it's a DMA fault, suspend the transfer while we're * handling the miss; we'll restart after it's handled. If we * don't suspend, it's possible that this process could swap * out and back in, and restart the engine since the DMA is * still 'running'. */ if (fault_num == INT_DMATLB_MISS || fault_num == INT_DMATLB_ACCESS || fault_num == INT_DMATLB_MISS_DWNCL || fault_num == INT_DMATLB_ACCESS_DWNCL) { __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); while (__insn_mfspr(SPR_DMA_USER_STATUS) & SPR_DMA_STATUS__BUSY_MASK) ; } #endif /* Validate fault num and decide if this is a first-time page fault. */ switch (fault_num) { case INT_ITLB_MISS: case INT_DTLB_MISS: #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: #endif is_page_fault = 1; break; case INT_DTLB_ACCESS: #if CHIP_HAS_TILE_DMA() case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: #endif is_page_fault = 0; break; default: panic("Bad fault number %d in do_page_fault", fault_num); } #if CHIP_HAS_TILE_DMA() if (!user_mode(regs)) { struct async_tlb *async; switch (fault_num) { #if CHIP_HAS_TILE_DMA() case INT_DMATLB_MISS: case INT_DMATLB_ACCESS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS_DWNCL: async = ¤t->thread.dma_async_tlb; break; #endif default: async = NULL; } if (async) { /* * No vmalloc check required, so we can allow * interrupts immediately at this point. */ local_irq_enable(); set_thread_flag(TIF_ASYNC_TLB); if (async->fault_num != 0) { panic("Second async fault %d;" " old fault was %d (%#lx/%ld)", fault_num, async->fault_num, address, write); } BUG_ON(fault_num == 0); async->fault_num = fault_num; async->is_fault = is_page_fault; async->is_write = write; async->address = address; return; } } #endif handle_page_fault(regs, fault_num, is_page_fault, address, write); }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, unsigned long address) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; int code = SEGV_MAPERR; int fault; unsigned int flags = 0; cause >>= 2; /* Restart the instruction */ regs->ea -= 4; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) { if (user_mode(regs)) goto bad_area_nosemaphore; else goto vmalloc_fault; } if (unlikely(address >= TASK_SIZE)) goto bad_area_nosemaphore; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: code = SEGV_ACCERR; switch (cause) { case EXC_SUPERV_INSN_ACCESS: goto bad_area; case EXC_SUPERV_DATA_ACCESS: goto bad_area; case EXC_X_PROTECTION_FAULT: if (!(vma->vm_flags & VM_EXEC)) goto bad_area; break; case EXC_R_PROTECTION_FAULT: if (!(vma->vm_flags & VM_READ)) goto bad_area; break; case EXC_W_PROTECTION_FAULT: flags = FAULT_FLAG_WRITE; if (!(vma->vm_flags & VM_WRITE)) goto bad_area; break; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { _exception(SIGSEGV, regs, code, address); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); pr_alert("Unable to handle kernel %s at virtual address %08lx", address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", address); pr_alert("ea = %08lx, ra = %08lx, cause = %ld\n", regs->ea, regs->ra, cause); panic("Oops"); return; /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } pr_info("VM: killing process %s\n", tsk->comm); if (user_mode(regs)) do_group_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; _exception(SIGBUS, regs, BUS_ADRERR, address); return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ int offset = pgd_index(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; #if 1 /* FIXME: Is this entirely correct ? */ pgd = pgd_current + offset; #else pgd = ¤t->mm->pgd[offset]; #endif pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; flush_tlb_one(address); return; } }
void do_page_fault(struct pt_regs *regs, int write, unsigned long address, unsigned long cause_code) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; siginfo_t info; int fault, ret; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0); /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (address >= VMALLOC_START && address <= VMALLOC_END) { ret = handle_vmalloc_fault(mm, address); if (unlikely(ret)) goto bad_area_nosemaphore; else return; } info.si_code = SEGV_MAPERR; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto no_context; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; /* Handle protection violation, execute on heap or stack */ if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH)) goto bad_area; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */ if (unlikely(fatal_signal_pending(current))) { if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY)) up_read(&mm->mmap_sem); if (user_mode(regs)) return; } if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { /* To avoid updating stats twice for retry case */ if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } /* Fault Handled Gracefully */ up_read(&mm->mmap_sem); return; } /* TBD: switch to pagefault_out_of_memory() */ if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; /* no man's land */ BUG(); /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; tsk->thread.cause_code = cause_code; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *)address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source * when it acesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) */ if (fixup_exception(regs)) return; die("Oops", regs, address, cause_code); out_of_memory: if (is_global_init(tsk)) { yield(); goto survive; } up_read(&mm->mmap_sem); if (user_mode(regs)) do_group_exit(SIGKILL); /* This will never return */ goto no_context; do_sigbus: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; tsk->thread.fault_address = address; tsk->thread.cause_code = cause_code; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *)address; force_sig_info(SIGBUS, &info, tsk); }