/* * swp_handler logs the id of calling process, dissects the instruction, sanity * checks the memory location, calls emulate_swpX for the actual operation and * deals with fixup/error handling before returning */ static int swp_handler(struct pt_regs *regs, unsigned int instr) { unsigned int address, destreg, data, type; unsigned int res = 0; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); if (current->pid != previous_pid) { pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", current->comm, (unsigned long)current->pid); previous_pid = current->pid; } /* Ignore the instruction if it fails its condition code check */ if (!check_condition(regs, instr)) { regs->ARM_pc += 4; return 0; } address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)]; data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); type = instr & TYPE_SWPB; pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", EXTRACT_REG_NUM(instr, RN_OFFSET), address, destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); /* Check access in reasonable access range for both SWP and SWPB */ if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { pr_debug("SWP{B} emulation: access to %p not allowed!\n", (void *)address); res = -EFAULT; } else { res = emulate_swpX(address, &data, type); } if (res == 0) { /* * On successful emulation, revert the adjustment to the PC * made in kernel/traps.c in order to resume execution at the * instruction following the SWP{B}. */ regs->ARM_pc += 4; regs->uregs[destreg] = data; } else if (res == -EFAULT) { /* * Memory errors do not mean emulation failed. * Set up signal info to return SEGV, then return OK */ set_segfault(regs, address); } return 0; }
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; /* Enable interrupts if they were enabled in the parent context. */ if (interrupts_enabled(regs)) local_irq_enable(); /* * If we're in an interrupt or have no user context, we must not take * the fault. */ if (in_atomic() || !mm) goto no_context; if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; if (esr & ESR_LNX_EXEC) { vm_flags = VM_EXEC; } else if ((esr & ESR_EL1_WRITE) && !(esr & ESR_EL1_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, * we can bug out early if this is from code which shouldn't. */ if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; retry: down_read(&mm->mmap_sem); } else { /* * The above down_read_trylock() might have succeeded in which * case, we'll have missed the might_sleep() from down_read(). */ might_sleep(); #ifdef CONFIG_DEBUG_VM if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; #endif } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); /* * If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_sem because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return 0; /* * Major/minor page fault accounting is only done on the initial * attempt. If we go through a retry, it is extremely likely that the * page will be found in page cache at that point. */ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { /* * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; mm_flags |= FAULT_FLAG_TRIED; goto retry; } } up_read(&mm->mmap_sem); /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; /* * If we are in kernel mode at this point, we have no context to * handle this fault with. */ if (!user_mode(regs)) goto no_context; if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to * userspace (which will retry the fault, or kill us if we got * oom-killed). */ pagefault_out_of_memory(); return 0; } if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to successfully fix up * this page fault. */ sig = SIGBUS; code = BUS_ADRERR; } else { /* * Something tried to access memory that isn't in our memory * map. */ sig = SIGSEGV; code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; } __do_user_fault(tsk, addr, esr, sig, code, regs); return 0; no_context: __do_kernel_fault(mm, addr, esr, regs); return 0; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; if (notify_page_fault(regs)) return 0; tsk = current; mm = tsk->mm; trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ fault = VM_FAULT_BADCONTEXT; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (address == -ENOMEM) { fault = VM_FAULT_OOM; goto out_up; } } #endif retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) goto out_up; if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; if (expand_stack(vma, address)) goto out_up; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) goto out_up; if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; } } /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(tsk, TIF_PER_TRAP); fault = 0; out_up: up_read(&mm->mmap_sem); out: return fault; }
/* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ asmlinkage void do_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr, cause; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; int fault, code = SEGV_MAPERR; cause = regs->scause; addr = regs->sbadaddr; tsk = current; mm = tsk->mm; /* * Fault-in kernel-space virtual memory on-demand. * The 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) goto vmalloc_fault; /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->sstatus & SR_PIE)) local_irq_enable(); /* * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ if (unlikely(in_atomic() || !mm)) goto no_context; if (user_mode(regs)) flags |= FAULT_FLAG_USER; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); retry: down_read(&mm->mmap_sem); vma = find_vma(mm, addr); if (unlikely(!vma)) goto bad_area; if (likely(vma->vm_start <= addr)) goto good_area; if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) goto bad_area; if (unlikely(expand_stack(vma, addr))) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it. */ good_area: code = SEGV_ACCERR; switch (cause) { case EXC_INST_ACCESS: if (!(vma->vm_flags & VM_EXEC)) goto bad_area; break; case EXC_LOAD_ACCESS: if (!(vma->vm_flags & VM_READ)) goto bad_area; break; case EXC_STORE_ACCESS: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; break; default: panic("%s: unhandled cause %lu", __func__, cause); } /* * If for any reason at all we could not handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, addr, flags); /* * If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_sem because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk)) return; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { /* * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~(FAULT_FLAG_ALLOW_RETRY); flags |= FAULT_FLAG_TRIED; /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map. * Fix it, but check if it's kernel or user first. */ bad_area: up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { do_trap(regs, SIGSEGV, code, addr, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); die(regs, "Oops"); do_exit(SIGKILL); /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ out_of_memory: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk); return; vmalloc_fault: { pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; int index; if (user_mode(regs)) goto bad_area; /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk->active_mm->pgd" here. * We might be inside an interrupt in the middle * of a task switch. */ index = pgd_index(addr); pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, addr); pud_k = pud_offset(pgd_k, addr); if (!pud_present(*pud_k)) goto no_context; /* Since the vmalloc area is global, it is unnecessary to copy individual PTEs */ pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); /* Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); if (!pte_present(*pte_k)) goto no_context; return; } }
static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; if (notify_page_fault(regs)) return 0; tsk = current; mm = tsk->mm; trans_exc_code = regs->int_parm_long; fault = VM_FAULT_BADCONTEXT; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (address == -ENOMEM) { fault = VM_FAULT_OOM; goto out_up; } } #endif retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) goto out_up; if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; if (expand_stack(vma, address)) goto out_up; } fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) goto out_up; if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; down_read(&mm->mmap_sem); goto retry; } } clear_tsk_thread_flag(tsk, TIF_PER_TRAP); fault = 0; out_up: up_read(&mm->mmap_sem); out: return fault; }
/* * Handle an unaligned access * Returns 0 if successfully handled, 1 if some error happened */ int misaligned_fixup(unsigned long address, struct pt_regs *regs, struct callee_regs *cregs) { struct disasm_state state; char buf[TASK_COMM_LEN]; /* handle user mode only and only if enabled by sysadmin */ if (!user_mode(regs) || !unaligned_enabled) return 1; if (no_unaligned_warning) { pr_warn_once("%s(%d) made unaligned access which was emulated" " by kernel assist\n. This can degrade application" " performance significantly\n. To enable further" " logging of such instances, please \n" " echo 0 > /proc/sys/kernel/ignore-unaligned-usertrap\n", get_task_comm(buf, current), task_pid_nr(current)); } else { /* Add rate limiting if it gets down to it */ pr_warn("%s(%d): unaligned access to/from 0x%lx by PC: 0x%lx\n", get_task_comm(buf, current), task_pid_nr(current), address, regs->ret); } disasm_instr(regs->ret, &state, 1, regs, cregs); if (state.fault) goto fault; /* ldb/stb should not have unaligned exception */ if ((state.zz == 1) || (state.di)) goto fault; if (!state.write) fixup_load(&state, regs, cregs); else fixup_store(&state, regs, cregs); if (state.fault) goto fault; /* clear any remanants of delay slot */ if (delay_mode(regs)) { regs->ret = regs->bta ~1U; regs->status32 &= ~STATUS_DE_MASK; } else { regs->ret += state.instr_len; /* handle zero-overhead-loop */ if ((regs->ret == regs->lp_end) && (regs->lp_count)) { regs->ret = regs->lp_start; regs->lp_count--; } } perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address); return 0; fault: pr_err("Alignment trap: fault in fix-up %08lx at [<%08lx>]\n", state.words[0], address); return 1; }
void do_page_fault(struct pt_regs *regs) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int exccause = regs->exccause; unsigned int address = regs->excvaddr; siginfo_t info; int is_write, is_exec; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; info.si_code = SEGV_MAPERR; /* We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. */ if (address >= TASK_SIZE && !user_mode(regs)) goto vmalloc_fault; /* If we're in an interrupt or have no user * context, we must not take the fault.. */ if (faulthandler_disabled() || !mm) { bad_page_fault(regs, address, SIGSEGV); return; } is_write = (exccause == EXCCAUSE_STORE_CACHE_ATTRIBUTE) ? 1 : 0; is_exec = (exccause == EXCCAUSE_ITLB_PRIVILEGE || exccause == EXCCAUSE_ITLB_MISS || exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0; #ifdef DEBUG_PAGE_FAULT printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid, address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); #endif if (user_mode(regs)) flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else if (is_exec) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else /* Allow read even from write-only pages. */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) goto bad_area; /* If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGSEGV) goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (flags & VM_FAULT_MAJOR) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); else perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); return; /* Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { current->thread.bad_vaddr = address; current->thread.error_code = is_write; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void *) address; force_sig_info(SIGSEGV, &info, current); return; } bad_page_fault(regs, address, SIGSEGV); return; /* We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (!user_mode(regs)) bad_page_fault(regs, address, SIGKILL); else pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* Send a sigbus, regardless of whether we were in kernel * or user mode. */ current->thread.bad_vaddr = address; info.si_code = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void *) address; force_sig_info(SIGBUS, &info, current); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) bad_page_fault(regs, address, SIGBUS); return; vmalloc_fault: { /* Synchronize this task's top level page-table * with the 'reference' page table. */ struct mm_struct *act_mm = current->active_mm; int index = pgd_index(address); pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; if (act_mm == NULL) goto bad_page_fault; pgd = act_mm->pgd + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) goto bad_page_fault; pgd_val(*pgd) = pgd_val(*pgd_k); pmd = pmd_offset(pgd, address); pmd_k = pmd_offset(pgd_k, address); if (!pmd_present(*pmd) || !pmd_present(*pmd_k)) goto bad_page_fault; pmd_val(*pmd) = pmd_val(*pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto bad_page_fault; return; } bad_page_fault: bad_page_fault(regs, address, SIGKILL); return; }
/* Unlike the Sparc64 version (which has a struct fpustate), we * pass the taskstruct corresponding to the task which currently owns the * FPU. This is partly because we don't have the fpustate struct and * partly because the task owning the FPU isn't always current (as is * the case for the Sparc64 port). This is probably SMP-related... * This function returns 1 if all queued insns were emulated successfully. * The test for unimplemented FPop in kernel mode has been moved into * kernel/traps.c for simplicity. */ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt) { /* regs->pc isn't necessarily the PC at which the offending insn is sitting. * The FPU maintains a queue of FPops which cause traps. * When it hits an instruction that requires that the trapped op succeeded * (usually because it reads a reg. that the trapped op wrote) then it * causes this exception. We need to emulate all the insns on the queue * and then allow the op to proceed. * This code should also handle the case where the trap was precise, * in which case the queue length is zero and regs->pc points at the * single FPop to be emulated. (this case is untested, though :->) * You'll need this case if you want to be able to emulate all FPops * because the FPU either doesn't exist or has been software-disabled. * [The UltraSPARC makes FP a precise trap; this isn't as stupid as it * might sound because the Ultra does funky things with a superscalar * architecture.] */ /* You wouldn't believe how often I typed 'ftp' when I meant 'fpt' :-> */ int i; int retcode = 0; /* assume all succeed */ unsigned long insn; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); #ifdef DEBUG_MATHEMU printk("In do_mathemu()... pc is %08lx\n", regs->pc); printk("fpqdepth is %ld\n", fpt->thread.fpqdepth); for (i = 0; i < fpt->thread.fpqdepth; i++) printk("%d: %08lx at %08lx\n", i, fpt->thread.fpqueue[i].insn, (unsigned long)fpt->thread.fpqueue[i].insn_addr); #endif if (fpt->thread.fpqdepth == 0) { /* no queue, guilty insn is at regs->pc */ #ifdef DEBUG_MATHEMU printk("precise trap at %08lx\n", regs->pc); #endif if (!get_user(insn, (u32 __user *) regs->pc)) { retcode = do_one_mathemu(insn, &fpt->thread.fsr, fpt->thread.float_regs); if (retcode) { /* in this case we need to fix up PC & nPC */ regs->pc = regs->npc; regs->npc += 4; } } return retcode; } /* Normal case: need to empty the queue... */ for (i = 0; i < fpt->thread.fpqdepth; i++) { retcode = do_one_mathemu(fpt->thread.fpqueue[i].insn, &(fpt->thread.fsr), fpt->thread.float_regs); if (!retcode) /* insn failed, no point doing any more */ break; } /* Now empty the queue and clear the queue_not_empty flag */ if (retcode) fpt->thread.fsr &= ~(0x3000 | FSR_CEXC_MASK); else fpt->thread.fsr &= ~0x3000; fpt->thread.fpqdepth = 0; return retcode; }
static void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int __user *pc) { union mips_instruction insn; unsigned long value; unsigned int res; unsigned long origpc; unsigned long orig31; void __user *fault_addr = NULL; origpc = (unsigned long)pc; orig31 = regs->regs[31]; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); /* * This load never faults. */ __get_user(insn.word, pc); switch (insn.i_format.opcode) { /* * These are instructions that a compiler doesn't generate. We * can assume therefore that the code is MIPS-aware and * really buggy. Emulating these instructions would break the * semantics anyway. */ case ll_op: case lld_op: case sc_op: case scd_op: /* * For these instructions the only way to create an address * error is an attempted access to kernel/supervisor address * space. */ case ldl_op: case ldr_op: case lwl_op: case lwr_op: case sdl_op: case sdr_op: case swl_op: case swr_op: case lb_op: case lbu_op: case sb_op: goto sigbus; case spec3_op: if (insn.r_format.func != lx_op) goto sigill; switch (insn.r_format.re) { case lwx_op: if (!access_ok(VERIFY_READ, addr, 4)) goto sigbus; LoadW(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.r_format.rd] = value; break; case lhx_op: if (!access_ok(VERIFY_READ, addr, 2)) goto sigbus; LoadHW(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.r_format.rd] = value; break; #ifdef CONFIG_64BIT case ldx_op: if (!access_ok(VERIFY_READ, addr, 8)) goto sigbus; LoadDW(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.r_format.rd] = value; break; case lwux_op: if (!access_ok(VERIFY_READ, addr, 4)) goto sigbus; LoadWU(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.r_format.rd] = value; break; #endif /* CONFIG_64BIT */ case lhux_op: if (!access_ok(VERIFY_READ, addr, 2)) goto sigbus; LoadHWU(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.r_format.rd] = value; break; case lbux_op: case lbx_op: goto sigbus; default: goto sigill; } break; /* * The remaining opcodes are the ones that are really of * interest. */ case lh_op: if (!access_ok(VERIFY_READ, addr, 2)) goto sigbus; LoadHW(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.i_format.rt] = value; break; case lw_op: if (!access_ok(VERIFY_READ, addr, 4)) goto sigbus; LoadW(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.i_format.rt] = value; break; case lhu_op: if (!access_ok(VERIFY_READ, addr, 2)) goto sigbus; LoadHWU(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.i_format.rt] = value; break; case lwu_op: #ifdef CONFIG_64BIT /* * A 32-bit kernel might be running on a 64-bit processor. But * if we're on a 32-bit processor and an i-cache incoherency * or race makes us see a 64-bit instruction here the sdl/sdr * would blow up, so for now we don't handle unaligned 64-bit * instructions on 32-bit kernels. */ if (!access_ok(VERIFY_READ, addr, 4)) goto sigbus; LoadWU(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.i_format.rt] = value; break; #endif /* CONFIG_64BIT */ /* Cannot handle 64-bit instructions in 32-bit kernel */ goto sigill; case ld_op: #ifdef CONFIG_64BIT /* * A 32-bit kernel might be running on a 64-bit processor. But * if we're on a 32-bit processor and an i-cache incoherency * or race makes us see a 64-bit instruction here the sdl/sdr * would blow up, so for now we don't handle unaligned 64-bit * instructions on 32-bit kernels. */ if (!access_ok(VERIFY_READ, addr, 8)) goto sigbus; LoadDW(addr, value, res); if (res) goto fault; compute_return_epc(regs); regs->regs[insn.i_format.rt] = value; break; #endif /* CONFIG_64BIT */ /* Cannot handle 64-bit instructions in 32-bit kernel */ goto sigill; case sh_op: if (!access_ok(VERIFY_WRITE, addr, 2)) goto sigbus; compute_return_epc(regs); value = regs->regs[insn.i_format.rt]; StoreHW(addr, value, res); if (res) goto fault; break; case sw_op: if (!access_ok(VERIFY_WRITE, addr, 4)) goto sigbus; compute_return_epc(regs); value = regs->regs[insn.i_format.rt]; StoreW(addr, value, res); if (res) goto fault; break; case sd_op: #ifdef CONFIG_64BIT /* * A 32-bit kernel might be running on a 64-bit processor. But * if we're on a 32-bit processor and an i-cache incoherency * or race makes us see a 64-bit instruction here the sdl/sdr * would blow up, so for now we don't handle unaligned 64-bit * instructions on 32-bit kernels. */ if (!access_ok(VERIFY_WRITE, addr, 8)) goto sigbus; compute_return_epc(regs); value = regs->regs[insn.i_format.rt]; StoreDW(addr, value, res); if (res) goto fault; break; #endif /* CONFIG_64BIT */ /* Cannot handle 64-bit instructions in 32-bit kernel */ goto sigill; case lwc1_op: case ldc1_op: case swc1_op: case sdc1_op: die_if_kernel("Unaligned FP access in kernel code", regs); BUG_ON(!used_math()); BUG_ON(!is_fpu_owner()); lose_fpu(1); /* Save FPU state for the emulator. */ res = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); own_fpu(1); /* Restore FPU state. */ /* Signal if something went wrong. */ process_fpemu_return(res, fault_addr); if (res == 0) break; return; /* * COP2 is available to implementor for application specific use. * It's up to applications to register a notifier chain and do * whatever they have to do, including possible sending of signals. */ case lwc2_op: cu2_notifier_call_chain(CU2_LWC2_OP, regs); break; case ldc2_op: cu2_notifier_call_chain(CU2_LDC2_OP, regs); break; case swc2_op: cu2_notifier_call_chain(CU2_SWC2_OP, regs); break; case sdc2_op: cu2_notifier_call_chain(CU2_SDC2_OP, regs); break; default: /* * Pheeee... We encountered an yet unknown instruction or * cache coherence problem. Die sucker, die ... */ goto sigill; } #ifdef CONFIG_DEBUG_FS unaligned_instructions++; #endif return; fault: /* roll back jump/branch */ regs->cp0_epc = origpc; regs->regs[31] = orig31; /* Did we have an exception handler installed? */ if (fixup_exception(regs)) return; die_if_kernel("Unhandled kernel unaligned access", regs); force_sig(SIGSEGV, current); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); force_sig(SIGBUS, current); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); force_sig(SIGILL, current); }
void do_page_fault(unsigned long address, struct pt_regs *regs) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; siginfo_t info; int ret; vm_fault_t fault; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; clear_siginfo(&info); /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (address >= VMALLOC_START) { ret = handle_kernel_vaddr_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else return; } info.si_code = SEGV_MAPERR; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; /* Handle protection violation, execute on heap or stack */ if ((regs->ecr_vec == ECR_V_PROTV) && (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) goto bad_area; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(vma, address, flags); /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */ if (unlikely(fatal_signal_pending(current))) { if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY)) up_read(&mm->mmap_sem); if (user_mode(regs)) return; } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { /* To avoid updating stats twice for retry case */ if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } /* Fault Handled Gracefully */ up_read(&mm->mmap_sem); return; } if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGSEGV) goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; /* no man's land */ BUG(); /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *)address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source * when it accesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) */ if (fixup_exception(regs)) return; die("Oops", regs, address); out_of_memory: up_read(&mm->mmap_sem); if (user_mode(regs)) { pagefault_out_of_memory(); return; } goto no_context; do_sigbus: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; tsk->thread.fault_address = address; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *)address; force_sig_info(SIGBUS, &info, tsk); }
asmlinkage void do_ade(struct pt_regs *regs) { enum ctx_state prev_state; unsigned int __user *pc; mm_segment_t seg; prev_state = exception_enter(); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->cp0_badvaddr); /* * Did we catch a fault trying to load an instruction? */ if (regs->cp0_badvaddr == regs->cp0_epc) goto sigbus; if (user_mode(regs) && !test_thread_flag(TIF_FIXADE)) goto sigbus; if (unaligned_action == UNALIGNED_ACTION_SIGNAL) goto sigbus; /* * Do branch emulation only if we didn't forward the exception. * This is all so but ugly ... */ /* * Are we running in microMIPS mode? */ if (get_isa16_mode(regs->cp0_epc)) { /* * Did we catch a fault trying to load an instruction in * 16-bit mode? */ if (regs->cp0_badvaddr == msk_isa16_mode(regs->cp0_epc)) goto sigbus; if (unaligned_action == UNALIGNED_ACTION_SHOW) show_registers(regs); if (cpu_has_mmips) { seg = get_fs(); if (!user_mode(regs)) set_fs(KERNEL_DS); emulate_load_store_microMIPS(regs, (void __user *)regs->cp0_badvaddr); set_fs(seg); return; } if (cpu_has_mips16) { seg = get_fs(); if (!user_mode(regs)) set_fs(KERNEL_DS); emulate_load_store_MIPS16e(regs, (void __user *)regs->cp0_badvaddr); set_fs(seg); return; } goto sigbus; } if (unaligned_action == UNALIGNED_ACTION_SHOW) show_registers(regs); pc = (unsigned int __user *)exception_epc(regs); seg = get_fs(); if (!user_mode(regs)) set_fs(KERNEL_DS); emulate_load_store_insn(regs, (void __user *)regs->cp0_badvaddr, pc); set_fs(seg); return; sigbus: die_if_kernel("Kernel unaligned instruction access", regs); force_sig(SIGBUS, current); /* * XXX On return from the signal handler we should advance the epc */ exception_exit(prev_state); }
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; mm = tsk->mm; if (interrupts_enabled(regs)) local_irq_enable(); if (in_atomic() || !mm) goto no_context; if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; if (esr & ESR_LNX_EXEC) { vm_flags = VM_EXEC; } else if (esr & ESR_EL1_WRITE) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; retry: down_read(&mm->mmap_sem); } else { might_sleep(); #ifdef CONFIG_DEBUG_VM if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; #endif } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return 0; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; mm_flags |= FAULT_FLAG_TRIED; goto retry; } } up_read(&mm->mmap_sem); if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; if (!user_mode(regs)) goto no_context; if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); return 0; } if (fault & VM_FAULT_SIGBUS) { sig = SIGBUS; code = BUS_ADRERR; } else { sig = SIGSEGV; code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; } __do_user_fault(tsk, addr, esr, sig, code, regs); return 0; no_context: __do_kernel_fault(mm, addr, esr, regs); return 0; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ static inline int do_exception(struct pt_regs *regs, int access) { struct gmap *gmap; struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; enum fault_type type; unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; tsk = current; /* * The instruction that caused the program check has * been nullified. Don't signal single step via SIGTRAP. */ clear_pt_regs_flag(regs, PIF_PER_TRAP); if (notify_page_fault(regs)) return 0; mm = tsk->mm; trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ fault = VM_FAULT_BADCONTEXT; type = get_fault_type(regs); switch (type) { case KERNEL_FAULT: goto out; case VDSO_FAULT: fault = VM_FAULT_BADMAP; goto out; case USER_FAULT: case GMAP_FAULT: if (faulthandler_disabled() || !mm) goto out; break; } address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (user_mode(regs)) flags |= FAULT_FLAG_USER; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); gmap = NULL; if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { gmap = (struct gmap *) S390_lowcore.gmap; current->thread.gmap_addr = address; current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); current->thread.gmap_int_code = regs->int_code & 0xffff; address = __gmap_translate(gmap, address); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (gmap->pfault_enabled) flags |= FAULT_FLAG_RETRY_NOWAIT; } retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) goto out_up; if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; if (expand_stack(vma, address)) goto out_up; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) goto out_up; if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(vma, address, flags); /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { /* FAULT_FLAG_RETRY_NOWAIT has been set, * mmap_sem has not been released */ current->thread.gmap_pfault = 1; fault = VM_FAULT_PFAULT; goto out_up; } /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~(FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT); flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; } } if (IS_ENABLED(CONFIG_PGSTE) && gmap) { address = __gmap_link(gmap, current->thread.gmap_addr, address); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (address == -ENOMEM) { fault = VM_FAULT_OOM; goto out_up; } } fault = 0; out_up: up_read(&mm->mmap_sem); out: return fault; }