void __init page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte = NULL; int i, j, k; unsigned long vaddr; vaddr = start; i = __pgd_offset(vaddr); j = __pud_offset(vaddr); k = __pmd_offset(vaddr); pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { pud = (pud_t *)pgd; for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { pmd = one_md_table_init(pud); #ifndef __PAGETABLE_PMD_FOLDED pmd += k; #endif for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { pte = page_table_kmap_check(one_page_table_init(pmd), pmd, vaddr, pte); vaddr += PMD_SIZE; } k = 0; } j = 0; } }
static int __do_vmalloc_fault(unsigned long addr, struct mm_struct *mm) { /* Synchronise this task's top level page-table * with the 'reference' page table. */ int offset = __pgd_offset(addr); pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto bad_area; pgd = mm->pgd + offset; #if 0 /* note that we are two-level */ if (!pgd_present(*pgd)) set_pgd(pgd, *pgd_k); #endif pmd_k = pmd_offset(pgd_k, addr); if (pmd_none(*pmd_k)) goto bad_area; pmd = pmd_offset(pgd, addr); if (!pmd_none(*pmd)) goto bad_area; set_pmd(pmd, *pmd_k); return 1; bad_area: return -2; }
/* * fault_is_priv() * Return true if the fault is a privilege violation. */ STATIC int fault_is_priv(struct pt_regs *regs, unsigned long missqw0, unsigned long missqw1) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep; unsigned long address; /* * Check if this is user or kernel in pt_regs/CSR. */ pgd = (pgd_t *)(MMU_MISSQW0_PGD_GET(missqw0) << MMU_MISSQW0_PGD_SHIFT); address = (unsigned long)(MMU_MISSQW1_VPN_GET(missqw1) << MMU_VPN_SHIFT); pmd = (pmd_t *)__pgd_offset(pgd, address); if (unlikely(pmd_none(*pmd)) || (unlikely(pmd_bad(*pmd)))) { return 0; } ptep = pte_offset_map(pmd, address); if (unlikely(pte_none(*ptep)) || (unlikely(pte_bad(*ptep)))) { return 0; } /* * If the PTE is a supervisory PTE and we are in user_mode() * declare this as a privilege violation. */ if (user_mode(regs) && ((pte_val(*ptep) & L_PTE_USER) == 0)) { return 1; } return 0; }
/* * fault_is_resolved() * Return true if the fault appears to be resolved. */ STATIC int fault_is_resolved(struct pt_regs *regs, unsigned long missqw0, unsigned long missqw1) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep; unsigned long address; unsigned long src = MMU_MISSQW1_SRC_GET(missqw1); unsigned long op = MMU_MISSQW0_OP_GET(missqw0); /* * Potential hardware bug, check if this is an ifetch with a write op. * If so, we will be in an infinite loop. check here because this * is under debug. */ if ((src == 0) && (op == 1)) { printk(KERN_CRIT "iftech/write: missqw0=%lx, missqw1=%lx\n", missqw0, missqw1); return 0; } /* * See if we now have a valid pte? */ pgd = (pgd_t *)(MMU_MISSQW0_PGD_GET(missqw0) << MMU_MISSQW0_PGD_SHIFT); address = (unsigned long)(MMU_MISSQW1_VPN_GET(missqw1) << MMU_VPN_SHIFT); pmd = (pmd_t *)__pgd_offset(pgd, address); if (unlikely(pmd_none(*pmd)) || (unlikely(pmd_bad(*pmd)))) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] is empty\n", address, pgd, pmd); return 0; } ptep = pte_offset_map(pmd, address); if (unlikely(pte_none(*ptep)) || (unlikely(pte_bad(*ptep)))) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] is empty\n", address, pgd, pmd, ptep); return 0; } if (unlikely(!pte_present(*ptep))) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] is invalid: 0x%lx\n", address, pgd, pmd, ptep, pte_val(*ptep)); return 0; } if (MMU_MISSQW0_OP_GET(missqw0) && !pte_write(*ptep)) { printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] write requested but not given: 0x%lx\n", address, pgd, pmd, ptep, pte_val(*ptep)); /* Fall through, not as critical */ } fault_printk(FAULT_DBG_TRACE, "FAULT[%d]: ti[%p], missqw0=%08lx, missqw1=%08lx, resolved!\n", raw_smp_processor_id(), (void *)current_thread_info(), missqw0, missqw1); return 1; }
/* * fault_pte() * Obtain the pte corresponding to the fault. */ STATIC pte_t *fault_pte(pgd_t *pgd, unsigned long address) { pmd_t *pmd = (pmd_t *)__pgd_offset(pgd, address); if (unlikely(pmd_none(*pmd)) || (unlikely(pmd_bad(*pmd))) || (unlikely(pmd_present(*pmd) == 0))) { return 0; } return pte_offset_map(pmd, address); }
/* * First Level Translation Fault Handler * * We enter here because the first level page table doesn't contain * a valid entry for the address. * * If the address is in kernel space (>= TASK_SIZE), then we are * probably faulting in the vmalloc() area. * * If the init_task's first level page tables contains the relevant * entry, we copy the it to this task. If not, we send the process * a signal, fixup the exception, or oops the kernel. * * NOTE! We MUST NOT take any locks for this case. We may be in an * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ int do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; int offset; pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); offset = __pgd_offset(addr); /* * FIXME: CP15 C1 is write only on ARMv3 architectures. * You really need to read the value in the page table * register, not a copy. */ pgd = cpu_get_pgd() + offset; pgd_k = init_mm.pgd + offset; if (pgd_none(*pgd_k)) goto bad_area; #if 0 /* note that we are two-level */ if (!pgd_present(*pgd)) set_pgd(pgd, *pgd_k); #endif pmd_k = pmd_offset(pgd_k, addr); pmd = pmd_offset(pgd, addr); if (pmd_none(*pmd_k)) goto bad_area; set_pmd(pmd, *pmd_k); return 0; bad_area: tsk = current; do_bad_area(tsk, tsk->active_mm, addr, fsr, regs); return 0; }
/* * First Level Translation Fault Handler * * We enter here because the first level page table doesn't contain * a valid entry for the address. * * If the address is in kernel space (>= TASK_SIZE), then we are * probably faulting in the vmalloc() area. * * If the init_task's first level page tables contains the relevant * entry, we copy the it to this task. If not, we send the process * a signal, fixup the exception, or oops the kernel. * * NOTE! We MUST NOT take any locks for this case. We may be in an * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ int do_translation_fault(unsigned long addr, int error_code, struct pt_regs *regs) { struct task_struct *tsk; struct mm_struct *mm; int offset; pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; if (addr < TASK_SIZE) return do_page_fault(addr, error_code, regs); offset = __pgd_offset(addr); /* * FIXME: CP15 C1 is write only on ARMv3 architectures. */ pgd = cpu_get_pgd() + offset; pgd_k = init_mm.pgd + offset; if (pgd_none(*pgd_k)) goto bad_area; #if 0 /* note that we are two-level */ if (!pgd_present(*pgd)) set_pgd(pgd, *pgd_k); #endif pmd_k = pmd_offset(pgd_k, addr); pmd = pmd_offset(pgd, addr); if (pmd_none(*pmd_k)) goto bad_area; set_pmd(pmd, *pmd_k); return 0; bad_area: tsk = current; mm = tsk->active_mm; do_bad_area(tsk, mm, addr, error_code, regs); return 0; }
void __init pagetable_init(void) { unsigned long vaddr; pgd_t *pgd_base; #ifdef CONFIG_HIGHMEM pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; #endif /* Initialize the entire pgd. */ pgd_init((unsigned long)swapper_pg_dir); pgd_init((unsigned long)swapper_pg_dir + sizeof(pgd_t) * USER_PTRS_PER_PGD); pgd_base = swapper_pg_dir; /* * Fixed mappings: */ #ifdef CONFIG_BCM53000_HIGHMEM vaddr = __fix_to_virt(VALIAS_IDX(__end_of_fixed_addresses - 1)) & PMD_MASK; #else vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; #endif fixrange_init(vaddr, 0, pgd_base); #ifdef CONFIG_HIGHMEM /* * Permanent kmaps: */ vaddr = PKMAP_BASE; fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + __pgd_offset(vaddr); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; #endif }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long address) { struct vm_area_struct * vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; siginfo_t info; #if 0 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(), current->comm, current->pid, field, address, write, field, regs->cp0_epc); #endif info.si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) goto vmalloc_fault; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ switch (handle_mm_fault(mm, vma, address, write)) { case VM_FAULT_MINOR: tsk->min_flt++; break; case VM_FAULT_MAJOR: tsk->maj_flt++; break; case VM_FAULT_SIGBUS: goto do_sigbus; case VM_FAULT_OOM: goto out_of_memory; default: BUG(); } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.cp0_badvaddr = address; tsk->thread.error_code = write; #if 0 printk("do_page_fault() #2: sending SIGSEGV to %s for " "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", tsk->comm, write ? "write access to" : "read access from", field, address, field, (unsigned long) regs->cp0_epc, field, (unsigned long) regs->regs[31]); #endif info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *) address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { current->thread.cp0_baduaddr = address; return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", smp_processor_id(), field, address, field, regs->cp0_epc, field, regs->regs[31]); die("Oops", regs); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (user_mode(regs)) do_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; else /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ #if 0 printk("do_page_fault() #3: sending SIGBUS to %s for " "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", tsk->comm, write ? "write access to" : "read access from", field, address, field, (unsigned long) regs->cp0_epc, field, (unsigned long) regs->regs[31]); #endif tsk->thread.cp0_badvaddr = address; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *) address; force_sig_info(SIGBUS, &info, tsk); return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long mmu_meh) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; int si_code; int fault; unsigned long address = mmu_meh & PAGE_MASK; si_code = SEGV_MAPERR; #ifndef CONFIG_CPU_HAS_TLBI /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely(address >= VMALLOC_START) && unlikely(address <= VMALLOC_END)) { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; unsigned long pgd_base; pgd_base = tlb_get_pgd(); pgd = (pgd_t *)pgd_base + offset; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = (pud_t *)pgd; pud_k = (pud_t *)pgd_k; if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } #endif /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; else if (fault & VM_FAULT_SIGSEGV) goto bad_area; BUG(); } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); pr_alert("Unable to handle kernel paging request at virtual " "address 0x%08lx, pc: 0x%08lx\n", address, regs->pc); die_if_kernel("Oops", regs, write); out_of_memory: /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); }