static int pSeries_lpar_hpte_removebolted(unsigned long ea, int psize, int ssize) { unsigned long vpn; unsigned long slot, vsid; vsid = get_kernel_vsid(ea, ssize); vpn = hpt_vpn(ea, vsid, ssize); slot = pSeries_lpar_hpte_find(vpn, psize, ssize); if (slot == -1) return -ENOENT; /* * lpar doesn't use the passed actual page size */ pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); return 0; }
/* * Build an entry for the base kernel segment and put it into * the segment table or SLB. All other segment table or SLB * entries are faulted in. */ void stab_initialize(unsigned long stab) { unsigned long esid, vsid; esid = GET_ESID(KERNELBASE); vsid = get_kernel_vsid(esid << SID_SHIFT); if (cur_cpu_spec->cpu_features & CPU_FTR_SLB) { /* Invalidate the entire SLB & all the ERATS */ #ifdef CONFIG_PPC_ISERIES asm volatile("isync; slbia; isync":::"memory"); #else asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); make_slbe(esid, vsid, 0, 1); asm volatile("isync":::"memory"); #endif } else {
static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { unsigned long lpar_rc, slot, vsid, va, dummy0, dummy1; vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); va = (vsid << 28) | (ea & 0x0fffffff); spin_lock(&beat_htab_lock); slot = beat_lpar_hpte_find(va, psize); BUG_ON(slot == -1); lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0, &dummy1); spin_unlock(&beat_htab_lock); BUG_ON(lpar_rc != 0); }
static inline void create_pte_mapping(unsigned long start, unsigned long end, unsigned long mode, unsigned long mask, int large) { unsigned long addr; HPTE *htab = (HPTE *)__v2a(htab_data.htab); unsigned int step; if (large) step = 16*MB; else step = 4*KB; for (addr = start; addr < end; addr += step) { unsigned long vsid = get_kernel_vsid(addr); unsigned long va = (vsid << 28) | (addr & 0xfffffff); make_pte(htab, va, (unsigned long)__v2a(addr), mode, mask, large); } }
static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { unsigned long vpn; unsigned long lpar_rc, slot, vsid; u64 dummy0, dummy1; vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M); raw_spin_lock(&beat_htab_lock); slot = beat_lpar_hpte_find(vpn, psize); BUG_ON(slot == -1); lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0, &dummy1); raw_spin_unlock(&beat_htab_lock); BUG_ON(lpar_rc != 0); }
static long map_to_linear(ulong paddr) { unsigned long vaddr; int psize; unsigned long mode; int slot; uint shift; unsigned long tmp_mode; psize = MMU_PAGE_4K; shift = mmu_psize_defs[psize].shift; mode = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; vaddr = (ulong)__va(paddr); { unsigned long vpn, hash, hpteg; unsigned long vsid = get_kernel_vsid(vaddr); unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); vpn = va >> shift; tmp_mode = mode; /* Make non-kernel text non-executable */ if (!in_kernel_text(vaddr)) tmp_mode = mode | HPTE_R_N; hash = hpt_hash(va, shift); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); BUG_ON(!ppc_md.hpte_insert); slot = ppc_md.hpte_insert(hpteg, va, paddr, tmp_mode, HPTE_V_BOLTED, psize); if (slot < 0) printk(KERN_EMERG "%s: no more bolted entries " "HTAB[0x%lx]: 0x%lx\n", __func__, hpteg, paddr); } return slot; }
static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { unsigned long vpn; unsigned long lpar_rc, slot, vsid, flags; vsid = get_kernel_vsid(ea, ssize); vpn = hpt_vpn(ea, vsid, ssize); slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); flags = newpp & 7; if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); }
/* * Bolt the kernel addr space into the HPT */ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) { unsigned long pa; unsigned long mode_rw = _PAGE_ACCESSED | _PAGE_COHERENT | PP_RWXX; HPTE hpte; for (pa=saddr; pa < eaddr ;pa+=PAGE_SIZE) { unsigned long ea = (unsigned long)__va(pa); unsigned long vsid = get_kernel_vsid( ea ); unsigned long va = ( vsid << 28 ) | ( pa & 0xfffffff ); unsigned long vpn = va >> PAGE_SHIFT; unsigned long slot = HvCallHpt_findValid( &hpte, vpn ); if (hpte.dw0.dw0.v) { /* HPTE exists, so just bolt it */ HvCallHpt_setSwBits(slot, 0x10, 0); } else { /* No HPTE exists, so create a new bolted one */ make_pte(NULL, va, (unsigned long)__v2a(ea), mode_rw, 0, 0); } } }
int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long mode, int psize) { unsigned long vaddr, paddr; unsigned int step, shift; unsigned long tmp_mode; int ret = 0; shift = mmu_psize_defs[psize].shift; step = 1 << shift; for (vaddr = vstart, paddr = pstart; vaddr < vend; vaddr += step, paddr += step) { unsigned long vpn, hash, hpteg; unsigned long vsid = get_kernel_vsid(vaddr); unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); vpn = va >> shift; tmp_mode = mode; /* Make non-kernel text non-executable */ if (!in_kernel_text(vaddr)) tmp_mode = mode | HPTE_R_N; hash = hpt_hash(va, shift); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); DBG("htab_bolt_mapping: calling %p\n", ppc_md.hpte_insert); BUG_ON(!ppc_md.hpte_insert); ret = ppc_md.hpte_insert(hpteg, va, paddr, tmp_mode, HPTE_V_BOLTED, psize); if (ret < 0) break; } return ret < 0 ? ret : 0; }
/* * A linux PTE was changed and the corresponding hash table entry * neesd to be flushed. This function will either perform the flush * immediately or will batch it up if the current CPU has an active * batch on it. */ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge) { unsigned long vpn; struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); unsigned long vsid; unsigned int psize; int ssize; real_pte_t rpte; int i; i = batch->index; /* Get page size (maybe move back to caller). * * NOTE: when using special 64K mappings in 4K environment like * for SPEs, we obtain the page size from the slice, which thus * must still exist (and thus the VMA not reused) at the time * of this call */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE psize = get_slice_psize(mm, addr); /* Mask the address for the correct page size */ addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ #endif } else { psize = pte_pagesize_index(mm, addr, pte); /* Mask the address for the standard page size. If we * have a 64k page kernel, but the hardware does not * support 64k pages, this might be different from the * hardware page size encoded in the slice table. */ addr &= PAGE_MASK; } /* Build full vaddr */ if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); vsid = get_vsid(mm->context.id, addr, ssize); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } WARN_ON(vsid == 0); vpn = hpt_vpn(addr, vsid, ssize); rpte = __real_pte(__pte(pte), ptep); /* * Check if we have an active batch on this CPU. If not, just * flush now and return. For now, we don global invalidates * in that case, might be worth testing the mm cpu mask though * and decide to use local invalidates instead... */ if (!batch->active) { flush_hash_page(vpn, rpte, psize, ssize, 0); put_cpu_var(ppc64_tlb_batch); return; } /* * This can happen when we are in the middle of a TLB batch and * we encounter memory pressure (eg copy_page_range when it tries * to allocate a new pte). If we have to reclaim memory and end * up scanning and resetting referenced bits then our batch context * will change mid stream. * * We also need to ensure only one page size is present in a given * batch */ if (i != 0 && (mm != batch->mm || batch->psize != psize || batch->ssize != ssize)) { __flush_tlb_pending(batch); i = 0; } if (i == 0) { batch->mm = mm; batch->psize = psize; batch->ssize = ssize; } batch->pte[i] = rpte; batch->vpn[i] = vpn; batch->index = ++i; if (i >= PPC64_TLB_BATCH_NR) __flush_tlb_pending(batch); put_cpu_var(ppc64_tlb_batch); }
pgdp = pgd_offset_i(ea); pmdp = pmd_alloc(&ioremap_mm, pgdp, ea); ptep = pte_alloc_kernel(&ioremap_mm, pmdp, ea); pa = abs_to_phys(pa); set_pte(ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); spin_unlock(&ioremap_mm.page_table_lock); } else { unsigned long va, vpn, hash, hpteg; /* * If the mm subsystem is not fully up, we cannot create a * linux page table entry for this mapping. Simply bolt an * entry in the hardware page table. */ vsid = get_kernel_vsid(ea); va = (vsid << 28) | (ea & 0xFFFFFFF); vpn = va >> PAGE_SHIFT; hash = hpt_hash(vpn, 0); hpteg = ((hash & htab_data.htab_hash_mask)*HPTES_PER_GROUP); /* Panic if a pte grpup is full */ if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0, _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX, 1, 0) == -1) { panic("map_io_page: could not insert mapping"); } } }
/* Result code is: * 0 - handled * 1 - normal page fault * -1 - critical hash insertion error */ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) { void *pgdir; unsigned long vsid; struct mm_struct *mm; pte_t *ptep; cpumask_t tmp; int rc, user_region = 0, local = 0; int psize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { DBG_LOW(" out of pgtable range !\n"); return 1; } /* Get region & vsid */ switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); return 1; } vsid = get_vsid(mm->context.id, ea); psize = mm->context.user_psize; break; case VMALLOC_REGION_ID: mm = &init_mm; vsid = get_kernel_vsid(ea); if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; else psize = mmu_io_psize; break; default: /* Not a valid range * Send the problem up to do_page_fault */ return 1; } DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); /* Get pgdir */ pgdir = mm->pgd; if (pgdir == NULL) return 1; /* Check CPU locality */ tmp = cpumask_of_cpu(smp_processor_id()); if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) local = 1; /* Handle hugepage regions */ if (unlikely(in_hugepage_area(mm->context, ea))) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } /* Get PTE and page size from page tables */ ptep = find_linux_pte(pgdir, ea); if (ptep == NULL || !pte_present(*ptep)) { DBG_LOW(" no PTE !\n"); return 1; } #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); #else DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), pte_val(*(ptep + PTRS_PER_PTE))); #endif /* Pre-check access permissions (will be re-checked atomically * in __hash_page_XX but this pre-check is a fast path */ if (access & ~pte_val(*ptep)) { DBG_LOW(" no access !\n"); return 1; } /* Do actual hashing */ #ifndef CONFIG_PPC_64K_PAGES rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); #else if (mmu_ci_restrictions) { /* If this PTE is non-cacheable, switch to 4k */ if (psize == MMU_PAGE_64K && (pte_val(*ptep) & _PAGE_NO_CACHE)) { if (user_region) { psize = MMU_PAGE_4K; mm->context.user_psize = MMU_PAGE_4K; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; } else if (ea < VMALLOC_END) { /* * some driver did a non-cacheable mapping * in vmalloc space, so switch vmalloc * to 4k pages */ printk(KERN_ALERT "Reducing vmalloc segment " "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; } } if (user_region) { if (psize != get_paca()->context.user_psize) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } } else if (get_paca()->vmalloc_sllp != mmu_psize_defs[mmu_vmalloc_psize].sllp) { get_paca()->vmalloc_sllp = mmu_psize_defs[mmu_vmalloc_psize].sllp; slb_flush_and_rebolt(); } } if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); else rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); #endif /* CONFIG_PPC_64K_PAGES */ #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); #else DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), pte_val(*(ptep + PTRS_PER_PTE))); #endif DBG_LOW(" -> rc=%d\n", rc); return rc; }
/* * A linux PTE was changed and the corresponding hash table entry * neesd to be flushed. This function will either perform the flush * immediately or will batch it up if the current CPU has an active * batch on it. * * Must be called from within some kind of spinlock/non-preempt region... */ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge) { struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); unsigned long vsid, vaddr; unsigned int psize; real_pte_t rpte; int i; i = batch->index; /* We mask the address for the base page size. Huge pages will * have applied their own masking already */ addr &= PAGE_MASK; /* Get page size (maybe move back to caller). * * NOTE: when using special 64K mappings in 4K environment like * for SPEs, we obtain the page size from the slice, which thus * must still exist (and thus the VMA not reused) at the time * of this call */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE psize = mmu_huge_psize; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ #endif } else psize = pte_pagesize_index(mm, addr, pte); /* Build full vaddr */ if (!is_kernel_addr(addr)) { vsid = get_vsid(mm->context.id, addr); WARN_ON(vsid == 0); } else vsid = get_kernel_vsid(addr); vaddr = (vsid << 28 ) | (addr & 0x0fffffff); rpte = __real_pte(__pte(pte), ptep); /* * Check if we have an active batch on this CPU. If not, just * flush now and return. For now, we don global invalidates * in that case, might be worth testing the mm cpu mask though * and decide to use local invalidates instead... */ if (!batch->active) { flush_hash_page(vaddr, rpte, psize, 0); return; } /* * This can happen when we are in the middle of a TLB batch and * we encounter memory pressure (eg copy_page_range when it tries * to allocate a new pte). If we have to reclaim memory and end * up scanning and resetting referenced bits then our batch context * will change mid stream. * * We also need to ensure only one page size is present in a given * batch */ if (i != 0 && (mm != batch->mm || batch->psize != psize)) { __flush_tlb_pending(batch); i = 0; } if (i == 0) { batch->mm = mm; batch->psize = psize; } batch->pte[i] = rpte; batch->vaddr[i] = vaddr; batch->index = ++i; if (i >= PPC64_TLB_BATCH_NR) __flush_tlb_pending(batch); }