int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int index; if (radix_enabled()) index = radix__init_new_context(mm); else index = hash__init_new_context(mm); if (index < 0) return index; mm->context.id = index; #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(mm); #endif atomic_set(&mm->context.active_cpus, 0); atomic_set(&mm->context.copros, 0); return 0; }
int __meminit remove_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) return radix__remove_section_mapping(start, end); return hash__remove_section_mapping(start, end); }
int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) { if (radix_enabled()) return radix__create_section_mapping(start, end, nid); return hash__create_section_mapping(start, end, nid); }
/* For use by kexec */ void mmu_cleanup_all(void) { if (radix_enabled()) radix__mmu_cleanup_all(); else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); }
/* * Initialize window context registers related to Address Translation. * These registers are common to send/receive windows although they * differ for user/kernel windows. As we resolve the TODOs we may * want to add fields to vas_winctx and move the initialization to * init_vas_winctx_regs(). */ static void init_xlate_regs(struct vas_window *window, bool user_win) { u64 lpcr, val; /* * MSR_TA, MSR_US are false for both kernel and user. * MSR_DR and MSR_PR are false for kernel. */ val = 0ULL; val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1); val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1); if (user_win) { val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1); val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1); } write_hvwc_reg(window, VREG(XLATE_MSR), val); lpcr = mfspr(SPRN_LPCR); val = 0ULL; /* * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB. * * NOTE: From Section 1.3.1, Address Translation Context of the * Nest MMU Workbook, LPCR_SC should be 0 for Power9. */ val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5); val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL); val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC); val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0); write_hvwc_reg(window, VREG(XLATE_LPCR), val); /* * Section 1.3.1 (Address translation Context) of NMMU workbook. * 0b00 Hashed Page Table mode * 0b01 Reserved * 0b10 Radix on HPT * 0b11 Radix on Radix */ val = 0ULL; val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2); write_hvwc_reg(window, VREG(XLATE_CTL), val); /* * TODO: Can we mfspr(AMR) even for user windows? */ val = 0ULL; val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR)); write_hvwc_reg(window, VREG(AMR), val); val = 0ULL; val = SET_FIELD(VAS_SEIDR, val, 0); write_hvwc_reg(window, VREG(SEIDR), val); }
void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif if (radix_enabled()) WARN_ON(process_tb[mm->context.id].prtb0 != 0); else subpage_prot_free(mm); destroy_contexts(&mm->context); mm->context.id = MMU_NO_CONTEXT; }
int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int index; index = __init_new_context(); if (index < 0) return index; if (radix_enabled()) { radix__init_new_context(mm, index); } else { /* The old code would re-promote on fork, we don't do that * when using slices as it could cause problem promoting slices * that have been forced down to 4K * * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check * explicitly against context.id == 0. This ensures that we * properly initialize context slice details for newly allocated * mm's (which will have id == 0) and don't alter context slice * inherited via fork (which will have id != 0). * * We should not be calling init_new_context() on init_mm. Hence a * check against 0 is ok. */ if (mm->context.id == 0) slice_set_user_psize(mm, mmu_virtual_psize); subpage_prot_init_new_context(mm); } mm->context.id = index; #ifdef CONFIG_PPC_ICSWX mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); if (!mm->context.cop_lockp) { __destroy_context(index); subpage_prot_free(mm); mm->context.id = MMU_NO_CONTEXT; return -ENOMEM; } spin_lock_init(mm->context.cop_lockp); #endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&mm->context); #endif return 0; }
static pte_t set_pte_filter(pte_t pte) { if (radix_enabled()) return pte; pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || cpu_has_feature(CPU_FTR_NOEXECUTE))) { struct page *pg = maybe_pte_to_page(pte); if (!pg) return pte; if (!test_bit(PG_arch_1, &pg->flags)) { flush_dcache_icache_page(pg); set_bit(PG_arch_1, &pg->flags); } } return pte; }
static void pnv_show_cpuinfo(struct seq_file *m) { struct device_node *root; const char *model = ""; root = of_find_node_by_path("/"); if (root) model = of_get_property(root, "model", NULL); seq_printf(m, "machine\t\t: PowerNV %s\n", model); if (firmware_has_feature(FW_FEATURE_OPAL)) seq_printf(m, "firmware\t: OPAL\n"); else seq_printf(m, "firmware\t: BML\n"); of_node_put(root); if (radix_enabled()) seq_printf(m, "MMU\t\t: Radix\n"); else seq_printf(m, "MMU\t\t: Hash\n"); }
void arch_exit_mmap(struct mm_struct *mm) { if (radix_enabled()) { /* * Radix doesn't have a valid bit in the process table * entries. However we know that at least P9 implementation * will avoid caching an entry with an invalid RTS field, * and 0 is invalid. So this will do. * * This runs before the "fullmm" tlb flush in exit_mmap, * which does a RIC=2 tlbie to clear the process table * entry. See the "fullmm" comments in tlb-radix.c. * * No barrier required here after the store because * this process will do the invalidate, which starts with * ptesync. */ process_tb[mm->context.id].prtb0 = 0; } }
void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_cleanup(&mm->context); #endif #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); kfree(mm->context.cop_lockp); mm->context.cop_lockp = NULL; #endif /* CONFIG_PPC_ICSWX */ if (radix_enabled()) process_tb[mm->context.id].prtb1 = 0; else subpage_prot_free(mm); destroy_pagetable_page(mm); __destroy_context(mm->context.id); mm->context.id = MMU_NO_CONTEXT; }
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) { unsigned int pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; /* * If this context hasn't run on that CPU before and KVM is * around, there's a slim chance that the guest on another * CPU just brought in obsolete translation into the TLB of * this CPU due to a bad prefetch using the guest PID on * the way into the hypervisor. * * We work around this here. If KVM is possible, we check if * any sibling thread is in KVM. If it is, the window may exist * and thus we flush that PID from the core. * * A potential future improvement would be to mark which PIDs * have never been used on the system and avoid it if the PID * is new and the process has no other cpumask bit set. */ if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { int cpu = smp_processor_id(); int sib = cpu_first_thread_sibling(cpu); bool flush = false; for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { if (sib == cpu) continue; if (paca[sib].kvm_hstate.kvm_vcpu) flush = true; } if (flush) _tlbiel_pid(pid, RIC_FLUSH_ALL); } }
void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); unsigned long addr; long ret; struct paca_struct *pp; struct dtl_entry *dtl; /* * The spec says it "may be problematic" if CPU x registers the VPA of * CPU y. We should never do that, but wail if we ever do. */ WARN_ON(cpu != smp_processor_id()); if (cpu_has_feature(CPU_FTR_ALTIVEC)) lppaca_of(cpu).vmxregs_in_use = 1; if (cpu_has_feature(CPU_FTR_ARCH_207S)) lppaca_of(cpu).ebb_regs_in_use = 1; addr = __pa(&lppaca_of(cpu)); ret = register_vpa(hwcpu, addr); if (ret) { pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " "%lx failed with %ld\n", cpu, hwcpu, addr, ret); return; } #ifdef CONFIG_PPC_BOOK3S_64 /* * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. */ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr); ret = register_slb_shadow(hwcpu, addr); if (ret) pr_err("WARNING: SLB shadow buffer registration for " "cpu %d (hw %d) of area %lx failed with %ld\n", cpu, hwcpu, addr, ret); } #endif /* CONFIG_PPC_BOOK3S_64 */ /* * Register dispatch trace log, if one has been allocated. */ pp = paca_ptrs[cpu]; dtl = pp->dispatch_log; if (dtl) { pp->dtl_ridx = 0; pp->dtl_curr = dtl; lppaca_of(cpu).dtl_idx = 0; /* hypervisor reads buffer length from this field */ dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); ret = register_dtl(hwcpu, __pa(dtl)); if (ret) pr_err("WARNING: DTL registration of cpu %d (hw %d) " "failed with %ld\n", smp_processor_id(), hwcpu, ret); lppaca_of(cpu).dtl_enable_mask = 2; } }
/* * This ought to be kept in sync with the powerpc specific do_page_fault * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; unsigned long is_write; int ret; if (mm == NULL) return -EFAULT; if (mm->pgd == NULL) return -EFAULT; down_read(&mm->mmap_sem); ret = -EFAULT; vma = find_vma(mm, ea); if (!vma) goto out_unlock; if (ea < vma->vm_start) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_unlock; if (expand_stack(vma, ea)) goto out_unlock; } is_write = dsisr & DSISR_ISSTORE; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto out_unlock; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; /* * PROT_NONE is covered by the VMA check above. * and hash should get a NOHPTE fault instead of * a PROTFAULT in case fixup is needed for things * like autonuma. */ if (!radix_enabled()) WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); } ret = 0; *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; goto out_unlock; } else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { ret = -EFAULT; goto out_unlock; } BUG(); } if (*flt & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; out_unlock: up_read(&mm->mmap_sem); return ret; }
/* * This is called at the end of handling a user page fault, when the * fault has been handled by updating a HUGE PMD entry in the linux page tables. * We use it to preload an HPTE into the hash table corresponding to * the updated linux HUGE PMD entry. */ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { if (radix_enabled()) prefetch((void *)addr); }
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; /* Assume we're using HV mode when the HV module is loaded */ int hv_enabled = kvmppc_hv_ops ? 1 : 0; if (kvm) { /* * Hooray - we know which VM type we're running on. Depend on * that rather than the guess above. */ hv_enabled = is_kvmppc_hv_enabled(kvm); } switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: case KVM_CAP_PPC_BOOKE_WATCHDOG: case KVM_CAP_PPC_EPR: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: case KVM_CAP_PPC_PAPR: #endif case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif /* We support this only for PR */ r = !hv_enabled; break; #ifdef CONFIG_KVM_MPIC case KVM_CAP_IRQ_MPIC: r = 1; break; #endif #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: /* fallthrough */ case KVM_CAP_SPAPR_TCE_VFIO: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; case KVM_CAP_PPC_ALLOC_HTAB: r = hv_enabled; break; #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: r = 0; if (kvm) { if (kvm->arch.emul_smt_mode > 1) r = kvm->arch.emul_smt_mode; else r = kvm->arch.smt_mode; } else if (hv_enabled) { if (cpu_has_feature(CPU_FTR_ARCH_300)) r = 1; else r = threads_per_subcore; } break; case KVM_CAP_PPC_SMT_POSSIBLE: r = 1; if (hv_enabled) { if (!cpu_has_feature(CPU_FTR_ARCH_300)) r = ((threads_per_subcore << 1) - 1); else /* P9 can emulate dbells, so allow any mode */ r = 8 | 4 | 2 | 1; } break; case KVM_CAP_PPC_RMA: r = 0; break; case KVM_CAP_PPC_HWRNG: r = kvmppc_hwrng_present(); break; case KVM_CAP_PPC_MMU_RADIX: r = !!(hv_enabled && radix_enabled()); break; case KVM_CAP_PPC_MMU_HASH_V3: r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300)); break; #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE r = hv_enabled; #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) r = 1; #else r = 0; #endif break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: r = hv_enabled; break; #endif case KVM_CAP_NR_VCPUS: /* * Recommending a number of CPUs is somewhat arbitrary; we * return the number of present CPUs for -HV (since a host * will have secondary threads "offline"), and for other KVM * implementations just count online CPUs. */ if (hv_enabled) r = num_present_cpus(); else r = num_online_cpus(); break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_PPC_GET_SMMU_INFO: r = 1; break; case KVM_CAP_SPAPR_MULTITCE: r = 1; break; case KVM_CAP_SPAPR_RESIZE_HPT: r = !!hv_enabled; break; #endif #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_FWNMI: r = hv_enabled; break; #endif case KVM_CAP_PPC_HTM: r = hv_enabled && (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP); break; default: r = 0; break; } return r; }