/* Combine a generic pgprot_t with cache home to get a cache-aware pgprot. */ static pgprot_t __init construct_pgprot(pgprot_t prot, int home) { prot = pte_set_home(prot, home); if (home == PAGE_HOME_IMMUTABLE) { if (ktext_hash) prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } return prot; }
/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, pgprot_t home) { void *addr; struct vm_struct *area; unsigned long offset, last_addr; pgprot_t pgprot; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; if (!size || last_addr < phys_addr) return NULL; /* Create a read/write, MMIO VA mapping homed at the requested shim. */ pgprot = PAGE_KERNEL; pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO); pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home)); /* * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; /* * Ok, go for it.. */ area = get_vm_area(size, VM_IOREMAP /* | other flags? */); if (!area) return NULL; area->phys_addr = phys_addr; addr = area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); return NULL; } return (__force void __iomem *) (offset + (char *)addr); }
/* * Copy memory by briefly enabling incoherent cacheline-at-a-time mode. * * We set up our own source and destination PTEs that we fully control. * This is the only way to guarantee that we don't race with another * thread that is modifying the PTE; we can't afford to try the * copy_{to,from}_user() technique of catching the interrupt, since * we must run with interrupts disabled to avoid the risk of some * other code seeing the incoherent data in our cache. (Recall that * our cache is indexed by PA, so even if the other code doesn't use * our kmap_atomic virtual addresses, they'll still hit in cache using * the normal VAs that aren't supposed to hit in cache.) */ static void memcpy_multicache(void *dest, const void *source, pte_t dst_pte, pte_t src_pte, int len) { int idx; unsigned long flags, newsrc, newdst; pmd_t *pmdp; pte_t *ptep; int type0, type1; int cpu = get_cpu(); /* * Disable interrupts so that we don't recurse into memcpy() * in an interrupt handler, nor accidentally reference * the PA of the source from an interrupt routine. Also * notify the simulator that we're playing games so we don't * generate spurious coherency warnings. */ local_irq_save(flags); sim_allow_multiple_caching(1); /* Set up the new dest mapping */ type0 = kmap_atomic_idx_push(); idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); ptep = pte_offset_kernel(pmdp, newdst); if (pte_val(*ptep) != pte_val(dst_pte)) { set_pte(ptep, dst_pte); local_flush_tlb_page(NULL, newdst, PAGE_SIZE); } /* Set up the new source mapping */ type1 = kmap_atomic_idx_push(); idx += (type0 - type1); src_pte = hv_pte_set_nc(src_pte); src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); ptep = pte_offset_kernel(pmdp, newsrc); __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); /* Actually move the data. */ __memcpy_asm((void *)newdst, (const void *)newsrc, len); /* * Remap the source as locally-cached and not OLOC'ed so that * we can inval without also invaling the remote cpu's cache. * This also avoids known errata with inv'ing cacheable oloc data. */ src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); /* * Do the actual invalidation, covering the full L2 cache line * at the end since __memcpy_asm() is somewhat aggressive. */ __inv_buffer((void *)newsrc, len); /* * We're done: notify the simulator that all is back to normal, * and re-enable interrupts and pre-emption. */ kmap_atomic_idx_pop(); kmap_atomic_idx_pop(); sim_allow_multiple_caching(0); local_irq_restore(flags); put_cpu(); }
/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ pte_t pte_set_home(pte_t pte, int home) { /* Check for non-linear file mapping "PTEs" and pass them through. */ if (pte_file(pte)) return pte; #if CHIP_HAS_MMIO() /* Check for MMIO mappings and pass them through. */ if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) return pte; #endif /* * Only immutable pages get NC mappings. If we have a * non-coherent PTE, but the underlying page is not * immutable, it's likely the result of a forced * caching setting running up against ptrace setting * the page to be writable underneath. In this case, * just keep the PTE coherent. */ if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) { pte = hv_pte_clear_nc(pte); pr_err("non-immutable page incoherently referenced: %#llx\n", pte.val); } switch (home) { case PAGE_HOME_UNCACHED: pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); break; case PAGE_HOME_INCOHERENT: pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); break; case PAGE_HOME_IMMUTABLE: /* * We could home this page anywhere, since it's immutable, * but by default just home it to follow "hash_default". */ BUG_ON(hv_pte_get_writable(pte)); if (pte_get_forcecache(pte)) { /* Upgrade "force any cpu" to "No L3" for immutable. */ if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3 && pte_get_anyhome(pte)) { pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); } } else #if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); else #endif pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); pte = hv_pte_set_nc(pte); break; #if CHIP_HAS_CBOX_HOME_MAP() case PAGE_HOME_HASH: pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); break; #endif default: BUG_ON(home < 0 || home >= NR_CPUS || !cpu_is_valid_lotar(home)); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); pte = set_remote_cache_cpu(pte, home); break; } #if CHIP_HAS_NC_AND_NOALLOC_BITS() if (noallocl2) pte = hv_pte_set_no_alloc_l2(pte); /* Simplify "no local and no l3" to "uncached" */ if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) && hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); } #endif /* Checking this case here gives a better panic than from the hv. */ BUG_ON(hv_pte_get_mode(pte) == 0); return pte; }
static void memcpy_multicache(void *dest, const void *source, pte_t dst_pte, pte_t src_pte, int len) { int idx; unsigned long flags, newsrc, newdst; pmd_t *pmdp; pte_t *ptep; int type0, type1; int cpu = get_cpu(); /* */ local_irq_save(flags); sim_allow_multiple_caching(1); /* */ type0 = kmap_atomic_idx_push(); idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); ptep = pte_offset_kernel(pmdp, newdst); if (pte_val(*ptep) != pte_val(dst_pte)) { set_pte(ptep, dst_pte); local_flush_tlb_page(NULL, newdst, PAGE_SIZE); } /* */ type1 = kmap_atomic_idx_push(); idx += (type0 - type1); src_pte = hv_pte_set_nc(src_pte); src_pte = hv_pte_clear_writable(src_pte); /* */ newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); ptep = pte_offset_kernel(pmdp, newsrc); __set_pte(ptep, src_pte); /* */ local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); /* */ __memcpy_asm((void *)newdst, (const void *)newsrc, len); /* */ src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); src_pte = hv_pte_set_writable(src_pte); /* */ __set_pte(ptep, src_pte); /* */ local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); /* */ __inv_buffer((void *)newsrc, len); /* */ kmap_atomic_idx_pop(); kmap_atomic_idx_pop(); sim_allow_multiple_caching(0); local_irq_restore(flags); put_cpu(); }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. * * This routine transitions us from using a set of compiled-in large * pages to using some more precise caching, including removing access * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START) * marking read-only data as locally cacheable, striping the remaining * .data and .bss across all the available tiles, and removing access * to pages above the top of RAM (thus ensuring a page fault from a bad * virtual address rather than a hypervisor shoot down for accessing * memory outside the assigned limits). */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) { unsigned long long irqmask; unsigned long address, pfn; pmd_t *pmd; pte_t *pte; int pte_ofs; const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id()); struct cpumask kstripe_mask; int rc, i; #if CHIP_HAS_CBOX_HOME_MAP() if (ktext_arg_seen && ktext_hash) { pr_warning("warning: \"ktext\" boot argument ignored" " if \"kcache_hash\" sets up text hash-for-home\n"); ktext_small = 0; } if (kdata_arg_seen && kdata_hash) { pr_warning("warning: \"kdata\" boot argument ignored" " if \"kcache_hash\" sets up data hash-for-home\n"); } if (kdata_huge && !hash_default) { pr_warning("warning: disabling \"kdata=huge\"; requires" " kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } #endif /* * Set up a mask for cpus to use for kernel striping. * This is normally all cpus, but minus dataplane cpus if any. * If the dataplane covers the whole chip, we stripe over * the whole chip too. */ cpumask_copy(&kstripe_mask, cpu_possible_mask); if (!kdata_arg_seen) kdata_mask = kstripe_mask; /* Allocate and fill in L2 page tables */ for (i = 0; i < MAX_NUMNODES; ++i) { #ifdef CONFIG_HIGHMEM unsigned long end_pfn = node_lowmem_end_pfn[i]; #else unsigned long end_pfn = node_end_pfn[i]; #endif unsigned long end_huge_pfn = 0; /* Pre-shatter the last huge page to allow per-cpu pages. */ if (kdata_huge) end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT); pfn = node_start_pfn[i]; /* Allocate enough memory to hold L2 page tables for node. */ init_prealloc_ptes(i, end_pfn - pfn); address = (unsigned long) pfn_to_kaddr(pfn); while (pfn < end_pfn) { BUG_ON(address & (HPAGE_SIZE-1)); pmd = get_pmd(pgtables, address); pte = get_prealloc_pte(pfn); if (pfn < end_huge_pfn) { pgprot_t prot = init_pgprot(address); *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot)); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) pte[pte_ofs] = pfn_pte(pfn, prot); } else { if (kdata_huge) printk(KERN_DEBUG "pre-shattered huge" " page at %#lx\n", address); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) { pgprot_t prot = init_pgprot(address); pte[pte_ofs] = pfn_pte(pfn, prot); } assign_pte(pmd, pte); } } } /* * Set or check ktext_map now that we have cpu_possible_mask * and kstripe_mask to work with. */ if (ktext_all) cpumask_copy(&ktext_mask, cpu_possible_mask); else if (ktext_nondataplane) ktext_mask = kstripe_mask; else if (!cpumask_empty(&ktext_mask)) { /* Sanity-check any mask that was requested */ struct cpumask bad; cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); if (!cpumask_empty(&bad)) { char buf[NR_CPUS * 5]; cpulist_scnprintf(buf, sizeof(buf), &bad); pr_info("ktext: not using unavailable cpus %s\n", buf); } if (cpumask_empty(&ktext_mask)) { pr_warning("ktext: no valid cpus; caching on %d.\n", smp_processor_id()); cpumask_copy(&ktext_mask, cpumask_of(smp_processor_id())); } } address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC, PAGE_HOME_IMMUTABLE); if (ktext_local) { if (ktext_nocache) prot = hv_pte_set_mode(prot, HV_PTE_MODE_UNCACHED); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } else { prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_TILE_L3); cpu = cpumask_first(&ktext_mask); prot = ktext_set_nocache(prot); } BUG_ON(address != (unsigned long)_stext); pte = NULL; for (; address < (unsigned long)_einittext; pfn++, address += PAGE_SIZE) { pte_ofs = pte_index(address); if (pte_ofs == 0) { if (pte) assign_pte(pmd++, pte); pte = alloc_pte(); } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); if (cpu == NR_CPUS) cpu = cpumask_first(&ktext_mask); } pte[pte_ofs] = pfn_pte(pfn, prot); } if (pte) assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); #if CHIP_HAS_CBOX_HOME_MAP() if (ktext_hash) { pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_HASH_L3); pteval = ktext_set_nocache(pteval); } else #endif /* CHIP_HAS_CBOX_HOME_MAP() */ if (cpumask_weight(&ktext_mask) == 1) { pteval = set_remote_cache_cpu(pteval, cpumask_first(&ktext_mask)); pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_TILE_L3); pteval = ktext_set_nocache(pteval); } else if (ktext_nocache) pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_UNCACHED); else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); for (; address < (unsigned long)_einittext; pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir); /* * Since we may be changing the caching of the stack and page * table itself, we invoke an assembly helper to do the * following steps: * * - flush the cache so we start with an empty slate * - install pgtables[] as the real page table * - flush the TLB so the new page table takes effect */ irqmask = interrupt_mask_save_mask(); interrupt_mask_set_mask(-1ULL); rc = flush_and_install_context(__pa(pgtables), init_pgprot((unsigned long)pgtables), __get_cpu_var(current_asid), cpumask_bits(my_cpu_mask)); interrupt_mask_restore_mask(irqmask); BUG_ON(rc != 0); /* Copy the page table back to the normal swapper_pg_dir. */ memcpy(pgd_base, pgtables, sizeof(pgtables)); __install_page_table(pgd_base, __get_cpu_var(current_asid), swapper_pgprot); /* * We just read swapper_pgprot and thus brought it into the cache, * with its new home & caching mode. When we start the other CPUs, * they're going to reference swapper_pgprot via their initial fake * VA-is-PA mappings, which cache everything locally. At that * time, if it's in our cache with a conflicting home, the * simulator's coherence checker will complain. So, flush it out * of our cache; we're not going to ever use it again anyway. */ __insn_finv(&swapper_pgprot); }
int arch_vm_area_flags(struct mm_struct *mm, unsigned long flags, unsigned long vm_flags, pid_t *pid_ptr, pgprot_t *prot_ptr) { pgprot_t prot = __pgprot(0); pid_t pid = 0; #if CHIP_HAS_NC_AND_NOALLOC_BITS() if (flags & MAP_CACHE_NO_L1) prot = hv_pte_set_no_alloc_l1(prot); if (flags & MAP_CACHE_NO_L2) prot = hv_pte_set_no_alloc_l2(prot); #endif #if CHIP_HAS_CBOX_HOME_MAP() /* Certain types of mapping have standard hash-for-home defaults. */ if (!(flags & _MAP_CACHE_HOME)) { if ((flags & (MAP_GROWSDOWN | MAP_ANONYMOUS)) == (MAP_GROWSDOWN | MAP_ANONYMOUS)) flags |= ucache_flags(STACK); else if ((flags & MAP_ANONYMOUS) == MAP_ANONYMOUS) flags |= ucache_flags(HEAP); else if ((flags & (MAP_ANONYMOUS | MAP_PRIVATE)) == MAP_PRIVATE) flags |= (vm_flags & PROT_WRITE) ? ucache_flags(DATA) : ucache_flags(TEXT); } #endif /* * If the only request is for what the kernel does naturally, * remove it, to avoid unnecessary use of VM_DONTMERGE. */ if (flags & MAP_ANONYMOUS) { switch (flags & _MAP_CACHE_MKHOME(_MAP_CACHE_HOME_MASK)) { #if CHIP_HAS_CBOX_HOME_MAP() case MAP_CACHE_HOME_HASH: if (hash_default) flags &= ~MAP_CACHE_HOME_HASH; break; #endif case MAP_CACHE_HOME_SINGLE: if (!hash_default) flags &= ~MAP_CACHE_HOME_SINGLE; break; } } if (flags & _MAP_CACHE_HOME) prot = pte_set_forcecache(prot); if ((flags & _MAP_CACHE_MKHOME(_MAP_CACHE_HOME_MASK)) == MAP_CACHE_HOME_NONE) { /* * We special-case setting the home cache to "none". * If the user isn't indicating willingness to tolerate * incoherence, and is caching locally on the cpu, we * fail a writable mapping, or enforce a readonly mapping. */ if (!(flags & _MAP_CACHE_INCOHERENT) && (flags & MAP_CACHE_NO_LOCAL) != MAP_CACHE_NO_LOCAL) { if (vm_flags & VM_WRITE) return -EINVAL; } if ((flags & MAP_CACHE_NO_LOCAL) == MAP_CACHE_NO_LOCAL) prot = hv_pte_set_mode(prot, HV_PTE_MODE_UNCACHED); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } else if (flags & _MAP_CACHE_HOME) { /* Extract the cpu (or magic cookie). */ int cpu = (flags >> _MAP_CACHE_HOME_SHIFT) & _MAP_CACHE_HOME_MASK; switch (cpu) { case _MAP_CACHE_HOME_SINGLE: /* * This is the default case; we set "anyhome" * and the OS will pick the cpu for us in pfn_pte() * by examining the page_home() of the page. */ prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_TILE_L3); prot = pte_set_anyhome(prot); break; #if CHIP_HAS_CBOX_HOME_MAP() case _MAP_CACHE_HOME_HASH: /* Mark this page for home-map hash caching. */ prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); break; #endif case _MAP_CACHE_HOME_TASK: pid = current->pid; /*FALLTHROUGH*/ case _MAP_CACHE_HOME_HERE: cpu = smp_processor_id(); /*FALLTHROUGH*/ default: if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_is_valid_lotar(cpu)) return -EINVAL; prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_TILE_L3); prot = set_remote_cache_cpu(prot, cpu); } }
/* * Must set some caching mode to keep set_pte() happy. * It doesn't matter what we choose, because the PFN * is illegal, so we're going to take a page fault anyway. */ static inline pgprot_t io_prot(void) { return hv_pte_set_mode(PAGE_KERNEL, HV_PTE_MODE_UNCACHED); }