static int set_up_temporary_mappings(void) { unsigned long start, end, next; int error; temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!temp_level4_pgt) return -ENOMEM; /* It is safe to reuse the original kernel mapping */ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), init_level4_pgt[pgd_index(__START_KERNEL_map)]); /* Set up the direct mapping from scratch */ start = (unsigned long)pfn_to_kaddr(0); end = (unsigned long)pfn_to_kaddr(max_pfn); for (; start < end; start = next) { pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); if (!pud) return -ENOMEM; next = start + PGDIR_SIZE; if (next > end) next = end; if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) return error; set_pgd(temp_level4_pgt + pgd_index(start), mk_kernel_pgd(__pa(pud))); } return 0; }
void __init initmem_init(void) { x86_numa_init(); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) highstart_pfn = max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n", max_low_pfn, highstart_pfn); printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", (ulong) pfn_to_kaddr(max_low_pfn)); printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); __vmalloc_start_set = true; setup_bootmem_allocator(); }
static void __init map_range(struct range *range) { unsigned long start; unsigned long end; start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); kasan_populate_shadow(start, end, early_pfn_to_nid(range->start)); }
/* Is address valid for reading? */ static int valid_address(struct KBacktraceIterator *kbt, unsigned long address) { HV_PTE *l1_pgtable = kbt->pgtable; HV_PTE *l2_pgtable; unsigned long pfn; HV_PTE pte; struct page *page; if (l1_pgtable == NULL) return 0; /* can't read user space in other tasks */ #ifdef CONFIG_64BIT /* Find the real l1_pgtable by looking in the l0_pgtable. */ pte = l1_pgtable[HV_L0_INDEX(address)]; if (!hv_pte_get_present(pte)) return 0; pfn = hv_pte_get_pfn(pte); if (pte_huge(pte)) { if (!pfn_valid(pfn)) { pr_err("L0 huge page has bad pfn %#lx\n", pfn); return 0; } return hv_pte_get_present(pte) && hv_pte_get_readable(pte); } page = pfn_to_page(pfn); BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); #endif pte = l1_pgtable[HV_L1_INDEX(address)]; if (!hv_pte_get_present(pte)) return 0; pfn = hv_pte_get_pfn(pte); if (pte_huge(pte)) { if (!pfn_valid(pfn)) { pr_err("huge page has bad pfn %#lx\n", pfn); return 0; } return hv_pte_get_present(pte) && hv_pte_get_readable(pte); } page = pfn_to_page(pfn); if (PageHighMem(page)) { pr_err("L2 page table not in LOWMEM (%#llx)\n", HV_PFN_TO_CPA(pfn)); return 0; } l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); pte = l2_pgtable[HV_L2_INDEX(address)]; return hv_pte_get_present(pte) && hv_pte_get_readable(pte); }
static int __init map_range(struct range *range) { unsigned long start; unsigned long end; start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); /* * end + 1 here is intentional. We check several shadow bytes in advance * to slightly speed up fastpath. In some rare cases we could cross * boundary of mapped shadow, so we just map some more here. */ return vmemmap_populate(start, end + 1, NUMA_NO_NODE); }
unsigned long vmalloc_to_phys(void *va) { unsigned long pfn = vmalloc_to_pfn(va); BUG_ON(!pfn); return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); }
/* Is address valid for reading? */ static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) { HV_PTE *l1_pgtable = kbt->pgtable; HV_PTE *l2_pgtable; unsigned long pfn; HV_PTE pte; struct page *page; if (l1_pgtable == NULL) return 0; /* can't read user space in other tasks */ pte = l1_pgtable[HV_L1_INDEX(address)]; if (!hv_pte_get_present(pte)) return 0; pfn = hv_pte_get_pfn(pte); if (pte_huge(pte)) { if (!pfn_valid(pfn)) { pr_err("huge page has bad pfn %#lx\n", pfn); return 0; } return hv_pte_get_present(pte) && hv_pte_get_readable(pte); } page = pfn_to_page(pfn); if (PageHighMem(page)) { pr_err("L2 page table not in LOWMEM (%#llx)\n", HV_PFN_TO_CPA(pfn)); return 0; } l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); pte = l2_pgtable[HV_L2_INDEX(address)]; return hv_pte_get_present(pte) && hv_pte_get_readable(pte); }
void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) { unsigned long bootmap_pages, bootmap_start, bootmap_size; unsigned long start_pfn, free_pfn, end_pfn; /* Don't allow bogus node assignment */ BUG_ON(nid > MAX_NUMNODES || nid == 0); /* * The free pfn starts at the beginning of the range, and is * advanced as necessary for pgdat and node map allocations. */ free_pfn = start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; __add_active_range(nid, start_pfn, end_pfn); /* Node-local pgdat */ NODE_DATA(nid) = pfn_to_kaddr(free_pfn); free_pfn += PFN_UP(sizeof(struct pglist_data)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; /* Node-local bootmap */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_start = (unsigned long)pfn_to_kaddr(free_pfn); bootmap_size = init_bootmem_node(NODE_DATA(nid), free_pfn, start_pfn, end_pfn); free_bootmem_with_active_regions(nid, end_pfn); /* Reserve the pgdat and bootmap space with the bootmem allocator */ reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT, sizeof(struct pglist_data), BOOTMEM_DEFAULT); reserve_bootmem_node(NODE_DATA(nid), free_pfn << PAGE_SHIFT, bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); /* It's up */ node_set_online(nid); /* Kick sparsemem */ sparse_memory_present_with_active_regions(nid); }
void __init mem_init(void) { int i; #ifndef __tilegx__ void *last; #endif #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif #ifdef CONFIG_HIGHMEM /* check that fixmap and pkmap do not overlap */ if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) { pr_err("fixmap and kmap areas overlap - this will crash\n"); pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), FIXADDR_START); BUG(); } #endif set_max_mapnr_init(); /* this will put all bootmem onto the freelists */ free_all_bootmem(); #ifndef CONFIG_64BIT /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ set_non_bootmem_pages_init(); #endif mem_init_print_info(NULL); /* * In debug mode, dump some interesting memory mappings. */ #ifdef CONFIG_HIGHMEM printk(KERN_DEBUG " KMAP %#lx - %#lx\n", FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1); printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ for (i = MAX_NUMNODES-1; i >= 0; --i) { struct pglist_data *node = &node_data[i]; if (node->node_present_pages) { unsigned long start = (unsigned long) pfn_to_kaddr(node->node_start_pfn); unsigned long end = start + (node->node_present_pages << PAGE_SHIFT); printk(KERN_DEBUG " MEM%d %#lx - %#lx\n", i, start, end - 1); } } #else last =
/* * On SH machines the conventional approach is to stash system RAM * in node 0, and other memory blocks in to node 1 and up, ordered by * latency. Each node's pgdat is node-local at the beginning of the node, * immediately followed by the node mem map. */ void __init setup_memory(void) { unsigned long free_pfn = PFN_UP(__pa(_end)); /* * Node 0 sets up its pgdat at the first available pfn, * and bumps it up before setting up the bootmem allocator. */ NODE_DATA(0) = pfn_to_kaddr(free_pfn); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); free_pfn += PFN_UP(sizeof(struct pglist_data)); NODE_DATA(0)->bdata = &bootmem_node_data[0]; /* Set up node 0 */ setup_bootmem_allocator(free_pfn); /* Give the platforms a chance to hook up their nodes */ plat_mem_setup(); }
struct vm_struct *alloc_vm_area(unsigned long size) { struct vm_struct *area; struct page *page; page = alloc_foreign_page(); if (page == NULL) { BUG(); return NULL; } area = kmalloc(sizeof(*area), GFP_KERNEL); if (area != NULL) { area->flags = VM_MAP;//XXX area->addr = pfn_to_kaddr(page_to_pfn(page)); area->size = size; area->pages = NULL; //XXX area->nr_pages = size >> PAGE_SHIFT; area->phys_addr = 0; }
static struct page *__kmalloc_section_memmap(unsigned long nr_pages) { struct page *page, *ret; unsigned long memmap_size = sizeof(struct page) * nr_pages; page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); if (page) goto got_map_page; ret = vmalloc(memmap_size); if (ret) goto got_map_ptr; return NULL; got_map_page: ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); got_map_ptr: memset(ret, 0, memmap_size); return ret; }
/** * cmm_count_pages - Count the number of pages loaned in a particular range. * * @arg: memory_isolate_notify structure with address range and count * * Return value: * 0 on success **/ static unsigned long cmm_count_pages(void *arg) { struct memory_isolate_notify *marg = arg; struct cmm_page_array *pa; unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn); unsigned long end = start + (marg->nr_pages << PAGE_SHIFT); unsigned long idx; spin_lock(&cmm_lock); pa = cmm_page_list; while (pa) { if ((unsigned long)pa >= start && (unsigned long)pa < end) marg->pages_found++; for (idx = 0; idx < pa->index; idx++) if (pa->page[idx] >= start && pa->page[idx] < end) marg->pages_found++; pa = pa->next; } spin_unlock(&cmm_lock); return 0; }
/* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ static int read_user_stack_slow(void __user *ptr, void *buf, int nb) { int ret = -EFAULT; pgd_t *pgdir; pte_t *ptep, pte; unsigned shift; unsigned long addr = (unsigned long) ptr; unsigned long offset; unsigned long pfn, flags; void *kaddr; pgdir = current->mm->pgd; if (!pgdir) return -EFAULT; local_irq_save(flags); ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift); if (!ptep) goto err_out; if (!shift) shift = PAGE_SHIFT; /* align address to page boundary */ offset = addr & ((1UL << shift) - 1); pte = READ_ONCE(*ptep); if (!pte_present(pte) || !pte_user(pte)) goto err_out; pfn = pte_pfn(pte); if (!page_is_ram(pfn)) goto err_out; /* no highmem to worry about here */ kaddr = pfn_to_kaddr(pfn); memcpy(buf, kaddr + offset, nb); ret = 0; err_out: local_irq_restore(flags); return ret; }
/* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) { pgd_t *pgdir; pte_t *ptep, pte; int pagesize; unsigned long addr = (unsigned long) ptr; unsigned long offset; unsigned long pfn; void *kaddr; pgdir = current->mm->pgd; if (!pgdir) return -EFAULT; pagesize = get_slice_psize(current->mm, addr); /* align address to page boundary */ offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); addr -= offset; if (is_huge_psize(pagesize)) ptep = huge_pte_offset(current->mm, addr); else ptep = find_linux_pte(pgdir, addr); if (ptep == NULL) return -EFAULT; pte = *ptep; if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) return -EFAULT; pfn = pte_pfn(pte); if (!page_is_ram(pfn)) return -EFAULT; /* no highmem to worry about here */ kaddr = pfn_to_kaddr(pfn); memcpy(ret, kaddr + offset, nb); return 0; }
static int __meminit kasan_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct memory_notify *mem_data = data; unsigned long nr_shadow_pages, start_kaddr, shadow_start; unsigned long shadow_end, shadow_size; nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT; start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn); shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr); shadow_size = nr_shadow_pages << PAGE_SHIFT; shadow_end = shadow_start + shadow_size; if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) || WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT))) return NOTIFY_BAD; switch (action) { case MEM_GOING_ONLINE: { void *ret; ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, shadow_end, GFP_KERNEL, PAGE_KERNEL, VM_NO_GUARD, pfn_to_nid(mem_data->start_pfn), __builtin_return_address(0)); if (!ret) return NOTIFY_BAD; kmemleak_ignore(ret); return NOTIFY_OK; } case MEM_OFFLINE: vfree((void *)shadow_start); } return NOTIFY_OK; }
/* * On SH machines the conventional approach is to stash system RAM * in node 0, and other memory blocks in to node 1 and up, ordered by * latency. Each node's pgdat is node-local at the beginning of the node, * immediately followed by the node mem map. */ void __init setup_memory(void) { unsigned long free_pfn = PFN_UP(__pa(_end)); u64 base = min_low_pfn << PAGE_SHIFT; u64 size = (max_low_pfn << PAGE_SHIFT) - base; lmb_add(base, size); /* Reserve the LMB regions used by the kernel, initrd, etc.. */ lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET, (PFN_PHYS(free_pfn) + PAGE_SIZE - 1) - (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET)); /* * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. */ if (CONFIG_ZERO_PAGE_OFFSET != 0) lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET); lmb_analyze(); lmb_dump_all(); /* * Node 0 sets up its pgdat at the first available pfn, * and bumps it up before setting up the bootmem allocator. */ NODE_DATA(0) = pfn_to_kaddr(free_pfn); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); free_pfn += PFN_UP(sizeof(struct pglist_data)); NODE_DATA(0)->bdata = &bootmem_node_data[0]; /* Set up node 0 */ setup_bootmem_allocator(free_pfn); /* Give the platforms a chance to hook up their nodes */ plat_mem_setup(); }
/* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) { pgd_t *pgdir; pte_t *ptep, pte; unsigned shift; unsigned long addr = (unsigned long) ptr; unsigned long offset; unsigned long pfn; void *kaddr; pgdir = current->mm->pgd; if (!pgdir) return -EFAULT; ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); if (!shift) shift = PAGE_SHIFT; /* align address to page boundary */ offset = addr & ((1UL << shift) - 1); addr -= offset; if (ptep == NULL) return -EFAULT; pte = *ptep; if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) return -EFAULT; pfn = pte_pfn(pte); if (!page_is_ram(pfn)) return -EFAULT; /* no highmem to worry about here */ kaddr = pfn_to_kaddr(pfn); memcpy(ret, kaddr + offset, nb); return 0; }
static inline unsigned long vaddr(struct pending_req *req, int seg) { unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); }
void __init mem_init(void) { int codesize, datasize, initsize; int i; #ifndef __tilegx__ void *last; #endif #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif #ifdef CONFIG_HIGHMEM /* check that fixmap and pkmap do not overlap */ if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) { pr_err("fixmap and kmap areas overlap" " - this will crash\n"); pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), FIXADDR_START); BUG(); } #endif set_max_mapnr_init(); /* this will put all bootmem onto the freelists */ totalram_pages += free_all_bootmem(); #ifndef CONFIG_64BIT /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ set_non_bootmem_pages_init(); #endif codesize = (unsigned long)&_etext - (unsigned long)&_text; datasize = (unsigned long)&_end - (unsigned long)&_sdata; initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), num_physpages << (PAGE_SHIFT-10), codesize >> 10, datasize >> 10, initsize >> 10, (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); /* * In debug mode, dump some interesting memory mappings. */ #ifdef CONFIG_HIGHMEM printk(KERN_DEBUG " KMAP %#lx - %#lx\n", FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1); printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif #ifdef CONFIG_HUGEVMAP printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", HUGE_VMAP_BASE, HUGE_VMAP_END - 1); #endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ for (i = MAX_NUMNODES-1; i >= 0; --i) { struct pglist_data *node = &node_data[i]; if (node->node_present_pages) { unsigned long start = (unsigned long) pfn_to_kaddr(node->node_start_pfn); unsigned long end = start + (node->node_present_pages << PAGE_SHIFT); printk(KERN_DEBUG " MEM%d %#lx - %#lx\n", i, start, end - 1); } } #else last = high_memory; for (i = MAX_NUMNODES-1; i >= 0; --i) { if ((unsigned long)vbase_map[i] != -1UL) { printk(KERN_DEBUG " LOWMEM%d %#lx - %#lx\n", i, (unsigned long) (vbase_map[i]), (unsigned long) (last-1)); last = vbase_map[i]; } } #endif #ifndef __tilegx__ /* * Convert from using one lock for all atomic operations to * one per cpu. */ __init_atomic_per_cpu(); #endif }
/* * This maps the physical memory to kernel virtual address space, a total * of max_low_pfn pages, by creating page tables starting from address * PAGE_OFFSET. * * This routine transitions us from using a set of compiled-in large * pages to using some more precise caching, including removing access * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START) * marking read-only data as locally cacheable, striping the remaining * .data and .bss across all the available tiles, and removing access * to pages above the top of RAM (thus ensuring a page fault from a bad * virtual address rather than a hypervisor shoot down for accessing * memory outside the assigned limits). */ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) { unsigned long long irqmask; unsigned long address, pfn; pmd_t *pmd; pte_t *pte; int pte_ofs; const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id()); struct cpumask kstripe_mask; int rc, i; #if CHIP_HAS_CBOX_HOME_MAP() if (ktext_arg_seen && ktext_hash) { pr_warning("warning: \"ktext\" boot argument ignored" " if \"kcache_hash\" sets up text hash-for-home\n"); ktext_small = 0; } if (kdata_arg_seen && kdata_hash) { pr_warning("warning: \"kdata\" boot argument ignored" " if \"kcache_hash\" sets up data hash-for-home\n"); } if (kdata_huge && !hash_default) { pr_warning("warning: disabling \"kdata=huge\"; requires" " kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } #endif /* * Set up a mask for cpus to use for kernel striping. * This is normally all cpus, but minus dataplane cpus if any. * If the dataplane covers the whole chip, we stripe over * the whole chip too. */ cpumask_copy(&kstripe_mask, cpu_possible_mask); if (!kdata_arg_seen) kdata_mask = kstripe_mask; /* Allocate and fill in L2 page tables */ for (i = 0; i < MAX_NUMNODES; ++i) { #ifdef CONFIG_HIGHMEM unsigned long end_pfn = node_lowmem_end_pfn[i]; #else unsigned long end_pfn = node_end_pfn[i]; #endif unsigned long end_huge_pfn = 0; /* Pre-shatter the last huge page to allow per-cpu pages. */ if (kdata_huge) end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT); pfn = node_start_pfn[i]; /* Allocate enough memory to hold L2 page tables for node. */ init_prealloc_ptes(i, end_pfn - pfn); address = (unsigned long) pfn_to_kaddr(pfn); while (pfn < end_pfn) { BUG_ON(address & (HPAGE_SIZE-1)); pmd = get_pmd(pgtables, address); pte = get_prealloc_pte(pfn); if (pfn < end_huge_pfn) { pgprot_t prot = init_pgprot(address); *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot)); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) pte[pte_ofs] = pfn_pte(pfn, prot); } else { if (kdata_huge) printk(KERN_DEBUG "pre-shattered huge" " page at %#lx\n", address); for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; pfn++, pte_ofs++, address += PAGE_SIZE) { pgprot_t prot = init_pgprot(address); pte[pte_ofs] = pfn_pte(pfn, prot); } assign_pte(pmd, pte); } } } /* * Set or check ktext_map now that we have cpu_possible_mask * and kstripe_mask to work with. */ if (ktext_all) cpumask_copy(&ktext_mask, cpu_possible_mask); else if (ktext_nondataplane) ktext_mask = kstripe_mask; else if (!cpumask_empty(&ktext_mask)) { /* Sanity-check any mask that was requested */ struct cpumask bad; cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); if (!cpumask_empty(&bad)) { char buf[NR_CPUS * 5]; cpulist_scnprintf(buf, sizeof(buf), &bad); pr_info("ktext: not using unavailable cpus %s\n", buf); } if (cpumask_empty(&ktext_mask)) { pr_warning("ktext: no valid cpus; caching on %d.\n", smp_processor_id()); cpumask_copy(&ktext_mask, cpumask_of(smp_processor_id())); } } address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC, PAGE_HOME_IMMUTABLE); if (ktext_local) { if (ktext_nocache) prot = hv_pte_set_mode(prot, HV_PTE_MODE_UNCACHED); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } else { prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_TILE_L3); cpu = cpumask_first(&ktext_mask); prot = ktext_set_nocache(prot); } BUG_ON(address != (unsigned long)_stext); pte = NULL; for (; address < (unsigned long)_einittext; pfn++, address += PAGE_SIZE) { pte_ofs = pte_index(address); if (pte_ofs == 0) { if (pte) assign_pte(pmd++, pte); pte = alloc_pte(); } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); if (cpu == NR_CPUS) cpu = cpumask_first(&ktext_mask); } pte[pte_ofs] = pfn_pte(pfn, prot); } if (pte) assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); #if CHIP_HAS_CBOX_HOME_MAP() if (ktext_hash) { pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_HASH_L3); pteval = ktext_set_nocache(pteval); } else #endif /* CHIP_HAS_CBOX_HOME_MAP() */ if (cpumask_weight(&ktext_mask) == 1) { pteval = set_remote_cache_cpu(pteval, cpumask_first(&ktext_mask)); pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_TILE_L3); pteval = ktext_set_nocache(pteval); } else if (ktext_nocache) pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_UNCACHED); else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); for (; address < (unsigned long)_einittext; pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir); /* * Since we may be changing the caching of the stack and page * table itself, we invoke an assembly helper to do the * following steps: * * - flush the cache so we start with an empty slate * - install pgtables[] as the real page table * - flush the TLB so the new page table takes effect */ irqmask = interrupt_mask_save_mask(); interrupt_mask_set_mask(-1ULL); rc = flush_and_install_context(__pa(pgtables), init_pgprot((unsigned long)pgtables), __get_cpu_var(current_asid), cpumask_bits(my_cpu_mask)); interrupt_mask_restore_mask(irqmask); BUG_ON(rc != 0); /* Copy the page table back to the normal swapper_pg_dir. */ memcpy(pgd_base, pgtables, sizeof(pgtables)); __install_page_table(pgd_base, __get_cpu_var(current_asid), swapper_pgprot); /* * We just read swapper_pgprot and thus brought it into the cache, * with its new home & caching mode. When we start the other CPUs, * they're going to reference swapper_pgprot via their initial fake * VA-is-PA mappings, which cache everything locally. At that * time, if it's in our cache with a conflicting home, the * simulator's coherence checker will complain. So, flush it out * of our cache; we're not going to ever use it again anyway. */ __insn_finv(&swapper_pgprot); }
static int omx_xen_accept_gref_list(omx_xenif_t * omx_xenif, struct omx_xen_user_region_segment *seg, uint32_t gref, void **vaddr, uint8_t part) { int ret = 0; struct backend_info *be = omx_xenif->be; struct vm_struct *area; pte_t *pte; struct gnttab_map_grant_ref ops = { .flags = GNTMAP_host_map | GNTMAP_contains_pte, //.flags = GNTMAP_host_map, .ref = gref, .dom = be->remoteDomain, }; dprintk_in(); area = alloc_vm_area(PAGE_SIZE, &pte); if (!area) { ret = -ENOMEM; goto out; } seg->vm_gref[part] = area; ops.host_addr = arbitrary_virt_to_machine(pte).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &ops, 1)) { printk_err("HYPERVISOR map grant ref failed"); ret = -ENOSYS; goto out; } dprintk_deb("addr=%#lx, mfn=%#lx, kaddr=%#lx\n", (unsigned long)area->addr, ops.dev_bus_addr >> PAGE_SHIFT, ops.host_addr); if (ops.status) { printk_err("HYPERVISOR map grant ref failed status = %d", ops.status); ret = ops.status; goto out; } dprintk_deb("gref_offset = %#x\n", seg->gref_offset); *vaddr = (area->addr + seg->gref_offset); ret = ops.handle; #if 0 for (i = 0; i < (size + 2); i++) { dprintk_deb("gref_list[%d] = %u\n", i, *(((uint32_t *) * vaddr) + i)); } #endif seg->all_handle[part] = ops.handle; dprintk_deb("vaddr = %p, area->addr=%p, handle[%d]=%d\n", vaddr, area->addr, part, seg->all_handle[part]); out: dprintk_out(); return ret; } int omx_xen_register_user_segment(omx_xenif_t * omx_xenif, struct omx_ring_msg_register_user_segment *req) { struct backend_info *be = omx_xenif->be; void *vaddr = NULL; uint32_t **gref_list; struct page **page_list; struct omxback_dev *omxdev = be->omxdev; struct omx_endpoint *endpoint; struct omx_xen_user_region *region; struct omx_xen_user_region_segment *seg; int ret = 0; int i = 0, k = 0; uint8_t eid, nr_parts; uint16_t first_page_offset, gref_offset; uint32_t sid, id, nr_grefs, nr_pages, length, gref[OMX_XEN_GRANT_PAGES_MAX]; uint64_t domU_vaddr; int idx = 0, sidx = 0; struct gnttab_map_grant_ref *map; struct gnttab_unmap_grant_ref *unmap; dprintk_in(); TIMER_START(&t_reg_seg); sid = req->sid; id = req->rid; eid = req->eid; domU_vaddr = req->aligned_vaddr; nr_grefs = req->nr_grefs; nr_pages = req->nr_pages; nr_parts = req->nr_parts; length = req->length; dprintk_deb("nr_parts = %#x\n", nr_parts); for (k = 0; k < nr_parts; k++) { gref[k] = req->gref[k]; dprintk_deb("printing gref = %lu\n", gref[k]); } gref_offset = req->gref_offset; first_page_offset = req->first_page_offset; endpoint = omxdev->endpoints[eid]; region = rcu_dereference_protected(endpoint->xen_regions[id], 1); if (unlikely(!region)) { printk_err(KERN_ERR "Cannot access non-existing region %d\n", id); ret = -EINVAL; goto out; } dprintk_deb("Got region @%#lx id=%u\n", (unsigned long)region, id); seg = ®ion->segments[sid]; if (unlikely(!seg)) { printk(KERN_ERR "Cannot access non-existing segment %d\n", sid); ret = -EINVAL; goto out; } dprintk_deb("Got segment @%#lx id=%u\n", (unsigned long)seg, sid); seg->gref_offset = gref_offset; dprintk_deb ("Offset of actual list of grant references (in the frontend) = %#x\n", gref_offset); for (k = 0; k < nr_parts; k++) { seg->all_gref[k] = gref[k]; dprintk_deb("grant reference for list of grefs = %#x\n", gref[k]); } seg->nr_parts = nr_parts; dprintk_deb("parts of gref list = %#x\n", nr_parts); TIMER_START(&t_alloc_pages); gref_list = kzalloc(sizeof(uint32_t *) * nr_parts, GFP_ATOMIC); if (!gref_list) { ret = -ENOMEM; printk_err("gref list is NULL, ENOMEM!!!\n"); goto out; } map = kzalloc(sizeof(struct gnttab_map_grant_ref) * nr_pages, GFP_ATOMIC); if (!map) { ret = -ENOMEM; printk_err(" map is NULL, ENOMEM!!!\n"); goto out; } unmap = kzalloc(sizeof(struct gnttab_unmap_grant_ref) * nr_pages, GFP_ATOMIC); if (!unmap) { ret = -ENOMEM; printk_err(" unmap is NULL, ENOMEM!!!\n"); goto out; } #ifdef OMX_XEN_COOKIES seg->cookie = omx_xen_page_get_cookie(omx_xenif, nr_pages); if (!seg->cookie) { printk_err("cannot get cookie\n"); goto out; } page_list = seg->cookie->pages; #else page_list = kzalloc(sizeof(struct page *) * nr_pages, GFP_ATOMIC); if (!page_list) { ret = -ENOMEM; printk_err(" page list is NULL, ENOMEM!!!\n"); goto out; } ret = alloc_xenballooned_pages(nr_pages, page_list, false /* lowmem */); if (ret) { printk_err("cannot allocate xenballooned_pages\n"); goto out; } #endif TIMER_STOP(&t_alloc_pages); TIMER_START(&t_accept_gref_list); for (k = 0; k < nr_parts; k++) { ret = omx_xen_accept_gref_list(omx_xenif, seg, gref[k], &vaddr, k); if (ret < 0) { printk_err("Cannot accept gref list, = %d\n", ret); goto out; } gref_list[k] = (uint32_t *) vaddr; if (!gref_list) { printk_err("gref_list is NULL!!!, = %p\n", gref_list); ret = -ENOSYS; goto out; } } TIMER_STOP(&t_accept_gref_list); seg->gref_list = gref_list; seg->nr_pages = nr_pages; seg->first_page_offset = first_page_offset; i = 0; idx = 0; sidx = 0; seg->map = map; seg->unmap = unmap; while (i < nr_pages) { void *tmp_vaddr; unsigned long addr = (unsigned long)pfn_to_kaddr(page_to_pfn(page_list[i])); if (sidx % 256 == 0) dprintk_deb("gref_list[%d][%d] = %#x\n", idx, sidx, gref_list[idx][sidx]); gnttab_set_map_op(&map[i], addr, GNTMAP_host_map, gref_list[idx][sidx], be->remoteDomain); gnttab_set_unmap_op(&unmap[i], addr, GNTMAP_host_map, -1 /* handle */ ); i++; if ((unlikely(i % nr_grefs == 0))) { idx++; sidx = 0; } else { sidx++; } //printk(KERN_INFO "idx=%d, i=%d, sidx=%d\n", idx, i, sidx); } TIMER_START(&t_accept_grants); ret = gnttab_map_refs(map, NULL, page_list, nr_pages); if (ret) { printk_err("Error mapping, ret= %d\n", ret); goto out; } TIMER_STOP(&t_accept_grants); for (i = 0; i < nr_pages; i++) { if (map[i].status) { ret = -EINVAL; printk_err("idx %d, status =%d\n", i, map[i].status); goto out; } else { //BUG_ON(map->map_ops[i].handle == -1); unmap[i].handle = map[i].handle; dprintk_deb("map handle=%d\n", map[i].handle); } } seg->pages = page_list; seg->nr_pages = nr_pages; seg->length = length; region->total_length += length; dprintk_deb("total_length = %#lx, nrpages=%lu, pages = %#lx\n", region->total_length, seg->nr_pages, (unsigned long)seg->pages); goto all_ok; out: printk_err("error registering, try to debug MORE!!!!\n"); all_ok: TIMER_STOP(&t_reg_seg); dprintk_out(); return ret; } int omx_xen_create_user_region(omx_xenif_t * omx_xenif, uint32_t id, uint64_t vaddr, uint32_t nr_segments, uint32_t nr_pages, uint32_t nr_grefs, uint8_t eid) { struct backend_info *be = omx_xenif->be; struct omxback_dev *omxdev = be->omxdev; struct omx_endpoint *endpoint = omxdev->endpoints[eid]; struct omx_xen_user_region *region; int ret = 0; dprintk_in(); TIMER_START(&t_create_reg); //udelay(1000); /* allocate the relevant region */ region = kzalloc(sizeof(struct omx_xen_user_region) + nr_segments * sizeof(struct omx_xen_user_region_segment), GFP_KERNEL); if (!region) { printk_err ("No memory to allocate the region/segment buffers\n"); ret = -ENOMEM; goto out; } /* init stuff needed :S */ kref_init(®ion->refcount); region->total_length = 0; region->nr_vmalloc_segments = 0; region->total_registered_length = 0; region->id = id; region->nr_segments = nr_segments; region->eid = eid; region->endpoint = endpoint; region->dirty = 0; if (unlikely(rcu_access_pointer(endpoint->xen_regions[id]) != NULL)) { printk(KERN_ERR "Cannot create busy region %d\n", id); ret = -EBUSY; goto out; } rcu_assign_pointer(endpoint->xen_regions[id], region); out: TIMER_STOP(&t_create_reg); dprintk_out(); return ret; } /* Various region/segment handler functions */ void omx_xen_user_region_destroy_segments(struct omx_xen_user_region *region, struct omx_endpoint *endpoint) { int i; dprintk_in(); if (!endpoint) { printk_err("endpoint is null!!\n"); return; } for (i = 0; i < region->nr_segments; i++) omx_xen_deregister_user_segment(endpoint->be->omx_xenif, region->id, i, endpoint->endpoint_index); dprintk_out(); }
static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, u16 idx) { return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); }
static inline unsigned long idx_to_kaddr(struct xenvif *vif, u16 idx) { return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); }
static inline unsigned long idx_to_kaddr(unsigned int idx) { return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx])); }
/** * cmm_mem_going_offline - Unloan pages where memory is to be removed * @arg: memory_notify structure with page range to be offlined * * Return value: * 0 on success **/ static int cmm_mem_going_offline(void *arg) { struct memory_notify *marg = arg; unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn); unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT); struct cmm_page_array *pa_curr, *pa_last, *npa; unsigned long idx; unsigned long freed = 0; cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n", start_page, marg->nr_pages); spin_lock(&cmm_lock); /* Search the page list for pages in the range to be offlined */ pa_last = pa_curr = cmm_page_list; while (pa_curr) { for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) { if ((pa_curr->page[idx] < start_page) || (pa_curr->page[idx] >= end_page)) continue; plpar_page_set_active(__pa(pa_curr->page[idx])); free_page(pa_curr->page[idx]); freed++; loaned_pages--; totalram_pages++; pa_curr->page[idx] = pa_last->page[--pa_last->index]; if (pa_last->index == 0) { if (pa_curr == pa_last) pa_curr = pa_last->next; pa_last = pa_last->next; free_page((unsigned long)cmm_page_list); cmm_page_list = pa_last; continue; } } pa_curr = pa_curr->next; } /* Search for page list structures in the range to be offlined */ pa_last = NULL; pa_curr = cmm_page_list; while (pa_curr) { if (((unsigned long)pa_curr >= start_page) && ((unsigned long)pa_curr < end_page)) { npa = (struct cmm_page_array *)__get_free_page( GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC); if (!npa) { spin_unlock(&cmm_lock); cmm_dbg("Failed to allocate memory for list " "management. Memory hotplug " "failed.\n"); return ENOMEM; } memcpy(npa, pa_curr, PAGE_SIZE); if (pa_curr == cmm_page_list) cmm_page_list = npa; if (pa_last) pa_last->next = npa; free_page((unsigned long) pa_curr); freed++; pa_curr = npa; } pa_last = pa_curr; pa_curr = pa_curr->next; } spin_unlock(&cmm_lock); cmm_dbg("Released %ld pages in the search range.\n", freed); return 0; }