static unsigned long msync_page_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { pgd_t *pgd; unsigned long next; unsigned long ret = 0; /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need * to do anything more on an msync(). */ if (vma->vm_flags & VM_HUGETLB) return 0; BUG_ON(addr >= end); pgd = pgd_offset(vma->vm_mm, addr); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; ret += msync_pud_range(vma, pgd, addr, next); } while (pgd++, addr = next, addr != end); return ret; }
static int unuse_vma(struct vm_area_struct *vma, swp_entry_t entry, struct page *page) { pgd_t *pgd; unsigned long addr, end, next; if (page->mapping) { addr = page_address_in_vma(page, vma); if (addr == -EFAULT) return 0; else end = addr + PAGE_SIZE; } else { addr = vma->vm_start; end = vma->vm_end; } pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; if (unuse_pud_range(vma, pgd, addr, next, entry, page)) return 1; } while (pgd++, addr = next, addr != end); return 0; }
static int walk_pgd_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; int err = 0; pgd = pgd_offset(walk->mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; continue; } if (walk->pmd_entry || walk->pte_entry) err = walk_p4d_range(pgd, addr, next, walk); if (err) break; } while (pgd++, addr = next, addr != end); return err; }
int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; unsigned long next; int err; might_sleep(); BUG_ON(addr >= end); start = addr; phys_addr -= addr; pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot); if (err) break; } while (pgd++, addr = next, addr != end); flush_cache_vmap(start, end); return err; }
/** * walk_page_range - walk a memory map's page tables with a callback * @mm: memory map to walk * @addr: starting address * @end: ending address * @walk: set of callbacks to invoke for each level of the tree * * Recursively walk the page table for the memory area in a VMA, * calling supplied callbacks. Callbacks are called in-order (first * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, * etc.). If lower-level callbacks are omitted, walking depth is reduced. * * Each callback receives an entry pointer and the start and end of the * associated range, and a copy of the original mm_walk for access to * the ->private or ->mm fields. * * No locks are taken, but the bottom level iterator will map PTE * directories from highmem if necessary. * * If any callback returns a non-zero value, the walk is aborted and * the return value is propagated back to the caller. Otherwise 0 is returned. */ int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; int err = 0; if (addr >= end) return err; if (!walk->mm) return -EINVAL; pgd = pgd_offset(walk->mm, addr); do { struct vm_area_struct *uninitialized_var(vma); next = pgd_addr_end(addr, end); #ifdef CONFIG_HUGETLB_PAGE /* * handle hugetlb vma individually because pagetable walk for * the hugetlb page is dependent on the architecture and * we can't handled it in the same manner as non-huge pages. */ vma = find_vma(walk->mm, addr); if (vma && is_vm_hugetlb_page(vma)) { if (vma->vm_end < next) next = vma->vm_end; /* * Hugepage is very tightly coupled with vma, so * walk through hugetlb entries within a given vma. */ err = walk_hugetlb_range(vma, addr, next, walk); if (err) break; pgd = pgd_offset(walk->mm, next); continue; } #endif if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; pgd++; continue; } if (walk->pgd_entry) err = walk->pgd_entry(pgd, addr, next, walk); if (!err && (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) err = walk_pud_range(pgd, addr, next, walk); if (err) break; pgd++; } while (addr = next, addr != end); return err; }
/** * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @write: whether pages will be written to * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Attempt to pin user pages in memory without taking mm->mmap_sem. * If not successful, it will fall back to taking the lock and * calling get_user_pages(). * * Returns number of pages pinned. This may be fewer than the number * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; pgd_t *pgdp; int nr = 0; start &= PAGE_MASK; addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; if (end < start) goto slow_irqon; local_irq_disable(); pgdp = pgd_offset(mm, addr); do { pgd_t pgd = *pgdp; next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); return nr; { int ret; slow: local_irq_enable(); slow_irqon: /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; down_read(&mm->mmap_sem); ret = get_user_pages(current, mm, start, (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); up_read(&mm->mmap_sem); /* Have to be a bit careful with return values */ if (nr > 0) { if (ret < 0) ret = nr; else ret += nr; } return ret; } }
void identity_mapping_del(pgd_t *pgd, unsigned long addr, unsigned long end) { unsigned long next; pgd += pgd_index(addr); do { next = pgd_addr_end(addr, end); idmap_del_pud(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, int node, bool early) { unsigned long next; pgd_t *pgdp; pgdp = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); kasan_pud_populate(pgdp, addr, next, node, early); } while (pgdp++, addr = next, addr != end); }
static void __init kasan_map_early_shadow(void) { unsigned long addr = KASAN_SHADOW_START; unsigned long end = KASAN_SHADOW_END; unsigned long next; pgd_t *pgd; pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); kasan_early_pud_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static void __init kasan_map_early_shadow(pgd_t *pgd) { /* See comment in kasan_init() */ unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK; unsigned long end = KASAN_SHADOW_END; unsigned long next; pgd += pgd_index(addr); do { next = pgd_addr_end(addr, end); kasan_early_p4d_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static void vunmap_page_range(unsigned long addr, unsigned long end) { pgd_t *pgd; unsigned long next; BUG_ON(addr >= end); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; vunmap_pud_range(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end) { unsigned long prot, next; prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) prot |= PMD_BIT4; pgd += pgd_index(addr); do { next = pgd_addr_end(addr, end); idmap_add_pud(pgd, addr, next, prot); } while (pgd++, addr = next, addr != end); }
static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, int nid) { pgd_t *pgd; unsigned long next; addr = addr & PAGE_MASK; end = round_up(end, PAGE_SIZE); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); kasan_populate_pgd(pgd, addr, next, nid); } while (pgd++, addr = next, addr != end); }
/* * Walking through page table. */ static void clear_page_range(struct vm_area_struct *vma) { pgd_t *pgd; unsigned long next, addr, end; addr = vma->vm_start; end = vma->vm_end; pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none(*pgd)) continue; next = clear_pud_range(vma, pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static inline int check_pgd_range(struct mm_struct *mm, unsigned long addr, unsigned long end, unsigned long *nodes) { pgd_t *pgd; unsigned long next; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; if (check_pud_range(mm, pgd, addr, next, nodes)) return -EIO; } while (pgd++, addr = next, addr != end); return 0; }
static void mincore_page_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, unsigned char *vec) { unsigned long next; pgd_t *pgd; pgd = pgd_offset(vma->vm_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) mincore_unmapped_range(vma, addr, next, vec); else mincore_pud_range(vma, pgd, addr, next, vec); vec += (next - addr) >> PAGE_SHIFT; } while (pgd++, addr = next, addr != end); }
/* * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); end = addr + length; do { next = pgd_addr_end(addr, end); alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); }
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, unsigned long end) { unsigned long next; unsigned long addr = start; pgd_t *src_pgdp = pgd_offset_k(start); dst_pgdp = pgd_offset_raw(dst_pgdp, start); do { next = pgd_addr_end(addr, end); if (pgd_none(READ_ONCE(*src_pgdp))) continue; if (copy_pud(dst_pgdp, src_pgdp, addr, next)) return -ENOMEM; } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); return 0; }
static int vmap_page_range_noflush(unsigned long start, unsigned long end, pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; unsigned long addr = start; int err = 0; int nr = 0; BUG_ON(addr >= end); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); if (err) return err; } while (pgd++, addr = next, addr != end); return nr; }
static void identity_mapping_add(pgd_t *pgd, const char *text_start, const char *text_end, unsigned long prot) { unsigned long addr, end; unsigned long next; addr = virt_to_phys(text_start); end = virt_to_phys(text_end); prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) prot |= PMD_BIT4; pgd += pgd_index(addr); do { next = pgd_addr_end(addr, end); idmap_add_pud(pgd, addr, next, prot); } while (pgd++, addr = next, addr != end); }
static int remap_area_pages(unsigned long start, unsigned long pfn, size_t size, const struct mem_type *type) { unsigned long addr = start; unsigned long next, end = start + size; unsigned long phys_addr = __pfn_to_phys(pfn); pgd_t *pgd; int err = 0; BUG_ON(addr >= end); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); err = remap_area_pmd(pgd, addr, next, phys_addr, type); if (err) break; phys_addr += next - addr; } while (pgd++, addr = next, addr != end); return err; }
static int __create_hyp_mappings(pgd_t *pgdp, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { pgd = pgdp + pgd_index(addr); pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; goto out; } pud_populate(NULL, pud, pmd); get_page(virt_to_page(pud)); kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; pfn += (next - addr) >> PAGE_SHIFT; } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; }
/** * kasan_populate_zero_shadow - populate shadow memory region with * kasan_zero_page * @shadow_start - start of the memory range to populate * @shadow_end - end of the memory range to populate */ void __init kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end) { unsigned long addr = (unsigned long)shadow_start; unsigned long end = (unsigned long)shadow_end; pgd_t *pgd = pgd_offset_k(addr); unsigned long next; do { next = pgd_addr_end(addr, end); if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { pud_t *pud; pmd_t *pmd; /* * kasan_zero_pud should be populated with pmds * at this moment. * [pud,pmd]_populate*() below needed only for * 3,2 - level page tables where we don't have * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ pgd_populate(&init_mm, pgd, kasan_zero_pud); pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, kasan_zero_pmd); pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); continue; } if (pgd_none(*pgd)) { pgd_populate(&init_mm, pgd, early_alloc(PAGE_SIZE, NUMA_NO_NODE)); } zero_pud_populate(pgd, addr, next); } while (pgd++, addr = next, addr != end); }
static int remap_area_pages(unsigned long start, unsigned long pfn, unsigned long size, unsigned long flags) { unsigned long addr = start; unsigned long next, end = start + size; unsigned long phys_addr = __pfn_to_phys(pfn); pgprot_t prot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags); pgd_t *pgd; int err = 0; BUG_ON(addr >= end); pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); err = remap_area_pmd(pgd, addr, next, phys_addr, prot); if (err) break; phys_addr += next - addr; } while (pgd++, addr = next, addr != end); return err; }
/* * Like get_user_pages_fast() except its IRQ-safe in that it won't fall * back to the regular GUP. */ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; unsigned long flags; pgd_t *pgdp; int nr = 0; start &= PAGE_MASK; addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, (void __user *)start, len))) return 0; /* * This doesn't prevent pagetable teardown, but does prevent * the pagetables and pages from being freed. */ local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { pgd_t pgd = *pgdp; next = pgd_addr_end(addr, end); if (pgd_none(pgd)) break; if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); return nr; }
/* * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void)) { unsigned long addr, length, end, next; /* * If the virtual and physical address don't have the same offset * within a page, we cannot map the region as the caller expects. */ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) return; phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); end = addr + length; do { next = pgd_addr_end(addr, end); alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); }
int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; unsigned long flags; pgd_t *pgdp; int nr = 0; pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); start &= PAGE_MASK; addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, start, len))) return 0; pr_devel(" aligned: %lx .. %lx\n", start, end); /* * XXX: batch / limit 'nr', to avoid large irq off latency * needs some instrumenting to determine the common sizes used by * important workloads (eg. DB2), and whether limiting the batch size * will decrease performance. * * It seems like we're in the clear for the moment. Direct-IO is * the main guy that batches up lots of get_user_pages, and even * they are limited to 64-at-a-time which is not so many. */ /* * This doesn't prevent pagetable teardown, but does prevent * the pagetables from being freed on powerpc. * * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ local_irq_save(flags); pgdp = pgd_offset(mm, addr); do { pgd_t pgd = READ_ONCE(*pgdp); pr_devel(" %016lx: normal pgd %p\n", addr, (void *)pgd_val(pgd)); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) break; if (pgd_huge(pgd)) { if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, write, pages, &nr)) break; } else if (is_hugepd(pgdp)) { if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, addr, next, write, pages, &nr)) break; } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) break; } while (pgdp++, addr = next, addr != end); local_irq_restore(flags); return nr; }
int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; pgd_t *pgdp; int nr = 0; start &= PAGE_MASK; addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; /* * XXX: batch / limit 'nr', to avoid large irq off latency * needs some instrumenting to determine the common sizes used by * important workloads (eg. DB2), and whether limiting the batch size * will decrease performance. * * It seems like we're in the clear for the moment. Direct-IO is * the main guy that batches up lots of get_user_pages, and even * they are limited to 64-at-a-time which is not so many. */ /* * This doesn't prevent pagetable teardown, but does prevent * the pagetables from being freed on sparc. * * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ local_irq_disable(); pgdp = pgd_offset(mm, addr); do { pgd_t pgd = *pgdp; next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); return nr; { int ret; slow: local_irq_enable(); /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; down_read(&mm->mmap_sem); ret = get_user_pages(current, mm, start, (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); up_read(&mm->mmap_sem); /* Have to be a bit careful with return values */ if (nr > 0) { if (ret < 0) ret = nr; else ret += nr; } return ret; } }
/** * walk_page_range - walk a memory map's page tables with a callback * @mm: memory map to walk * @addr: starting address * @end: ending address * @walk: set of callbacks to invoke for each level of the tree * * Recursively walk the page table for the memory area in a VMA, * calling supplied callbacks. Callbacks are called in-order (first * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, * etc.). If lower-level callbacks are omitted, walking depth is reduced. * * Each callback receives an entry pointer and the start and end of the * associated range, and a copy of the original mm_walk for access to * the ->private or ->mm fields. * * Usually no locks are taken, but splitting transparent huge page may * take page table lock. And the bottom level iterator will map PTE * directories from highmem if necessary. * * If any callback returns a non-zero value, the walk is aborted and * the return value is propagated back to the caller. Otherwise 0 is returned. * * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry * is !NULL. */ int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { pgd_t *pgd; unsigned long next; int err = 0; if (addr >= end) return err; if (!walk->mm) return -EINVAL; VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); pgd = pgd_offset(walk->mm, addr); do { struct vm_area_struct *vma = NULL; next = pgd_addr_end(addr, end); /* * This function was not intended to be vma based. * But there are vma special cases to be handled: * - hugetlb vma's * - VM_PFNMAP vma's */ vma = find_vma(walk->mm, addr); if (vma) { /* * There are no page structures backing a VM_PFNMAP * range, so do not allow split_huge_page_pmd(). */ if ((vma->vm_start <= addr) && (vma->vm_flags & VM_PFNMAP)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; pgd = pgd_offset(walk->mm, next); continue; } /* * Handle hugetlb vma individually because pagetable * walk for the hugetlb page is dependent on the * architecture and we can't handled it in the same * manner as non-huge pages. */ if (walk->hugetlb_entry && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) { if (vma->vm_end < next) next = vma->vm_end; /* * Hugepage is very tightly coupled with vma, * so walk through hugetlb entries within a * given vma. */ err = walk_hugetlb_range(vma, addr, next, walk); if (err) break; pgd = pgd_offset(walk->mm, next); continue; } } if (pgd_none_or_clear_bad(pgd)) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; pgd++; continue; } if (walk->pgd_entry) err = walk->pgd_entry(pgd, addr, next, walk); if (!err && (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) err = walk_pud_range(pgd, addr, next, walk); if (err) break; pgd++; } while (addr = next, addr != end); return err; }
int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; pgd_t *pgdp; int nr = 0; pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); start &= PAGE_MASK; addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, start, len))) goto slow_irqon; pr_devel(" aligned: %lx .. %lx\n", start, end); /* * XXX: batch / limit 'nr', to avoid large irq off latency * needs some instrumenting to determine the common sizes used by * important workloads (eg. DB2), and whether limiting the batch size * will decrease performance. * * It seems like we're in the clear for the moment. Direct-IO is * the main guy that batches up lots of get_user_pages, and even * they are limited to 64-at-a-time which is not so many. */ /* * This doesn't prevent pagetable teardown, but does prevent * the pagetables from being freed on powerpc. * * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ local_irq_disable(); pgdp = pgd_offset(mm, addr); do { pgd_t pgd = *pgdp; pr_devel(" %016lx: normal pgd %p\n", addr, (void *)pgd_val(pgd)); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; if (pgd_huge(pgd)) { if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, write, pages, &nr)) goto slow; } else if (is_hugepd(pgdp)) { if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, addr, next, write, pages, &nr)) goto slow; } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); return nr; { int ret; slow: local_irq_enable(); slow_irqon: pr_devel(" slow path ! nr = %d\n", nr); /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; down_read(&mm->mmap_sem); ret = get_user_pages(current, mm, start, (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); up_read(&mm->mmap_sem); /* Have to be a bit careful with return values */ if (nr > 0) { if (ret < 0) ret = nr; else ret += nr; } return ret; } }