int __m4u_get_user_pages(int eModuleID, struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { int i; unsigned long vm_flags; int trycnt; if (nr_pages <= 0) return 0; //VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); if(!!pages != !!(gup_flags & FOLL_GET)) { M4UMSG(" error: __m4u_get_user_pages !!pages != !!(gup_flags & FOLL_GET), pages=0x%x, gup_flags & FOLL_GET=0x%x \n", (unsigned int)pages, gup_flags & FOLL_GET); } /* * Require read or write permissions. * If FOLL_FORCE is set, we only require the "MAY" flags. */ vm_flags = (gup_flags & FOLL_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (gup_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; M4UDBG("Trying to get_user_pages from start vaddr 0x%08x with %d pages\n", start, nr_pages); do { struct vm_area_struct *vma; M4UDBG("For a new vma area from 0x%08x\n", start); vma = find_extend_vma(mm, start); if (!vma) { M4UMSG("error: the vma is not found, start=0x%x, module=%d \n", (unsigned int)start, eModuleID); return i ? i : -EFAULT; } if( ((~vma->vm_flags) & (VM_IO|VM_PFNMAP|VM_SHARED|VM_WRITE)) == 0 ) { M4UMSG("error: m4u_get_pages(): bypass pmem garbage pages! vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); return i ? i : -EFAULT;; } if(vma->vm_flags & VM_IO) { M4UDBG("warning: vma is marked as VM_IO \n"); } if(vma->vm_flags & VM_PFNMAP) { M4UMSG("error: vma permission is not correct, vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); M4UMSG("hint: maybe the memory is remapped with un-permitted vma->vm_flags! \n"); //m4u_dump_maps(start); return i ? i : -EFAULT;; } if(!(vm_flags & vma->vm_flags)) { M4UMSG("error: vm_flags invalid, vm_flags=0x%x, vma->vm_flags=0x%x, start=0x%x, module=%d \n", (unsigned int)vm_flags, (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID); //m4u_dump_maps(start); return i ? : -EFAULT; } do { struct page *page; unsigned int foll_flags = gup_flags; /* * If we have a pending SIGKILL, don't keep faulting * pages and potentially allocating memory. */ if (unlikely(fatal_signal_pending(current))) return i ? i : -ERESTARTSYS; MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF)); page = follow_page(vma, start, foll_flags); MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000); while (!page) { int ret; M4UDBG("Trying to allocate for %dth page(vaddr: 0x%08x)\n", i, start); MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagStart, eModuleID, start&(~0xFFF)); ret = handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagEnd, eModuleID, 0x1000); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) { M4UMSG("handle_mm_fault() error: no memory, aaddr:0x%08lx (%d pages are allocated), module=%d\n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : -ENOMEM; } if (ret & (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) { M4UMSG("handle_mm_fault() error: invalide memory address, vaddr:0x%lx (%d pages are allocated), module=%d\n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : -EFAULT; } BUG(); } if (ret & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; /* * The VM_FAULT_WRITE bit tells us that * do_wp_page has broken COW when necessary, * even if maybe_mkwrite decided not to set * pte_write. We can thus safely do subsequent * page lookups as if they were reads. But only * do so when looping for pte_write is futile: * in some cases userspace may also be wanting * to write to the gotten user page, which a * read fault here might prevent (a readonly * page might get reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) foll_flags &= ~FOLL_WRITE; MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF)); page = follow_page(vma, start, foll_flags); MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000); } if (IS_ERR(page)) { M4UMSG("handle_mm_fault() error: faulty page is returned, vaddr:0x%lx (%d pages are allocated), module=%d \n", start, i, eModuleID); //m4u_dump_maps(start); return i ? i : PTR_ERR(page); } if (pages) { pages[i] = page; MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagStart, eModuleID, start&(~0xFFF)); /* Use retry version to guarantee it will succeed in getting the lock */ trycnt = 3000; do { if (trylock_page(page)) { mlock_vma_page(page); unlock_page(page); //make sure hw pte is not 0 { int i; for(i=0; i<3000; i++) { if(!m4u_user_v2p(start)) { handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE)? FAULT_FLAG_WRITE : 0); cond_resched(); } else break; } if(i==3000) M4UMSG("error: cannot handle_mm_fault to get hw pte: va=0x%x\n", start); } break; } } while (trycnt-- > 0); if(PageMlocked(page)==0) { M4UMSG("Can't mlock page\n"); dump_page(page); } else { unsigned int pfn = page_to_pfn(page); if(pfn < mlock_cnt_size) { pMlock_cnt[page_to_pfn(page)]++; } else { M4UERR("mlock_cnt_size is too small: pfn=%d, size=%d\n", pfn, mlock_cnt_size); } //M4UMSG("lock page:\n"); //dump_page(page); } MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagEnd, eModuleID, 0x1000); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; nr_pages--; } while (nr_pages && start < vma->vm_end); } while (nr_pages);
static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct dev_pagemap *pgmap = NULL; struct page *page; spinlock_t *ptl; pte_t *ptep, pte; retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if (!pte_present(pte)) { swp_entry_t entry; /* * KSM's break_ksm() relies upon recognizing a ksm page * even while it is being migrated, so for that case we * need migration_entry_wait(). */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; if (pte_none(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) goto no_page; pte_unmap_unlock(ptep, ptl); migration_entry_wait(mm, pmd, address); goto retry; } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { pte_unmap_unlock(ptep, ptl); return NULL; } page = vm_normal_page(vma, address, pte); if (!page && pte_devmap(pte) && (flags & FOLL_GET)) { /* * Only return device mapping pages in the FOLL_GET case since * they are only valid while holding the pgmap reference. */ pgmap = get_dev_pagemap(pte_pfn(pte), NULL); if (pgmap) page = pte_page(pte); else goto no_page; } else if (unlikely(!page)) { if (flags & FOLL_DUMP) { /* Avoid special (like zero) pages in core dumps */ page = ERR_PTR(-EFAULT); goto out; } if (is_zero_pfn(pte_pfn(pte))) { page = pte_page(pte); } else { int ret; ret = follow_pfn_pte(vma, address, ptep, flags); page = ERR_PTR(ret); goto out; } } if (flags & FOLL_SPLIT && PageTransCompound(page)) { int ret; get_page(page); pte_unmap_unlock(ptep, ptl); lock_page(page); ret = split_huge_page(page); unlock_page(page); put_page(page); if (ret) return ERR_PTR(ret); goto retry; } if (flags & FOLL_GET) { get_page(page); /* drop the pgmap reference now that we hold the page */ if (pgmap) { put_dev_pagemap(pgmap); pgmap = NULL; } } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* Do not mlock pte-mapped THP */ if (PageTransCompound(page)) goto out; /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE * which might bounce very badly if there is contention. * * If the page is already locked, we don't need to * handle it now - vmscan will handle it later if and * when it attempts to reclaim the page. */ if (page->mapping && trylock_page(page)) { lru_add_drain(); /* push cached pages to LRU */ /* * Because we lock page here, and migration is * blocked by the pte's page reference, and we * know the page is still mapped, we don't even * need to check for file-cache page truncation. */ mlock_vma_page(page); unlock_page(page); } } out: pte_unmap_unlock(ptep, ptl); return page; no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return NULL; return no_page_table(vma, flags); }
static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct page *page; spinlock_t *ptl; pte_t *ptep, pte; retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if (!pte_present(pte)) { swp_entry_t entry; /* * KSM's break_ksm() relies upon recognizing a ksm page * even while it is being migrated, so for that case we * need migration_entry_wait(). */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; if (pte_none(pte) || pte_file(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) goto no_page; pte_unmap_unlock(ptep, ptl); migration_entry_wait(mm, pmd, address); goto retry; } if ((flags & FOLL_NUMA) && pte_numa(pte)) goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) { pte_unmap_unlock(ptep, ptl); return NULL; } page = vm_normal_page(vma, address, pte); if (unlikely(!page)) { if ((flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(pte))) goto bad_page; page = pte_page(pte); } if (flags & FOLL_GET) get_page_foll(page); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE * which might bounce very badly if there is contention. * * If the page is already locked, we don't need to * handle it now - vmscan will handle it later if and * when it attempts to reclaim the page. */ if (page->mapping && trylock_page(page)) { lru_add_drain(); /* push cached pages to LRU */ /* * Because we lock page here, and migration is * blocked by the pte's page reference, and we * know the page is still mapped, we don't even * need to check for file-cache page truncation. */ mlock_vma_page(page); unlock_page(page); } } pte_unmap_unlock(ptep, ptl); return page; bad_page: pte_unmap_unlock(ptep, ptl); return ERR_PTR(-EFAULT); no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return NULL; return no_page_table(vma, flags); }