Пример #1
0
int __m4u_get_user_pages(int eModuleID, struct task_struct *tsk, struct mm_struct *mm, 
                     unsigned long start, int nr_pages, unsigned int gup_flags,
                     struct page **pages, struct vm_area_struct **vmas)
{
        int i;
        unsigned long vm_flags;
	int trycnt;

        if (nr_pages <= 0)
                return 0;

        //VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
        if(!!pages != !!(gup_flags & FOLL_GET)) {
            M4UMSG(" error: __m4u_get_user_pages !!pages != !!(gup_flags & FOLL_GET), pages=0x%x, gup_flags & FOLL_GET=0x%x \n",
                    (unsigned int)pages, gup_flags & FOLL_GET);
        }

        /*   
         * Require read or write permissions.
         * If FOLL_FORCE is set, we only require the "MAY" flags.
         */
        vm_flags  = (gup_flags & FOLL_WRITE) ?
                        (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
        vm_flags &= (gup_flags & FOLL_FORCE) ?
                        (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
        i = 0; 

        M4UDBG("Trying to get_user_pages from start vaddr 0x%08x with %d pages\n", start, nr_pages);

        do { 
                struct vm_area_struct *vma;
                M4UDBG("For a new vma area from 0x%08x\n", start);
                vma = find_extend_vma(mm, start);

                if (!vma)
                {
                    M4UMSG("error: the vma is not found, start=0x%x, module=%d \n", 
                           (unsigned int)start, eModuleID);
                    return i ? i : -EFAULT;
                } 
                if( ((~vma->vm_flags) & (VM_IO|VM_PFNMAP|VM_SHARED|VM_WRITE)) == 0 )
                {
                    M4UMSG("error: m4u_get_pages(): bypass pmem garbage pages! vma->vm_flags=0x%x, start=0x%x, module=%d \n", 
                            (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID);
                	return i ? i : -EFAULT;;
                }                     
                if(vma->vm_flags & VM_IO)
                {
                	  M4UDBG("warning: vma is marked as VM_IO \n");
                }
                if(vma->vm_flags & VM_PFNMAP)
                {
                    M4UMSG("error: vma permission is not correct, vma->vm_flags=0x%x, start=0x%x, module=%d \n", 
                            (unsigned int)(vma->vm_flags), (unsigned int)start, eModuleID);
                    M4UMSG("hint: maybe the memory is remapped with un-permitted vma->vm_flags! \n");          
                    //m4u_dump_maps(start);
                    return i ? i : -EFAULT;;
                }
                if(!(vm_flags & vma->vm_flags)) 
                {
                    M4UMSG("error: vm_flags invalid, vm_flags=0x%x, vma->vm_flags=0x%x, start=0x%x, module=%d \n", 
                           (unsigned int)vm_flags,
                           (unsigned int)(vma->vm_flags), 
                           (unsigned int)start,
                            eModuleID);
                    //m4u_dump_maps(start);                  
                    return i ? : -EFAULT;
                }

                do {
                        struct page *page;
                        unsigned int foll_flags = gup_flags;
                        /*
                         * If we have a pending SIGKILL, don't keep faulting
                         * pages and potentially allocating memory.
                         */
                        if (unlikely(fatal_signal_pending(current)))
                                return i ? i : -ERESTARTSYS;
                        MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF));
                        page = follow_page(vma, start, foll_flags);
                        MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000);
                        while (!page) {
                                int ret;

                                M4UDBG("Trying to allocate for %dth page(vaddr: 0x%08x)\n", i, start);
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagStart, eModuleID, start&(~0xFFF));
                                ret = handle_mm_fault(mm, vma, start,
                                        (foll_flags & FOLL_WRITE) ?
                                        FAULT_FLAG_WRITE : 0);
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FORCE_PAGING], MMProfileFlagEnd, eModuleID, 0x1000);
                                if (ret & VM_FAULT_ERROR) {
                                        if (ret & VM_FAULT_OOM) {
                                                M4UMSG("handle_mm_fault() error: no memory, aaddr:0x%08lx (%d pages are allocated), module=%d\n", 
                                                start, i, eModuleID);
                                                //m4u_dump_maps(start);
                                                return i ? i : -ENOMEM;
					                    }
                                        if (ret &
                                            (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) {
                                                M4UMSG("handle_mm_fault() error: invalide memory address, vaddr:0x%lx (%d pages are allocated), module=%d\n", 
                                                start, i, eModuleID);
                                                //m4u_dump_maps(start);
                                                return i ? i : -EFAULT;
					                    }
                                        BUG();
                                }
                                if (ret & VM_FAULT_MAJOR)
                                        tsk->maj_flt++;
                                else
                                        tsk->min_flt++;

                                /*
                                 * The VM_FAULT_WRITE bit tells us that
                                 * do_wp_page has broken COW when necessary,
                                 * even if maybe_mkwrite decided not to set
                                 * pte_write. We can thus safely do subsequent
                                 * page lookups as if they were reads. But only
                                 * do so when looping for pte_write is futile:
                                 * in some cases userspace may also be wanting
                                 * to write to the gotten user page, which a
                                 * read fault here might prevent (a readonly
                                 * page might get reCOWed by userspace write).
                                 */
                                if ((ret & VM_FAULT_WRITE) &&
                                    !(vma->vm_flags & VM_WRITE))
                                        foll_flags &= ~FOLL_WRITE;
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagStart, eModuleID, start&(~0xFFF));
                                page = follow_page(vma, start, foll_flags);
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_FOLLOW_PAGE], MMProfileFlagEnd, eModuleID, 0x1000);
                        }
                        if (IS_ERR(page)) {
                                M4UMSG("handle_mm_fault() error: faulty page is returned, vaddr:0x%lx (%d pages are allocated), module=%d \n", 
                                        start, i, eModuleID);
                                //m4u_dump_maps(start);
                                return i ? i : PTR_ERR(page);
			            }
                        if (pages) {
                                pages[i] = page;
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagStart, eModuleID, start&(~0xFFF));
				
				/* Use retry version to guarantee it will succeed in getting the lock */
				trycnt = 3000;
				do {
					if (trylock_page(page)) {
						mlock_vma_page(page);
						unlock_page(page);

                        //make sure hw pte is not 0
                        {
                            int i;
                            for(i=0; i<3000; i++)
                            {   
                                if(!m4u_user_v2p(start))
                                {
                                    handle_mm_fault(mm, vma, start, (foll_flags & FOLL_WRITE)? FAULT_FLAG_WRITE : 0);
                                    cond_resched();
                                }
                                else
                                    break;
                            }
                            if(i==3000)
                                M4UMSG("error: cannot handle_mm_fault to get hw pte: va=0x%x\n", start);
                        }

                        break;
					}
				} while (trycnt-- > 0);

                                if(PageMlocked(page)==0)
                                {
                                    M4UMSG("Can't mlock page\n");
                                    dump_page(page);
                                }
                                else
                                {
                                    unsigned int pfn = page_to_pfn(page);
                                    if(pfn < mlock_cnt_size)
                                    {
                                        pMlock_cnt[page_to_pfn(page)]++;
                                    }
                                    else
                                    {
                                        M4UERR("mlock_cnt_size is too small: pfn=%d, size=%d\n", pfn, mlock_cnt_size);
                                    }
                                    
                                    //M4UMSG("lock page:\n");
                                    //dump_page(page);
                                }
                                MMProfileLogEx(M4U_MMP_Events[PROFILE_MLOCK], MMProfileFlagEnd, eModuleID, 0x1000);

                        }
                        if (vmas)
                                vmas[i] = vma;
                        i++;
                        start += PAGE_SIZE;
                        nr_pages--;
                } while (nr_pages && start < vma->vm_end);
        } while (nr_pages);
Пример #2
0
static struct page *follow_page_pte(struct vm_area_struct *vma,
		unsigned long address, pmd_t *pmd, unsigned int flags)
{
	struct mm_struct *mm = vma->vm_mm;
	struct dev_pagemap *pgmap = NULL;
	struct page *page;
	spinlock_t *ptl;
	pte_t *ptep, pte;

retry:
	if (unlikely(pmd_bad(*pmd)))
		return no_page_table(vma, flags);

	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
	pte = *ptep;
	if (!pte_present(pte)) {
		swp_entry_t entry;
		/*
		 * KSM's break_ksm() relies upon recognizing a ksm page
		 * even while it is being migrated, so for that case we
		 * need migration_entry_wait().
		 */
		if (likely(!(flags & FOLL_MIGRATION)))
			goto no_page;
		if (pte_none(pte))
			goto no_page;
		entry = pte_to_swp_entry(pte);
		if (!is_migration_entry(entry))
			goto no_page;
		pte_unmap_unlock(ptep, ptl);
		migration_entry_wait(mm, pmd, address);
		goto retry;
	}
	if ((flags & FOLL_NUMA) && pte_protnone(pte))
		goto no_page;
	if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
		pte_unmap_unlock(ptep, ptl);
		return NULL;
	}

	page = vm_normal_page(vma, address, pte);
	if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
		/*
		 * Only return device mapping pages in the FOLL_GET case since
		 * they are only valid while holding the pgmap reference.
		 */
		pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
		if (pgmap)
			page = pte_page(pte);
		else
			goto no_page;
	} else if (unlikely(!page)) {
		if (flags & FOLL_DUMP) {
			/* Avoid special (like zero) pages in core dumps */
			page = ERR_PTR(-EFAULT);
			goto out;
		}

		if (is_zero_pfn(pte_pfn(pte))) {
			page = pte_page(pte);
		} else {
			int ret;

			ret = follow_pfn_pte(vma, address, ptep, flags);
			page = ERR_PTR(ret);
			goto out;
		}
	}

	if (flags & FOLL_SPLIT && PageTransCompound(page)) {
		int ret;
		get_page(page);
		pte_unmap_unlock(ptep, ptl);
		lock_page(page);
		ret = split_huge_page(page);
		unlock_page(page);
		put_page(page);
		if (ret)
			return ERR_PTR(ret);
		goto retry;
	}

	if (flags & FOLL_GET) {
		get_page(page);

		/* drop the pgmap reference now that we hold the page */
		if (pgmap) {
			put_dev_pagemap(pgmap);
			pgmap = NULL;
		}
	}
	if (flags & FOLL_TOUCH) {
		if ((flags & FOLL_WRITE) &&
		    !pte_dirty(pte) && !PageDirty(page))
			set_page_dirty(page);
		/*
		 * pte_mkyoung() would be more correct here, but atomic care
		 * is needed to avoid losing the dirty bit: it is easier to use
		 * mark_page_accessed().
		 */
		mark_page_accessed(page);
	}
	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
		/* Do not mlock pte-mapped THP */
		if (PageTransCompound(page))
			goto out;

		/*
		 * The preliminary mapping check is mainly to avoid the
		 * pointless overhead of lock_page on the ZERO_PAGE
		 * which might bounce very badly if there is contention.
		 *
		 * If the page is already locked, we don't need to
		 * handle it now - vmscan will handle it later if and
		 * when it attempts to reclaim the page.
		 */
		if (page->mapping && trylock_page(page)) {
			lru_add_drain();  /* push cached pages to LRU */
			/*
			 * Because we lock page here, and migration is
			 * blocked by the pte's page reference, and we
			 * know the page is still mapped, we don't even
			 * need to check for file-cache page truncation.
			 */
			mlock_vma_page(page);
			unlock_page(page);
		}
	}
out:
	pte_unmap_unlock(ptep, ptl);
	return page;
no_page:
	pte_unmap_unlock(ptep, ptl);
	if (!pte_none(pte))
		return NULL;
	return no_page_table(vma, flags);
}
Пример #3
0
static struct page *follow_page_pte(struct vm_area_struct *vma,
		unsigned long address, pmd_t *pmd, unsigned int flags)
{
	struct mm_struct *mm = vma->vm_mm;
	struct page *page;
	spinlock_t *ptl;
	pte_t *ptep, pte;

retry:
	if (unlikely(pmd_bad(*pmd)))
		return no_page_table(vma, flags);

	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
	pte = *ptep;
	if (!pte_present(pte)) {
		swp_entry_t entry;
		/*
		 * KSM's break_ksm() relies upon recognizing a ksm page
		 * even while it is being migrated, so for that case we
		 * need migration_entry_wait().
		 */
		if (likely(!(flags & FOLL_MIGRATION)))
			goto no_page;
		if (pte_none(pte) || pte_file(pte))
			goto no_page;
		entry = pte_to_swp_entry(pte);
		if (!is_migration_entry(entry))
			goto no_page;
		pte_unmap_unlock(ptep, ptl);
		migration_entry_wait(mm, pmd, address);
		goto retry;
	}
	if ((flags & FOLL_NUMA) && pte_numa(pte))
		goto no_page;
	if ((flags & FOLL_WRITE) && !pte_write(pte)) {
		pte_unmap_unlock(ptep, ptl);
		return NULL;
	}

	page = vm_normal_page(vma, address, pte);
	if (unlikely(!page)) {
		if ((flags & FOLL_DUMP) ||
		    !is_zero_pfn(pte_pfn(pte)))
			goto bad_page;
		page = pte_page(pte);
	}

	if (flags & FOLL_GET)
		get_page_foll(page);
	if (flags & FOLL_TOUCH) {
		if ((flags & FOLL_WRITE) &&
		    !pte_dirty(pte) && !PageDirty(page))
			set_page_dirty(page);
		/*
		 * pte_mkyoung() would be more correct here, but atomic care
		 * is needed to avoid losing the dirty bit: it is easier to use
		 * mark_page_accessed().
		 */
		mark_page_accessed(page);
	}
	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
		/*
		 * The preliminary mapping check is mainly to avoid the
		 * pointless overhead of lock_page on the ZERO_PAGE
		 * which might bounce very badly if there is contention.
		 *
		 * If the page is already locked, we don't need to
		 * handle it now - vmscan will handle it later if and
		 * when it attempts to reclaim the page.
		 */
		if (page->mapping && trylock_page(page)) {
			lru_add_drain();  /* push cached pages to LRU */
			/*
			 * Because we lock page here, and migration is
			 * blocked by the pte's page reference, and we
			 * know the page is still mapped, we don't even
			 * need to check for file-cache page truncation.
			 */
			mlock_vma_page(page);
			unlock_page(page);
		}
	}
	pte_unmap_unlock(ptep, ptl);
	return page;
bad_page:
	pte_unmap_unlock(ptep, ptl);
	return ERR_PTR(-EFAULT);

no_page:
	pte_unmap_unlock(ptep, ptl);
	if (!pte_none(pte))
		return NULL;
	return no_page_table(vma, flags);
}