static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct vm_area_struct *vma = walk->vma; pte_t *pte; spinlock_t *ptl; if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); return 0; } if (pmd_trans_unstable(pmd)) return 0; /* * The mmap_sem held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things * in here. */ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; }
static int pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) { unsigned long addr = (unsigned long)_addr; pgd_t *pgd; pmd_t *pmd; pte_t *pte; pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; pud = pud_offset(pgd, addr); if (unlikely(pud_none(*pud) || pud_bad(*pud))) return 0; pmd = pmd_offset(pud, addr); if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); return 0; } *ptep = pte; *ptlp = ptl; return 1; }
/* * No need to decide whether this PTE shares the swap entry with others, * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { spinlock_t *ptl; pte_t *pte; int ret = 1; if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) ret = -ENOMEM; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { if (ret > 0) mem_cgroup_uncharge_page(page); ret = 0; goto out; } inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); swap_free(entry); /* * Move the page to the active list so it is not * immediately swapped out again after swapon. */ activate_page(page); out: pte_unmap_unlock(pte, ptl); return ret; }
static struct page* my_follow_page(struct vm_area_struct *vma, unsigned long addr) { pud_t *pud = NULL; pmd_t *pmd = NULL; pgd_t *pgd = NULL; pte_t *pte = NULL; spinlock_t *ptl = NULL; struct page* page = NULL; struct mm_struct *mm = current->mm; pgd = pgd_offset(current->mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) { goto out; } pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) { goto out; } printk("aaaa\n"); pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { goto out; } pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); printk("bbbb\n"); if (!pte) goto out; printk("cccc\n"); if (!pte_present(*pte)) goto unlock; page = pfn_to_page(pte_pfn(*pte)); if (!page) goto unlock; get_page(page); unlock: pte_unmap_unlock(pte, ptl); out: return page; }
static int mfill_zeropage_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr) { pte_t _dst_pte, *dst_pte; spinlock_t *ptl; int ret; pgoff_t offset, max_off; struct inode *inode; _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); if (dst_vma->vm_file) { /* the shmem MAP_PRIVATE case requires checking the i_size */ inode = dst_vma->vm_file->f_inode; offset = linear_page_index(dst_vma, dst_addr); max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); ret = -EFAULT; if (unlikely(offset >= max_off)) goto out_unlock; } ret = -EEXIST; if (!pte_none(*dst_pte)) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); ret = 0; out_unlock: pte_unmap_unlock(dst_pte, ptl); return ret; }
/** * __replace_page - replace page in vma by new page. * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page * @addr: address the old @page is mapped at * @page: the cowed page we are replacing by kpage * @kpage: the modified page we replace page by * * Returns 0 on success, -EFAULT on failure. */ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct page *old_page, struct page *new_page) { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; pte_t *ptep; int err; /* For mmu_notifiers */ const unsigned long mmun_start = addr; const unsigned long mmun_end = addr + PAGE_SIZE; struct mem_cgroup *memcg; err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg, false); if (err) return err; /* For try_to_free_swap() and munlock_vma_page() below */ lock_page(old_page); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); err = -EAGAIN; ptep = page_check_address(old_page, mm, addr, &ptl, 0); if (!ptep) { mem_cgroup_cancel_charge(new_page, memcg, false); goto unlock; } get_page(new_page); page_add_new_anon_rmap(new_page, vma, addr, false); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); if (!PageAnon(old_page)) { dec_mm_counter(mm, mm_counter_file(old_page)); inc_mm_counter(mm, MM_ANONPAGES); } flush_cache_page(vma, addr, pte_pfn(*ptep)); ptep_clear_flush_notify(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot)); page_remove_rmap(old_page, false); if (!page_mapped(old_page)) try_to_free_swap(old_page); pte_unmap_unlock(ptep, ptl); if (vma->vm_flags & VM_LOCKED) munlock_vma_page(old_page); put_page(old_page); err = 0; unlock: mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(old_page); return err; }
unsigned long noinline __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) { unsigned long ua_flags; int atomic; if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { memcpy((void *)to, from, n); return 0; } /* the mmap semaphore is taken only if not in an atomic context */ atomic = faulthandler_disabled(); if (!atomic) down_read(¤t->mm->mmap_sem); while (n) { pte_t *pte; spinlock_t *ptl; int tocopy; while (!pin_page_for_write(to, &pte, &ptl)) { if (!atomic) up_read(¤t->mm->mmap_sem); if (__put_user(0, (char __user *)to)) goto out; if (!atomic) down_read(¤t->mm->mmap_sem); } tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; if (tocopy > n) tocopy = n; ua_flags = uaccess_save_and_enable(); memcpy((void *)to, from, tocopy); uaccess_restore(ua_flags); to += tocopy; from += tocopy; n -= tocopy; if (pte) pte_unmap_unlock(pte, ptl); else spin_unlock(ptl); } if (!atomic) up_read(¤t->mm->mmap_sem); out: return n; }
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty_pmd(vma, addr, pmd); goto out; } page = pmd_page(*pmd); /* Clear accessed and referenced bits. */ pmdp_test_and_clear_young(vma, addr, pmd); ClearPageReferenced(page); out: spin_unlock(ptl); return 0; } if (pmd_trans_unstable(pmd)) return 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty(vma, addr, pte); continue; } if (!pte_present(ptent)) continue; page = vm_normal_page(vma, addr, ptent); if (!page) continue; /* Clear accessed and referenced bits. */ ptep_test_and_clear_young(vma, addr, pte); ClearPageReferenced(page); } pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; }
unsigned long noinline __copy_from_user_memcpy(void *to, const void __user *from, unsigned long n) { unsigned long ua_flags; int atomic; if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { memcpy(to, (const void *)from, n); return 0; } /* the mmap semaphore is taken only if not in an atomic context */ atomic = in_atomic(); if (!atomic) down_read(¤t->mm->mmap_sem); while (n) { pte_t *pte; spinlock_t *ptl; int tocopy; while (!pin_page_for_read(from, &pte, &ptl)) { char temp; if (!atomic) up_read(¤t->mm->mmap_sem); if (__get_user(temp, (char __user *)from)) goto out; if (!atomic) down_read(¤t->mm->mmap_sem); } tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1; if (tocopy > n) tocopy = n; ua_flags = uaccess_save_and_enable(); memcpy(to, (const void *)from, tocopy); uaccess_restore(ua_flags); to += tocopy; from += tocopy; n -= tocopy; pte_unmap_unlock(pte, ptl); } if (!atomic) up_read(¤t->mm->mmap_sem); out: return n; }
static struct page* mtest_seek_page(struct vm_area_struct *vma, unsigned long addr) { pgd_t *pgd; //top level page table pud_t *pud; //second level page table pmd_t *pmd; //third level page table pte_t *pte; //last level page table spinlock_t *ptl; struct page *page = NULL; struct mm_struct *mm = vma->vm_mm; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return NULL; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) return NULL; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return NULL; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!pte) return NULL; if (!pte_present(*pte)){ pte_unmap_unlock(pte, ptl); return NULL; } page = pfn_to_page(pte_pfn(*pte)); if (!page){ pte_unmap_unlock(pte, ptl); return NULL; } get_page(page); pte_unmap_unlock(pte, ptl); return page; }
static unsigned long noinline __clear_user_memset(void __user *addr, unsigned long n) { unsigned long ua_flags; if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { memset((void *)addr, 0, n); return 0; } down_read(¤t->mm->mmap_sem); while (n) { pte_t *pte; spinlock_t *ptl; int tocopy; while (!pin_page_for_write(addr, &pte, &ptl)) { up_read(¤t->mm->mmap_sem); if (__put_user(0, (char __user *)addr)) goto out; down_read(¤t->mm->mmap_sem); } tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; if (tocopy > n) tocopy = n; ua_flags = uaccess_save_and_enable(); memset((void *)addr, 0, tocopy); uaccess_restore(ua_flags); addr += tocopy; n -= tocopy; if (pte) pte_unmap_unlock(pte, ptl); else spin_unlock(ptl); } up_read(¤t->mm->mmap_sem); out: return n; }
static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec) { unsigned long next; spinlock_t *ptl; pte_t *ptep; ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { pte_t pte = *ptep; pgoff_t pgoff; next = addr + PAGE_SIZE; if (pte_none(pte)) mincore_unmapped_range(vma, addr, next, vec); else if (pte_present(pte)) *vec = 1; else if (pte_file(pte)) { pgoff = pte_to_pgoff(pte); *vec = mincore_page(vma->vm_file->f_mapping, pgoff); } else { /* pte is a swap entry */ swp_entry_t entry = pte_to_swp_entry(pte); if (is_migration_entry(entry)) { /* migration entries are always uptodate */ *vec = 1; } else { #ifdef CONFIG_SWAP pgoff = entry.val; *vec = mincore_page(&swapper_space, pgoff); #else WARN_ON(1); *vec = 1; #endif } } vec++; } while (ptep++, addr = next, addr != end); pte_unmap_unlock(ptep - 1, ptl); }
unsigned long ___copy_to_user(void *to, const void *from, unsigned long n) { if (get_fs() == KERNEL_DS) { memcpy(to, from, n); return 0; } if (n < 256) return __arch_copy_to_user(to, from, n); down_read(¤t->mm->mmap_sem); while (n) { pte_t *pte; spinlock_t *ptl; int tocopy; while (unlikely(!pin_page_for_write(to, &pte, &ptl))) { up_read(¤t->mm->mmap_sem); if (put_user(*((u8 *)from), (u8 *)to)) goto out; down_read(¤t->mm->mmap_sem); } tocopy = ((~((unsigned long)to)) & (PAGE_SIZE - 1)) + 1; if (tocopy > n) tocopy = n; memcpy(to, from, tocopy); to += tocopy; from += tocopy; n -= tocopy; pte_unmap_unlock(pte, ptl); } out: up_read(¤t->mm->mmap_sem); return n; }
/* * Check that @page is mapped at @address into @mm. * * If @sync is false, page_check_address may perform a racy check to avoid * the page table lock when the pte is not present (helpful when reclaiming * highly shared pages). * * On success returns with pte mapped and locked. */ static pte_t *mr__page_check_address(struct page *page, struct mm_struct *mm, unsigned long address, spinlock_t **ptlp, int sync) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) return NULL; pud = pud_offset(pgd, address); if (!pud_present(*pud)) return NULL; pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return NULL; if (pmd_trans_huge(*pmd)) return NULL; pte = pte_offset_map(pmd, address); /* Make a quick check before getting the lock */ if (!sync && !pte_present(*pte)) { pte_unmap(pte); return NULL; } ptl = pte_lockptr(mm, pmd); spin_lock(ptl); if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { *ptlp = ptl; return pte; } pte_unmap_unlock(pte, ptl); return NULL; }
static int mfill_zeropage_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr) { pte_t _dst_pte, *dst_pte; spinlock_t *ptl; int ret; _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); if (!pte_none(*dst_pte)) goto out_unlock; set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); ret = 0; out_unlock: pte_unmap_unlock(dst_pte, ptl); return ret; }
static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte; spinlock_t *ptl; int progress = 0; unsigned long ret = 0; again: pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { struct page *page; if (progress >= 64) { progress = 0; if (need_resched() || need_lockbreak(ptl)) break; } progress++; if (!pte_present(*pte)) continue; if (!pte_maybe_dirty(*pte)) continue; page = vm_normal_page(vma, addr, *pte); if (!page) continue; if (ptep_clear_flush_dirty(vma, addr, pte) || page_test_and_clear_dirty(page)) ret += set_page_dirty(page); progress += 3; } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(pte - 1, ptl); cond_resched(); if (addr != end) goto again; return ret; }
static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, swp_entry_t entry, struct page *page) { pte_t swp_pte = swp_entry_to_pte(entry); pte_t *pte; spinlock_t *ptl; int found = 0; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { /* * swapoff spends a _lot_ of time in this loop! * Test inline before going to call unuse_pte. */ if (unlikely(pte_same(*pte, swp_pte))) { unuse_pte(vma, pte++, addr, entry, page); found = 1; break; } } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(pte - 1, ptl); return found; }
static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct dev_pagemap *pgmap = NULL; struct page *page; spinlock_t *ptl; pte_t *ptep, pte; retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if (!pte_present(pte)) { swp_entry_t entry; /* * KSM's break_ksm() relies upon recognizing a ksm page * even while it is being migrated, so for that case we * need migration_entry_wait(). */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; if (pte_none(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) goto no_page; pte_unmap_unlock(ptep, ptl); migration_entry_wait(mm, pmd, address); goto retry; } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { pte_unmap_unlock(ptep, ptl); return NULL; } page = vm_normal_page(vma, address, pte); if (!page && pte_devmap(pte) && (flags & FOLL_GET)) { /* * Only return device mapping pages in the FOLL_GET case since * they are only valid while holding the pgmap reference. */ pgmap = get_dev_pagemap(pte_pfn(pte), NULL); if (pgmap) page = pte_page(pte); else goto no_page; } else if (unlikely(!page)) { if (flags & FOLL_DUMP) { /* Avoid special (like zero) pages in core dumps */ page = ERR_PTR(-EFAULT); goto out; } if (is_zero_pfn(pte_pfn(pte))) { page = pte_page(pte); } else { int ret; ret = follow_pfn_pte(vma, address, ptep, flags); page = ERR_PTR(ret); goto out; } } if (flags & FOLL_SPLIT && PageTransCompound(page)) { int ret; get_page(page); pte_unmap_unlock(ptep, ptl); lock_page(page); ret = split_huge_page(page); unlock_page(page); put_page(page); if (ret) return ERR_PTR(ret); goto retry; } if (flags & FOLL_GET) { get_page(page); /* drop the pgmap reference now that we hold the page */ if (pgmap) { put_dev_pagemap(pgmap); pgmap = NULL; } } if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* Do not mlock pte-mapped THP */ if (PageTransCompound(page)) goto out; /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE * which might bounce very badly if there is contention. * * If the page is already locked, we don't need to * handle it now - vmscan will handle it later if and * when it attempts to reclaim the page. */ if (page->mapping && trylock_page(page)) { lru_add_drain(); /* push cached pages to LRU */ /* * Because we lock page here, and migration is * blocked by the pte's page reference, and we * know the page is still mapped, we don't even * need to check for file-cache page truncation. */ mlock_vma_page(page); unlock_page(page); } } out: pte_unmap_unlock(ptep, ptl); return page; no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return NULL; return no_page_table(vma, flags); }
static int pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) { unsigned long addr = (unsigned long)_addr; pgd_t *pgd; pmd_t *pmd; pte_t *pte; pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; pud = pud_offset(pgd, addr); if (unlikely(pud_none(*pud) || pud_bad(*pud))) return 0; pmd = pmd_offset(pud, addr); if (unlikely(pmd_none(*pmd))) return 0; /* * A pmd can be bad if it refers to a HugeTLB or THP page. * * Both THP and HugeTLB pages have the same pmd layout * and should not be manipulated by the pte functions. * * Lock the page table for the destination and check * to see that it's still huge and whether or not we will * need to fault on write, or if we have a splitting THP. */ if (unlikely(pmd_thp_or_huge(*pmd))) { ptl = ¤t->mm->page_table_lock; spin_lock(ptl); if (unlikely(!pmd_thp_or_huge(*pmd) || pmd_hugewillfault(*pmd) || pmd_trans_splitting(*pmd))) { spin_unlock(ptl); return 0; } *ptep = NULL; *ptlp = ptl; return 1; } if (unlikely(pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); return 0; } *ptep = pte; *ptlp = ptl; return 1; }
/*pgtable sequential scan and count for __access_bits.*/ static int scan_pgtable(void) { pgd_t *pgd = NULL; pud_t *pud = NULL; pmd_t *pmd = NULL; pte_t *ptep, pte; spinlock_t *ptl; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long start = 0; /*the start of address.*/ unsigned long end = 0; /*the end of address.*/ unsigned long address = 0; /* the address of vma.*/ int number_hotpages = 0; /* the number of hot pages */ int number_vpages = 0; int cycle_index = 0; /* the loop counter, which denotes ITERATIONS. */ /* the array that records the number of hot page in every cycle */ int hot_page[ITERATIONS]; int number_current_pg = 0; int pg_count = 0; int j = 0; int times = 0; /* records reuse time*/ /* some variables that describe page "heat" */ int hig = 0; int mid = 0; int low = 0; int llow = 0; int lllow = 0; int llllow = 0; int all_pages = 0;/* the total number of pages */ /*the average number of hot pages in each iteration.*/ long avg_hotpage=0; /*the total number of memory accesses across all pages*/ long num_access=0; /* avg utilization of each page */ int avg_page_utilization = 0; /*get the handle of current running benchmark.*/ struct task_struct *bench_process = get_current_process(); if(bench_process == NULL) { printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n"); return 0; } else /* get the process*/ mm = bench_process->mm; if(mm == NULL) { printk("sysmon: error mm is NULL, return back & trying...\n"); return 0; } for(j = 0; j < PAGE_ALL; j++) page_heat[j] = -1; for(j = 0; j < ITERATIONS; j++) { hot_page[j] = 0; reuse_time[j] = 0; dirty_page[j] = 0; } /*yanghao*/ times = 0; for(cycle_index = 0; cycle_index < ITERATIONS; cycle_index++) { number_hotpages = 0; /*scan each vma*/ for(vma = mm->mmap; vma; vma = vma->vm_next) { start = vma->vm_start; end = vma->vm_end; mm = vma->vm_mm; /*in each vma, we check all pages*/ for(address = start; address < end; address += PAGE_SIZE) { /*scan page table for each page in this VMA*/ pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) continue; pud = pud_offset(pgd, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) continue; pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) continue; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if(pte_present(pte)) { if(pte_young(pte)) /*hot page*/ { /*re-set and clear _access_bits to 0*/ pte = pte_mkold(pte); set_pte_at(mm, address, ptep, pte); /*yanghao:re-set and clear _dirty_bits to 0*/ pte = pte_mkclean(pte); set_pte_at(mm, address, ptep, pte); } } else /*no page pte_none*/ { pte_unmap_unlock(ptep, ptl); continue; } pte_unmap_unlock(ptep, ptl); page_counts++; } } /*count the number of hot pages*/ if(bench_process == NULL) { printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n"); return 0; } else /*get the process*/ mm = bench_process->mm; if(mm == NULL) { printk("sysmon: error mm is NULL, return back & trying...\n"); return 0; } number_vpages = 0; sampling_interval = page_counts / 250; /*yanghao:*/ page_counts = 0; for(vma = mm->mmap; vma; vma = vma->vm_next) { start = vma->vm_start; end = vma->vm_end; /*scan each page in this VMA*/ mm = vma->vm_mm; pg_count = 0; for(address = start; address < end; address += PAGE_SIZE) { /*scan page table for each page in this VMA*/ pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) continue; pud = pud_offset(pgd, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) continue; pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) continue; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if(pte_present(pte)) { if(pte_young(pte)) /* hot pages*/ { number_current_pg = pg_count + number_vpages; page_heat[number_current_pg]++; hot_page[cycle_index]++; /*yanghao:*/ if (page_counts == random_page) { times++; if (pte_dirty(pte)) dirty_page[cycle_index] = 1; } } else { if (page_counts == random_page) reuse_time[times]++; } } pg_count++; pte_unmap_unlock(ptep, ptl); page_counts++; } number_vpages += (int)(end - start)/PAGE_SIZE; } } /*yanghao:cal. the No. of random_page*/ random_page += sampling_interval; if(random_page >= page_counts) random_page=page_counts / 300; /*****************************OUTPUT************************************/ for(j = 0; j < PAGE_ALL; j++) { if(page_heat[j] < VH && page_heat[j] > H) hig++; if(page_heat[j] > M && page_heat[j] <= H) mid++; if(page_heat[j] <= M && page_heat[j] > L) low++; if(page_heat[j] > VL_MAX && page_heat[j] <= L) llow++; if(page_heat[j] > VL_MIN && page_heat[j] <= VL_MAX) lllow++; if(page_heat[j] >= 0 && page_heat[j] <= VL_MIN) llllow++; if(page_heat[j] > -1) all_pages++; } /*the values reflect the accessing frequency of each physical page.*/ printk("[LOG: after sampling (%d loops) ...] ",ITERATIONS); printk("the values denote the physical page accessing frequence.\n"); printk("-->hig (150,200) is %d. Indicating the number of re-used pages is high.\n",hig); printk("-->mid (100,150] is %d.\n",mid); printk("-->low (64,100] is %d.\n",low); printk("-->llow (10,64] is %d. In locality,no too many re-used pages.\n",llow); printk("-->lllow (5,10] is %d.\n",lllow); printk("-->llllow [1,5] is %d.\n",llllow); for(j = 0;j < ITERATIONS; j++) avg_hotpage += hot_page[j]; avg_hotpage /= (j+1); /* * new step@20140704 * (1)the different phases of memory utilization * (2)the avg. page accessing utilization * (3)memory pages layout and spectrum */ for(j = 0; j < PAGE_ALL; j++) if(page_heat[j] > -1) /*the page that is accessed at least once.*/ num_access += (page_heat[j] + 1); printk("the total number of memory accesses is %ld, the average is %ld\n", num_access, num_access / ITERATIONS); avg_page_utilization = num_access / all_pages; printk("Avg hot pages num is %ld, all used pages num is %d, avg utilization of each page is %d\n", avg_hotpage, all_pages, avg_page_utilization); /*yanghao:print the information about reuse-distance*/ if ((times == 0) && (reuse_time[0] ==0)) printk("the page No.%d is not available.",random_page); else { if ((times == 0) && (reuse_time[0] == 0)) printk("the page No.%d was not used in this 200 loops.",random_page); else { if (times < ITERATIONS) times++; printk("the reusetime of page No.%d is:",random_page); for (j = 0; j < times; j++) printk("%d ",reuse_time[j]); printk("\n"); printk("the total number of the digit above denotes the sum that page NO.%d be accessd in %d loops.\n", random_page,ITERATIONS); printk("each digit means the sum loops that between current loop and the last loop.\n"); } } printk("\n\n"); return 1; }
static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct page *page; spinlock_t *ptl; pte_t *ptep, pte; retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if (!pte_present(pte)) { swp_entry_t entry; /* * KSM's break_ksm() relies upon recognizing a ksm page * even while it is being migrated, so for that case we * need migration_entry_wait(). */ if (likely(!(flags & FOLL_MIGRATION))) goto no_page; if (pte_none(pte) || pte_file(pte)) goto no_page; entry = pte_to_swp_entry(pte); if (!is_migration_entry(entry)) goto no_page; pte_unmap_unlock(ptep, ptl); migration_entry_wait(mm, pmd, address); goto retry; } if ((flags & FOLL_NUMA) && pte_numa(pte)) goto no_page; if ((flags & FOLL_WRITE) && !pte_write(pte)) { pte_unmap_unlock(ptep, ptl); return NULL; } page = vm_normal_page(vma, address, pte); if (unlikely(!page)) { if ((flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(pte))) goto bad_page; page = pte_page(pte); } if (flags & FOLL_GET) get_page_foll(page); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) set_page_dirty(page); /* * pte_mkyoung() would be more correct here, but atomic care * is needed to avoid losing the dirty bit: it is easier to use * mark_page_accessed(). */ mark_page_accessed(page); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { /* * The preliminary mapping check is mainly to avoid the * pointless overhead of lock_page on the ZERO_PAGE * which might bounce very badly if there is contention. * * If the page is already locked, we don't need to * handle it now - vmscan will handle it later if and * when it attempts to reclaim the page. */ if (page->mapping && trylock_page(page)) { lru_add_drain(); /* push cached pages to LRU */ /* * Because we lock page here, and migration is * blocked by the pte's page reference, and we * know the page is still mapped, we don't even * need to check for file-cache page truncation. */ mlock_vma_page(page); unlock_page(page); } } pte_unmap_unlock(ptep, ptl); return page; bad_page: pte_unmap_unlock(ptep, ptl); return ERR_PTR(-EFAULT); no_page: pte_unmap_unlock(ptep, ptl); if (!pte_none(pte)) return NULL; return no_page_table(vma, flags); }
static int mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, struct page **pagep) { struct mem_cgroup *memcg; pte_t _dst_pte, *dst_pte; spinlock_t *ptl; void *page_kaddr; int ret; struct page *page; if (!*pagep) { ret = -ENOMEM; page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr); if (!page) goto out; page_kaddr = kmap_atomic(page); ret = copy_from_user(page_kaddr, (const void __user *) src_addr, PAGE_SIZE); kunmap_atomic(page_kaddr); /* fallback to copy_from_user outside mmap_sem */ if (unlikely(ret)) { ret = -EFAULT; *pagep = page; /* don't free the page */ goto out; } } else { page = *pagep; *pagep = NULL; } /* * The memory barrier inside __SetPageUptodate makes sure that * preceeding stores to the page contents become visible before * the set_pte_at() write. */ __SetPageUptodate(page); ret = -ENOMEM; if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false)) goto out_release; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); if (dst_vma->vm_flags & VM_WRITE) _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte)); ret = -EEXIST; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); if (!pte_none(*dst_pte)) goto out_release_uncharge_unlock; inc_mm_counter(dst_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, dst_vma); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(dst_vma, dst_addr, dst_pte); pte_unmap_unlock(dst_pte, ptl); ret = 0; out: return ret; out_release_uncharge_unlock: pte_unmap_unlock(dst_pte, ptl); mem_cgroup_cancel_charge(page, memcg, false); out_release: page_cache_release(page); goto out; }