unsigned long l4x_set_pte(struct mm_struct *mm, unsigned long addr, pte_t old, pte_t pteval) { /* * Check if any invalidation is necessary * * Invalidation (flush) necessary if: * old page was present * new page is not present OR * new page has another physical address OR * new page has another protection OR * new page has other access attributes */ /* old was present && new not -> flush */ int flush_rights = L4_FPAGE_RWX; #if 0 if ((pte_val(old) & PAGE_MASK) != (pte_val(pteval) & PAGE_MASK)) printk("spte %x->%x\n", pte_val(old), pte_val(pteval)); #endif if (pte_present(pteval)) { /* new page is present, * now we have to find out what has changed */ if (((pte_val(old) ^ pte_val(pteval)) & PAGE_MASK) || (pte_young(old) && !pte_young(pteval))) { /* physical page frame changed * || access attribute changed -> flush */ /* flush is the default */ //pteval.pte_low &= ~_PAGE_MAPPED; pteval = __pte(pte_val(pteval) & ~_PAGE_MAPPED); } else if ((pte_write(old) && !pte_write(pteval)) || (pte_dirty(old) && !pte_dirty(pteval))) { /* Protection changed from r/w to ro * or page now clean -> remap */ flush_rights = L4_FPAGE_W; check_pte_mapped(old, pteval, "RW->RO"); } else { /* nothing changed, simply return */ check_pte_mapped(old, pteval, "NoChg"); return pte_val(pteval); } } /* Ok, now actually flush or remap the page */ L4XV_FN_v(l4x_flush_page(mm, pte_val(old), addr, PAGE_SHIFT, flush_rights)); return pte_val(pteval); }
/* * Changing some bits of contiguous entries requires us to follow a * Break-Before-Make approach, breaking the whole contiguous set * before we can change any entries. See ARM DDI 0487A.k_iss10775, * "Misprogramming of the Contiguous bit", page D4-1762. * * This helper performs the break step. */ static pte_t get_clear_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pgsize, unsigned long ncontig) { pte_t orig_pte = huge_ptep_get(ptep); bool valid = pte_valid(orig_pte); unsigned long i, saddr = addr; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); /* * If HW_AFDBM is enabled, then the HW could turn on * the dirty or accessed bit for any page in the set, * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); if (pte_young(pte)) orig_pte = pte_mkyoung(orig_pte); } if (valid) { struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); flush_tlb_range(&vma, saddr, addr); } return orig_pte; }
static unsigned long clear_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte; pte_t ptecont; do { pte = pte_offset_map(pmd, addr); ptecont = *pte; if (pte_none(ptecont)) continue; /* * pte_young is a confusing name, though it AND _PAGE_ACCESSED * Instead, I think we should call it pte_accessed */ if (pte_present(ptecont) && pte_young(ptecont)) { /* * The physical page, which this pte points to, has * been read or written to during this time period. */ DEBUG_INFO("[%#016lx - %#016lx], pfn = %#013lx", addr, end, pte_pfn(ptecont)); collect_statistics(pte_pfn(ptecont)); pte_clear_flags(ptecont, _PAGE_ACCESSED); } } while (pte++, addr += PAGE_SIZE, addr != end); return addr; }
int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; pte_t orig_pte; if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); if (pte_young(orig_pte)) pte = pte_mkyoung(pte); hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); return 1; }
static int pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) { unsigned long addr = (unsigned long)_addr; pgd_t *pgd; pmd_t *pmd; pte_t *pte; pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; pud = pud_offset(pgd, addr); if (unlikely(pud_none(*pud) || pud_bad(*pud))) return 0; pmd = pmd_offset(pud, addr); if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); return 0; } *ptep = pte; *ptlp = ptl; return 1; }
int kthread_wss(void *data) { unsigned long va; int ret; int wss; pgd_t *pgd; pmd_t *pmd; pud_t *pud; pte_t *ptep; struct task_struct *task; while(!kthread_should_stop()) { printk(KERN_INFO "Checking process' WSS.\n"); for_each_process(task) { wss = 0; if(task->mm != NULL) { struct vm_area_struct *temp = task->mm->mmap; while(temp) { if(temp->vm_flags & VM_IO){} else { for(va = temp->vm_start; va < temp->vm_end; va+=PAGE_SIZE) { pgd = pgd_offset(task->mm,va); if(pgd_none(*pgd)) break; pud = pud_offset(pgd,va); if(pud_none(*pud)) break; pmd = pmd_offset(pud,va); if(pmd_none(*pmd)) break; ptep = pte_offset_map(pmd,va); ret = 0; if(pte_young(*ptep)) { ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *) &ptep->pte); wss++; } if(ret) { pte_update(task->mm, va, ptep); } pte_unmap(ptep); } } temp = temp->vm_next; } printk(KERN_INFO "%i: %i\n", task->pid, wss); } } msleep(1000); } return 0; }
/* * huge_ptep_set_access_flags will update access flags (dirty, accesssed) * and write permission. * * For a contiguous huge pte range we need to check whether or not write * permission has to change only on the first pte in the set. Then for * all the contiguous ptes we need to check whether or not there is a * discrepancy between dirty or young. */ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) { int i; if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) return 1; for (i = 0; i < ncontig; i++) { pte_t orig_pte = huge_ptep_get(ptep + i); if (pte_dirty(pte) != pte_dirty(orig_pte)) return 1; if (pte_young(pte) != pte_young(orig_pte)) return 1; } return 0; }
/* * This is called at the end of handling a user page fault, when the * fault has been handled by updating a PTE in the linux page tables. * We use it to preload an HPTE into the hash table corresponding to * the updated linux PTE. * * This must always be called with the pte lock held. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { #ifdef CONFIG_PPC_STD_MMU unsigned long access = 0, trap; #endif unsigned long pfn = pte_pfn(pte); /* handle i-cache coherency */ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && !cpu_has_feature(CPU_FTR_NOEXECUTE) && pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); #ifdef CONFIG_8xx /* On 8xx, cache control instructions (particularly * "dcbst" from flush_dcache_icache) fault as write * operation if there is an unpopulated TLB entry * for the address in question. To workaround that, * we invalidate the TLB here, thus avoiding dcbst * misbehaviour. */ _tlbie(address); #endif if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) { if (vma->vm_mm == current->active_mm) { __flush_dcache_icache((void *) address); } else flush_dcache_icache_page(page); set_bit(PG_arch_1, &page->flags); } } #ifdef CONFIG_PPC_STD_MMU /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(pte) || address >= TASK_SIZE) return; /* We try to figure out if we are coming from an instruction * access fault and pass that down to __hash_page so we avoid * double-faulting on execution of fresh text. We have to test * for regs NULL since init will get here first thing at boot * * We also avoid filling the hash if not coming from a fault */ if (current->thread.regs == NULL) return; trap = TRAP(current->thread.regs); if (trap == 0x400) access |= _PAGE_EXEC; else if (trap != 0x300) return; hash_preload(vma->vm_mm, address, access, trap); #endif /* CONFIG_PPC_STD_MMU */ }
unsigned long l4x_set_pte(struct mm_struct *mm, unsigned long addr, pte_t old, pte_t pteval) { /* * Check if any invalidation is necessary * * Invalidation (flush) necessary if: * old page was present * new page is not present OR * new page has another physical address OR * new page has another protection OR * new page has other access attributes */ /* old was present && new not -> flush */ int flush_rights = L4_FPAGE_RWX; if (pte_present(pteval)) { /* new page is present, * now we have to find out what has changed */ if (((pte_val(old) ^ pte_val(pteval)) & L4X_PHYSICAL_PAGE_MASK) || (pte_young(old) && !pte_young(pteval))) { /* physical page frame changed * || access attribute changed -> flush */ /* flush is the default */ } else if ((pte_write(old) && !pte_write(pteval)) || (pte_dirty(old) && !pte_dirty(pteval))) { /* Protection changed from r/w to ro * or page now clean -> remap */ flush_rights = L4_FPAGE_W; } else { /* nothing changed, simply return */ return pte_val(pteval); } } /* Ok, now actually flush or remap the page */ l4x_flush_page(mm, pte_val(old) & L4X_PHYSICAL_PAGE_MASK, addr, PAGE_SHIFT, flush_rights, _RET_IP_); return pte_val(pteval); }
// Return true (!= 0) if any referenced bits are set. static int ref_bits_set (int exclude_irqhandler) { void *cur_addr; pte_t *pte; int i; int ret_val = 0; for (i = 0; i < cr_num_drivers; i++) { if (exclude_irqhandler) uprintk ("i %d: ", i); for (cur_addr = cr_base_address[i]; cur_addr < cr_base_address[i] + cr_module_size[i]; cur_addr += PAGE_SIZE) { pte = virt_to_pte (cur_addr); if (pte != NULL) { // See if we're excluding the interrupt handler // from this check. if (exclude_irqhandler && addr_contains_irq_handler (cur_addr)) { pte_unmap(pte); if (exclude_irqhandler) uprintk ("X"); continue; } // See if the page was referenced lately. if (pte_young(*pte) != 0) { // kunmap_atomic (page, KM_IRQ1); pte_unmap(pte); if (exclude_irqhandler) uprintk ("1"); ret_val = 1; continue; } if (exclude_irqhandler) uprintk ("0"); // kunmap_atomic (page, KM_IRQ1); pte_unmap(pte); } } if (exclude_irqhandler) uprintk ("\n"); } return ret_val; }
static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; struct page *page = NULL; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); } else if (is_swap_pte(*pte)) { swp_entry_t swpent = pte_to_swp_entry(*pte); if (!non_swap_entry(swpent)) mss->swap += PAGE_SIZE; else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); } if (!page) return; smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); }
static int pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) { unsigned long addr = (unsigned long)_addr; pgd_t *pgd; pmd_t *pmd; pte_t *pte; pud_t *pud; spinlock_t *ptl; pgd = pgd_offset(current->mm, addr); if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) return 0; pud = pud_offset(pgd, addr); if (unlikely(pud_none(*pud) || pud_bad(*pud))) return 0; pmd = pmd_offset(pud, addr); if (unlikely(pmd_none(*pmd))) return 0; /* * A pmd can be bad if it refers to a HugeTLB or THP page. * * Both THP and HugeTLB pages have the same pmd layout * and should not be manipulated by the pte functions. * * Lock the page table for the destination and check * to see that it's still huge and whether or not we will * need to fault on write, or if we have a splitting THP. */ if (unlikely(pmd_thp_or_huge(*pmd))) { ptl = ¤t->mm->page_table_lock; spin_lock(ptl); if (unlikely(!pmd_thp_or_huge(*pmd) || pmd_hugewillfault(*pmd) || pmd_trans_splitting(*pmd))) { spin_unlock(ptl); return 0; } *ptep = NULL; *ptlp = ptl; return 1; } if (unlikely(pmd_bad(*pmd))) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); return 0; } *ptep = pte; *ptlp = ptl; return 1; }
/* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by * segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; *code_out = SEGV_MAPERR; /* * If the fault was during atomic operation, don't take the fault, just * fail. */ if (in_atomic()) goto out_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto out; else if (vma->vm_start <= address) goto good_area; else if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; else if (expand_stack(vma, address)) goto out; good_area: *code_out = SEGV_ACCERR; if (is_write && !(vma->vm_flags & VM_WRITE)) goto out; /* Don't require VM_READ|VM_EXEC for write faults! */ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; do { int fault; survive: fault = handle_mm_fault(mm, vma, address, is_write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { err = -ENOMEM; goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; } BUG(); } if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out. */ #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); out_nosemaphore: return err; /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: if (is_global_init(current)) { up_read(&mm->mmap_sem); yield(); down_read(&mm->mmap_sem); goto survive; } goto out; }
/* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by * segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (is_write ? FAULT_FLAG_WRITE : 0); *code_out = SEGV_MAPERR; /* * If the fault was during atomic operation, don't take the fault, just * fail. */ if (in_atomic()) goto out_nosemaphore; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto out; else if (vma->vm_start <= address) goto good_area; else if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; else if (expand_stack(vma, address)) goto out; good_area: *code_out = SEGV_ACCERR; if (is_write && !(vma->vm_flags & VM_WRITE)) goto out; /* Don't require VM_READ|VM_EXEC for write faults! */ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; do { int fault; fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; } BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out. */ #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); out_nosemaphore: return err; out_of_memory: /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); pagefault_out_of_memory(); return 0; }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; unsigned long addr, end; int r, w, x, err, fd; if(mm == NULL) return; fd = mm->context.skas.mm_fd; for(addr = start_addr; addr < end_addr;){ npgd = pgd_offset(mm, addr); if(!pgd_present(*npgd)){ if(force || pgd_newpage(*npgd)){ end = addr + PGDIR_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pgd_mkuptodate(*npgd); } addr += PGDIR_SIZE; continue; } npud = pud_offset(npgd, addr); if(!pud_present(*npud)){ if(force || pud_newpage(*npud)){ end = addr + PUD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pud_mkuptodate(*npud); } addr += PUD_SIZE; continue; } npmd = pmd_offset(npud, addr); if(!pmd_present(*npmd)){ if(force || pmd_newpage(*npmd)){ end = addr + PMD_SIZE; if(end > end_addr) end = end_addr; err = unmap(fd, (void *) addr, end - addr); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; continue; } npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = unmap(fd, (void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map(fd, addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)) protect(fd, addr, PAGE_SIZE, r, w, x, 1); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } }
/*pgtable sequential scan and count for __access_bits.*/ static int scan_pgtable(void) { pgd_t *pgd = NULL; pud_t *pud = NULL; pmd_t *pmd = NULL; pte_t *ptep, pte; spinlock_t *ptl; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long start = 0; /*the start of address.*/ unsigned long end = 0; /*the end of address.*/ unsigned long address = 0; /* the address of vma.*/ int number_hotpages = 0; /* the number of hot pages */ int number_vpages = 0; int cycle_index = 0; /* the loop counter, which denotes ITERATIONS. */ /* the array that records the number of hot page in every cycle */ int hot_page[ITERATIONS]; int number_current_pg = 0; int pg_count = 0; int j = 0; int times = 0; /* records reuse time*/ /* some variables that describe page "heat" */ int hig = 0; int mid = 0; int low = 0; int llow = 0; int lllow = 0; int llllow = 0; int all_pages = 0;/* the total number of pages */ /*the average number of hot pages in each iteration.*/ long avg_hotpage=0; /*the total number of memory accesses across all pages*/ long num_access=0; /* avg utilization of each page */ int avg_page_utilization = 0; /*get the handle of current running benchmark.*/ struct task_struct *bench_process = get_current_process(); if(bench_process == NULL) { printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n"); return 0; } else /* get the process*/ mm = bench_process->mm; if(mm == NULL) { printk("sysmon: error mm is NULL, return back & trying...\n"); return 0; } for(j = 0; j < PAGE_ALL; j++) page_heat[j] = -1; for(j = 0; j < ITERATIONS; j++) { hot_page[j] = 0; reuse_time[j] = 0; dirty_page[j] = 0; } /*yanghao*/ times = 0; for(cycle_index = 0; cycle_index < ITERATIONS; cycle_index++) { number_hotpages = 0; /*scan each vma*/ for(vma = mm->mmap; vma; vma = vma->vm_next) { start = vma->vm_start; end = vma->vm_end; mm = vma->vm_mm; /*in each vma, we check all pages*/ for(address = start; address < end; address += PAGE_SIZE) { /*scan page table for each page in this VMA*/ pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) continue; pud = pud_offset(pgd, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) continue; pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) continue; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if(pte_present(pte)) { if(pte_young(pte)) /*hot page*/ { /*re-set and clear _access_bits to 0*/ pte = pte_mkold(pte); set_pte_at(mm, address, ptep, pte); /*yanghao:re-set and clear _dirty_bits to 0*/ pte = pte_mkclean(pte); set_pte_at(mm, address, ptep, pte); } } else /*no page pte_none*/ { pte_unmap_unlock(ptep, ptl); continue; } pte_unmap_unlock(ptep, ptl); page_counts++; } } /*count the number of hot pages*/ if(bench_process == NULL) { printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n"); return 0; } else /*get the process*/ mm = bench_process->mm; if(mm == NULL) { printk("sysmon: error mm is NULL, return back & trying...\n"); return 0; } number_vpages = 0; sampling_interval = page_counts / 250; /*yanghao:*/ page_counts = 0; for(vma = mm->mmap; vma; vma = vma->vm_next) { start = vma->vm_start; end = vma->vm_end; /*scan each page in this VMA*/ mm = vma->vm_mm; pg_count = 0; for(address = start; address < end; address += PAGE_SIZE) { /*scan page table for each page in this VMA*/ pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) continue; pud = pud_offset(pgd, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) continue; pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) continue; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); pte = *ptep; if(pte_present(pte)) { if(pte_young(pte)) /* hot pages*/ { number_current_pg = pg_count + number_vpages; page_heat[number_current_pg]++; hot_page[cycle_index]++; /*yanghao:*/ if (page_counts == random_page) { times++; if (pte_dirty(pte)) dirty_page[cycle_index] = 1; } } else { if (page_counts == random_page) reuse_time[times]++; } } pg_count++; pte_unmap_unlock(ptep, ptl); page_counts++; } number_vpages += (int)(end - start)/PAGE_SIZE; } } /*yanghao:cal. the No. of random_page*/ random_page += sampling_interval; if(random_page >= page_counts) random_page=page_counts / 300; /*****************************OUTPUT************************************/ for(j = 0; j < PAGE_ALL; j++) { if(page_heat[j] < VH && page_heat[j] > H) hig++; if(page_heat[j] > M && page_heat[j] <= H) mid++; if(page_heat[j] <= M && page_heat[j] > L) low++; if(page_heat[j] > VL_MAX && page_heat[j] <= L) llow++; if(page_heat[j] > VL_MIN && page_heat[j] <= VL_MAX) lllow++; if(page_heat[j] >= 0 && page_heat[j] <= VL_MIN) llllow++; if(page_heat[j] > -1) all_pages++; } /*the values reflect the accessing frequency of each physical page.*/ printk("[LOG: after sampling (%d loops) ...] ",ITERATIONS); printk("the values denote the physical page accessing frequence.\n"); printk("-->hig (150,200) is %d. Indicating the number of re-used pages is high.\n",hig); printk("-->mid (100,150] is %d.\n",mid); printk("-->low (64,100] is %d.\n",low); printk("-->llow (10,64] is %d. In locality,no too many re-used pages.\n",llow); printk("-->lllow (5,10] is %d.\n",lllow); printk("-->llllow [1,5] is %d.\n",llllow); for(j = 0;j < ITERATIONS; j++) avg_hotpage += hot_page[j]; avg_hotpage /= (j+1); /* * new step@20140704 * (1)the different phases of memory utilization * (2)the avg. page accessing utilization * (3)memory pages layout and spectrum */ for(j = 0; j < PAGE_ALL; j++) if(page_heat[j] > -1) /*the page that is accessed at least once.*/ num_access += (page_heat[j] + 1); printk("the total number of memory accesses is %ld, the average is %ld\n", num_access, num_access / ITERATIONS); avg_page_utilization = num_access / all_pages; printk("Avg hot pages num is %ld, all used pages num is %d, avg utilization of each page is %d\n", avg_hotpage, all_pages, avg_page_utilization); /*yanghao:print the information about reuse-distance*/ if ((times == 0) && (reuse_time[0] ==0)) printk("the page No.%d is not available.",random_page); else { if ((times == 0) && (reuse_time[0] == 0)) printk("the page No.%d was not used in this 200 loops.",random_page); else { if (times < ITERATIONS) times++; printk("the reusetime of page No.%d is:",random_page); for (j = 0; j < times; j++) printk("%d ",reuse_time[j]); printk("\n"); printk("the total number of the digit above denotes the sum that page NO.%d be accessd in %d loops.\n", random_page,ITERATIONS); printk("each digit means the sum loops that between current loop and the last loop.\n"); } } printk("\n\n"); return 1; }
static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { pgd_t *npgd; pmd_t *npmd; pte_t *npte; unsigned long addr; int r, w, x, err; if((current->thread.mode.tt.extern_pid != -1) && (current->thread.mode.tt.extern_pid != os_getpid())) panic("fix_range fixing wrong address space, current = 0x%p", current); if(mm == NULL) return; for(addr=start_addr;addr<end_addr;){ if(addr == TASK_SIZE){ /* Skip over kernel text, kernel data, and physical * memory, which don't have ptes, plus kernel virtual * memory, which is flushed separately, and remap * the process stack. The only way to get here is * if (end_addr == STACK_TOP) > TASK_SIZE, which is * only true in the honeypot case. */ addr = STACK_TOP - ABOVE_KMEM; continue; } npgd = pgd_offset(mm, addr); npmd = pmd_offset(npgd, addr); if(pmd_present(*npmd)){ npte = pte_offset_kernel(npmd, addr); r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); if(!pte_dirty(*npte)) w = 0; if(!pte_young(*npte)){ r = 0; w = 0; } if(force || pte_newpage(*npte)){ err = os_unmap_memory((void *) addr, PAGE_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); if(pte_present(*npte)) map_memory(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x); } else if(pte_newprot(*npte)){ protect_memory(addr, PAGE_SIZE, r, w, x, 1); } *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } else { if(force || pmd_newpage(*npmd)){ err = os_unmap_memory((void *) addr, PMD_SIZE); if(err < 0) panic("munmap failed, errno = %d\n", -err); pmd_mkuptodate(*npmd); } addr += PMD_SIZE; } } }