Exemplo n.º 1
0
unsigned long l4x_set_pte(struct mm_struct *mm,
                          unsigned long addr,
                          pte_t old, pte_t pteval)
{
	/*
	 * Check if any invalidation is necessary
	 *
	 * Invalidation (flush) necessary if:
	 *   old page was present
	 *       new page is not present OR
	 *       new page has another physical address OR
	 *       new page has another protection OR
	 *       new page has other access attributes
	 */

	/* old was present && new not -> flush */
	int flush_rights = L4_FPAGE_RWX;
#if 0
	if ((pte_val(old) & PAGE_MASK) != (pte_val(pteval) & PAGE_MASK))
		printk("spte %x->%x\n", pte_val(old), pte_val(pteval));
#endif
	if (pte_present(pteval)) {
		/* new page is present,
		 * now we have to find out what has changed */
		if (((pte_val(old) ^ pte_val(pteval)) & PAGE_MASK)
		    || (pte_young(old) && !pte_young(pteval))) {
			/* physical page frame changed
			 * || access attribute changed -> flush */
			/* flush is the default */
			//pteval.pte_low &= ~_PAGE_MAPPED;
			pteval = __pte(pte_val(pteval) & ~_PAGE_MAPPED);

		} else if ((pte_write(old) && !pte_write(pteval))
		           || (pte_dirty(old) && !pte_dirty(pteval))) {
			/* Protection changed from r/w to ro
			 * or page now clean -> remap */
			flush_rights = L4_FPAGE_W;
			check_pte_mapped(old, pteval, "RW->RO");
		} else {
			/* nothing changed, simply return */
			check_pte_mapped(old, pteval, "NoChg");
			return pte_val(pteval);
		}
	}

	/* Ok, now actually flush or remap the page */
	L4XV_FN_v(l4x_flush_page(mm, pte_val(old), addr, PAGE_SHIFT, flush_rights));
	return pte_val(pteval);
}
Exemplo n.º 2
0
/*
 * Changing some bits of contiguous entries requires us to follow a
 * Break-Before-Make approach, breaking the whole contiguous set
 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
 * "Misprogramming of the Contiguous bit", page D4-1762.
 *
 * This helper performs the break step.
 */
static pte_t get_clear_flush(struct mm_struct *mm,
			     unsigned long addr,
			     pte_t *ptep,
			     unsigned long pgsize,
			     unsigned long ncontig)
{
	pte_t orig_pte = huge_ptep_get(ptep);
	bool valid = pte_valid(orig_pte);
	unsigned long i, saddr = addr;

	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
		pte_t pte = ptep_get_and_clear(mm, addr, ptep);

		/*
		 * If HW_AFDBM is enabled, then the HW could turn on
		 * the dirty or accessed bit for any page in the set,
		 * so check them all.
		 */
		if (pte_dirty(pte))
			orig_pte = pte_mkdirty(orig_pte);

		if (pte_young(pte))
			orig_pte = pte_mkyoung(orig_pte);
	}

	if (valid) {
		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
		flush_tlb_range(&vma, saddr, addr);
	}
	return orig_pte;
}
Exemplo n.º 3
0
static unsigned long clear_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
				     unsigned long addr, unsigned long end)
{
	pte_t *pte;
	pte_t ptecont;

	do {
		pte = pte_offset_map(pmd, addr);
		ptecont = *pte;

		if (pte_none(ptecont))
			continue;

		/*
		 * pte_young is a confusing name, though it AND _PAGE_ACCESSED
		 * Instead, I think we should call it pte_accessed
		 */
		if (pte_present(ptecont) && pte_young(ptecont)) {
			/*
			 * The physical page, which this pte points to, has
			 * been read or written to during this time period.
			 */
			DEBUG_INFO("[%#016lx - %#016lx], pfn = %#013lx", addr, end, pte_pfn(ptecont));
			collect_statistics(pte_pfn(ptecont));
			pte_clear_flags(ptecont, _PAGE_ACCESSED);
		}
	} while (pte++, addr += PAGE_SIZE, addr != end);

	return addr;
}
Exemplo n.º 4
0
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
			       unsigned long addr, pte_t *ptep,
			       pte_t pte, int dirty)
{
	int ncontig, i;
	size_t pgsize = 0;
	unsigned long pfn = pte_pfn(pte), dpfn;
	pgprot_t hugeprot;
	pte_t orig_pte;

	if (!pte_cont(pte))
		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);

	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
	dpfn = pgsize >> PAGE_SHIFT;

	if (!__cont_access_flags_changed(ptep, pte, ncontig))
		return 0;

	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);

	/* Make sure we don't lose the dirty or young state */
	if (pte_dirty(orig_pte))
		pte = pte_mkdirty(pte);

	if (pte_young(orig_pte))
		pte = pte_mkyoung(pte);

	hugeprot = pte_pgprot(pte);
	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));

	return 1;
}
static int
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
{
	unsigned long addr = (unsigned long)_addr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	pud_t *pud;
	spinlock_t *ptl;

	pgd = pgd_offset(current->mm, addr);
	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
		return 0;

	pud = pud_offset(pgd, addr);
	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
		return 0;

	pmd = pmd_offset(pud, addr);
	if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
		return 0;

	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
	    !pte_write(*pte) || !pte_dirty(*pte))) {
		pte_unmap_unlock(pte, ptl);
		return 0;
	}

	*ptep = pte;
	*ptlp = ptl;

	return 1;
}
Exemplo n.º 6
0
Arquivo: wss.c Projeto: dpwong/cse430
int kthread_wss(void *data)
{
	unsigned long va;
	int ret;
	int wss;

	pgd_t *pgd;
	pmd_t *pmd;
	pud_t *pud;
	pte_t *ptep;
	
	struct task_struct *task;
	while(!kthread_should_stop())
	{
		printk(KERN_INFO "Checking process' WSS.\n");
		for_each_process(task)
		{
			wss = 0;
			if(task->mm != NULL)
			{
				struct vm_area_struct *temp = task->mm->mmap;
				while(temp)
				{
					if(temp->vm_flags & VM_IO){}
					else
					{
						for(va = temp->vm_start; va < temp->vm_end; va+=PAGE_SIZE)
						{
				  			pgd = pgd_offset(task->mm,va);
			 		  		if(pgd_none(*pgd))
								break;
							pud = pud_offset(pgd,va);
							if(pud_none(*pud))
								break;
							pmd = pmd_offset(pud,va);
							if(pmd_none(*pmd))
								break;
							ptep = pte_offset_map(pmd,va);
							ret = 0;
							if(pte_young(*ptep))
							{
								ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,												(unsigned long *) &ptep->pte);
								wss++;
							}
							if(ret)
							{
								pte_update(task->mm, va, ptep);
							}
							pte_unmap(ptep);
						}
					}
					temp = temp->vm_next;
				}
				printk(KERN_INFO "%i: %i\n", task->pid, wss);
			}
		}
	msleep(1000);
	}
	return 0;
}
Exemplo n.º 7
0
/*
 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
 * and write permission.
 *
 * For a contiguous huge pte range we need to check whether or not write
 * permission has to change only on the first pte in the set. Then for
 * all the contiguous ptes we need to check whether or not there is a
 * discrepancy between dirty or young.
 */
static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
{
	int i;

	if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
		return 1;

	for (i = 0; i < ncontig; i++) {
		pte_t orig_pte = huge_ptep_get(ptep + i);

		if (pte_dirty(pte) != pte_dirty(orig_pte))
			return 1;

		if (pte_young(pte) != pte_young(orig_pte))
			return 1;
	}

	return 0;
}
Exemplo n.º 8
0
/*
 * This is called at the end of handling a user page fault, when the
 * fault has been handled by updating a PTE in the linux page tables.
 * We use it to preload an HPTE into the hash table corresponding to
 * the updated linux PTE.
 * 
 * This must always be called with the pte lock held.
 */
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
		      pte_t pte)
{
#ifdef CONFIG_PPC_STD_MMU
	unsigned long access = 0, trap;
#endif
	unsigned long pfn = pte_pfn(pte);

	/* handle i-cache coherency */
	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
	    !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
	    pfn_valid(pfn)) {
		struct page *page = pfn_to_page(pfn);
#ifdef CONFIG_8xx
		/* On 8xx, cache control instructions (particularly
		 * "dcbst" from flush_dcache_icache) fault as write
		 * operation if there is an unpopulated TLB entry
		 * for the address in question. To workaround that,
		 * we invalidate the TLB here, thus avoiding dcbst
		 * misbehaviour.
		 */
		_tlbie(address);
#endif
		if (!PageReserved(page)
		    && !test_bit(PG_arch_1, &page->flags)) {
			if (vma->vm_mm == current->active_mm) {
				__flush_dcache_icache((void *) address);
			} else
				flush_dcache_icache_page(page);
			set_bit(PG_arch_1, &page->flags);
		}
	}

#ifdef CONFIG_PPC_STD_MMU
	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
	if (!pte_young(pte) || address >= TASK_SIZE)
		return;

	/* We try to figure out if we are coming from an instruction
	 * access fault and pass that down to __hash_page so we avoid
	 * double-faulting on execution of fresh text. We have to test
	 * for regs NULL since init will get here first thing at boot
	 *
	 * We also avoid filling the hash if not coming from a fault
	 */
	if (current->thread.regs == NULL)
		return;
	trap = TRAP(current->thread.regs);
	if (trap == 0x400)
		access |= _PAGE_EXEC;
	else if (trap != 0x300)
		return;
	hash_preload(vma->vm_mm, address, access, trap);
#endif /* CONFIG_PPC_STD_MMU */
}
Exemplo n.º 9
0
unsigned long l4x_set_pte(struct mm_struct *mm,
                          unsigned long addr,
                          pte_t old, pte_t pteval)
{
	/*
	 * Check if any invalidation is necessary
	 *
	 * Invalidation (flush) necessary if:
	 *   old page was present
	 *       new page is not present OR
	 *       new page has another physical address OR
	 *       new page has another protection OR
	 *       new page has other access attributes
	 */

	/* old was present && new not -> flush */
	int flush_rights = L4_FPAGE_RWX;

	if (pte_present(pteval)) {
		/* new page is present,
		 * now we have to find out what has changed */
		if (((pte_val(old) ^ pte_val(pteval)) & L4X_PHYSICAL_PAGE_MASK)
		    || (pte_young(old) && !pte_young(pteval))) {
			/* physical page frame changed
			 * || access attribute changed -> flush */
			/* flush is the default */
		} else if ((pte_write(old) && !pte_write(pteval))
		           || (pte_dirty(old) && !pte_dirty(pteval))) {
			/* Protection changed from r/w to ro
			 * or page now clean -> remap */
			flush_rights = L4_FPAGE_W;
		} else {
			/* nothing changed, simply return */
			return pte_val(pteval);
		}
	}

	/* Ok, now actually flush or remap the page */
	l4x_flush_page(mm, pte_val(old) & L4X_PHYSICAL_PAGE_MASK,
	               addr, PAGE_SHIFT, flush_rights, _RET_IP_);
	return pte_val(pteval);
}
Exemplo n.º 10
0
// Return true (!= 0) if any referenced bits are set.
static int ref_bits_set (int exclude_irqhandler) {
    void *cur_addr;
    pte_t *pte;
    int i;
    int ret_val = 0;

    for (i = 0; i < cr_num_drivers; i++) {
        if (exclude_irqhandler) uprintk ("i %d: ", i);
        for (cur_addr = cr_base_address[i];
             cur_addr < cr_base_address[i] + cr_module_size[i];
             cur_addr += PAGE_SIZE) {
            
            pte = virt_to_pte (cur_addr);
            if (pte != NULL) {
                // See if we're excluding the interrupt handler
                // from this check.
                if (exclude_irqhandler &&
                    addr_contains_irq_handler (cur_addr)) {
                    pte_unmap(pte);
                    if (exclude_irqhandler) uprintk ("X");
                    continue;
                }

                // See if the page was referenced lately.
                if (pte_young(*pte) != 0) {
                    // kunmap_atomic (page, KM_IRQ1);
                    pte_unmap(pte);
                    if (exclude_irqhandler) uprintk ("1");
                    ret_val = 1;
                    continue;
                }

                if (exclude_irqhandler) uprintk ("0");
                
                // kunmap_atomic (page, KM_IRQ1);
                pte_unmap(pte);
            }
        }

        if (exclude_irqhandler) uprintk ("\n");
    }

    return ret_val;
}
Exemplo n.º 11
0
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
		struct mm_walk *walk)
{
	struct mem_size_stats *mss = walk->private;
	struct vm_area_struct *vma = walk->vma;
	struct page *page = NULL;

	if (pte_present(*pte)) {
		page = vm_normal_page(vma, addr, *pte);
	} else if (is_swap_pte(*pte)) {
		swp_entry_t swpent = pte_to_swp_entry(*pte);

		if (!non_swap_entry(swpent))
			mss->swap += PAGE_SIZE;
		else if (is_migration_entry(swpent))
			page = migration_entry_to_page(swpent);
	}

	if (!page)
		return;
	smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
}
Exemplo n.º 12
0
static int
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
{
	unsigned long addr = (unsigned long)_addr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	pud_t *pud;
	spinlock_t *ptl;

	pgd = pgd_offset(current->mm, addr);
	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
		return 0;

	pud = pud_offset(pgd, addr);
	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
		return 0;

	pmd = pmd_offset(pud, addr);
	if (unlikely(pmd_none(*pmd)))
		return 0;

	/*
	 * A pmd can be bad if it refers to a HugeTLB or THP page.
	 *
	 * Both THP and HugeTLB pages have the same pmd layout
	 * and should not be manipulated by the pte functions.
	 *
	 * Lock the page table for the destination and check
	 * to see that it's still huge and whether or not we will
	 * need to fault on write, or if we have a splitting THP.
	 */
	if (unlikely(pmd_thp_or_huge(*pmd))) {
		ptl = &current->mm->page_table_lock;
		spin_lock(ptl);
		if (unlikely(!pmd_thp_or_huge(*pmd)
			|| pmd_hugewillfault(*pmd)
			|| pmd_trans_splitting(*pmd))) {
			spin_unlock(ptl);
			return 0;
		}

		*ptep = NULL;
		*ptlp = ptl;
		return 1;
	}

	if (unlikely(pmd_bad(*pmd)))
		return 0;

	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
	    !pte_write(*pte) || !pte_dirty(*pte))) {
		pte_unmap_unlock(pte, ptl);
		return 0;
	}

	*ptep = pte;
	*ptlp = ptl;

	return 1;
}
Exemplo n.º 13
0
/*
 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
 * segv().
 */
int handle_page_fault(unsigned long address, unsigned long ip,
                      int is_write, int is_user, int *code_out)
{
    struct mm_struct *mm = current->mm;
    struct vm_area_struct *vma;
    pgd_t *pgd;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    int err = -EFAULT;

    *code_out = SEGV_MAPERR;

    /*
     * If the fault was during atomic operation, don't take the fault, just
     * fail.
     */
    if (in_atomic())
        goto out_nosemaphore;

    down_read(&mm->mmap_sem);
    vma = find_vma(mm, address);
    if (!vma)
        goto out;
    else if (vma->vm_start <= address)
        goto good_area;
    else if (!(vma->vm_flags & VM_GROWSDOWN))
        goto out;
    else if (is_user && !ARCH_IS_STACKGROW(address))
        goto out;
    else if (expand_stack(vma, address))
        goto out;

good_area:
    *code_out = SEGV_ACCERR;
    if (is_write && !(vma->vm_flags & VM_WRITE))
        goto out;

    /* Don't require VM_READ|VM_EXEC for write faults! */
    if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
        goto out;

    do {
        int fault;
survive:
        fault = handle_mm_fault(mm, vma, address, is_write);
        if (unlikely(fault & VM_FAULT_ERROR)) {
            if (fault & VM_FAULT_OOM) {
                err = -ENOMEM;
                goto out_of_memory;
            } else if (fault & VM_FAULT_SIGBUS) {
                err = -EACCES;
                goto out;
            }
            BUG();
        }
        if (fault & VM_FAULT_MAJOR)
            current->maj_flt++;
        else
            current->min_flt++;

        pgd = pgd_offset(mm, address);
        pud = pud_offset(pgd, address);
        pmd = pmd_offset(pud, address);
        pte = pte_offset_kernel(pmd, address);
    } while (!pte_present(*pte));
    err = 0;
    /*
     * The below warning was added in place of
     *	pte_mkyoung(); if (is_write) pte_mkdirty();
     * If it's triggered, we'd see normally a hang here (a clean pte is
     * marked read-only to emulate the dirty bit).
     * However, the generic code can mark a PTE writable but clean on a
     * concurrent read fault, triggering this harmlessly. So comment it out.
     */
#if 0
    WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
    flush_tlb_page(vma, address);
out:
    up_read(&mm->mmap_sem);
out_nosemaphore:
    return err;

    /*
     * We ran out of memory, or some other thing happened to us that made
     * us unable to handle the page fault gracefully.
     */
out_of_memory:
    if (is_global_init(current)) {
        up_read(&mm->mmap_sem);
        yield();
        down_read(&mm->mmap_sem);
        goto survive;
    }
    goto out;
}
/*
 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
 * segv().
 */
int handle_page_fault(unsigned long address, unsigned long ip,
		      int is_write, int is_user, int *code_out)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	int err = -EFAULT;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
				 (is_write ? FAULT_FLAG_WRITE : 0);

	*code_out = SEGV_MAPERR;

	/*
	 * If the fault was during atomic operation, don't take the fault, just
	 * fail.
	 */
	if (in_atomic())
		goto out_nosemaphore;

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto out;
	else if (vma->vm_start <= address)
		goto good_area;
	else if (!(vma->vm_flags & VM_GROWSDOWN))
		goto out;
	else if (is_user && !ARCH_IS_STACKGROW(address))
		goto out;
	else if (expand_stack(vma, address))
		goto out;

good_area:
	*code_out = SEGV_ACCERR;
	if (is_write && !(vma->vm_flags & VM_WRITE))
		goto out;

	/* Don't require VM_READ|VM_EXEC for write faults! */
	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
		goto out;

	do {
		int fault;

		fault = handle_mm_fault(mm, vma, address, flags);

		if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
			goto out_nosemaphore;

		if (unlikely(fault & VM_FAULT_ERROR)) {
			if (fault & VM_FAULT_OOM) {
				goto out_of_memory;
			} else if (fault & VM_FAULT_SIGBUS) {
				err = -EACCES;
				goto out;
			}
			BUG();
		}
		if (flags & FAULT_FLAG_ALLOW_RETRY) {
			if (fault & VM_FAULT_MAJOR)
				current->maj_flt++;
			else
				current->min_flt++;
			if (fault & VM_FAULT_RETRY) {
				flags &= ~FAULT_FLAG_ALLOW_RETRY;
				flags |= FAULT_FLAG_TRIED;

				goto retry;
			}
		}

		pgd = pgd_offset(mm, address);
		pud = pud_offset(pgd, address);
		pmd = pmd_offset(pud, address);
		pte = pte_offset_kernel(pmd, address);
	} while (!pte_present(*pte));
	err = 0;
	/*
	 * The below warning was added in place of
	 *	pte_mkyoung(); if (is_write) pte_mkdirty();
	 * If it's triggered, we'd see normally a hang here (a clean pte is
	 * marked read-only to emulate the dirty bit).
	 * However, the generic code can mark a PTE writable but clean on a
	 * concurrent read fault, triggering this harmlessly. So comment it out.
	 */
#if 0
	WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
	flush_tlb_page(vma, address);
out:
	up_read(&mm->mmap_sem);
out_nosemaphore:
	return err;

out_of_memory:
	/*
	 * We ran out of memory, call the OOM killer, and return the userspace
	 * (which will retry the fault, or kill us if we got oom-killed).
	 */
	up_read(&mm->mmap_sem);
	pagefault_out_of_memory();
	return 0;
}
Exemplo n.º 15
0
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
		      unsigned long end_addr, int force)
{
	pgd_t *npgd;
	pud_t *npud;
	pmd_t *npmd;
	pte_t *npte;
	unsigned long addr,  end;
	int r, w, x, err, fd;

	if(mm == NULL) return;
	fd = mm->context.skas.mm_fd;
	for(addr = start_addr; addr < end_addr;){
		npgd = pgd_offset(mm, addr);
		if(!pgd_present(*npgd)){
			if(force || pgd_newpage(*npgd)){
				end = addr + PGDIR_SIZE;
				if(end > end_addr)
					end = end_addr;
				err = unmap(fd, (void *) addr, end - addr);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pgd_mkuptodate(*npgd);
			}
			addr += PGDIR_SIZE;
			continue;
		}

		npud = pud_offset(npgd, addr);
		if(!pud_present(*npud)){
			if(force || pud_newpage(*npud)){
				end = addr + PUD_SIZE;
				if(end > end_addr)
					end = end_addr;
				err = unmap(fd, (void *) addr, end - addr);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pud_mkuptodate(*npud);
			}
			addr += PUD_SIZE;
			continue;
		}

		npmd = pmd_offset(npud, addr);
		if(!pmd_present(*npmd)){
			if(force || pmd_newpage(*npmd)){
				end = addr + PMD_SIZE;
				if(end > end_addr)
					end = end_addr;
				err = unmap(fd, (void *) addr, end - addr);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pmd_mkuptodate(*npmd);
			}
			addr += PMD_SIZE;
			continue;
		}

		npte = pte_offset_kernel(npmd, addr);
		r = pte_read(*npte);
		w = pte_write(*npte);
		x = pte_exec(*npte);
		if(!pte_dirty(*npte))
			w = 0;
		if(!pte_young(*npte)){
			r = 0;
			w = 0;
		}
		if(force || pte_newpage(*npte)){
			err = unmap(fd, (void *) addr, PAGE_SIZE);
			if(err < 0)
				panic("munmap failed, errno = %d\n", -err);
			if(pte_present(*npte))
				map(fd, addr, pte_val(*npte) & PAGE_MASK,
				    PAGE_SIZE, r, w, x);
		}
		else if(pte_newprot(*npte))
			protect(fd, addr, PAGE_SIZE, r, w, x, 1);

		*npte = pte_mkuptodate(*npte);
		addr += PAGE_SIZE;
	}
}
Exemplo n.º 16
0
/*pgtable sequential scan and count for __access_bits.*/
static int scan_pgtable(void)
{
	pgd_t *pgd = NULL;
	pud_t *pud = NULL;
	pmd_t *pmd = NULL;
	pte_t *ptep, pte;
	spinlock_t *ptl;

	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long start = 0; /*the start of address.*/
	unsigned long end = 0;   /*the end of address.*/
	unsigned long address = 0; /* the address of vma.*/
	int number_hotpages = 0; /* the number of hot pages */
	int number_vpages = 0;
	int cycle_index = 0; /* the loop counter, which denotes ITERATIONS. */
	/* the array that records the number of hot page in every cycle */
	int hot_page[ITERATIONS];
	int number_current_pg = 0;
	int pg_count = 0;
	int j = 0;
	int times = 0; /* records reuse time*/

	/* some variables that describe page "heat" */
	int hig = 0;
	int mid = 0;
	int low = 0;
	int llow = 0;
	int lllow = 0;
	int llllow = 0;
	int all_pages = 0;/* the total number of pages */
	/*the average number of hot pages in each iteration.*/
	long avg_hotpage=0;
	/*the total number of memory accesses across all pages*/
	long num_access=0;
	/* avg utilization of each page */
	int avg_page_utilization = 0;

	/*get the handle of current running benchmark.*/
	struct task_struct *bench_process = get_current_process();
	if(bench_process == NULL)
	{
		printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n");
		return 0;
	}
	else /* get the process*/
		mm = bench_process->mm;
	if(mm == NULL)
	{
		printk("sysmon: error mm is NULL, return back & trying...\n");
		return 0;
	}

	for(j = 0; j < PAGE_ALL; j++)
		page_heat[j] = -1;

	for(j = 0; j < ITERATIONS; j++)
	{
		hot_page[j] = 0;
		reuse_time[j] = 0;
		dirty_page[j] = 0;
	}

	/*yanghao*/
	times = 0;
	for(cycle_index = 0; cycle_index < ITERATIONS; cycle_index++)
	{
		number_hotpages = 0;
		/*scan each vma*/
		for(vma = mm->mmap; vma; vma = vma->vm_next)
		{
			start = vma->vm_start;
			end = vma->vm_end;
			mm = vma->vm_mm;
			/*in each vma, we check all pages*/
			for(address = start; address < end; address += PAGE_SIZE)
			{
				/*scan page table for each page in this VMA*/
				pgd = pgd_offset(mm, address);
				if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
					continue;
				pud = pud_offset(pgd, address);
				if (pud_none(*pud) || unlikely(pud_bad(*pud)))
					continue;
				pmd = pmd_offset(pud, address);
				if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
					continue;
				ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
				pte = *ptep;
				if(pte_present(pte))
				{
					if(pte_young(pte)) /*hot page*/
					{
						/*re-set and clear  _access_bits to 0*/
						pte = pte_mkold(pte);
						set_pte_at(mm, address, ptep, pte);
						/*yanghao:re-set and clear _dirty_bits to 0*/
						pte = pte_mkclean(pte);
						set_pte_at(mm, address, ptep, pte);
					}
				}
				else /*no page pte_none*/
				{
					pte_unmap_unlock(ptep, ptl);
					continue;
				}
				pte_unmap_unlock(ptep, ptl);
				page_counts++;
			}
		}
		/*count the number of hot pages*/
		if(bench_process == NULL)
		{
			printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n");
			return 0;
		}
		else /*get the process*/
			mm = bench_process->mm;
		if(mm == NULL)
		{
			printk("sysmon: error mm is NULL, return back & trying...\n");
			return 0;
		}
		number_vpages = 0;

		sampling_interval = page_counts / 250; /*yanghao:*/
		page_counts = 0;

		for(vma = mm->mmap; vma; vma = vma->vm_next)
		{
			start = vma->vm_start;
			end = vma->vm_end;
			/*scan each page in this VMA*/
			mm = vma->vm_mm;
			pg_count = 0;
			for(address = start; address < end; address += PAGE_SIZE)
			{
				/*scan page table for each page in this VMA*/
				pgd = pgd_offset(mm, address);
				if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
					continue;
				pud = pud_offset(pgd, address);
				if (pud_none(*pud) || unlikely(pud_bad(*pud)))
					continue;
				pmd = pmd_offset(pud, address);
				if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
					continue;
				ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
				pte = *ptep;
				if(pte_present(pte))
				{
					if(pte_young(pte)) /* hot pages*/
					{
						number_current_pg = pg_count + number_vpages;
						page_heat[number_current_pg]++;
						hot_page[cycle_index]++;
						/*yanghao:*/
						if (page_counts == random_page)
						{
							times++;
							if (pte_dirty(pte))
								dirty_page[cycle_index] = 1;
						}
					}
					else
					{
						if (page_counts == random_page)
							reuse_time[times]++;
					}
				}
				pg_count++;
				pte_unmap_unlock(ptep, ptl);
				page_counts++;
			}
			number_vpages += (int)(end - start)/PAGE_SIZE;
		}
	}
	/*yanghao:cal. the No. of random_page*/
	random_page += sampling_interval;
	if(random_page >= page_counts)
		random_page=page_counts / 300;
	/*****************************OUTPUT************************************/
	for(j = 0; j < PAGE_ALL; j++)
	{
		if(page_heat[j] < VH && page_heat[j] > H)
			hig++;
		if(page_heat[j] > M && page_heat[j] <= H)
			mid++;
		if(page_heat[j] <= M && page_heat[j] > L)
			low++;
		if(page_heat[j] > VL_MAX && page_heat[j] <= L)
			llow++;
		if(page_heat[j] > VL_MIN && page_heat[j] <= VL_MAX)
			lllow++;
		if(page_heat[j] >= 0 && page_heat[j] <= VL_MIN)
			llllow++;
		if(page_heat[j] > -1)
			all_pages++;
	}

	/*the values reflect the accessing frequency of each physical page.*/
	printk("[LOG: after sampling (%d loops) ...] ",ITERATIONS);
	printk("the values denote the physical page accessing frequence.\n"); 
	printk("-->hig (150,200) is %d. Indicating the number of re-used pages is high.\n",hig);
	printk("-->mid (100,150] is %d.\n",mid);
	printk("-->low (64,100] is %d.\n",low);
	printk("-->llow (10,64] is %d. In locality,no too many re-used pages.\n",llow);
	printk("-->lllow (5,10] is %d.\n",lllow);
	printk("-->llllow [1,5] is %d.\n",llllow);

	for(j = 0;j < ITERATIONS; j++)
		avg_hotpage += hot_page[j];
	avg_hotpage /= (j+1);

	/*
	 * new step@20140704
	 * (1)the different phases of memory utilization
	 * (2)the avg. page accessing utilization
	 * (3)memory pages layout and spectrum
	 */
	for(j = 0; j < PAGE_ALL; j++)
		if(page_heat[j] > -1) /*the page that is accessed at least once.*/
			num_access += (page_heat[j] + 1);

	printk("the total number of memory accesses is %ld, the average is %ld\n",
			num_access, num_access / ITERATIONS);
	avg_page_utilization = num_access / all_pages;
	printk("Avg hot pages num is %ld, all used pages num is %d, avg utilization of each page is %d\n", 
			avg_hotpage, all_pages, avg_page_utilization);
	/*yanghao:print the information about reuse-distance*/
	if ((times == 0) && (reuse_time[0] ==0))
		printk("the page No.%d is not available.",random_page);
	else
	{
		if ((times == 0) && (reuse_time[0] == 0))
			printk("the page No.%d was not used in this 200 loops.",random_page);
		else
		{
			if (times < ITERATIONS)
				times++;
			printk("the reusetime of page No.%d is:",random_page);
			for (j = 0; j < times; j++)
				printk("%d ",reuse_time[j]);
			printk("\n");
			printk("the total number of the digit above denotes the sum that page NO.%d be accessd in %d loops.\n",
					random_page,ITERATIONS);
			printk("each digit means the sum loops that between current loop and the last loop.\n");
		}
	}
	printk("\n\n");
	return 1;
}
Exemplo n.º 17
0
static void fix_range(struct mm_struct *mm, unsigned long start_addr, 
		      unsigned long end_addr, int force)
{
	pgd_t *npgd;
	pmd_t *npmd;
	pte_t *npte;
	unsigned long addr;
	int r, w, x, err;

	if((current->thread.mode.tt.extern_pid != -1) && 
	   (current->thread.mode.tt.extern_pid != os_getpid()))
		panic("fix_range fixing wrong address space, current = 0x%p",
		      current);
	if(mm == NULL) return;
	for(addr=start_addr;addr<end_addr;){
		if(addr == TASK_SIZE){
			/* Skip over kernel text, kernel data, and physical
			 * memory, which don't have ptes, plus kernel virtual
			 * memory, which is flushed separately, and remap
			 * the process stack.  The only way to get here is
			 * if (end_addr == STACK_TOP) > TASK_SIZE, which is
			 * only true in the honeypot case.
			 */
			addr = STACK_TOP - ABOVE_KMEM;
			continue;
		}
		npgd = pgd_offset(mm, addr);
		npmd = pmd_offset(npgd, addr);
		if(pmd_present(*npmd)){
			npte = pte_offset_kernel(npmd, addr);
			r = pte_read(*npte);
			w = pte_write(*npte);
			x = pte_exec(*npte);
			if(!pte_dirty(*npte)) w = 0;
			if(!pte_young(*npte)){
				r = 0;
				w = 0;
			}
			if(force || pte_newpage(*npte)){
				err = os_unmap_memory((void *) addr, 
						      PAGE_SIZE);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				if(pte_present(*npte))
					map_memory(addr, 
						   pte_val(*npte) & PAGE_MASK,
						   PAGE_SIZE, r, w, x);
			}
			else if(pte_newprot(*npte)){
				protect_memory(addr, PAGE_SIZE, r, w, x, 1);
			}
			*npte = pte_mkuptodate(*npte);
			addr += PAGE_SIZE;
		}
		else {
			if(force || pmd_newpage(*npmd)){
				err = os_unmap_memory((void *) addr, PMD_SIZE);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pmd_mkuptodate(*npmd);
			}
			addr += PMD_SIZE;
		}
	}
}