Пример #1
0
/*
 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
 * back to the regular GUP.
 */
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
			  struct page **pages)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr, len, end;
	unsigned long next;
	unsigned long flags;
	pgd_t *pgdp;
	int nr = 0;

	start &= PAGE_MASK;
	addr = start;
	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;
	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
					(void __user *)start, len)))
		return 0;

	/*
	 * This doesn't prevent pagetable teardown, but does prevent
	 * the pagetables and pages from being freed.
	 */
	local_irq_save(flags);
	pgdp = pgd_offset(mm, addr);
	do {
		pgd_t pgd = *pgdp;

		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			break;
		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
			break;
	} while (pgdp++, addr = next, addr != end);
	local_irq_restore(flags);

	return nr;
}
Пример #2
0
static inline unsigned long uvirt_to_kva(pgd_t *pgd, unsigned long adr)
{
      if (!pgd_none(*pgd) && !pgd_bad(*pgd)) {
            pmd_t *pmd;
	    //#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
	      // pmd = pmd_offset(pgd, adr);
	    //#else /* >= 2.6.11 */
            pmd = pmd_offset(pud_offset(pgd, adr), adr);
	    //#endif /* < 2.6.11 */
            if (!pmd_none(*pmd)) {
                  pte_t *ptep, pte;
		  //#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
		    //  ptep = pte_offset(pmd, adr);
		  //#else /* >= 2.6.0 */
                  ptep = pte_offset_kernel(pmd, adr);
		  //#endif /* < 2.6.0 */
                  pte = *ptep;
                  if (pte_present(pte)) {
                        return (((unsigned long)page_address(pte_page(pte))) | (adr & (PAGE_SIZE - 1)));
                  }
            }
      }
      return 0UL;
}
Пример #3
0
struct page *virt2page(struct mm_struct *mm, unsigned long addr)
{
    pgd_t *pgd;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    struct page *page = NULL;;

    pgd = pgd_offset(mm, addr);
    if (pgd_none(*pgd) || pgd_bad(*pgd))
        return NULL;

    pud = pud_offset(pgd, addr);
    if (pud_none(*pud) || pud_bad(*pud))
        return NULL;

    pmd = pmd_offset(pud, addr);
    if (pmd_none(*pmd) || pmd_bad(*pmd))
        return NULL;

    if (!(pte = pte_offset_map(pmd, addr)))
        return NULL;
    if (pte_none(*pte))
        goto out;

    if (!pte_present(*pte))
        goto out;

    if (!(page = pte_page(*pte)))
        goto out;

out:
    pte_unmap(pte);

    return page;
}
Пример #4
0
static struct page *  
my_follow_page(struct vm_area_struct *vma, unsigned long addr)  
{  
  
  pud_t *pud;  
  pmd_t *pmd;  
  pgd_t *pgd;            
  pte_t *pte;  
  spinlock_t *ptl;  
  struct page *page = NULL;  
  struct mm_struct *mm = vma->vm_mm;  
  pgd = pgd_offset(mm, addr);  
  if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) {  
    goto out;  
  }  
  pud = pud_offset(pgd, addr);  
  if (pud_none(*pud) || unlikely(pud_bad(*pud)))  
    goto out;  
  pmd = pmd_offset(pud, addr);  
  if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {  
    goto out;  
  }  
  pte = pte_offset_map_lock(mm, pmd, addr, &ptl);  
  if (!pte)  
    goto out;  
  if (!pte_present(*pte))  
    goto unlock;  
  page = pfn_to_page(pte_pfn(*pte));  
  if (!page)  
    goto unlock;  
  get_page(page);  
 unlock:  
  pte_unmap_unlock(pte, ptl);  
 out:  
  return page;  
}  
Пример #5
0
static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size,
                                   int * pages, int * shared, int * dirty, int * total)
{
    pmd_t * pmd;
    unsigned long end;

    if (pgd_none(*pgd))
        return;
    if (pgd_bad(*pgd)) {
        printk("statm_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
        pgd_clear(pgd);
        return;
    }
    pmd = pmd_offset(pgd, address);
    address &= ~PGDIR_MASK;
    end = address + size;
    if (end > PGDIR_SIZE)
        end = PGDIR_SIZE;
    do {
        statm_pte_range(pmd, address, end - address, pages, shared, dirty, total);
        address = (address + PMD_SIZE) & PMD_MASK;
        pmd++;
    } while (address < end);
}
Пример #6
0
static int
pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
{
	unsigned long addr = (unsigned long)_addr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	pud_t *pud;
	spinlock_t *ptl;

	pgd = pgd_offset(current->mm, addr);
	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
	{
		return 0;
	}
	pud = pud_offset(pgd, addr);
	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
	{
		return 0;
	}

	pmd = pmd_offset(pud, addr);
	if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
		return 0;

	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
	if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
		pte_unmap_unlock(pte, ptl);
		return 0;
	}

	*ptep = pte;
	*ptlp = ptl;

	return 1;
}
Пример #7
0
pte_t *huge_pte_offset(struct mm_struct *mm,
		       unsigned long addr, unsigned long sz)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte = NULL;

	pgd = pgd_offset(mm, addr);
	if (!pgd_none(*pgd)) {
		pud = pud_offset(pgd, addr);
		if (!pud_none(*pud)) {
			pmd = pmd_offset(pud, addr);
			if (!pmd_none(*pmd)) {
				if (is_hugetlb_pmd(*pmd))
					pte = (pte_t *)pmd;
				else
					pte = pte_offset_map(pmd, addr);
			}
		}
	}

	return pte;
}
/*
 * Dump out the page tables associated with 'addr' in mm 'mm'.
 */
void show_pte(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;

	if (!mm)
		mm = &init_mm;

	pr_alert("pgd = %p\n", mm->pgd);
	pgd = pgd_offset(mm, addr);
	pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));

	do {
		pud_t *pud;
		pmd_t *pmd;
		pte_t *pte;

		if (pgd_none(*pgd) || pgd_bad(*pgd))
			break;

		pud = pud_offset(pgd, addr);
		printk(", *pud=%016llx", pud_val(*pud));
		if (pud_none(*pud) || pud_bad(*pud))
			break;

		pmd = pmd_offset(pud, addr);
		printk(", *pmd=%016llx", pmd_val(*pmd));
		if (pmd_none(*pmd) || pmd_bad(*pmd))
			break;

		pte = pte_offset_map(pmd, addr);
		printk(", *pte=%016llx", pte_val(*pte));
		pte_unmap(pte);
	} while(0);

	printk("\n");
}
Пример #9
0
static void
count_pgd_pages(struct mm_struct * mm, struct vm_area_struct * vma,
                pgd_t *dir, unsigned long address, unsigned long end,
                signed char *data_buf, int node_map)
{
    pmd_t * pmd;
    unsigned long pgd_end, next_addr;

    if (pgd_none(*dir))
        return;
    pmd = pmd_offset(dir, address);
    pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
    if (pgd_end && (end > pgd_end))
        end = pgd_end;
    do {
        count_pmd_pages(mm, vma, pmd, address, end, data_buf,
                        node_map);
        next_addr = (address + PMD_SIZE) & PMD_MASK;
        data_buf += ((next_addr - address) >> PAGE_SHIFT);
        address = next_addr;
        pmd++;
    } while (address && (address < end));
    return;
}
Пример #10
0
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	if (vaddr & (PMD_SIZE-1)) {		
		printk(KERN_ERR "set_pmd_pfn: vaddr misaligned\n");
		return; 
	}
	if (pfn & (PTRS_PER_PTE-1)) {		
		printk(KERN_ERR "set_pmd_pfn: pfn misaligned\n");
		return; 
	}
	pgd = swapper_pg_dir + pgd_index(vaddr);
	if (pgd_none(*pgd)) {
		printk(KERN_ERR "set_pmd_pfn: pgd_none\n");
		return; 
	}
	pud = pud_offset(pgd, vaddr);
	pmd = pmd_offset(pud, vaddr);
	set_pmd(pmd, pfn_pmd(pfn, flags));
	local_flush_tlb_one(vaddr);
}
Пример #11
0
void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	if (mm == &init_mm)
		return;
	pgd = mm->pgd + pgd_index(addr);
	BUG_ON(pgd_none(*pgd));
	pud = pud_offset(pgd, addr);
	BUG_ON(pud_none(*pud));
	pmd = pmd_offset(pud, addr);
	/*
	 * khugepaged to collapse normal pages to hugepage, first set
	 * pmd to none to force page fault/gup to take mmap_sem. After
	 * pmd is set to none, we do a pte_clear which does this assertion
	 * so if we find pmd none, return.
	 */
	if (pmd_none(*pmd))
		return;
	BUG_ON(!pmd_present(*pmd));
	assert_spin_locked(pte_lockptr(mm, pmd));
}
static enum gcerror virt2phys(unsigned int logical, pte_t *physical)
{
	pgd_t *pgd;	/* Page Global Directory (PGD). */
	pmd_t *pmd;	/* Page Middle Directory (PMD). */
	pte_t *pte;	/* Page Table Entry (PTE). */

	/* Get the pointer to the entry in PGD for the address. */
	pgd = pgd_offset(current->mm, logical);
	if (pgd_none(*pgd) || pgd_bad(*pgd))
		return GCERR_MMU_PAGE_BAD;

	/* Get the pointer to the entry in PMD for the address. */
	pmd = pmd_offset(pgd, logical);
	if (pmd_none(*pmd) || pmd_bad(*pmd))
		return GCERR_MMU_PAGE_BAD;

	/* Get the pointer to the entry in PTE for the address. */
	pte = pte_offset_map(pmd, logical);
	if ((pte == NULL) || !pte_present(*pte))
		return GCERR_MMU_PAGE_BAD;

	*physical = (*pte & PAGE_MASK) | (logical & ~PAGE_MASK);
	return GCERR_NONE;
}
Пример #13
0
static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

	pgd = pgd_offset_k(addr);
	if (pgd_none(*pgd)) {
		pgd_ERROR(*pgd);
		return;
	}

	pud = pud_alloc(NULL, pgd, addr);
	if (unlikely(!pud)) {
		pud_ERROR(*pud);
		return;
	}

	pmd = pmd_alloc(NULL, pud, addr);
	if (unlikely(!pmd)) {
		pmd_ERROR(*pmd);
		return;
	}

	pte = pte_offset_kernel(pmd, addr);
	if (!pte_none(*pte)) {
		pte_ERROR(*pte);
		return;
	}

	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));

	if (cached_to_uncached)
		flush_tlb_one(get_asid(), addr);
}
static unsigned long uva_to_pa(struct mm_struct *mm, unsigned long addr)
{
        unsigned long ret = 0UL;
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;

        pgd = pgd_offset(mm, addr);
        if (!pgd_none(*pgd)) {
                pud = pud_offset(pgd, addr);
                if (!pud_none(*pud)) {
                        pmd = pmd_offset(pud, addr);
                        if (!pmd_none(*pmd)) {
                                pte = pte_offset_map(pmd, addr);
                                if (!pte_none(*pte) && pte_present(*pte)) {
#ifdef BMM_HAS_PTE_PAGE
					/* Use page struct */
					struct page *page = pte_page(*pte);
					if(page) {
	                                        ret = page_to_phys(page);
        	                                ret |= (addr & (PAGE_SIZE-1));
					}
#else
					/* Use hard PTE */
					pte = (pte_t *)((u32)pte - 2048);
					if(pte)
						ret = (*pte & 0xfffff000)
							| (addr & 0xfff);
#endif
                                }
                        }
                }
        }
        return ret;
}
Пример #15
0
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
			struct page **pages)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr, len, end;
	unsigned long next;
	pgd_t *pgdp;
	int nr = 0;

	pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");

	start &= PAGE_MASK;
	addr = start;
	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;

	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
					start, len)))
		goto slow_irqon;

	pr_devel("  aligned: %lx .. %lx\n", start, end);

	/*
	 * XXX: batch / limit 'nr', to avoid large irq off latency
	 * needs some instrumenting to determine the common sizes used by
	 * important workloads (eg. DB2), and whether limiting the batch size
	 * will decrease performance.
	 *
	 * It seems like we're in the clear for the moment. Direct-IO is
	 * the main guy that batches up lots of get_user_pages, and even
	 * they are limited to 64-at-a-time which is not so many.
	 */
	/*
	 * This doesn't prevent pagetable teardown, but does prevent
	 * the pagetables from being freed on powerpc.
	 *
	 * So long as we atomically load page table pointers versus teardown,
	 * we can follow the address down to the the page and take a ref on it.
	 */
	local_irq_disable();

	pgdp = pgd_offset(mm, addr);
	do {
		pgd_t pgd = *pgdp;

		pr_devel("  %016lx: normal pgd %p\n", addr,
			 (void *)pgd_val(pgd));
		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			goto slow;
		if (pgd_huge(pgd)) {
			if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
					 write, pages, &nr))
				goto slow;
		} else if (is_hugepd(pgdp)) {
			if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
					addr, next, write, pages, &nr))
				goto slow;
		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
			goto slow;
	} while (pgdp++, addr = next, addr != end);

	local_irq_enable();

	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
	return nr;

	{
		int ret;

slow:
		local_irq_enable();
slow_irqon:
		pr_devel("  slow path ! nr = %d\n", nr);

		/* Try to get the remaining pages with get_user_pages */
		start += nr << PAGE_SHIFT;
		pages += nr;

		down_read(&mm->mmap_sem);
		ret = get_user_pages(current, mm, start,
			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
		up_read(&mm->mmap_sem);

		/* Have to be a bit careful with return values */
		if (nr > 0) {
			if (ret < 0)
				ret = nr;
			else
				ret += nr;
		}

		return ret;
	}
}
Пример #16
0
/* Pretty sick eh? */
int prom_callback(long *args)
{
	struct console *cons, *saved_console = NULL;
	unsigned long flags;
	char *cmd;

	if (!args)
		return -1;
	if (!(cmd = (char *)args[0]))
		return -1;

	save_and_cli(flags);
	cons = console_drivers;
	while (cons) {
		unregister_console(cons);
		cons->flags &= ~(CON_PRINTBUFFER);
		cons->next = saved_console;
		saved_console = cons;
		cons = console_drivers;
	}
	register_console(&prom_console);
	if (!strcmp(cmd, "sync")) {
		prom_printf("PROM `%s' command...\n", cmd);
		show_free_areas();
		if(current->pid != 0) {
			sti();
			sys_sync();
			cli();
		}
		args[2] = 0;
		args[args[1] + 3] = -1;
		prom_printf("Returning to PROM\n");
	} else if (!strcmp(cmd, "va>tte-data")) {
		unsigned long ctx, va;
		unsigned long tte = 0;
		long res = PROM_FALSE;

		ctx = args[3];
		va = args[4];
		if (ctx) {
			/*
			 * Find process owning ctx, lookup mapping.
			 */
			struct task_struct *p;
			struct mm_struct *mm = NULL;
			pgd_t *pgdp;
			pmd_t *pmdp;
			pte_t *ptep;

			for_each_task(p) {
				mm = p->mm;
				if (CTX_HWBITS(mm->context) == ctx)
					break;
			}
			if (!mm ||
			    CTX_HWBITS(mm->context) != ctx)
				goto done;

			pgdp = pgd_offset(mm, va);
			if (pgd_none(*pgdp))
				goto done;
			pmdp = pmd_offset(pgdp, va);
			if (pmd_none(*pmdp))
				goto done;
			ptep = pte_offset(pmdp, va);
			if (!pte_present(*ptep))
				goto done;
			tte = pte_val(*ptep);
			res = PROM_TRUE;
			goto done;
		}

		if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) {
			/* Spitfire Errata #32 workaround */
			__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
					     "flush	%%g6"
					     : /* No outputs */
					     : "r" (0),
					     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));

			/*
			 * Locked down tlb entry 63.
			 */

			tte = spitfire_get_dtlb_data(63);
			res = PROM_TRUE;
			goto done;
		}
Пример #17
0
/*pgtable sequential scan and count for __access_bits.*/
static int scan_pgtable(void)
{
	pgd_t *pgd = NULL;
	pud_t *pud = NULL;
	pmd_t *pmd = NULL;
	pte_t *ptep, pte;
	spinlock_t *ptl;

	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long start = 0; /*the start of address.*/
	unsigned long end = 0;   /*the end of address.*/
	unsigned long address = 0; /* the address of vma.*/
	int number_hotpages = 0; /* the number of hot pages */
	int number_vpages = 0;
	int cycle_index = 0; /* the loop counter, which denotes ITERATIONS. */
	/* the array that records the number of hot page in every cycle */
	int hot_page[ITERATIONS];
	int number_current_pg = 0;
	int pg_count = 0;
	int j = 0;
	int times = 0; /* records reuse time*/

	/* some variables that describe page "heat" */
	int hig = 0;
	int mid = 0;
	int low = 0;
	int llow = 0;
	int lllow = 0;
	int llllow = 0;
	int all_pages = 0;/* the total number of pages */
	/*the average number of hot pages in each iteration.*/
	long avg_hotpage=0;
	/*the total number of memory accesses across all pages*/
	long num_access=0;
	/* avg utilization of each page */
	int avg_page_utilization = 0;

	/*get the handle of current running benchmark.*/
	struct task_struct *bench_process = get_current_process();
	if(bench_process == NULL)
	{
		printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n");
		return 0;
	}
	else /* get the process*/
		mm = bench_process->mm;
	if(mm == NULL)
	{
		printk("sysmon: error mm is NULL, return back & trying...\n");
		return 0;
	}

	for(j = 0; j < PAGE_ALL; j++)
		page_heat[j] = -1;

	for(j = 0; j < ITERATIONS; j++)
	{
		hot_page[j] = 0;
		reuse_time[j] = 0;
		dirty_page[j] = 0;
	}

	/*yanghao*/
	times = 0;
	for(cycle_index = 0; cycle_index < ITERATIONS; cycle_index++)
	{
		number_hotpages = 0;
		/*scan each vma*/
		for(vma = mm->mmap; vma; vma = vma->vm_next)
		{
			start = vma->vm_start;
			end = vma->vm_end;
			mm = vma->vm_mm;
			/*in each vma, we check all pages*/
			for(address = start; address < end; address += PAGE_SIZE)
			{
				/*scan page table for each page in this VMA*/
				pgd = pgd_offset(mm, address);
				if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
					continue;
				pud = pud_offset(pgd, address);
				if (pud_none(*pud) || unlikely(pud_bad(*pud)))
					continue;
				pmd = pmd_offset(pud, address);
				if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
					continue;
				ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
				pte = *ptep;
				if(pte_present(pte))
				{
					if(pte_young(pte)) /*hot page*/
					{
						/*re-set and clear  _access_bits to 0*/
						pte = pte_mkold(pte);
						set_pte_at(mm, address, ptep, pte);
						/*yanghao:re-set and clear _dirty_bits to 0*/
						pte = pte_mkclean(pte);
						set_pte_at(mm, address, ptep, pte);
					}
				}
				else /*no page pte_none*/
				{
					pte_unmap_unlock(ptep, ptl);
					continue;
				}
				pte_unmap_unlock(ptep, ptl);
				page_counts++;
			}
		}
		/*count the number of hot pages*/
		if(bench_process == NULL)
		{
			printk("sysmon: get no process handle in scan_pgtable function...exit&trying again...\n");
			return 0;
		}
		else /*get the process*/
			mm = bench_process->mm;
		if(mm == NULL)
		{
			printk("sysmon: error mm is NULL, return back & trying...\n");
			return 0;
		}
		number_vpages = 0;

		sampling_interval = page_counts / 250; /*yanghao:*/
		page_counts = 0;

		for(vma = mm->mmap; vma; vma = vma->vm_next)
		{
			start = vma->vm_start;
			end = vma->vm_end;
			/*scan each page in this VMA*/
			mm = vma->vm_mm;
			pg_count = 0;
			for(address = start; address < end; address += PAGE_SIZE)
			{
				/*scan page table for each page in this VMA*/
				pgd = pgd_offset(mm, address);
				if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
					continue;
				pud = pud_offset(pgd, address);
				if (pud_none(*pud) || unlikely(pud_bad(*pud)))
					continue;
				pmd = pmd_offset(pud, address);
				if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
					continue;
				ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
				pte = *ptep;
				if(pte_present(pte))
				{
					if(pte_young(pte)) /* hot pages*/
					{
						number_current_pg = pg_count + number_vpages;
						page_heat[number_current_pg]++;
						hot_page[cycle_index]++;
						/*yanghao:*/
						if (page_counts == random_page)
						{
							times++;
							if (pte_dirty(pte))
								dirty_page[cycle_index] = 1;
						}
					}
					else
					{
						if (page_counts == random_page)
							reuse_time[times]++;
					}
				}
				pg_count++;
				pte_unmap_unlock(ptep, ptl);
				page_counts++;
			}
			number_vpages += (int)(end - start)/PAGE_SIZE;
		}
	}
	/*yanghao:cal. the No. of random_page*/
	random_page += sampling_interval;
	if(random_page >= page_counts)
		random_page=page_counts / 300;
	/*****************************OUTPUT************************************/
	for(j = 0; j < PAGE_ALL; j++)
	{
		if(page_heat[j] < VH && page_heat[j] > H)
			hig++;
		if(page_heat[j] > M && page_heat[j] <= H)
			mid++;
		if(page_heat[j] <= M && page_heat[j] > L)
			low++;
		if(page_heat[j] > VL_MAX && page_heat[j] <= L)
			llow++;
		if(page_heat[j] > VL_MIN && page_heat[j] <= VL_MAX)
			lllow++;
		if(page_heat[j] >= 0 && page_heat[j] <= VL_MIN)
			llllow++;
		if(page_heat[j] > -1)
			all_pages++;
	}

	/*the values reflect the accessing frequency of each physical page.*/
	printk("[LOG: after sampling (%d loops) ...] ",ITERATIONS);
	printk("the values denote the physical page accessing frequence.\n"); 
	printk("-->hig (150,200) is %d. Indicating the number of re-used pages is high.\n",hig);
	printk("-->mid (100,150] is %d.\n",mid);
	printk("-->low (64,100] is %d.\n",low);
	printk("-->llow (10,64] is %d. In locality,no too many re-used pages.\n",llow);
	printk("-->lllow (5,10] is %d.\n",lllow);
	printk("-->llllow [1,5] is %d.\n",llllow);

	for(j = 0;j < ITERATIONS; j++)
		avg_hotpage += hot_page[j];
	avg_hotpage /= (j+1);

	/*
	 * new step@20140704
	 * (1)the different phases of memory utilization
	 * (2)the avg. page accessing utilization
	 * (3)memory pages layout and spectrum
	 */
	for(j = 0; j < PAGE_ALL; j++)
		if(page_heat[j] > -1) /*the page that is accessed at least once.*/
			num_access += (page_heat[j] + 1);

	printk("the total number of memory accesses is %ld, the average is %ld\n",
			num_access, num_access / ITERATIONS);
	avg_page_utilization = num_access / all_pages;
	printk("Avg hot pages num is %ld, all used pages num is %d, avg utilization of each page is %d\n", 
			avg_hotpage, all_pages, avg_page_utilization);
	/*yanghao:print the information about reuse-distance*/
	if ((times == 0) && (reuse_time[0] ==0))
		printk("the page No.%d is not available.",random_page);
	else
	{
		if ((times == 0) && (reuse_time[0] == 0))
			printk("the page No.%d was not used in this 200 loops.",random_page);
		else
		{
			if (times < ITERATIONS)
				times++;
			printk("the reusetime of page No.%d is:",random_page);
			for (j = 0; j < times; j++)
				printk("%d ",reuse_time[j]);
			printk("\n");
			printk("the total number of the digit above denotes the sum that page NO.%d be accessd in %d loops.\n",
					random_page,ITERATIONS);
			printk("each digit means the sum loops that between current loop and the last loop.\n");
		}
	}
	printk("\n\n");
	return 1;
}
Пример #18
0
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
			  struct page **pages)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr, len, end;
	unsigned long next;
	unsigned long flags;
	pgd_t *pgdp;
	int nr = 0;

	pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");

	start &= PAGE_MASK;
	addr = start;
	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;

	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
					start, len)))
		return 0;

	pr_devel("  aligned: %lx .. %lx\n", start, end);

	/*
	 * XXX: batch / limit 'nr', to avoid large irq off latency
	 * needs some instrumenting to determine the common sizes used by
	 * important workloads (eg. DB2), and whether limiting the batch size
	 * will decrease performance.
	 *
	 * It seems like we're in the clear for the moment. Direct-IO is
	 * the main guy that batches up lots of get_user_pages, and even
	 * they are limited to 64-at-a-time which is not so many.
	 */
	/*
	 * This doesn't prevent pagetable teardown, but does prevent
	 * the pagetables from being freed on powerpc.
	 *
	 * So long as we atomically load page table pointers versus teardown,
	 * we can follow the address down to the the page and take a ref on it.
	 */
	local_irq_save(flags);

	pgdp = pgd_offset(mm, addr);
	do {
		pgd_t pgd = READ_ONCE(*pgdp);

		pr_devel("  %016lx: normal pgd %p\n", addr,
			 (void *)pgd_val(pgd));
		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			break;
		if (pgd_huge(pgd)) {
			if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
					 write, pages, &nr))
				break;
		} else if (is_hugepd(pgdp)) {
			if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
					addr, next, write, pages, &nr))
				break;
		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
			break;
	} while (pgdp++, addr = next, addr != end);

	local_irq_restore(flags);

	return nr;
}
Пример #19
0
/**
 * follow_page_mask - look up a page descriptor from a user-virtual address
 * @vma: vm_area_struct mapping @address
 * @address: virtual address to look up
 * @flags: flags modifying lookup behaviour
 * @page_mask: on output, *page_mask is set according to the size of the page
 *
 * @flags can have FOLL_ flags set, defined in <linux/mm.h>
 *
 * Returns the mapped (struct page *), %NULL if no mapping exists, or
 * an error pointer if there is a mapping to something not represented
 * by a page descriptor (see also vm_normal_page()).
 */
struct page *follow_page_mask(struct vm_area_struct *vma,
			      unsigned long address, unsigned int flags,
			      unsigned int *page_mask)
{
	pgd_t *pgd;
	p4d_t *p4d;
	pud_t *pud;
	pmd_t *pmd;
	spinlock_t *ptl;
	struct page *page;
	struct mm_struct *mm = vma->vm_mm;

	*page_mask = 0;

	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
	if (!IS_ERR(page)) {
		BUG_ON(flags & FOLL_GET);
		return page;
	}

	pgd = pgd_offset(mm, address);
	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
		return no_page_table(vma, flags);
	p4d = p4d_offset(pgd, address);
	if (p4d_none(*p4d))
		return no_page_table(vma, flags);
	BUILD_BUG_ON(p4d_huge(*p4d));
	if (unlikely(p4d_bad(*p4d)))
		return no_page_table(vma, flags);
	pud = pud_offset(p4d, address);
	if (pud_none(*pud))
		return no_page_table(vma, flags);
	if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
		page = follow_huge_pud(mm, address, pud, flags);
		if (page)
			return page;
		return no_page_table(vma, flags);
	}
	if (pud_devmap(*pud)) {
		ptl = pud_lock(mm, pud);
		page = follow_devmap_pud(vma, address, pud, flags);
		spin_unlock(ptl);
		if (page)
			return page;
	}
	if (unlikely(pud_bad(*pud)))
		return no_page_table(vma, flags);

	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd))
		return no_page_table(vma, flags);
	if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
		page = follow_huge_pmd(mm, address, pmd, flags);
		if (page)
			return page;
		return no_page_table(vma, flags);
	}
	if (pmd_devmap(*pmd)) {
		ptl = pmd_lock(mm, pmd);
		page = follow_devmap_pmd(vma, address, pmd, flags);
		spin_unlock(ptl);
		if (page)
			return page;
	}
	if (likely(!pmd_trans_huge(*pmd)))
		return follow_page_pte(vma, address, pmd, flags);

	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
		return no_page_table(vma, flags);

	ptl = pmd_lock(mm, pmd);
	if (unlikely(!pmd_trans_huge(*pmd))) {
		spin_unlock(ptl);
		return follow_page_pte(vma, address, pmd, flags);
	}
	if (flags & FOLL_SPLIT) {
		int ret;
		page = pmd_page(*pmd);
		if (is_huge_zero_page(page)) {
			spin_unlock(ptl);
			ret = 0;
			split_huge_pmd(vma, pmd, address);
			if (pmd_trans_unstable(pmd))
				ret = -EBUSY;
		} else {
			get_page(page);
			spin_unlock(ptl);
			lock_page(page);
			ret = split_huge_page(page);
			unlock_page(page);
			put_page(page);
			if (pmd_none(*pmd))
				return no_page_table(vma, flags);
		}

		return ret ? ERR_PTR(ret) :
			follow_page_pte(vma, address, pmd, flags);
	}

	page = follow_trans_huge_pmd(vma, address, pmd, flags);
	spin_unlock(ptl);
	*page_mask = HPAGE_PMD_NR - 1;
	return page;
}
Пример #20
0
unsigned int m4u_user_v2p(unsigned int va)
{
    unsigned int pmdOffset = (va & (PMD_SIZE - 1));
    unsigned int pageOffset = (va & (PAGE_SIZE - 1));
    pgd_t *pgd;
    pmd_t *pmd;
    pte_t *pte;
    unsigned int pa;
    
    if(NULL==current)
    {
    	  M4UMSG("error: m4u_user_v2p, current is NULL! \n");
    	  return 0;
    }
    if(NULL==current->mm)
    {
    	  M4UMSG("error: m4u_user_v2p, current->mm is NULL! tgid=0x%x, name=%s \n", current->tgid, current->comm);
    	  return 0;
    }
        
    pgd = pgd_offset(current->mm, va); /* what is tsk->mm */
    M4UDBG("m4u_user_v2p(), pgd 0x%x\n", pgd);    
    M4UDBG("pgd_none=%d, pgd_bad=%d\n", pgd_none(*pgd), pgd_bad(*pgd));
    
    if(pgd_none(*pgd)||pgd_bad(*pgd))
    {
        M4UMSG("warning: m4u_user_v2p(), va=0x%x, pgd invalid! \n", va);
        return 0;
    }
    
    pmd = pmd_offset(pgd, va);
    M4UDBG("m4u_user_v2p(), pmd 0x%x\n", pmd);
    M4UDBG("pmd_none=%d, pmd_bad=%d, pmd_val=0x%x\n", pmd_none(*pmd), pmd_bad(*pmd), pmd_val(*pmd));
   
    /* If this is a page table entry, keep on walking to the next level */ 
    if (( (unsigned int)pmd_val(*pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
    {
        if(pmd_none(*pmd)||pmd_bad(*pmd))
        {
            M4UDBG("warning: m4u_user_v2p(), va=0x%x, pmd invalid! \n", va);
            return 0;
        }
        
        // we encounter some pte not preset issue, do not know why    
        pte = pte_offset_map(pmd, va);        
        if(pte_present(*pte)) 
        { 
            pa=(pte_val(*pte) & (PAGE_MASK)) | pageOffset; 
            M4UDBG("PA = 0x%8x\n", pa);
            return pa; 
        }
    }
    else /* Only 1 level page table */
    {
       if(pmd_none(*pmd))
       {
          M4UDBG("Error: m4u_user_v2p(), virtual addr 0x%x, pmd invalid! \n", va);
          return 0;
       }
       pa=(pte_val(*pmd) & (PMD_MASK)) | pmdOffset; 
       M4UDBG("PA = 0x%8x\n", pa);
       return pa;    
    }
    
    M4UDBG("warning: m4u_user_v2p(), pte invalid! \n");
    // m4u_dump_maps(va);
    
    return 0;
}
Пример #21
0
int memory_remove_pte(struct local_page *lp) {

	unsigned long del_user_va = lp->user_va;
	unsigned long del_slab_va = lp->slab_va;
	unsigned long del_pfn = page_to_pfn(virt_to_page(del_slab_va));
	struct vm_area_struct *del_vma = lp->vma;
	struct mm_struct *del_mm = del_vma->vm_mm;

#if(DEBUG)
	printk("[%s]\n", __FUNCTION__);
	printk("del_user_va: %p\n", del_user_va);
	printk("del_slab_va: %p\n", del_slab_va);
	printk("del_pfn: %p\n", del_pfn);
	printk("del_vma: %p\n", del_vma);
	printk("del_mm: %p\n", del_mm);
#endif

	// TODO: find PTE (need to be changed for x86)
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *ptep;

	pgd = pgd_offset(del_mm, del_user_va);
	if (pgd_none(*pgd) || pgd_bad(*pgd)) {
		printk("<error> invalid pgd\n");
		return -1;
	}

	pud = pud_offset(pgd, del_user_va);
	if (pud_none(*pud) || pud_bad(*pud)) {
		printk("<error> invalid pud\n");
		return -1;
	}

	pmd = pmd_offset(pud, del_user_va);
	if (pmd_none(*pmd) || pmd_bad(*pmd)) {
		printk("<error> invalid pmd\n");
		return -1;
	}

	ptep = pte_offset_kernel(pmd, del_user_va);
	if (!ptep) {
		printk("<error> invalid pte\n");
		return -1;
	}

#if(DEBUG)
	printk("ptep: %p\n", ptep);
	printk("pte: %p\n", *ptep);
	printk("pfn: %p\n", pte_pfn(*ptep));
#endif

	// flush cache
	flush_cache_page(del_vma, del_user_va, del_pfn);

	// clear PTE
	pte_clear(del_mm, del_user_va, ptep);

	// flush TLB
	flush_tlb_page(del_vma, del_user_va);

	return 0;
}
Пример #22
0
/**
 * follow_page_mask - look up a page descriptor from a user-virtual address
 * @vma: vm_area_struct mapping @address
 * @address: virtual address to look up
 * @flags: flags modifying lookup behaviour
 * @page_mask: on output, *page_mask is set according to the size of the page
 *
 * @flags can have FOLL_ flags set, defined in <linux/mm.h>
 *
 * Returns the mapped (struct page *), %NULL if no mapping exists, or
 * an error pointer if there is a mapping to something not represented
 * by a page descriptor (see also vm_normal_page()).
 */
struct page *follow_page_mask(struct vm_area_struct *vma,
			      unsigned long address, unsigned int flags,
			      unsigned int *page_mask)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	spinlock_t *ptl;
	struct page *page;
	struct mm_struct *mm = vma->vm_mm;

	*page_mask = 0;

	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
	if (!IS_ERR(page)) {
		BUG_ON(flags & FOLL_GET);
		return page;
	}

	pgd = pgd_offset(mm, address);
	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
		return no_page_table(vma, flags);

	pud = pud_offset(pgd, address);
	if (pud_none(*pud))
		return no_page_table(vma, flags);
	if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
		if (flags & FOLL_GET)
			return NULL;
		page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
		return page;
	}
	if (unlikely(pud_bad(*pud)))
		return no_page_table(vma, flags);

	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd))
		return no_page_table(vma, flags);
	if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
		if (flags & FOLL_GET) {
			/*
			 * Refcount on tail pages are not well-defined and
			 * shouldn't be taken. The caller should handle a NULL
			 * return when trying to follow tail pages.
			 */
			if (PageHead(page))
				get_page(page);
			else
				page = NULL;
		}
		return page;
	}
	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
		return no_page_table(vma, flags);
	if (pmd_trans_huge(*pmd)) {
		if (flags & FOLL_SPLIT) {
			split_huge_page_pmd(vma, address, pmd);
			return follow_page_pte(vma, address, pmd, flags);
		}
		ptl = pmd_lock(mm, pmd);
		if (likely(pmd_trans_huge(*pmd))) {
			if (unlikely(pmd_trans_splitting(*pmd))) {
				spin_unlock(ptl);
				wait_split_huge_page(vma->anon_vma, pmd);
			} else {
				page = follow_trans_huge_pmd(vma, address,
							     pmd, flags);
				spin_unlock(ptl);
				*page_mask = HPAGE_PMD_NR - 1;
				return page;
			}
		} else
			spin_unlock(ptl);
	}
	return follow_page_pte(vma, address, pmd, flags);
}
Пример #23
0
/* Pretty sick eh? */
int prom_callback(long *args)
{
	struct console *cons, *saved_console = NULL;
	unsigned long flags;
	char *cmd;
	extern spinlock_t prom_entry_lock;

	if (!args)
		return -1;
	if (!(cmd = (char *)args[0]))
		return -1;

	/*
	 * The callback can be invoked on the cpu that first dropped 
	 * into prom_cmdline after taking the serial interrupt, or on 
	 * a slave processor that was smp_captured() if the 
	 * administrator has done a switch-cpu inside obp. In either 
	 * case, the cpu is marked as in-interrupt. Drop IRQ locks.
	 */
	irq_exit();

	/* XXX Revisit the locking here someday.  This is a debugging
	 * XXX feature so it isnt all that critical.  -DaveM
	 */
	local_irq_save(flags);

	spin_unlock(&prom_entry_lock);
	cons = console_drivers;
	while (cons) {
		unregister_console(cons);
		cons->flags &= ~(CON_PRINTBUFFER);
		cons->next = saved_console;
		saved_console = cons;
		cons = console_drivers;
	}
	register_console(&prom_console);
	if (!strcmp(cmd, "sync")) {
		prom_printf("PROM `%s' command...\n", cmd);
		show_free_areas();
		if (current->pid != 0) {
			local_irq_enable();
			sys_sync();
			local_irq_disable();
		}
		args[2] = 0;
		args[args[1] + 3] = -1;
		prom_printf("Returning to PROM\n");
	} else if (!strcmp(cmd, "va>tte-data")) {
		unsigned long ctx, va;
		unsigned long tte = 0;
		long res = PROM_FALSE;

		ctx = args[3];
		va = args[4];
		if (ctx) {
			/*
			 * Find process owning ctx, lookup mapping.
			 */
			struct task_struct *p;
			struct mm_struct *mm = NULL;
			pgd_t *pgdp;
			pmd_t *pmdp;
			pte_t *ptep;

			for_each_process(p) {
				mm = p->mm;
				if (CTX_HWBITS(mm->context) == ctx)
					break;
			}
			if (!mm ||
			    CTX_HWBITS(mm->context) != ctx)
				goto done;

			pgdp = pgd_offset(mm, va);
			if (pgd_none(*pgdp))
				goto done;
			pmdp = pmd_offset(pgdp, va);
			if (pmd_none(*pmdp))
				goto done;

			/* Preemption implicitly disabled by virtue of
			 * being called from inside OBP.
			 */
			ptep = pte_offset_map(pmdp, va);
			if (pte_present(*ptep)) {
				tte = pte_val(*ptep);
				res = PROM_TRUE;
			}
			pte_unmap(ptep);
			goto done;
		}

		if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) {
			/* Spitfire Errata #32 workaround */
			__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
					     "flush	%%g6"
					     : /* No outputs */
					     : "r" (0),
					     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));

			/*
			 * Locked down tlb entry.
			 */

			if (tlb_type == spitfire)
				tte = spitfire_get_dtlb_data(SPITFIRE_HIGHEST_LOCKED_TLBENT);
			else if (tlb_type == cheetah || tlb_type == cheetah_plus)
				tte = cheetah_get_ldtlb_data(CHEETAH_HIGHEST_LOCKED_TLBENT);

			res = PROM_TRUE;
			goto done;
		}
Пример #24
0
static int
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
{
	unsigned long addr = (unsigned long)_addr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	pud_t *pud;
	spinlock_t *ptl;

	pgd = pgd_offset(current->mm, addr);
	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
		return 0;

	pud = pud_offset(pgd, addr);
	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
		return 0;

	pmd = pmd_offset(pud, addr);
	if (unlikely(pmd_none(*pmd)))
		return 0;

	/*
	 * A pmd can be bad if it refers to a HugeTLB or THP page.
	 *
	 * Both THP and HugeTLB pages have the same pmd layout
	 * and should not be manipulated by the pte functions.
	 *
	 * Lock the page table for the destination and check
	 * to see that it's still huge and whether or not we will
	 * need to fault on write, or if we have a splitting THP.
	 */
	if (unlikely(pmd_thp_or_huge(*pmd))) {
		ptl = &current->mm->page_table_lock;
		spin_lock(ptl);
		if (unlikely(!pmd_thp_or_huge(*pmd)
			|| pmd_hugewillfault(*pmd)
			|| pmd_trans_splitting(*pmd))) {
			spin_unlock(ptl);
			return 0;
		}

		*ptep = NULL;
		*ptlp = ptl;
		return 1;
	}

	if (unlikely(pmd_bad(*pmd)))
		return 0;

	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
	    !pte_write(*pte) || !pte_dirty(*pte))) {
		pte_unmap_unlock(pte, ptl);
		return 0;
	}

	*ptep = pte;
	*ptlp = ptl;

	return 1;
}
Пример #25
0
/*
 * copy one vm_area from one task to the other. Assumes the page tables
 * already present in the new task to be cleared in the whole range
 * covered by this vma.
 *
 * 08Jan98 Merged into one routine from several inline routines to reduce
 *         variable count and make things faster. -jj
 */
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
			struct vm_area_struct *vma)
{
	pgd_t * src_pgd, * dst_pgd;
	unsigned long address = vma->vm_start;
	unsigned long end = vma->vm_end;
	unsigned long cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;

	src_pgd = pgd_offset(src, address)-1;
	dst_pgd = pgd_offset(dst, address)-1;
	
	for (;;) {
		pmd_t * src_pmd, * dst_pmd;

		src_pgd++; dst_pgd++;
		
		/* copy_pmd_range */
		
		if (pgd_none(*src_pgd))
			goto skip_copy_pmd_range;
		if (pgd_bad(*src_pgd)) {
			pgd_ERROR(*src_pgd);
			pgd_clear(src_pgd);
skip_copy_pmd_range:	address = (address + PGDIR_SIZE) & PGDIR_MASK;
			if (!address || (address >= end))
				goto out;
			continue;
		}
		if (pgd_none(*dst_pgd)) {
			if (!pmd_alloc(dst_pgd, 0))
				goto nomem;
		}
		
		src_pmd = pmd_offset(src_pgd, address);
		dst_pmd = pmd_offset(dst_pgd, address);

		do {
			pte_t * src_pte, * dst_pte;
		
			/* copy_pte_range */
		
			if (pmd_none(*src_pmd))
				goto skip_copy_pte_range;
			if (pmd_bad(*src_pmd)) {
				pmd_ERROR(*src_pmd);
				pmd_clear(src_pmd);
skip_copy_pte_range:		address = (address + PMD_SIZE) & PMD_MASK;
				if (address >= end)
					goto out;
				goto cont_copy_pmd_range;
			}
			if (pmd_none(*dst_pmd)) {
				if (!pte_alloc(dst_pmd, 0))
					goto nomem;
			}
			
			src_pte = pte_offset(src_pmd, address);
			dst_pte = pte_offset(dst_pmd, address);
			
			do {
				pte_t pte = *src_pte;
				struct page *ptepage;
				
				/* copy_one_pte */

				if (pte_none(pte))
					goto cont_copy_pte_range_noset;
				if (!pte_present(pte)) {
					swap_duplicate(pte_to_swp_entry(pte));
					goto cont_copy_pte_range;
				}
				ptepage = pte_page(pte);
				if ((!VALID_PAGE(ptepage)) || 
				    PageReserved(ptepage))
					goto cont_copy_pte_range;

				/* If it's a COW mapping, write protect it both in the parent and the child */
				if (cow) {
					ptep_set_wrprotect(src_pte);
					pte = *src_pte;
				}

				/* If it's a shared mapping, mark it clean in the child */
				if (vma->vm_flags & VM_SHARED)
					pte = pte_mkclean(pte);
				pte = pte_mkold(pte);
				get_page(ptepage);

cont_copy_pte_range:		set_pte(dst_pte, pte);
cont_copy_pte_range_noset:	address += PAGE_SIZE;
				if (address >= end)
					goto out;
				src_pte++;
				dst_pte++;
			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
		
cont_copy_pmd_range:	src_pmd++;
			dst_pmd++;
		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
	}
out:
	return 0;

nomem:
	return -ENOMEM;
}
Пример #26
0
/*
 * This is useful to dump out the page tables associated with
 * 'addr' in mm 'mm'.
 */
void show_pte(struct mm_struct *mm, unsigned long addr)
{
	mm_segment_t fs;

	if (!mm)
		mm = &init_mm;

	printk(KERN_ALERT "mm = %p pgd = %p\n", mm, mm->pgd);

	fs = get_fs();
	set_fs(get_ds());
	do {
		pgd_t pg, *pgd = pgd_offset(mm, addr);
		pmd_t pm, *pmd;
		pte_t pt, *pte;

		printk(KERN_ALERT "*pgd = ");

		if (__get_user(pgd_val(pg), (unsigned long *)pgd)) {
			printk("(faulted)");
			break;
		}

		printk("%08lx", pgd_val(pg));

		if (pgd_none(pg))
			break;

		if (pgd_bad(pg)) {
			printk("(bad)");
			break;
		}

		pmd = pmd_offset(pgd, addr);

		printk(", *pmd = ");

		if (__get_user(pmd_val(pm), (unsigned long *)pmd)) {
			printk("(faulted)");
			break;
		}

		printk("%08lx", pmd_val(pm));

		if (pmd_none(pm))
			break;

		if (pmd_bad(pm)) {
			printk("(bad)");
			break;
		}

		pte = pte_offset(pmd, addr);

		printk(", *pte = ");

		if (__get_user(pte_val(pt), (unsigned long *)pte)) {
			printk("(faulted)");
			break;
		}

		printk("%08lx", pte_val(pt));
#ifdef CONFIG_CPU_32
		printk(", *ppte = %08lx", pte_val(pte[-PTRS_PER_PTE]));
#endif
	} while(0);
	set_fs(fs);

	printk("\n");
}
int VirtToPhys(void *vaddr, int *paddrp)
{
#if defined(__KERNEL__)

    unsigned long addr = (unsigned long) vaddr;

    if (addr < P1SEG || ((addr >= VMALLOC_START) && (addr < VMALLOC_END)))
    {
	/*
	 * Find the virtual address of either a user page (<P1SEG) or VMALLOC (P3SEG)
	 *
	 * This code is based on vmalloc_to_page() in mm/memory.c
	 */
	struct mm_struct *mm;

	pgd_t *pgd;
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10)
	pud_t *pud;
#endif
	pmd_t *pmd;
	pte_t *ptep, pte;
	
	/* Must use the correct mm based on whether this is a kernel or a userspace address */
	if (addr >= VMALLOC_START)
	    mm = &init_mm;
	else
	    mm = current->mm;

	/* Safety first! */
	if (mm == NULL)
	    return VTOP_INVALID_ARG;

	spin_lock(&mm->page_table_lock);
	
	pgd = pgd_offset(mm, addr);
	if (pgd_none(*pgd) || pgd_bad(*pgd))
	    goto out;

#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,10)
	pud = pud_offset(pgd, addr);
	if (pud_none(*pud) || pud_bad(*pud))
	    goto out;
	
	pmd = pmd_offset(pud, addr);
#else
	pmd = pmd_offset(pgd, addr);
#endif
	if (pmd_none(*pmd) || pmd_bad(*pmd))
	    goto out;
	
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
	ptep = pte_offset(pmd, addr);
#else
	ptep = pte_offset_map(pmd, addr);
#endif
	if (!ptep) 
	    goto out;

	pte = *ptep;
	if (pte_present(pte)) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
	    pte_unmap(ptep);
#endif
	    spin_unlock(&mm->page_table_lock);
	    
	    /* pte_page() macro is broken for SH in linux 2.6.20 and later */
	    *paddrp = page_to_phys(pfn_to_page(pte_pfn(pte))) | (addr & (PAGE_SIZE-1));
	    
	    /* INSbl28636: P3 segment pages cannot be looked up with pmb_virt_to_phys()
	     * instead we need to examine the _PAGE_CACHABLE bit in the pte
	     */
	    return ((pte_val(pte) & _PAGE_CACHABLE) ? VTOP_INCOHERENT_MEM : VTOP_SUCCESS);
	}
	
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
	pte_unmap(ptep);
#endif
	
    out:
	spin_unlock(&mm->page_table_lock);

	/* Failed to find a pte */
	return VTOP_INVALID_ARG;
    }
    else
#if defined(CONFIG_32BIT)
    {
	unsigned long flags;
	
	/* Try looking for an ioremap() via the PMB */
	if (pmb_virt_to_phys(vaddr, (unsigned long *)paddrp, &flags) == 0)
	{
	    /* Success: Test the returned PMB flags */
	    return ((flags & PMB_C) ? VTOP_INCOHERENT_MEM : VTOP_SUCCESS);
	}

	/* Failed to find a mapping */
	return VTOP_INVALID_ARG;
    }
#else
    {
	unsigned long addr = (unsigned long) vaddr;
	
	/* Assume 29-bit SH4 Linux */
	*(paddrp)= PHYSADDR(addr);
	
	/* only the P2SEG is uncached (doubt we will see P4SEG addresses) */
	return ((PXSEG(addr) == P2SEG) ? VTOP_SUCCESS: VTOP_INCOHERENT_MEM);
    }
#endif /* CONFIG_32BIT */

#endif /* __KERNEL__ */

    /* Not implemented */
    return VTOP_INVALID_ARG;
}
Пример #28
0
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
			struct task_struct *tsk)
{
	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
	unsigned cpu = smp_processor_id();
	u64 next_tlb_gen;

	/*
	 * NB: The scheduler will call us with prev == next when switching
	 * from lazy TLB mode to normal mode if active_mm isn't changing.
	 * When this happens, we don't assume that CR3 (and hence
	 * cpu_tlbstate.loaded_mm) matches next.
	 *
	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
	 */

	/* We don't want flush_tlb_func_* to run concurrently with us. */
	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
		WARN_ON_ONCE(!irqs_disabled());

	/*
	 * Verify that CR3 is what we think it is.  This will catch
	 * hypothetical buggy code that directly switches to swapper_pg_dir
	 * without going through leave_mm() / switch_mm_irqs_off() or that
	 * does something like write_cr3(read_cr3_pa()).
	 *
	 * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
	 * isn't free.
	 */
#ifdef CONFIG_DEBUG_VM
	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
		/*
		 * If we were to BUG here, we'd be very likely to kill
		 * the system so hard that we don't see the call trace.
		 * Try to recover instead by ignoring the error and doing
		 * a global flush to minimize the chance of corruption.
		 *
		 * (This is far from being a fully correct recovery.
		 *  Architecturally, the CPU could prefetch something
		 *  back into an incorrect ASID slot and leave it there
		 *  to cause trouble down the road.  It's better than
		 *  nothing, though.)
		 */
		__flush_tlb_all();
	}
#endif
	this_cpu_write(cpu_tlbstate.is_lazy, false);

	if (real_prev == next) {
		VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
			  next->context.ctx_id);

		/*
		 * We don't currently support having a real mm loaded without
		 * our cpu set in mm_cpumask().  We have all the bookkeeping
		 * in place to figure out whether we would need to flush
		 * if our cpu were cleared in mm_cpumask(), but we don't
		 * currently use it.
		 */
		if (WARN_ON_ONCE(real_prev != &init_mm &&
				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
			cpumask_set_cpu(cpu, mm_cpumask(next));

		return;
	} else {
		u16 new_asid;
		bool need_flush;

		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
			/*
			 * If our current stack is in vmalloc space and isn't
			 * mapped in the new pgd, we'll double-fault.  Forcibly
			 * map it.
			 */
			unsigned int index = pgd_index(current_stack_pointer);
			pgd_t *pgd = next->pgd + index;

			if (unlikely(pgd_none(*pgd)))
				set_pgd(pgd, init_mm.pgd[index]);
		}

		/* Stop remote flushes for the previous mm */
		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
				real_prev != &init_mm);
		cpumask_clear_cpu(cpu, mm_cpumask(real_prev));

		/*
		 * Start remote flushes and then read tlb_gen.
		 */
		cpumask_set_cpu(cpu, mm_cpumask(next));
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);

		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);

		if (need_flush) {
			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
			write_cr3(build_cr3(next, new_asid));
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
					TLB_FLUSH_ALL);
		} else {
			/* The new ASID is already up to date. */
			write_cr3(build_cr3_noflush(next, new_asid));
			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
		}

		this_cpu_write(cpu_tlbstate.loaded_mm, next);
		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
	}

	load_mm_cr4(next);
	switch_ldt(real_prev, next);
}
Пример #29
0
unsigned int m4u_user_v2p(unsigned int va)
{
    unsigned int pageOffset = (va & (PAGE_SIZE - 1));
    pgd_t *pgd;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    unsigned int pa;
    //M4UMSG("Enter m4u_user_v2p()! \n", va);

    if(NULL==current)
    {
    	  M4UMSG("warning: m4u_user_v2p, current is NULL! \n");
    	  return 0;
    }
    if(NULL==current->mm)
    {
    	  M4UMSG("warning: m4u_user_v2p, current->mm is NULL! tgid=0x%x, name=%s \n", current->tgid, current->comm);
    	  return 0;
    }
        
    pgd = pgd_offset(current->mm, va); /* what is tsk->mm */
    if(pgd_none(*pgd)||pgd_bad(*pgd))
    {
        M4UMSG("m4u_user_v2p(), va=0x%x, pgd invalid! \n", va);
        return 0;
    }

    pud = pud_offset(pgd, va);
    if(pud_none(*pud)||pud_bad(*pud))
    {
        M4UDBG("m4u_user_v2p(), va=0x%x, pud invalid! \n", va);
        return 0;
    }
    
    pmd = pmd_offset(pud, va);
    if(pmd_none(*pmd)||pmd_bad(*pmd))
    {
        M4UDBG("m4u_user_v2p(), va=0x%x, pmd invalid! \n", va);
        return 0;
    }
        
    pte = pte_offset_map(pmd, va);
    if(pte_present(*pte)) 
    { 
/*
        if((long long)pte_val(pte[PTE_HWTABLE_PTRS]) == (long long)0)
        {
        	M4UMSG("user_v2p, va=0x%x, *ppte=%08llx", va,
        	       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
            pte_unmap(pte);
            return 0;
        }
*/        
        pa=(pte_val(*pte) & (PAGE_MASK)) | pageOffset; 
        pte_unmap(pte);
        return pa; 
    }   

    pte_unmap(pte);


    M4UDBG("m4u_user_v2p(), va=0x%x, pte invalid! \n", va);
    // m4u_dump_maps(va);
    
    return 0;
}
Пример #30
0
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
			struct page **pages)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr, len, end;
	unsigned long next;
	pgd_t *pgdp;
	int nr = 0;

	start &= PAGE_MASK;
	addr = start;
	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;

	/*
	 * XXX: batch / limit 'nr', to avoid large irq off latency
	 * needs some instrumenting to determine the common sizes used by
	 * important workloads (eg. DB2), and whether limiting the batch size
	 * will decrease performance.
	 *
	 * It seems like we're in the clear for the moment. Direct-IO is
	 * the main guy that batches up lots of get_user_pages, and even
	 * they are limited to 64-at-a-time which is not so many.
	 */
	/*
	 * This doesn't prevent pagetable teardown, but does prevent
	 * the pagetables from being freed on sparc.
	 *
	 * So long as we atomically load page table pointers versus teardown,
	 * we can follow the address down to the the page and take a ref on it.
	 */
	local_irq_disable();

	pgdp = pgd_offset(mm, addr);
	do {
		pgd_t pgd = *pgdp;

		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			goto slow;
		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
			goto slow;
	} while (pgdp++, addr = next, addr != end);

	local_irq_enable();

	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
	return nr;

	{
		int ret;

slow:
		local_irq_enable();

		/* Try to get the remaining pages with get_user_pages */
		start += nr << PAGE_SHIFT;
		pages += nr;

		down_read(&mm->mmap_sem);
		ret = get_user_pages(current, mm, start,
			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
		up_read(&mm->mmap_sem);

		/* Have to be a bit careful with return values */
		if (nr > 0) {
			if (ret < 0)
				ret = nr;
			else
				ret += nr;
		}

		return ret;
	}
}