Пример #1
0
static int __do_vmalloc_fault(unsigned long addr, struct mm_struct *mm)
{
	/* Synchronise this task's top level page-table
	 * with the 'reference' page table.
	 */
	int offset = __pgd_offset(addr);
	pgd_t *pgd, *pgd_k;
	pmd_t *pmd, *pmd_k;

	pgd_k = init_mm.pgd + offset;
	if (!pgd_present(*pgd_k))
		goto bad_area;

	pgd = mm->pgd + offset;
#if 0	/* note that we are two-level */
	if (!pgd_present(*pgd))
		set_pgd(pgd, *pgd_k);
#endif

	pmd_k = pmd_offset(pgd_k, addr);
	if (pmd_none(*pmd_k))
		goto bad_area;

	pmd = pmd_offset(pgd, addr);
	if (!pmd_none(*pmd))
		goto bad_area;
	set_pmd(pmd, *pmd_k);
	return 1;

bad_area:
	return -2;
}
Пример #2
0
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
			   pte_t *ptep, pte_t pte, size_t *pgsize)
{
	pgd_t *pgd = pgd_offset(mm, addr);
	pud_t *pud;
	pmd_t *pmd;

	*pgsize = PAGE_SIZE;
	if (!pte_cont(pte))
		return 1;
	if (!pgd_present(*pgd)) {
		VM_BUG_ON(!pgd_present(*pgd));
		return 1;
	}
	pud = pud_offset(pgd, addr);
	if (!pud_present(*pud)) {
		VM_BUG_ON(!pud_present(*pud));
		return 1;
	}
	pmd = pmd_offset(pud, addr);
	if (!pmd_present(*pmd)) {
		VM_BUG_ON(!pmd_present(*pmd));
		return 1;
	}
	if ((pte_t *)pmd == ptep) {
		*pgsize = PMD_SIZE;
		return CONT_PMDS;
	}
	return CONT_PTES;
}
Пример #3
0
static int handle_tlbmiss(struct mm_struct *mm,
              unsigned long long protection_flags,
              unsigned long long textaccess,
              unsigned long address)
{
    pgd_t *dir;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    pte_t entry;

    /* NB. The PGD currently only contains a single entry - there is no
       page table tree stored for the top half of the address space since
       virtual pages in that region should never be mapped in user mode.
       (In kernel mode, the only things in that region are the 512Mb super
       page (locked in), and vmalloc (modules) +  I/O device pages (handled
       by handle_vmalloc_fault), so no PGD for the upper half is required
       by kernel mode either).

       See how mm->pgd is allocated and initialised in pgd_alloc to see why
       the next test is necessary.  - RPC */
    if (address >= (unsigned long) TASK_SIZE)
        /* upper half - never has page table entries. */
        return 0;

    dir = pgd_offset(mm, address);
    if (pgd_none(*dir) || !pgd_present(*dir))
        return 0;
    if (!pgd_present(*dir))
        return 0;

    pud = pud_offset(dir, address);
    if (pud_none(*pud) || !pud_present(*pud))
        return 0;

    pmd = pmd_offset(pud, address);
    if (pmd_none(*pmd) || !pmd_present(*pmd))
        return 0;

    pte = pte_offset_kernel(pmd, address);
    entry = *pte;

    if (pte_none(entry) || !pte_present(entry))
        return 0;

    /*
     * If the page doesn't have sufficient protection bits set to
     * service the kind of fault being handled, there's not much
     * point doing the TLB refill.  Punt the fault to the general
     * handler.
     */
    if ((pte_val(entry) & protection_flags) != protection_flags)
        return 0;

        __do_tlb_refill(address, textaccess, pte);

    return 1;
}
Пример #4
0
asmlinkage long sys_my_syscall( int pid, unsigned long address)
{
   
   struct task_struct* task;
   struct mm_struct* mm;
   pgd_t* pgd;
   pud_t* pud;
   pmd_t* pmd;
   pte_t* pte;
   unsigned long pte_val ;
   printk(KERN_INFO "PID: %d, VIRTUAL_ADDR: 0x%lx\n", pid, address);
   for_each_process(task)
   {
     if(task->pid == pid)
     {           
      printk(KERN_INFO "Task %d found\n", task->pid);
      mm = task->mm;
      
      pgd = pgd_offset(mm, address);
      printk(KERN_INFO "PGD INFO: PRESENT: %d, BAD: %d, NONE: %d\n", pgd_present(*pgd), pgd_bad(*pgd), pgd_none(*pgd));
      if(!(pgd_none(*pgd) || pgd_bad(*pgd)) && pgd_present(*pgd))
      {
        printk(KERN_INFO "PGD INFO: PRESENT: %d, BAD: %d, NONE: %d\n", pgd_present(*pgd), pgd_bad(*pgd), pgd_none(*pgd));
        pud = pud_offset(pgd, address);
        printk(KERN_INFO "PUD INFO: PRESENT: %d, BAD: %d, NONE: %d\n", pud_present(*pud), pud_bad(*pud), pud_none(*pud));
           
        if(!(pud_none(*pud) || pud_bad(*pud)) && pud_present(*pud))
        {
          printk(KERN_INFO "PUD INFO: PRESENT: %d, BAD: %d, NONE: %d\n", pud_present(*pud), pud_bad(*pud), pud_none(*pud));
          pmd = pmd_offset(pud, address);
          printk(KERN_INFO "PMD INFO: PRESENT: %d, BAD: %d, NONE: %d\n", pmd_present(*pmd), pmd_bad(*pmd), pmd_none(*pmd));
           
          if(!(pmd_none(*pmd) || pmd_bad(*pmd)) && pmd_present(*pmd))
          {
            printk(KERN_INFO "PMD INFO: PRESENT: %d, BAD: %d, NONE: %d\n", pmd_present(*pmd), pmd_bad(*pmd), pmd_none(*pmd));
            pte = pte_offset_map(pmd, address);
            printk(KERN_INFO "PTE INFO: PRESENT: %d PTE: 0x%lx \n ", pte_present(*pte), pte->pte);


            pte_val = pte->pte;
            
            if(pte_val == 0)
                pte_val = __pte_to_swp_entry(*pte).val;
            
            pte_unmap(pte);
            printk(KERN_INFO "pte_val: %lx\n" , pte_val);
            return pte_val;
            
            }
        }
      }
    }
  }
  printk(KERN_INFO "Data not found!\n");
  return 0;
}
Пример #5
0
static void walk_pgd_level(struct seq_file *m)
{
#ifdef CONFIG_X86_64
	pgd_t *start = (pgd_t *) &init_level4_pgt;
#else
	pgd_t *start = swapper_pg_dir;
#endif
	int i;
	struct pg_state st;

	memset(&st, 0, sizeof(st));

	for (i = 0; i < PTRS_PER_PGD; i++) {
		st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
		if (!pgd_none(*start)) {
			pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;

			if (pgd_large(*start) || !pgd_present(*start))
				note_page(m, &st, __pgprot(prot), 1);
			else
				walk_pud_level(m, &st, *start,
					       i * PGD_LEVEL_MULT);
		} else
			note_page(m, &st, __pgprot(0), 1);

		start++;
	}

	/* Flush out the last page */
	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
	note_page(m, &st, __pgprot(0), 0);
}
Пример #6
0
static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
	unsigned index = pgd_index(address);
	pgd_t *pgd_k;
	pud_t *pud, *pud_k;
	pmd_t *pmd, *pmd_k;

	pgd += index;
	pgd_k = init_mm.pgd + index;

	if (!pgd_present(*pgd_k))
		return NULL;

	pud = pud_offset(pgd, address);
	pud_k = pud_offset(pgd_k, address);
	if (!pud_present(*pud_k))
		return NULL;

	pmd = pmd_offset(pud, address);
	pmd_k = pmd_offset(pud_k, address);
	if (!pmd_present(*pmd_k))
		return NULL;
	if (!pmd_present(*pmd)) {
		set_pmd(pmd, *pmd_k);
		arch_flush_lazy_mmu_mode();
	} else
		BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
	return pmd_k;
}
Пример #7
0
pte_t *
lookup_pte(pgd_t *page_dir, unsigned long pf_address)
{
	/*
	 * find the page table entry within the page table hierarchy
	 */
	pte_t *pte = NULL;
	pgd_t *pgd = page_dir + pgd_index(pf_address);

#ifdef DEBUG_LOOKUP_PTABS
	if ((int)page_dir < 0x1000) {
		printk("%s: page_dir=%x\n", __func__, (int)page_dir);
		enter_kdebug("page_dir<4096");
	}
	printk("%s: %lx pdir = %p\n", __func__, pf_address, pgd);
#endif
	if (pgd_present(*pgd)) {
		pmd_t *pmd = pmd_offset(pgd, pf_address);
#ifdef DEBUG_LOOKUP_PTABS
		printk("pgd_present(*%x) is true\n", pgd);
		printk(" pmd = %p\n", pmd);
#endif
		if (pmd_present(*pmd)) {
#ifdef DEBUG_LOOKUP_PTABS
			printk("pmd_present(*%x) is true\n", pmd);
#endif
			pte = pte_offset_map(pmd, pf_address);
		}
	}
#ifdef DEBUG_LOOKUP_PTABS
	printk("returning:  pte = %p\n", pte);
#endif
	return pte;
}
Пример #8
0
static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
				   unsigned long end_gpa)
{
	pud_t *pud;
	unsigned long end = ~0ul;
	int i_min = pgd_index(start_gpa);
	int i_max = pgd_index(end_gpa);
	bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
	int i;

	for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
		if (!pgd_present(pgd[i]))
			continue;

		pud = pud_offset(pgd + i, 0);
		if (i == i_max)
			end = end_gpa;

		if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
			pgd_clear(pgd + i);
			pud_free(NULL, pud);
		} else {
			safe_to_remove = false;
		}
	}
	return safe_to_remove;
}
Пример #9
0
static int handle_vmalloc_fault(unsigned long address)
{
	/*
	 * Synchronize this task's top level page-table
	 * with the 'reference' page table.
	 */
	pgd_t *pgd, *pgd_k;
	pud_t *pud, *pud_k;
	pmd_t *pmd, *pmd_k;

	pgd = pgd_offset_fast(current->active_mm, address);
	pgd_k = pgd_offset_k(address);

	if (!pgd_present(*pgd_k))
		goto bad_area;

	pud = pud_offset(pgd, address);
	pud_k = pud_offset(pgd_k, address);
	if (!pud_present(*pud_k))
		goto bad_area;

	pmd = pmd_offset(pud, address);
	pmd_k = pmd_offset(pud_k, address);
	if (!pmd_present(*pmd_k))
		goto bad_area;

	set_pmd(pmd, *pmd_k);

	/* XXX: create the TLB entry here */
	return 0;

bad_area:
	return 1;
}
Пример #10
0
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
			      unsigned long pstart, unsigned long pend)
{
	unsigned long addr = pstart + info->offset;
	unsigned long end = pend + info->offset;
	unsigned long next;
	int result;

	for (; addr < end; addr = next) {
		pgd_t *pgd = pgd_page + pgd_index(addr);
		pud_t *pud;

		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
		if (next > end)
			next = end;

		if (pgd_present(*pgd)) {
			pud = pud_offset(pgd, 0);
			result = ident_pud_init(info, pud, addr, next);
			if (result)
				return result;
			continue;
		}

		pud = (pud_t *)info->alloc_pgt_page(info->context);
		if (!pud)
			return -ENOMEM;
		result = ident_pud_init(info, pud, addr, next);
		if (result)
			return result;
		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
	}

	return 0;
}
Пример #11
0
int oleole_flush_guest_virt_memory(oleole_guest_system_t *gsys)
{
	unsigned long flags;
	unsigned long start, end;
	struct vm_area_struct	*vma;
	struct mm_struct *mm;
	pgd_t *pgd;

	spin_lock_irqsave(&gsys->lock, flags);
	vma = gsys->vma;
	spin_unlock_irqrestore(&gsys->lock, flags);

	if (!vma)
		return -1;

	mm = vma->vm_mm;

	if (!mm)
		return -1;

	start = vma->vm_start + OLEOLE_GUSET_VIRT_SPACE_OFFSET;
	end   = start         + 0x100000000UL;

	down_write(&mm->mmap_sem);

	pgd = pgd_offset(mm, start);
	if (!pgd_present(*pgd))
		goto miss;

	for (; start < end ; start += PUD_SIZE) {
		pud_t *pud;
		pmd_t *pmd;
		struct page *page;

		pud = pud_offset(pgd, start);
		if (!pud_present(*pud))
			goto miss;

		free_pmd_range(pud);

		pmd = pmd_offset(pud, 0);
		page = virt_to_page(pmd);
		__free_page(page);
		pud_clear(pud);
	}
miss:

	up_write(&mm->mmap_sem);

	__flush_tlb();

	return 0;
}
Пример #12
0
static int page_present(struct mm_struct *mm, void __user *uptr, int wr)
{
	unsigned long addr = (unsigned long)uptr;
	pgd_t *pgd = pgd_offset(mm, addr);
	if (pgd_present(*pgd)) {
		pmd_t *pmd = pmd_offset(pgd, addr);
		if (pmd_present(*pmd)) {
			pte_t *pte = pte_offset_map(pmd, addr);
			return (pte_present(*pte) && (!wr || pte_write(*pte)));
		}
	}
	return 0;
}
Пример #13
0
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgdp;
	pud_t *pudp;
	pmd_t *pmdp = NULL;

	pgdp = pgd_offset(mm, addr);
	if (pgd_present(*pgdp)) {
		pudp = pud_offset(pgdp, addr);
		if (pud_present(*pudp))
			pmdp = pmd_offset(pudp, addr);
	}
	return (pte_t *) pmdp;
}
Пример #14
0
/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn't on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr,
	unsigned long data)
{
	pgd_t *pgdir;
	pmd_t *pgmiddle;
	pte_t *pgtable;
	unsigned long page;
		
repeat:
	pgdir = pgd_offset(vma->vm_mm, addr);
	if (!pgd_present(*pgdir)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	if (pgd_bad(*pgdir)) {
		printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
		pgd_clear(pgdir);
		return;
	}
	pgmiddle = pmd_offset(pgdir,addr);
	if (pmd_none(*pgmiddle)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	if (pmd_bad(*pgmiddle)) {
		printk("ptrace: bad page directory %08lx\n",
		       pmd_val(*pgmiddle));
		pmd_clear(pgmiddle);
		return;
	}
	pgtable = pte_offset(pgmiddle, addr);
	if (!pte_present(*pgtable)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	page = pte_page(*pgtable);
	if (!pte_write(*pgtable)) {
		do_wp_page(tsk, vma, addr, 2);
		goto repeat;
	}
/* this is a hack for non-kernel-mapped video buffers and similar */
	if (page < high_memory) {
		*(unsigned long *) (page + (addr & ~PAGE_MASK)) = data;
		flush_page_to_ram (page);
	}
/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
/* this should also re-instate whatever read-only mode there was before */
	*pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
	flush_tlb_all();
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
    pgd_t *pgd;
    pud_t *pud;
    pmd_t *pmd = NULL;

    pgd = pgd_offset(mm, addr);
    if (pgd_present(*pgd)) {
        pud = pud_offset(pgd, addr);
        if (pud_present(*pud))
            pmd = pmd_offset(pud, addr);
    }
    return (pte_t *) pmd;
}
Пример #16
0
static void shmedia_mapioaddr(unsigned long pa, unsigned long va,
			      unsigned long flags)
{
	pgd_t *pgdp;
	pud_t *pudp;
	pmd_t *pmdp;
	pte_t *ptep, pte;
	pgprot_t prot;

	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);

	if (!flags)
		flags = 1; /* 1 = CB0-1 device */

	pgdp = pgd_offset_k(va);
	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
		pudp = (pud_t *)sh64_get_page();
		set_pgd(pgdp, __pgd((unsigned long)pudp | _KERNPG_TABLE));
	}

	pudp = pud_offset(pgdp, va);
	if (pud_none(*pudp) || !pud_present(*pudp)) {
		pmdp = (pmd_t *)sh64_get_page();
		set_pud(pudp, __pud((unsigned long)pmdp | _KERNPG_TABLE));
	}

	pmdp = pmd_offset(pudp, va);
	if (pmd_none(*pmdp) || !pmd_present(*pmdp)) {
		ptep = (pte_t *)sh64_get_page();
		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
	}

	prot = __pgprot(_PAGE_PRESENT | _PAGE_READ     | _PAGE_WRITE  |
			_PAGE_DIRTY   | _PAGE_ACCESSED | _PAGE_SHARED | flags);

	pte = pfn_pte(pa >> PAGE_SHIFT, prot);
	ptep = pte_offset_kernel(pmdp, va);

	if (!pte_none(*ptep) &&
	    pte_val(*ptep) != pte_val(pte))
		pte_ERROR(*ptep);

	set_pte(ptep, pte);

	flush_tlb_kernel_range(va, PAGE_SIZE);
}
/*
 * First Level Translation Fault Handler
 *
 * We enter here because the first level page table doesn't contain
 * a valid entry for the address.
 *
 * If the address is in kernel space (>= TASK_SIZE), then we are
 * probably faulting in the vmalloc() area.
 *
 * If the init_task's first level page tables contains the relevant
 * entry, we copy the it to this task.  If not, we send the process
 * a signal, fixup the exception, or oops the kernel.
 *
 * NOTE! We MUST NOT take any locks for this case. We may be in an
 * interrupt or a critical region, and should only copy the information
 * from the master page table, nothing more.
 */
int do_translation_fault(unsigned long addr, unsigned int fsr,
			 struct pt_regs *regs)
{
	struct task_struct *tsk;
	int offset;
	pgd_t *pgd, *pgd_k;
	pmd_t *pmd, *pmd_k;

	if (addr < TASK_SIZE)
		return do_page_fault(addr, fsr, regs);

	offset = __pgd_offset(addr);

	/*
	 * FIXME: CP15 C1 is write only on ARMv3 architectures.
	 * You really need to read the value in the page table
	 * register, not a copy.
	 */
	pgd = cpu_get_pgd() + offset;
	pgd_k = init_mm.pgd + offset;

	if (pgd_none(*pgd_k))
		goto bad_area;

#if 0	/* note that we are two-level */
	if (!pgd_present(*pgd))
		set_pgd(pgd, *pgd_k);
#endif

	pmd_k = pmd_offset(pgd_k, addr);
	pmd   = pmd_offset(pgd, addr);

	if (pmd_none(*pmd_k))
		goto bad_area;

	set_pmd(pmd, *pmd_k);
	return 0;

bad_area:
	tsk = current;

	do_bad_area(tsk, tsk->active_mm, addr, fsr, regs);
	return 0;
}
Пример #18
0
/*
 * First Level Translation Fault Handler
 *
 * We enter here because the first level page table doesn't contain
 * a valid entry for the address.
 *
 * If the address is in kernel space (>= TASK_SIZE), then we are
 * probably faulting in the vmalloc() area.
 *
 * If the init_task's first level page tables contains the relevant
 * entry, we copy the it to this task.  If not, we send the process
 * a signal, fixup the exception, or oops the kernel.
 *
 * NOTE! We MUST NOT take any locks for this case. We may be in an
 * interrupt or a critical region, and should only copy the information
 * from the master page table, nothing more.
 */
int do_translation_fault(unsigned long addr, int error_code, struct pt_regs *regs)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	int offset;
	pgd_t *pgd, *pgd_k;
	pmd_t *pmd, *pmd_k;

	if (addr < TASK_SIZE)
		return do_page_fault(addr, error_code, regs);

	offset = __pgd_offset(addr);

	/*
	 * FIXME: CP15 C1 is write only on ARMv3 architectures.
	 */
	pgd = cpu_get_pgd() + offset;
	pgd_k = init_mm.pgd + offset;

	if (pgd_none(*pgd_k))
		goto bad_area;

#if 0	/* note that we are two-level */
	if (!pgd_present(*pgd))
		set_pgd(pgd, *pgd_k);
#endif

	pmd_k = pmd_offset(pgd_k, addr);
	pmd   = pmd_offset(pgd, addr);

	if (pmd_none(*pmd_k))
		goto bad_area;

	set_pmd(pmd, *pmd_k);
	return 0;

bad_area:
	tsk = current;
	mm  = tsk->active_mm;

	do_bad_area(tsk, mm, addr, error_code, regs);
	return 0;
}
Пример #19
0
int exit_mm(struct mm_struct *mm)
{
	pmd_t *pmd;
	pgd_t *pgd;
	uint32_t pgdno, pmdno;
	physaddr_t pa;

	struct vm_area_struct* vma = mm->mmap;
	struct page *page;
	
	if(!mm || !mm->mm_pgd)
		return 0;

	if(!atomic_dec_and_test(&mm->mm_count))
		return 0;

	delete_all_vma(mm);

	for (pgdno = 0; pgdno < pgd_index(KERNEL_BASE_ADDR); pgdno++) {
		pgd = mm->mm_pgd + pgdno;
		if(!pgd_present(*pgd) || pgd_none(*pgd))
			continue;
		pmd_t* tmp = (pmd_t *)pgd_page_vaddr(*pgd);
		
		for (pmdno = 0; pmdno < PTRS_PER_PMD; pmdno++) {
			pmd = tmp +  pmdno;
			if(!pmd_present(*pmd) || pmd_none(*pmd))
				continue;
			struct page* p = virt2page(pmd_page_vaddr(*pmd));
			page_decref(p);
			pmd_set(pmd,0,0);
		}
		struct page* p = virt2page(pgd_page_vaddr(*pgd));
		page_decref(p);
		pgd_set(pgd,0,0);
	}

	page = virt2page((viraddr_t)mm->mm_pgd);
	page_free(page);
	kfree(mm);

	return 0;
}
Пример #20
0
/*
 * Insert the gateway page into a set of page tables, creating the
 * page tables if necessary.
 */
static void insert_gateway_page(pgd_t *pgd, unsigned long address)
{
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

	BUG_ON(!pgd_present(*pgd));

	pud = pud_offset(pgd, address);
	BUG_ON(!pud_present(*pud));

	pmd = pmd_offset(pud, address);
	if (!pmd_present(*pmd)) {
		pte = alloc_bootmem_pages(PAGE_SIZE);
		set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)));
	}

	pte = pte_offset_kernel(pmd, address);
	set_pte(pte, pfn_pte(__pa(gateway_page) >> PAGE_SHIFT, PAGE_READONLY));
}
pte_t *
huge_pte_offset (struct mm_struct *mm, unsigned long addr)
{
	unsigned long taddr = htlbpage_to_page(addr);
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte = NULL;

	pgd = pgd_offset(mm, taddr);
	if (pgd_present(*pgd)) {
		pud = pud_offset(pgd, taddr);
		if (pud_present(*pud)) {
			pmd = pmd_offset(pud, taddr);
			if (pmd_present(*pmd))
				pte = pte_offset_map(pmd, taddr);
		}
	}

	return pte;
}
Пример #22
0
/*
 * Check that @page is mapped at @address into @mm.
 *
 * If @sync is false, page_check_address may perform a racy check to avoid
 * the page table lock when the pte is not present (helpful when reclaiming
 * highly shared pages).
 *
 * On success returns with pte mapped and locked.
 */
static pte_t *mr__page_check_address(struct page *page, struct mm_struct *mm,
			  unsigned long address, spinlock_t **ptlp, int sync)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	spinlock_t *ptl;

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
		return NULL;

	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return NULL;

	pmd = pmd_offset(pud, address);
	if (!pmd_present(*pmd))
		return NULL;
	if (pmd_trans_huge(*pmd))
		return NULL;

	pte = pte_offset_map(pmd, address);
	/* Make a quick check before getting the lock */
	if (!sync && !pte_present(*pte)) {
		pte_unmap(pte);
		return NULL;
	}

	ptl = pte_lockptr(mm, pmd);
	spin_lock(ptl);
	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
		*ptlp = ptl;
		return pte;
	}
	pte_unmap_unlock(pte, ptl);
	return NULL;
}
Пример #23
0
/**
 * stage2_wp_range() - write protect stage2 memory region range
 * @kvm:	The KVM pointer
 * @addr:	Start address of range
 * @end:	End address of range
 */
static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
	pgd_t *pgd;
	phys_addr_t next;

	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
	do {
		/*
		 * Release kvm_mmu_lock periodically if the memory region is
		 * large. Otherwise, we may see kernel panics with
		 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
		 * CONFIG_LOCKDEP. Additionally, holding the lock too long
		 * will also starve other vCPUs.
		 */
		if (need_resched() || spin_needbreak(&kvm->mmu_lock))
			cond_resched_lock(&kvm->mmu_lock);

		next = kvm_pgd_addr_end(addr, end);
		if (pgd_present(*pgd))
			stage2_wp_puds(pgd, addr, next);
	} while (pgd++, addr = next, addr != end);
}
Пример #24
0
/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn't on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct vm_area_struct * vma, unsigned long addr,
	unsigned long data)
{
	pgd_t *pgdir;
	pte_t *pgtable;
	unsigned long page;

repeat:
	pgdir = PAGE_DIR_OFFSET(vma->vm_task, addr);
	if (!pgd_present(*pgdir)) {
		do_no_page(vma, addr, 1);
		goto repeat;
	}
	if (pgd_bad(*pgdir)) {
		printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
		pgd_clear(pgdir);
		return;
	}
	pgtable = (pte_t *) (PAGE_PTR(addr) + pgd_page(*pgdir));
	if (!pte_present(*pgtable)) {
		do_no_page(vma, addr, 1);
		goto repeat;
	}
	page = pte_page(*pgtable);
	if (!pte_write(*pgtable)) {
		do_wp_page(vma, addr, 1);
		goto repeat;
	}
/* this is a hack for non-kernel-mapped video buffers and similar */
	if (page < high_memory) {
		page += addr & ~PAGE_MASK;
		*(unsigned long *) page = data;
	}
/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
/* this should also re-instate whatever read-only mode there was before */
	*pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
	invalidate();
}
Пример #25
0
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	if (mm == NULL)
		return NULL;

	pgd = pgd_offset(mm, addr);
	if (!pgd_present(*pgd))
		return NULL;

	pud = pud_offset(pgd, addr);
	if (!pud_present(*pud))
		return NULL;

	pmd = pmd_offset(pud, addr);
	if (!pmd_present(*pmd))
		return NULL;

	return pte_offset_kernel(pmd, addr);
}
Пример #26
0
static pmd_t * __init kernel_ptr_table(void)
{
	if (!last_pgtable) {
		unsigned long pmd, last;
		int i;

		/* Find the last ptr table that was used in head.S and
		 * reuse the remaining space in that page for further
		 * ptr tables.
		 */
		last = (unsigned long)kernel_pg_dir;
		for (i = 0; i < PTRS_PER_PGD; i++) {
			if (!pgd_present(kernel_pg_dir[i]))
				continue;
			pmd = __pgd_page(kernel_pg_dir[i]);
			if (pmd > last)
				last = pmd;
		}

		last_pgtable = (pmd_t *)last;
#ifdef DEBUG
		printk("kernel_ptr_init: %p\n", last_pgtable);
#endif
	}

	last_pgtable += PTRS_PER_PMD;
	if (((unsigned long)last_pgtable & ~PAGE_MASK) == 0) {
		last_pgtable = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);

		clear_page(last_pgtable);
		__flush_page_to_ram(last_pgtable);
		flush_tlb_kernel_page(last_pgtable);
		nocache_page(last_pgtable);
	}

	return last_pgtable;
}
Пример #27
0
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd = NULL;
	pte_t *pte = NULL;

	pgd = pgd_offset(mm, addr);
	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
	if (!pgd_present(*pgd))
		return NULL;
	pud = pud_offset(pgd, addr);
	if (!pud_present(*pud))
		return NULL;

	if (pud_huge(*pud))
		return (pte_t *)pud;
	pmd = pmd_offset(pud, addr);
	if (!pmd_present(*pmd))
		return NULL;

	if (pte_cont(pmd_pte(*pmd))) {
		pmd = pmd_offset(
			pud, (addr & CONT_PMD_MASK));
		return (pte_t *)pmd;
	}
	if (pmd_huge(*pmd))
		return (pte_t *)pmd;
	pte = pte_offset_kernel(pmd, addr);
	if (pte_present(*pte) && pte_cont(*pte)) {
		pte = pte_offset_kernel(
			pmd, (addr & CONT_PTE_MASK));
		return pte;
	}
	return NULL;
}
Пример #28
0
pte_t *huge_pte_offset(struct mm_struct *mm,
		       unsigned long addr, unsigned long sz)
{
	pgd_t *pgdp;
	pud_t *pudp, pud;
	pmd_t *pmdp, pmd;

	pgdp = pgd_offset(mm, addr);
	if (!pgd_present(READ_ONCE(*pgdp)))
		return NULL;

	pudp = pud_offset(pgdp, addr);
	pud = READ_ONCE(*pudp);
	if (sz != PUD_SIZE && pud_none(pud))
		return NULL;
	/* hugepage or swap? */
	if (pud_huge(pud) || !pud_present(pud))
		return (pte_t *)pudp;
	/* table; check the next level */

	if (sz == CONT_PMD_SIZE)
		addr &= CONT_PMD_MASK;

	pmdp = pmd_offset(pudp, addr);
	pmd = READ_ONCE(*pmdp);
	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
	    pmd_none(pmd))
		return NULL;
	if (pmd_huge(pmd) || !pmd_present(pmd))
		return (pte_t *)pmdp;

	if (sz == CONT_PTE_SIZE)
		return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));

	return NULL;
}
Пример #29
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
			      unsigned long address)
{
	struct vm_area_struct * vma = NULL;
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	const int field = sizeof(unsigned long) * 2;
	siginfo_t info;

#if 0
	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(),
	       current->comm, current->pid, field, address, write,
	       field, regs->cp0_epc);
#endif

	info.si_code = SEGV_MAPERR;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
		goto vmalloc_fault;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_atomic() || !mm)
		goto bad_area_nosemaphore;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	info.si_code = SEGV_ACCERR;

	if (write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
			goto bad_area;
	}

survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	switch (handle_mm_fault(mm, vma, address, write)) {
	case VM_FAULT_MINOR:
		tsk->min_flt++;
		break;
	case VM_FAULT_MAJOR:
		tsk->maj_flt++;
		break;
	case VM_FAULT_SIGBUS:
		goto do_sigbus;
	case VM_FAULT_OOM:
		goto out_of_memory;
	default:
		BUG();
	}

	up_read(&mm->mmap_sem);
	return;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		tsk->thread.cp0_badvaddr = address;
		tsk->thread.error_code = write;
#if 0
		printk("do_page_fault() #2: sending SIGSEGV to %s for "
		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
		       tsk->comm,
		       write ? "write access to" : "read access from",
		       field, address,
		       field, (unsigned long) regs->cp0_epc,
		       field, (unsigned long) regs->regs[31]);
#endif
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void __user *) address;
		force_sig_info(SIGSEGV, &info, tsk);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault?  */
	if (fixup_exception(regs)) {
		current->thread.cp0_baduaddr = address;
		return;
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
	bust_spinlocks(1);

	printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
	       "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
	       smp_processor_id(), field, address, field, regs->cp0_epc,
	       field,  regs->regs[31]);
	die("Oops", regs);

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk("VM: killing process %s\n", tsk->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);
	goto no_context;

do_sigbus:
	up_read(&mm->mmap_sem);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	else
	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
#if 0
		printk("do_page_fault() #3: sending SIGBUS to %s for "
		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
		       tsk->comm,
		       write ? "write access to" : "read access from",
		       field, address,
		       field, (unsigned long) regs->cp0_epc,
		       field, (unsigned long) regs->regs[31]);
#endif
	tsk->thread.cp0_badvaddr = address;
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void __user *) address;
	force_sig_info(SIGBUS, &info, tsk);

	return;
vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Do _not_ use "tsk" here. We might be inside
		 * an interrupt in the middle of a task switch..
		 */
		int offset = __pgd_offset(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

		pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset;
		pgd_k = init_mm.pgd + offset;

		if (!pgd_present(*pgd_k))
			goto no_context;
		set_pgd(pgd, *pgd_k);

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);
		if (!pmd_present(*pmd_k))
			goto no_context;
		set_pmd(pmd, *pmd_k);

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;
		return;
	}
}
Пример #30
0
/*
 * This routine handles page faults.  It determines the address and the
 * problem, and then passes it off to one of the appropriate routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs)
{
	struct task_struct *tsk;
	struct vm_area_struct *vma;
	struct mm_struct *mm;
	unsigned long addr, cause;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
	int fault, code = SEGV_MAPERR;

	cause = regs->scause;
	addr = regs->sbadaddr;

	tsk = current;
	mm = tsk->mm;

	/*
	 * Fault-in kernel-space virtual memory on-demand.
	 * The 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
		goto vmalloc_fault;

	/* Enable interrupts if they were enabled in the parent context. */
	if (likely(regs->sstatus & SR_PIE))
		local_irq_enable();

	/*
	 * If we're in an interrupt, have no user context, or are running
	 * in an atomic region, then we must not take the fault.
	 */
	if (unlikely(in_atomic() || !mm))
		goto no_context;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;

	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, addr);
	if (unlikely(!vma))
		goto bad_area;
	if (likely(vma->vm_start <= addr))
		goto good_area;
	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
		goto bad_area;
	if (unlikely(expand_stack(vma, addr)))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it.
	 */
good_area:
	code = SEGV_ACCERR;

	switch (cause) {
	case EXC_INST_ACCESS:
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
		break;
	case EXC_LOAD_ACCESS:
		if (!(vma->vm_flags & VM_READ))
			goto bad_area;
		break;
	case EXC_STORE_ACCESS:
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
		break;
	default:
		panic("%s: unhandled cause %lu", __func__, cause);
	}

	/*
	 * If for any reason at all we could not handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, addr, flags);

	/*
	 * If we need to retry but a fatal signal is pending, handle the
	 * signal first. We do not need to release the mmap_sem because it
	 * would already be released in __lock_page_or_retry in mm/filemap.c.
	 */
	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
		return;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}

	/*
	 * Major/minor page fault accounting is only done on the
	 * initial attempt. If we go through a retry, it is extremely
	 * likely that the page will be found in page cache at that point.
	 */
	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR) {
			tsk->maj_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
		} else {
			tsk->min_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
		}
		if (fault & VM_FAULT_RETRY) {
			/*
			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
			 * of starvation.
			 */
			flags &= ~(FAULT_FLAG_ALLOW_RETRY);
			flags |= FAULT_FLAG_TRIED;

			/*
			 * No need to up_read(&mm->mmap_sem) as we would
			 * have already released it in __lock_page_or_retry
			 * in mm/filemap.c.
			 */
			goto retry;
		}
	}

	up_read(&mm->mmap_sem);
	return;

	/*
	 * Something tried to access memory that isn't in our memory map.
	 * Fix it, but check if it's kernel or user first.
	 */
bad_area:
	up_read(&mm->mmap_sem);
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		do_trap(regs, SIGSEGV, code, addr, tsk);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault? */
	if (fixup_exception(regs)) {
		return;
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
	bust_spinlocks(1);
	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
		"paging request", addr);
	die(regs, "Oops");
	do_exit(SIGKILL);

	/*
	 * We ran out of memory, call the OOM killer, and return the userspace
	 * (which will retry the fault, or kill us if we got oom-killed).
	 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

do_sigbus:
	up_read(&mm->mmap_sem);
	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
	return;

vmalloc_fault:
	{
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;
		int index;

	        if (user_mode(regs))
			goto bad_area;

		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Do _not_ use "tsk->active_mm->pgd" here.
		 * We might be inside an interrupt in the middle
		 * of a task switch.
		 */
		index = pgd_index(addr);
		pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
		pgd_k = init_mm.pgd + index;

		if (!pgd_present(*pgd_k))
			goto no_context;
		set_pgd(pgd, *pgd_k);

		pud = pud_offset(pgd, addr);
		pud_k = pud_offset(pgd_k, addr);
		if (!pud_present(*pud_k))
			goto no_context;

		/* Since the vmalloc area is global, it is unnecessary
		   to copy individual PTEs */
		pmd = pmd_offset(pud, addr);
		pmd_k = pmd_offset(pud_k, addr);
		if (!pmd_present(*pmd_k))
			goto no_context;
		set_pmd(pmd, *pmd_k);

		/* Make sure the actual PTE exists as well to
		 * catch kernel vmalloc-area accesses to non-mapped
		 * addresses. If we don't do this, this will just
		 * silently loop forever.
		 */
		pte_k = pte_offset_kernel(pmd_k, addr);
		if (!pte_present(*pte_k))
			goto no_context;
		return;
	}
}