Example #1
0
unsigned long l4x_set_pte(struct mm_struct *mm,
                          unsigned long addr,
                          pte_t old, pte_t pteval)
{
	/*
	 * Check if any invalidation is necessary
	 *
	 * Invalidation (flush) necessary if:
	 *   old page was present
	 *       new page is not present OR
	 *       new page has another physical address OR
	 *       new page has another protection OR
	 *       new page has other access attributes
	 */

	/* old was present && new not -> flush */
	int flush_rights = L4_FPAGE_RWX;
#if 0
	if ((pte_val(old) & PAGE_MASK) != (pte_val(pteval) & PAGE_MASK))
		printk("spte %x->%x\n", pte_val(old), pte_val(pteval));
#endif
	if (pte_present(pteval)) {
		/* new page is present,
		 * now we have to find out what has changed */
		if (((pte_val(old) ^ pte_val(pteval)) & PAGE_MASK)
		    || (pte_young(old) && !pte_young(pteval))) {
			/* physical page frame changed
			 * || access attribute changed -> flush */
			/* flush is the default */
			//pteval.pte_low &= ~_PAGE_MAPPED;
			pteval = __pte(pte_val(pteval) & ~_PAGE_MAPPED);

		} else if ((pte_write(old) && !pte_write(pteval))
		           || (pte_dirty(old) && !pte_dirty(pteval))) {
			/* Protection changed from r/w to ro
			 * or page now clean -> remap */
			flush_rights = L4_FPAGE_W;
			check_pte_mapped(old, pteval, "RW->RO");
		} else {
			/* nothing changed, simply return */
			check_pte_mapped(old, pteval, "NoChg");
			return pte_val(pteval);
		}
	}

	/* Ok, now actually flush or remap the page */
	L4XV_FN_v(l4x_flush_page(mm, pte_val(old), addr, PAGE_SHIFT, flush_rights));
	return pte_val(pteval);
}
static int
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
{
	unsigned long addr = (unsigned long)_addr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	pud_t *pud;
	spinlock_t *ptl;

	pgd = pgd_offset(current->mm, addr);
	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
		return 0;

	pud = pud_offset(pgd, addr);
	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
		return 0;

	pmd = pmd_offset(pud, addr);
	if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
		return 0;

	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
	    !pte_write(*pte) || !pte_dirty(*pte))) {
		pte_unmap_unlock(pte, ptl);
		return 0;
	}

	*ptep = pte;
	*ptlp = ptl;

	return 1;
}
Example #3
0
/*
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
 * RISC architectures).  The early dirtying is also good on the i386.
 *
 * There is also a hook called "update_mmu_cache()" that architectures
 * with external mmu caches can use to update those (ie the Sparc or
 * PowerPC hashed page tables that act as extended TLBs).
 *
 * Note the "page_table_lock". It is to protect against kswapd removing
 * pages from under us. Note that kswapd only ever _removes_ pages, never
 * adds them. As such, once we have noticed that the page is not present,
 * we can drop the lock early.
 *
 * The adding of pages is protected by the MM semaphore (which we hold),
 * so we don't need to worry about a page being suddenly been added into
 * our VM.
 */
static inline int handle_pte_fault(struct mm_struct *mm,
	struct vm_area_struct * vma, unsigned long address,
	int write_access, pte_t * pte)
{
	pte_t entry;

	/*
	 * We need the page table lock to synchronize with kswapd
	 * and the SMP-safe atomic PTE updates.
	 */
	spin_lock(&mm->page_table_lock);
	entry = *pte;
	if (!pte_present(entry)) {
		/*
		 * If it truly wasn't present, we know that kswapd
		 * and the PTE updates will not touch it later. So
		 * drop the lock.
		 */
		spin_unlock(&mm->page_table_lock);
		if (pte_none(entry))
			return do_no_page(mm, vma, address, write_access, pte);
		return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
	}

	if (write_access) {
		if (!pte_write(entry))
			return do_wp_page(mm, vma, address, pte, entry);

		entry = pte_mkdirty(entry);
	}
	entry = pte_mkyoung(entry);
	establish_pte(vma, address, pte, entry);
	spin_unlock(&mm->page_table_lock);
	return 1;
}
Example #4
0
void l4x_vmalloc_map_vm_area(unsigned long address, unsigned long end)
{
	if (address & ~PAGE_MASK)
		enter_kdebug("map_vm_area: Unaligned address!");

	for (; address < end; address += PAGE_SIZE) {
		pte_t *ptep;

#ifdef ARCH_arm
		unsigned long o;
		if ((o = l4x_arm_is_selfmapped_addr(address))) {
			address += o - PAGE_SIZE;
			continue;
		}
#endif

		ptep = lookup_pte(swapper_pg_dir, address);

		if (!ptep || !pte_present(*ptep)) {
			if (0)
				printk("%s: No (valid) PTE for %08lx?!"
			               " (ptep: %p, pte: %08"
#ifndef ARCH_arm
				       "l"
#endif
				       "x\n",
			               __func__, address,
			               ptep, pte_val(*ptep));
			continue;
		}
		l4x_virtual_mem_register(address, pte_val(*ptep));
		l4lx_memory_map_virtual_page(address, pte_val(*ptep),
		                             pte_write(*ptep));
	}
}
Example #5
0
/*
 * Do a quick page-table lookup for a single page. 
 */
static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) 
{
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *ptep, pte;

	pgd = pgd_offset(mm, address);
	if (pgd_none(*pgd) || pgd_bad(*pgd))
		goto out;

	pmd = pmd_offset(pgd, address);
	if (pmd_none(*pmd) || pmd_bad(*pmd))
		goto out;

	ptep = pte_offset(pmd, address);
	if (!ptep)
		goto out;

	pte = *ptep;
	if (pte_present(pte)) {
		if (!write ||
		    (pte_write(pte) && pte_dirty(pte)))
			return pte_page(pte);
	}

out:
	return 0;
}
Example #6
0
/* this routine handles present pages, when users try to write
   to a shared page.
   */
void do_wp_page(struct vm_area_struct *vma, unsigned long address, int write_access)
{
    pgd_t *pgd;
    pmd_t *pmd;
    pte_t *page_table,pte;
    unsigned long old_page, new_page;

    new_page = get_free_page(GFP_KERNEL);

    pgd = pgd_offset(vma->vm_task, address);
    if(pgd_none(*pgd))
        goto end_wp_page;
    if(pgd_bad(*pgd))
        goto bad_wp_page;
    pmd = pmd_offset(pgd,address);
    if(pmd_none(*pmd))
        goto end_wp_page;
    if(pmd_bad(*pmd))
        goto bad_wp_page;
    page_table = pte_offset(pmd,address);
    pte = *page_table;
    if(!pte_present(pte))
        goto end_wp_page;
    if(pte_write(pte))
        goto  end_wp_page;
    old_page = pte_page(pte);
    if(old_page >= main_memory_end)
        goto bad_wp_page;

    (vma->vm_task->mm->min_flt)++;

    if(mem_map[MAP_NR(old_page)].flags & PAGE_PRESENT)
    {
        if(new_page)
        {
            if(mem_map[MAP_NR(old_page)].flags & MAP_PAGE_RESERVED)
                ++(vma->vm_task->mm->rss);
            copy_page(old_page, new_page);
            *page_table = pte_mkwrite(pte_mkdirty(mk_pte((unsigned long)&new_page, vma->vm_page_prot)));
            free_page(old_page);
            return;
        }
        pte_val(*page_table) &= PAGE_BAD;
        free_page(old_page);
        oom();
        return;
    }
    *page_table = pte_mkdirty(pte_mkwrite(pte));
    if(new_page)
        free_page(new_page);
    return;
bad_wp_page:
    printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
    goto end_wp_page;
end_wp_page:
    if(new_page)
        free_page(new_page);
    return;
}
Example #7
0
/* Remap IO memory, the same way as remap_pfn_range(), but use
 * the obio memory space.
 *
 * They use a pgprot that sets PAGE_IO and does not check the
 * mem_map table as this is independent of normal memory.
 */
static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
				      unsigned long address,
				      unsigned long size,
				      unsigned long offset, pgprot_t prot,
				      int space)
{
	unsigned long end;

	/* clear hack bit that was used as a write_combine side-effect flag */
	offset &= ~0x1UL;
	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		pte_t entry;
		unsigned long curend = address + PAGE_SIZE;
		
		entry = mk_pte_io(offset, prot, space, PAGE_SIZE);
		if (!(address & 0xffff)) {
			if (PAGE_SIZE < (4 * 1024 * 1024) &&
			    !(address & 0x3fffff) &&
			    !(offset & 0x3ffffe) &&
			    end >= address + 0x400000) {
				entry = mk_pte_io(offset, prot, space,
						  4 * 1024 * 1024);
				curend = address + 0x400000;
				offset += 0x400000;
			} else if (PAGE_SIZE < (512 * 1024) &&
				   !(address & 0x7ffff) &&
				   !(offset & 0x7fffe) &&
				   end >= address + 0x80000) {
				entry = mk_pte_io(offset, prot, space,
						  512 * 1024 * 1024);
				curend = address + 0x80000;
				offset += 0x80000;
			} else if (PAGE_SIZE < (64 * 1024) &&
				   !(offset & 0xfffe) &&
				   end >= address + 0x10000) {
				entry = mk_pte_io(offset, prot, space,
						  64 * 1024);
				curend = address + 0x10000;
				offset += 0x10000;
			} else
				offset += PAGE_SIZE;
		} else
			offset += PAGE_SIZE;

		if (pte_write(entry))
			entry = pte_mkdirty(entry);
		do {
			BUG_ON(!pte_none(*pte));
			set_pte_at(mm, address, pte, entry);
			address += PAGE_SIZE;
			pte_val(entry) += PAGE_SIZE;
			pte++;
		} while (address < curend);
	} while (address < end);
}
Example #8
0
static int mem_write(struct inode * inode, struct file * file,char * buf, int count)
{
	pgd_t *page_dir;
	pmd_t *page_middle;
	pte_t pte;
	char * page;
	struct task_struct * tsk;
	unsigned long addr;
	char *tmp;
	int i;

	if (count < 0)
		return -EINVAL;
	addr = file->f_pos;
	tsk = get_task(inode->i_ino >> 16);
	if (!tsk)
		return -ESRCH;
	tmp = buf;
	while (count > 0) {
		if (current->signal & ~current->blocked)
			break;
		page_dir = pgd_offset(tsk,addr);
		if (pgd_none(*page_dir))
			break;
		if (pgd_bad(*page_dir)) {
			printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
			pgd_clear(page_dir);
			break;
		}
		page_middle = pmd_offset(page_dir,addr);
		if (pmd_none(*page_middle))
			break;
		if (pmd_bad(*page_middle)) {
			printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
			pmd_clear(page_middle);
			break;
		}
		pte = *pte_offset(page_middle,addr);
		if (!pte_present(pte))
			break;
		if (!pte_write(pte))
			break;
		page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
		i = PAGE_SIZE-(addr & ~PAGE_MASK);
		if (i > count)
			i = count;
		memcpy_fromfs(page, tmp, i);
		addr += i;
		tmp += i;
		count -= i;
	}
	file->f_pos = addr;
	if (tmp != buf)
		return tmp-buf;
	if (current->signal & ~current->blocked)
		return -ERESTARTSYS;
	return 0;
}
Example #9
0
/*
 * fault_is_resolved()
 *	Return true if the fault appears to be resolved.
 */
STATIC int fault_is_resolved(struct pt_regs *regs,
			 unsigned long missqw0,
			 unsigned long missqw1)
{
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *ptep;
	unsigned long address;
	unsigned long src = MMU_MISSQW1_SRC_GET(missqw1);
	unsigned long op = MMU_MISSQW0_OP_GET(missqw0);

	/*
	 * Potential hardware bug, check if this is an ifetch with a write op.
	 * If so, we will be in an infinite loop.  check here because this
	 * is under debug.
	 */
	if ((src == 0) && (op == 1)) {
		printk(KERN_CRIT "iftech/write: missqw0=%lx, missqw1=%lx\n",
		       missqw0, missqw1);
		return 0;
	}

	/*
	 * See if we now have a valid pte?
	 */
	pgd = (pgd_t *)(MMU_MISSQW0_PGD_GET(missqw0) << MMU_MISSQW0_PGD_SHIFT);
	address =  (unsigned long)(MMU_MISSQW1_VPN_GET(missqw1) << MMU_VPN_SHIFT);
	pmd = (pmd_t *)__pgd_offset(pgd, address);
	if (unlikely(pmd_none(*pmd)) || (unlikely(pmd_bad(*pmd)))) {
		printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] is empty\n",
		       address, pgd, pmd);
		return 0;
	}

	ptep = pte_offset_map(pmd, address);
	if (unlikely(pte_none(*ptep)) || (unlikely(pte_bad(*ptep)))) {
		printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] is empty\n",
		       address, pgd, pmd, ptep);
		return 0;
	}

	if (unlikely(!pte_present(*ptep))) {
		printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] is invalid: 0x%lx\n",
		       address, pgd, pmd, ptep, pte_val(*ptep));
		return 0;
	}

	if (MMU_MISSQW0_OP_GET(missqw0) && !pte_write(*ptep)) {
		printk(KERN_CRIT "address[0x%lx] pgd[%p] pmd[%p] pte[%p] write requested but not given: 0x%lx\n",
		       address, pgd, pmd, ptep, pte_val(*ptep));
		/* Fall through, not as critical */
	}

	fault_printk(FAULT_DBG_TRACE, "FAULT[%d]: ti[%p], missqw0=%08lx, missqw1=%08lx, resolved!\n", 
	       raw_smp_processor_id(), (void *)current_thread_info(), missqw0, missqw1);
	return 1;
}
Example #10
0
/*
 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
 * and write permission.
 *
 * For a contiguous huge pte range we need to check whether or not write
 * permission has to change only on the first pte in the set. Then for
 * all the contiguous ptes we need to check whether or not there is a
 * discrepancy between dirty or young.
 */
static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
{
	int i;

	if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
		return 1;

	for (i = 0; i < ncontig; i++) {
		pte_t orig_pte = huge_ptep_get(ptep + i);

		if (pte_dirty(pte) != pte_dirty(orig_pte))
			return 1;

		if (pte_young(pte) != pte_young(orig_pte))
			return 1;
	}

	return 0;
}
Example #11
0
int get_user_page(struct proc *p, unsigned long uvastart, int write, int force,
    struct page **plist)
{
	pte_t		pte;
	int		ret = -1;
	struct page	*pp;

	spin_lock(&p->pte_lock);

	pte = pgdir_walk(p->env_pgdir, (void*)uvastart, TRUE);

	if (!pte_walk_okay(pte))
		goto err1;

	if (!pte_is_present(pte)) {
		unsigned long prot = PTE_P | PTE_U | PTE_A | PTE_W | PTE_D;
#if 0
		printk("[akaros]: get_user_page() uva=0x%llx pte absent\n",
		    uvastart);
#endif
		/*
		 * TODO: ok to allocate with pte_lock? "prot" needs to be
		 * based on VMR writability, refer to pgprot_noncached().
		 */
		if (upage_alloc(p, &pp, 0))
			goto err1;
		pte_write(pte, page2pa(pp), prot);
	} else {
		pp = pa2page(pte_get_paddr(pte));

		/* __vmr_free_pgs() refcnt's pagemap pages differently */
		if (atomic_read(&pp->pg_flags) & PG_PAGEMAP) {
			printk("[akaros]: get_user_page(): uva=0x%llx\n",
			    uvastart);
			goto err1;
		}
	}

	if (write && (!pte_has_perm_urw(pte))) {
		/* TODO: How is Linux using the "force" parameter */
		printk("[akaros]: get_user_page() uva=0x%llx pte ro\n",
		    uvastart);
		goto err1;
	}

	/* TODO (GUP): change the interface such that devices provide the memory and
	 * the user mmaps it, instead of trying to pin arbitrary user memory. */
	warn_once("Extremely unsafe, unpinned memory mapped!  If your process dies, you might scribble on RAM!");

	plist[0] = pp;
	ret = 1;
err1:
	spin_unlock(&p->pte_lock);
	return ret;
}
Example #12
0
unsigned long l4x_set_pte(struct mm_struct *mm,
                          unsigned long addr,
                          pte_t old, pte_t pteval)
{
	/*
	 * Check if any invalidation is necessary
	 *
	 * Invalidation (flush) necessary if:
	 *   old page was present
	 *       new page is not present OR
	 *       new page has another physical address OR
	 *       new page has another protection OR
	 *       new page has other access attributes
	 */

	/* old was present && new not -> flush */
	int flush_rights = L4_FPAGE_RWX;

	if (pte_present(pteval)) {
		/* new page is present,
		 * now we have to find out what has changed */
		if (((pte_val(old) ^ pte_val(pteval)) & L4X_PHYSICAL_PAGE_MASK)
		    || (pte_young(old) && !pte_young(pteval))) {
			/* physical page frame changed
			 * || access attribute changed -> flush */
			/* flush is the default */
		} else if ((pte_write(old) && !pte_write(pteval))
		           || (pte_dirty(old) && !pte_dirty(pteval))) {
			/* Protection changed from r/w to ro
			 * or page now clean -> remap */
			flush_rights = L4_FPAGE_W;
		} else {
			/* nothing changed, simply return */
			return pte_val(pteval);
		}
	}

	/* Ok, now actually flush or remap the page */
	l4x_flush_page(mm, pte_val(old) & L4X_PHYSICAL_PAGE_MASK,
	               addr, PAGE_SHIFT, flush_rights, _RET_IP_);
	return pte_val(pteval);
}
Example #13
0
void pte_free(struct page *pte)
{
	unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);

	if (!pte_write(*virt_to_ptep(va)))
		BUG_ON(HYPERVISOR_update_va_mapping(
			va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));

	ClearPageForeign(pte);
	init_page_count(pte);

	__free_page(pte);
}
Example #14
0
static int page_present(struct mm_struct *mm, void __user *uptr, int wr)
{
	unsigned long addr = (unsigned long)uptr;
	pgd_t *pgd = pgd_offset(mm, addr);
	if (pgd_present(*pgd)) {
		pmd_t *pmd = pmd_offset(pgd, addr);
		if (pmd_present(*pmd)) {
			pte_t *pte = pte_offset_map(pmd, addr);
			return (pte_present(*pte) && (!wr || pte_write(*pte)));
		}
	}
	return 0;
}
Example #15
0
/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn't on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr,
	unsigned long data)
{
	pgd_t *pgdir;
	pmd_t *pgmiddle;
	pte_t *pgtable;
	unsigned long page;
		
repeat:
	pgdir = pgd_offset(vma->vm_mm, addr);
	if (!pgd_present(*pgdir)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	if (pgd_bad(*pgdir)) {
		printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
		pgd_clear(pgdir);
		return;
	}
	pgmiddle = pmd_offset(pgdir,addr);
	if (pmd_none(*pgmiddle)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	if (pmd_bad(*pgmiddle)) {
		printk("ptrace: bad page directory %08lx\n",
		       pmd_val(*pgmiddle));
		pmd_clear(pgmiddle);
		return;
	}
	pgtable = pte_offset(pgmiddle, addr);
	if (!pte_present(*pgtable)) {
		do_no_page(tsk, vma, addr, 1);
		goto repeat;
	}
	page = pte_page(*pgtable);
	if (!pte_write(*pgtable)) {
		do_wp_page(tsk, vma, addr, 2);
		goto repeat;
	}
/* this is a hack for non-kernel-mapped video buffers and similar */
	if (page < high_memory) {
		*(unsigned long *) (page + (addr & ~PAGE_MASK)) = data;
		flush_page_to_ram (page);
	}
/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
/* this should also re-instate whatever read-only mode there was before */
	*pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
	flush_tlb_all();
}
Example #16
0
void __iomem *
ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
{
	pte_t pte = __pte(flags);

	/* writeable implies dirty for kernel addresses */
	if (pte_write(pte))
		pte = pte_mkdirty(pte);

	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
	pte = pte_exprotect(pte);
	pte = pte_mkprivileged(pte);

	return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
}
Example #17
0
static unsigned long maybe_map(unsigned long virt, int is_write)
{
    pte_t pte;
    int err;

    void *phys = um_virt_to_phys(current, virt, &pte);
    int dummy_code;

    if(IS_ERR(phys) || (is_write && !pte_write(pte))) {
        err = handle_page_fault(virt, 0, is_write, 1, &dummy_code);
        if(err)
            return(0);
        phys = um_virt_to_phys(current, virt, NULL);
    }
    return((unsigned long) phys);
}
Example #18
0
static pte_t *maybe_map(unsigned long virt, int is_write)
{
	pte_t *pte = virt_to_pte(current->mm, virt);
	int err, dummy_code;

	if ((pte == NULL) || !pte_present(*pte) ||
	    (is_write && !pte_write(*pte))) {
		err = handle_page_fault(virt, 0, is_write, 1, &dummy_code);
		if (err)
			return NULL;
		pte = virt_to_pte(current->mm, virt);
	}
	if (!pte_present(*pte))
		pte = NULL;

	return pte;
}
Example #19
0
static void write_addr(u128 * target, u128 * inject)
{
    pte_t * ppt, pt;
    unsigned int level;

    if(pte_write(*lookup_address((unsigned long)target, &level)) == 0) {
        ppt = lookup_address((unsigned long)target, &level);
        pt = pte_mkwrite(*ppt);
        set_pte(ppt, pt);
        *target = *inject;
        ppt = lookup_address((unsigned long)target, &level);
        pt = pte_wrprotect(*ppt);
        set_pte(ppt, pt);
    }else {
        *target = *inject;
    }
}
static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
					     size_t n, int write_user)
{
	struct mm_struct *mm = current->mm;
	unsigned long offset, pfn, done, size;
	pte_t *pte;
	void *from, *to;

	done = 0;
retry:
	spin_lock(&mm->page_table_lock);
	do {
		pte = follow_table(mm, uaddr);
		if ((unsigned long) pte < 0x1000)
			goto fault;
		if (!pte_present(*pte)) {
			pte = (pte_t *) 0x11;
			goto fault;
		} else if (write_user && !pte_write(*pte)) {
			pte = (pte_t *) 0x04;
			goto fault;
		}

		pfn = pte_pfn(*pte);
		offset = uaddr & (PAGE_SIZE - 1);
		size = min(n - done, PAGE_SIZE - offset);
		if (write_user) {
			to = (void *)((pfn << PAGE_SHIFT) + offset);
			from = kptr + done;
		} else {
			from = (void *)((pfn << PAGE_SHIFT) + offset);
			to = kptr + done;
		}
		memcpy(to, from, size);
		done += size;
		uaddr += size;
	} while (done < n);
	spin_unlock(&mm->page_table_lock);
	return n - done;
fault:
	spin_unlock(&mm->page_table_lock);
	if (__handle_fault(uaddr, (unsigned long) pte, write_user))
		return n - done;
	goto retry;
}
Example #21
0
/*
 * The above separate functions for the no-page and wp-page
 * cases will go away (they mostly do the same thing anyway),
 * and we'll instead use only a general "handle_mm_fault()".
 *
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
 * RISC architectures).  The early dirtying is also good on the i386.
 *
 * There is also a hook called "update_mmu_cache()" that architectures
 * with external mmu caches can use to update those (ie the Sparc or
 * PowerPC hashed page tables that act as extended TLBs).
 */
static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address,
	int write_access, pte_t * pte)
{
	if (!pte_present(*pte)) {
		do_no_page(current, vma, address, write_access);
		return;
	}
	set_pte(pte, pte_mkyoung(*pte));
	flush_tlb_page(vma, address);
	if (!write_access)
		return;
	if (pte_write(*pte)) {
		set_pte(pte, pte_mkdirty(*pte));
		flush_tlb_page(vma, address);
		return;
	}
	do_wp_page(current, vma, address, write_access);
}
Example #22
0
/*
 * We can receive a page fault from a migrating PTE at any time.
 * Handle it by just waiting until the fault resolves.
 *
 * It's also possible to get a migrating kernel PTE that resolves
 * itself during the downcall from hypervisor to Linux.  We just check
 * here to see if the PTE seems valid, and if so we retry it.
 *
 * NOTE! We MUST NOT take any locks for this case.  We may be in an
 * interrupt or a critical region, and must do as little as possible.
 * Similarly, we can't use atomic ops here, since we may be handling a
 * fault caused by an atomic op access.
 *
 * If we find a migrating PTE while we're in an NMI context, and we're
 * at a PC that has a registered exception handler, we don't wait,
 * since this thread may (e.g.) have been interrupted while migrating
 * its own stack, which would then cause us to self-deadlock.
 */
static int handle_migrating_pte(pgd_t *pgd, int fault_num,
				unsigned long address, unsigned long pc,
				int is_kernel_mode, int write)
{
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	pte_t pteval;

	if (pgd_addr_invalid(address))
		return 0;

	pgd += pgd_index(address);
	pud = pud_offset(pgd, address);
	if (!pud || !pud_present(*pud))
		return 0;
	pmd = pmd_offset(pud, address);
	if (!pmd || !pmd_present(*pmd))
		return 0;
	pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) :
		pte_offset_kernel(pmd, address);
	pteval = *pte;
	if (pte_migrating(pteval)) {
		if (in_nmi() && search_exception_tables(pc))
			return 0;
		wait_for_migration(pte);
		return 1;
	}

	if (!is_kernel_mode || !pte_present(pteval))
		return 0;
	if (fault_num == INT_ITLB_MISS) {
		if (pte_exec(pteval))
			return 1;
	} else if (write) {
		if (pte_write(pteval))
			return 1;
	} else {
		if (pte_read(pteval))
			return 1;
	}

	return 0;
}
Example #23
0
struct page *kmem_vm_nopage(struct vm_area_struct *vma, unsigned long address, int write)
{
	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
	unsigned long kaddr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *ptep, pte;
	struct page *page = NULL;

	/* address is user VA; convert to kernel VA of desired page */
	kaddr = (address - vma->vm_start) + offset;
	kaddr = VMALLOC_VMADDR(kaddr);

	spin_lock(&init_mm.page_table_lock);

	/* Lookup page structure for kernel VA */
	pgd = pgd_offset(&init_mm, kaddr);
	if (pgd_none(*pgd) || pgd_bad(*pgd))
		goto out;
	pmd = pmd_offset(pgd, kaddr);
	if (pmd_none(*pmd) || pmd_bad(*pmd))
		goto out;
	ptep = pte_offset(pmd, kaddr);
	if (!ptep)
		goto out;
	pte = *ptep;
	if (!pte_present(pte))
		goto out;
	if (write && !pte_write(pte))
		goto out;
	page = pte_page(pte);
	if (!VALID_PAGE(page)) {
		page = NULL;
		goto out;
	}

	/* Increment reference count on page */
	get_page(page);

out:
	spin_unlock(&init_mm.page_table_lock);

	return page;
}
Example #24
0
File: exmap.c Project: jbert/exmap
/* Add the output line for the given page to the buffer, returning
 * number of chars added. Returns 0 if it can't fit into the
 * buffer. */
static int show_one_page(pte_t pte,
			 char *buffer,
			 int buflen)
{
	int len;
	unsigned long pfn = 0UL;
	swp_entry_t swap_entry;
	int present;
	int writable;
	unsigned long cookie;

	swap_entry.val = 0UL; /* All zeros not a valid entry */

	present = pte_present(pte);
	writable = pte_write(pte) ? 1 : 0;
	if (present) {
		pfn = pte_pfn(pte);
		if (!pfn_valid(pfn)) {
			pfn = 0;
		}
		cookie = pfn;
	}
	else {
		swap_entry = pte_to_swp_entry(pte);
		cookie = swap_entry.val;
	}
	
	len = snprintf (buffer,
			buflen,
			"%d %d %lx\n",
			present,
			writable,
			cookie);
	
	if (len >= buflen)
		goto ETOOLONG;
	
	return len;
 ETOOLONG:
	buffer[0] = '\0';
	return 0;
}
Example #25
0
/*
 * This routine puts a long into any process space by following the page
 * tables. NOTE! You should check that the long isn't on a page boundary,
 * and that it is in the task area before calling this: this routine does
 * no checking.
 *
 * Now keeps R/W state of page so that a text page stays readonly
 * even if a debugger scribbles breakpoints into it.  -M.U-
 */
static void put_long(struct vm_area_struct * vma, unsigned long addr,
	unsigned long data)
{
	pgd_t *pgdir;
	pte_t *pgtable;
	unsigned long page;

repeat:
	pgdir = PAGE_DIR_OFFSET(vma->vm_task, addr);
	if (!pgd_present(*pgdir)) {
		do_no_page(vma, addr, 1);
		goto repeat;
	}
	if (pgd_bad(*pgdir)) {
		printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
		pgd_clear(pgdir);
		return;
	}
	pgtable = (pte_t *) (PAGE_PTR(addr) + pgd_page(*pgdir));
	if (!pte_present(*pgtable)) {
		do_no_page(vma, addr, 1);
		goto repeat;
	}
	page = pte_page(*pgtable);
	if (!pte_write(*pgtable)) {
		do_wp_page(vma, addr, 1);
		goto repeat;
	}
/* this is a hack for non-kernel-mapped video buffers and similar */
	if (page < high_memory) {
		page += addr & ~PAGE_MASK;
		*(unsigned long *) page = data;
	}
/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
/* this should also re-instate whatever read-only mode there was before */
	*pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
	invalidate();
}
static int
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
{
	unsigned long addr = (unsigned long)_addr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	pud_t *pud;
	spinlock_t *ptl;

	pgd = pgd_offset(current->mm, addr);
	if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
		return 0;

	pud = pud_offset(pgd, addr);
	if (unlikely(pud_none(*pud) || pud_bad(*pud)))
		return 0;

	pmd = pmd_offset(pud, addr);
	if (unlikely(pmd_none(*pmd)))
		return 0;

	/*
	 * A pmd can be bad if it refers to a HugeTLB or THP page.
	 *
	 * Both THP and HugeTLB pages have the same pmd layout
	 * and should not be manipulated by the pte functions.
	 *
	 * Lock the page table for the destination and check
	 * to see that it's still huge and whether or not we will
	 * need to fault on write, or if we have a splitting THP.
	 */
	if (unlikely(pmd_thp_or_huge(*pmd))) {
		ptl = &current->mm->page_table_lock;
		spin_lock(ptl);
		if (unlikely(!pmd_thp_or_huge(*pmd)
			|| pmd_hugewillfault(*pmd)
			|| pmd_trans_splitting(*pmd))) {
			spin_unlock(ptl);
			return 0;
		}

		*ptep = NULL;
		*ptlp = ptl;
		return 1;
	}

	if (unlikely(pmd_bad(*pmd)))
		return 0;

	pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
	if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
	    !pte_write(*pte) || !pte_dirty(*pte))) {
		pte_unmap_unlock(pte, ptl);
		return 0;
	}

	*ptep = pte;
	*ptlp = ptl;

	return 1;
}
Example #27
0
/*
 * FOLL_FORCE can write to even unwritable pte's, but only
 * after we've gone through a COW cycle and they are dirty.
 */
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
	return pte_write(pte) ||
		((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
Example #28
0
/* check the permissions of a address and return its type - */
static int memory_check_addr_perm_task(const void *addr, word *size, int write, byte *read_only, byte *executable, struct task_struct *task)
{
	struct vm_area_struct *vma;
	word start = ROUNDDOWN((word)addr, PAGE_SIZE);
	word end = ROUNDUP((word)addr + *size, PAGE_SIZE);
	word total_size = 0;
	byte local_read_only = 0;
	byte local_executable = 0;
	int ret = ADDR_UNDEF;
	int atomic;
#ifdef HAS_LOOKUP_ADDRESS
	pte_t *pte;
	unsigned int level;
#endif

	if (NULL == read_only) {
		read_only = &local_read_only;
	}

	if (NULL == executable) {
		executable = &local_executable;
	}

	*read_only = 0;
	*executable = 0;

	atomic = in_atomic();

	if (!atomic) {
		down_read(&task->mm->mmap_sem);
	}

	while (start < end) {
		if (task && task->mm) {
			/* check if it's a user address */
			vma = find_vma(task->mm, start);
			if (vma && vma->vm_start <= start) {
				if (ret != ADDR_UNDEF && ret != ADDR_OUTSIDE) {
					goto end;
				}

				if (!(vma->vm_flags & VM_READ)) {
					goto end;
				}

				if (!(vma->vm_flags & VM_WRITE)) {
					if (write) {
						/* no more writable bytes */
						goto end;

					} else if (ret != ADDR_UNDEF && !(*read_only)) {
						/* the permissions has changed. this is where we stop the buffer */
						goto end;
					}

					*read_only = 1;
				}

				start = vma->vm_end;
				total_size = start - (word)addr;
				ret = ADDR_OUTSIDE;
				continue;
			}
		}

		/* check if it's a kernel virtual address */

#ifdef HAS_LOOKUP_ADDRESS
		pte = lookup_address((unsigned long)addr, &level);
		if (NULL == pte) {
			goto end;
		}

		if (ret == ADDR_UNDEF) {
			*executable = pte_exec(*pte);
		}

		if (pte_present(*pte)) {
			if (ret != ADDR_UNDEF && ret != ADDR_INSIDE) {
				goto end;
			}

			if (!pte_write(*pte)) {
				if (write) {
					/* no more writable bytes */
					goto end;

				} else if (ret != ADDR_UNDEF && !(*read_only)) {
					/* the permissions has changed. this is where we stop the buffer */
					goto end;
				}

				*read_only = 1;
			}

			start += PAGE_SIZE;
			total_size = start - (word)addr;
			ret = ADDR_INSIDE;
			continue;
		}
		goto end;
#else
		if (ret != ADDR_UNDEF && ret != ADDR_INSIDE) {
			goto end;
		}

		if (	start >= PAGE_OFFSET ||
			(start >= MODULES_VADDR && start < MODULES_END) ||
			(start >= VMALLOC_START && start < VMALLOC_END)) {
			/* this is not totally safe. but it's enough for now. */
			*executable = 1;
			start += PAGE_SIZE;
			total_size = start - (word)addr;
			ret = ADDR_INSIDE;
			continue;
		}
		goto end;
#endif
	}

end:
	if (!atomic) {
		up_read(&task->mm->mmap_sem);
	}
	if (total_size) {
		if (total_size < *size) {
			*size = total_size;
		}
		return ret;
	} else {
		return ADDR_UNDEF;
	}
}
static size_t copy_in_user_pt(size_t n, void __user *to,
			      const void __user *from)
{
	struct mm_struct *mm = current->mm;
	unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
		      uaddr, done, size, error_code;
	unsigned long uaddr_from = (unsigned long) from;
	unsigned long uaddr_to = (unsigned long) to;
	pte_t *pte_from, *pte_to;
	int write_user;

	if (segment_eq(get_fs(), KERNEL_DS)) {
		memcpy((void __force *) to, (void __force *) from, n);
		return 0;
	}
	done = 0;
retry:
	spin_lock(&mm->page_table_lock);
	do {
		write_user = 0;
		uaddr = uaddr_from;
		pte_from = follow_table(mm, uaddr_from);
		error_code = (unsigned long) pte_from;
		if (error_code < 0x1000)
			goto fault;
		if (!pte_present(*pte_from)) {
			error_code = 0x11;
			goto fault;
		}

		write_user = 1;
		uaddr = uaddr_to;
		pte_to = follow_table(mm, uaddr_to);
		error_code = (unsigned long) pte_to;
		if (error_code < 0x1000)
			goto fault;
		if (!pte_present(*pte_to)) {
			error_code = 0x11;
			goto fault;
		} else if (!pte_write(*pte_to)) {
			error_code = 0x04;
			goto fault;
		}

		pfn_from = pte_pfn(*pte_from);
		pfn_to = pte_pfn(*pte_to);
		offset_from = uaddr_from & (PAGE_SIZE-1);
		offset_to = uaddr_from & (PAGE_SIZE-1);
		offset_max = max(offset_from, offset_to);
		size = min(n - done, PAGE_SIZE - offset_max);

		memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
		       (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
		done += size;
		uaddr_from += size;
		uaddr_to += size;
	} while (done < n);
	spin_unlock(&mm->page_table_lock);
	return n - done;
fault:
	spin_unlock(&mm->page_table_lock);
	if (__handle_fault(uaddr, error_code, write_user))
		return n - done;
	goto retry;
}
Example #30
0
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
		      unsigned long end_addr, int force)
{
	pgd_t *npgd;
	pud_t *npud;
	pmd_t *npmd;
	pte_t *npte;
	unsigned long addr,  end;
	int r, w, x, err, fd;

	if(mm == NULL) return;
	fd = mm->context.skas.mm_fd;
	for(addr = start_addr; addr < end_addr;){
		npgd = pgd_offset(mm, addr);
		if(!pgd_present(*npgd)){
			if(force || pgd_newpage(*npgd)){
				end = addr + PGDIR_SIZE;
				if(end > end_addr)
					end = end_addr;
				err = unmap(fd, (void *) addr, end - addr);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pgd_mkuptodate(*npgd);
			}
			addr += PGDIR_SIZE;
			continue;
		}

		npud = pud_offset(npgd, addr);
		if(!pud_present(*npud)){
			if(force || pud_newpage(*npud)){
				end = addr + PUD_SIZE;
				if(end > end_addr)
					end = end_addr;
				err = unmap(fd, (void *) addr, end - addr);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pud_mkuptodate(*npud);
			}
			addr += PUD_SIZE;
			continue;
		}

		npmd = pmd_offset(npud, addr);
		if(!pmd_present(*npmd)){
			if(force || pmd_newpage(*npmd)){
				end = addr + PMD_SIZE;
				if(end > end_addr)
					end = end_addr;
				err = unmap(fd, (void *) addr, end - addr);
				if(err < 0)
					panic("munmap failed, errno = %d\n",
					      -err);
				pmd_mkuptodate(*npmd);
			}
			addr += PMD_SIZE;
			continue;
		}

		npte = pte_offset_kernel(npmd, addr);
		r = pte_read(*npte);
		w = pte_write(*npte);
		x = pte_exec(*npte);
		if(!pte_dirty(*npte))
			w = 0;
		if(!pte_young(*npte)){
			r = 0;
			w = 0;
		}
		if(force || pte_newpage(*npte)){
			err = unmap(fd, (void *) addr, PAGE_SIZE);
			if(err < 0)
				panic("munmap failed, errno = %d\n", -err);
			if(pte_present(*npte))
				map(fd, addr, pte_val(*npte) & PAGE_MASK,
				    PAGE_SIZE, r, w, x);
		}
		else if(pte_newprot(*npte))
			protect(fd, addr, PAGE_SIZE, r, w, x, 1);

		*npte = pte_mkuptodate(*npte);
		addr += PAGE_SIZE;
	}
}