Пример #1
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 *
 * We hold the mm semaphore and the page_table_lock on entry and exit
 * with the page_table_lock released.
 */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
	unsigned long address, pte_t *page_table, pte_t pte)
{
	struct page *old_page, *new_page;

	old_page = pte_page(pte);
	if (!VALID_PAGE(old_page))
		goto bad_wp_page;

	if (!TryLockPage(old_page)) {
		int reuse = can_share_swap_page(old_page);
		unlock_page(old_page);
		if (reuse) {
#ifndef CONFIG_SUPERH
			/* Not needed for VIPT cache */
			flush_cache_page(vma, address);
#endif
			establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
			spin_unlock(&mm->page_table_lock);
			return 1;	/* Minor fault */
		}
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
	page_cache_get(old_page);
	spin_unlock(&mm->page_table_lock);

	new_page = alloc_page(GFP_HIGHUSER);
	if (!new_page)
		goto no_mem;
	copy_cow_page(old_page,new_page,address);

	/*
	 * Re-check the pte - we dropped the lock
	 */
	spin_lock(&mm->page_table_lock);
	if (pte_same(*page_table, pte)) {
		if (PageReserved(old_page))
			++mm->rss;
		break_cow(vma, new_page, address, page_table);
		lru_cache_add(new_page);

		/* Free the old page.. */
		new_page = old_page;
	}
	spin_unlock(&mm->page_table_lock);
	page_cache_release(new_page);
	page_cache_release(old_page);
	return 1;	/* Minor fault */

bad_wp_page:
	spin_unlock(&mm->page_table_lock);
	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
	return -1;
no_mem:
	page_cache_release(old_page);
	return -1;
}
Пример #2
0
/*
 * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
 */
static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
		pte_t *page_table)
{
	flush_page_to_ram(new_page);
	flush_cache_page(vma, address);
	establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
}
Пример #3
0
/*
 * This function zeroes out partial mmap'ed pages at truncation time..
 */
static void partial_clear(struct vm_area_struct *vma, unsigned long address)
{
	pgd_t *page_dir;
	pmd_t *page_middle;
	pte_t *page_table, pte;

	page_dir = pgd_offset(vma->vm_mm, address);
	if (pgd_none(*page_dir))
		return;
	if (pgd_bad(*page_dir)) {
		printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
		pgd_clear(page_dir);
		return;
	}
	page_middle = pmd_offset(page_dir, address);
	if (pmd_none(*page_middle))
		return;
	if (pmd_bad(*page_middle)) {
		printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
		pmd_clear(page_middle);
		return;
	}
	page_table = pte_offset(page_middle, address);
	pte = *page_table;
	if (!pte_present(pte))
		return;
	flush_cache_page(vma, address);
	address &= ~PAGE_MASK;
	address += pte_page(pte);
	if (address >= high_memory)
		return;
	memset((void *) address, 0, PAGE_SIZE - (address & ~PAGE_MASK));
	flush_page_to_ram(pte_page(pte));
}
Пример #4
0
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
{
    struct mm_struct *mm;
    struct vm_area_struct *vma;
    struct page *page;
    void *old_buf = buf;

    /* Worry about races with exit() */
    task_lock(tsk);
    mm = tsk->mm;
    if (!tsk->task_dumpable || (&init_mm == mm))
        mm = NULL;
    if (mm)
        atomic_inc(&mm->mm_users);
    task_unlock(tsk);
    if (!mm)
        return 0;

    down_read(&mm->mmap_sem);
    /* ignore errors, just check how much was sucessfully transfered */
    while (len) {
        int bytes, ret, offset;
        void *maddr;

        ret = get_user_pages(current, mm, addr, 1,
                             write, 1, &page, &vma);
        if (ret <= 0)
            break;

        bytes = len;
        offset = addr & (PAGE_SIZE-1);
        if (bytes > PAGE_SIZE-offset)
            bytes = PAGE_SIZE-offset;

        flush_cache_page(vma, addr);

        maddr = kmap(page);
        if (write) {
            memcpy(maddr + offset, buf, bytes);
#ifdef CONFIG_SUPERH
            flush_dcache_page(page);
#endif
            flush_page_to_ram(page);
            flush_icache_user_range(vma, page, addr, len);
        } else {
            memcpy(buf, maddr + offset, bytes);
            flush_page_to_ram(page);
        }
        kunmap(page);
        put_page(page);
        len -= bytes;
        buf += bytes;
        addr += bytes;
    }
    up_read(&mm->mmap_sem);
    mmput(mm);

    return buf - old_buf;
}
Пример #5
0
/**
 * __replace_page - replace page in vma by new page.
 * based on replace_page in mm/ksm.c
 *
 * @vma:      vma that holds the pte pointing to page
 * @addr:     address the old @page is mapped at
 * @page:     the cowed page we are replacing by kpage
 * @kpage:    the modified page we replace page by
 *
 * Returns 0 on success, -EFAULT on failure.
 */
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
				struct page *old_page, struct page *new_page)
{
	struct mm_struct *mm = vma->vm_mm;
	spinlock_t *ptl;
	pte_t *ptep;
	int err;
	/* For mmu_notifiers */
	const unsigned long mmun_start = addr;
	const unsigned long mmun_end   = addr + PAGE_SIZE;
	struct mem_cgroup *memcg;

	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
			false);
	if (err)
		return err;

	/* For try_to_free_swap() and munlock_vma_page() below */
	lock_page(old_page);

	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
	err = -EAGAIN;
	ptep = page_check_address(old_page, mm, addr, &ptl, 0);
	if (!ptep) {
		mem_cgroup_cancel_charge(new_page, memcg, false);
		goto unlock;
	}

	get_page(new_page);
	page_add_new_anon_rmap(new_page, vma, addr, false);
	mem_cgroup_commit_charge(new_page, memcg, false, false);
	lru_cache_add_active_or_unevictable(new_page, vma);

	if (!PageAnon(old_page)) {
		dec_mm_counter(mm, mm_counter_file(old_page));
		inc_mm_counter(mm, MM_ANONPAGES);
	}

	flush_cache_page(vma, addr, pte_pfn(*ptep));
	ptep_clear_flush_notify(vma, addr, ptep);
	set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));

	page_remove_rmap(old_page, false);
	if (!page_mapped(old_page))
		try_to_free_swap(old_page);
	pte_unmap_unlock(ptep, ptl);

	if (vma->vm_flags & VM_LOCKED)
		munlock_vma_page(old_page);
	put_page(old_page);

	err = 0;
 unlock:
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	unlock_page(old_page);
	return err;
}
Пример #6
0
/*
 * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
 */
static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
		pte_t *page_table)
{
	flush_page_to_ram(new_page);
#ifndef CONFIG_SUPERH
	/* Not needed for VIPT cache (need better API for caches) */
	flush_cache_page(vma, address);
#endif
	establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
}
Пример #7
0
/**
 * \<\<private\>\> Writes a specified memory area into the checkpoint
 * file - heavy version.  It fills out the rest of the header, sets
 * pathname size to 0 (not used). and writes the header into the
 * checkpoint file.
 *
 * It is necessary to align the file to page size. This is required
 * otherwise we wouldn't be able to mmap pages from the file upon
 * restoring the checkpoint.
 *
 * Following algorithm dumps the pages, the idea comes from a regular
 * core dump handler (e.g. in fs/binfmt_elf.c)
 *
 \verbatim
  For each page in the region do:
    - get the page - might trigger pagefaults to load the pages
    - map the page descriptor to a valid linear address (user pages might
    be in high memory)
    - if the page is zero or marked untouched, advance the offset in the
    checkpoint only - there is no need to write 0's to disk, just create
    the gap for them.
    - otherwise write the entire page into the checkpoint file.
 \endverbatim
 *
 * @param *ckpt - checkpoint file where the area is to be stored
 * @param *vma - the actual VM area that is being processed
 * @param *hdr - partially filled VM area header
 * @return 0 upon success.
 */
static int tcmi_ckpt_vm_area_write_h(struct tcmi_ckpt *ckpt, 
				     struct vm_area_struct *vma, 
				     struct tcmi_ckpt_vm_area_hdr *hdr)
{
	/* page for the filepathname */
	unsigned long addr;

	/* finish the header */
	hdr->type = TCMI_CKPT_VM_AREA_HEAVY;
	hdr->pathname_size = 0;
	/* write the header into the checkpoint */
	if (tcmi_ckpt_write(ckpt, hdr, sizeof(*hdr)) < 0) {
		mdbg(ERR3, "Error writing VM area header chunk");
		goto exit0;
	}
	/* align the current file position to page size boundary */
	tcmi_ckpt_page_align(ckpt);
	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
		struct page *page;
		struct vm_area_struct *vma;
		void *kaddr;
		if (get_user_pages(current, current->mm, addr, 1, 0, 1,
				   &page, &vma) <= 0) {
			mdbg(INFO4, "Skipping untouched page at %08lx", addr);
			tcmi_ckpt_seek(ckpt, PAGE_SIZE, 1);
			continue;
		} 
		/*if (page == ZERO_PAGE(addr)) {
			mdbg(INFO4, "Skipping zero page at %08lx", addr);
			tcmi_ckpt_seek(ckpt, PAGE_SIZE, 1);
			continue;
		}
		*/
		/* actual page, that needs to be written. */
		flush_cache_page(vma, addr,  page_to_pfn(page));  /* TODO: check this fix is correct */
		kaddr = tcmi_ckpt_vm_area_kmap(page);
		/* write the page into the checkpoint */
		if (tcmi_ckpt_write(ckpt, kaddr, PAGE_SIZE) < 0) {
			mdbg(ERR3, "Error writing page at %08lx", addr);
			goto exit0;
		}
		tcmi_ckpt_vm_area_kunmap(page);
	}
	return 0;

	/* error handling */
 exit0:
	return -EINVAL;
}
Пример #8
0
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
{
   struct mm_struct *mm;
   struct vm_area_struct *vma;
   struct page *page;
   void *old_buf = buf;

   mm = get_task_mm(tsk);
   if (!mm)
      return 0;

   down_read(&mm->mmap_sem);
   /* ignore errors, just check how much was sucessfully transfered */
   while (len) {
      int bytes, ret, offset;
      void *maddr;

      ret = get_user_pages(tsk, mm, addr, 1,
            write, 1, &page, &vma);
      if (ret <= 0)
         break;

      bytes = len;
      offset = addr & (PAGE_SIZE-1);
      if (bytes > PAGE_SIZE-offset)
         bytes = PAGE_SIZE-offset;

      flush_cache_page(vma, addr);

      maddr = kmap(page);
      if (write) {
         copy_to_user_page(vma, page, addr,
                 maddr + offset, buf, bytes);
         set_page_dirty_lock(page);
      } else {
         copy_from_user_page(vma, page, addr,
                   buf, maddr + offset, bytes);
      }
      kunmap(page);
      page_cache_release(page);
      len -= bytes;
      buf += bytes;
      addr += bytes;
   }
   up_read(&mm->mmap_sem);
   mmput(mm);

   return buf - old_buf;
}
Пример #9
0
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
		       unsigned long vaddr, void *dst, const void *src,
		       unsigned long len)
{
	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
	    test_bit(PG_dcache_clean, &page->flags)) {
		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
		memcpy(vto, src, len);
		kunmap_coherent(vto);
	} else {
		memcpy(dst, src, len);
		if (boot_cpu_data.dcache.n_aliases)
			clear_bit(PG_dcache_clean, &page->flags);
	}

	if (vma->vm_flags & VM_EXEC)
		flush_cache_page(vma, vaddr, page_to_pfn(page));
}
Пример #10
0
/*
 * Actual dumper
 *
 * This is a two-pass process; first we find the offsets of the bits,
 * and then they are actually written out.  If we run out of core limit
 * we just truncate.
 */
static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
{
	int has_dumped = 0;
	mm_segment_t fs;
	int segs;
	size_t size = 0;
	int i;
	struct vm_area_struct *vma;
	struct elfhdr elf;
	off_t offset = 0, dataoff;
	unsigned long limit = current->rlim[RLIMIT_CORE].rlim_cur;
	int numnote = 4;
	struct memelfnote notes[4];
	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
	elf_fpregset_t fpu;		/* NT_PRFPREG */
	struct elf_prpsinfo psinfo;	/* NT_PRPSINFO */

	/* first copy the parameters from user space */
	memset(&psinfo, 0, sizeof(psinfo));
	{
		unsigned int i, len;

		len = current->mm->arg_end - current->mm->arg_start;
		if (len >= ELF_PRARGSZ)
			len = ELF_PRARGSZ-1;
		copy_from_user(&psinfo.pr_psargs,
			      (const char *)current->mm->arg_start, len);
		for(i = 0; i < len; i++)
			if (psinfo.pr_psargs[i] == 0)
				psinfo.pr_psargs[i] = ' ';
		psinfo.pr_psargs[len] = 0;

	}

	memset(&prstatus, 0, sizeof(prstatus));
	/*
	 * This transfers the registers from regs into the standard
	 * coredump arrangement, whatever that is.
	 */
#ifdef ELF_CORE_COPY_REGS
	ELF_CORE_COPY_REGS(prstatus.pr_reg, regs)
#else
	if (sizeof(elf_gregset_t) != sizeof(struct pt_regs))
	{
		printk("sizeof(elf_gregset_t) (%ld) != sizeof(struct pt_regs) (%ld)\n",
			(long)sizeof(elf_gregset_t), (long)sizeof(struct pt_regs));
	}
	else
		*(struct pt_regs *)&prstatus.pr_reg = *regs;
#endif

	/* now stop all vm operations */
	down_write(&current->mm->mmap_sem);
	segs = current->mm->map_count;

#ifdef DEBUG
	printk("elf_core_dump: %d segs %lu limit\n", segs, limit);
#endif

	/* Set up header */
	memcpy(elf.e_ident, ELFMAG, SELFMAG);
	elf.e_ident[EI_CLASS] = ELF_CLASS;
	elf.e_ident[EI_DATA] = ELF_DATA;
	elf.e_ident[EI_VERSION] = EV_CURRENT;
	memset(elf.e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);

	elf.e_type = ET_CORE;
	elf.e_machine = ELF_ARCH;
	elf.e_version = EV_CURRENT;
	elf.e_entry = 0;
	elf.e_phoff = sizeof(elf);
	elf.e_shoff = 0;
#ifdef ELF_CORE_EFLAGS
	elf.e_flags = ELF_CORE_EFLAGS;
#else
	elf.e_flags = 0;
#endif
	elf.e_ehsize = sizeof(elf);
	elf.e_phentsize = sizeof(struct elf_phdr);
	elf.e_phnum = segs+1;		/* Include notes */
	elf.e_shentsize = 0;
	elf.e_shnum = 0;
	elf.e_shstrndx = 0;

	fs = get_fs();
	set_fs(KERNEL_DS);

	has_dumped = 1;
	current->flags |= PF_DUMPCORE;

	DUMP_WRITE(&elf, sizeof(elf));
	offset += sizeof(elf);				/* Elf header */
	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */

	/*
	 * Set up the notes in similar form to SVR4 core dumps made
	 * with info from their /proc.
	 */

	notes[0].name = "CORE";
	notes[0].type = NT_PRSTATUS;
	notes[0].datasz = sizeof(prstatus);
	notes[0].data = &prstatus;
	prstatus.pr_info.si_signo = prstatus.pr_cursig = signr;
	prstatus.pr_sigpend = current->pending.signal.sig[0];
	prstatus.pr_sighold = current->blocked.sig[0];
	psinfo.pr_pid = prstatus.pr_pid = current->pid;
	psinfo.pr_ppid = prstatus.pr_ppid = current->p_pptr->pid;
	psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp;
	psinfo.pr_sid = prstatus.pr_sid = current->session;
	prstatus.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
	prstatus.pr_utime.tv_usec = CT_TO_USECS(current->times.tms_utime);
	prstatus.pr_stime.tv_sec = CT_TO_SECS(current->times.tms_stime);
	prstatus.pr_stime.tv_usec = CT_TO_USECS(current->times.tms_stime);
	prstatus.pr_cutime.tv_sec = CT_TO_SECS(current->times.tms_cutime);
	prstatus.pr_cutime.tv_usec = CT_TO_USECS(current->times.tms_cutime);
	prstatus.pr_cstime.tv_sec = CT_TO_SECS(current->times.tms_cstime);
	prstatus.pr_cstime.tv_usec = CT_TO_USECS(current->times.tms_cstime);

#ifdef DEBUG
	dump_regs("Passed in regs", (elf_greg_t *)regs);
	dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg);
#endif

	notes[1].name = "CORE";
	notes[1].type = NT_PRPSINFO;
	notes[1].datasz = sizeof(psinfo);
	notes[1].data = &psinfo;
	i = current->state ? ffz(~current->state) + 1 : 0;
	psinfo.pr_state = i;
	psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
	psinfo.pr_zomb = psinfo.pr_sname == 'Z';
	psinfo.pr_nice = task_nice(current);
	psinfo.pr_flag = current->flags;
	psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
	psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
	strncpy(psinfo.pr_fname, current->comm, sizeof(psinfo.pr_fname));

	notes[2].name = "CORE";
	notes[2].type = NT_TASKSTRUCT;
	notes[2].datasz = sizeof(*current);
	notes[2].data = current;

	/* Try to dump the FPU. */
	prstatus.pr_fpvalid = dump_fpu (regs, &fpu);
	if (!prstatus.pr_fpvalid)
	{
		numnote--;
	}
	else
	{
		notes[3].name = "CORE";
		notes[3].type = NT_PRFPREG;
		notes[3].datasz = sizeof(fpu);
		notes[3].data = &fpu;
	}
	
	/* Write notes phdr entry */
	{
		struct elf_phdr phdr;
		int sz = 0;

		for(i = 0; i < numnote; i++)
			sz += notesize(&notes[i]);

		phdr.p_type = PT_NOTE;
		phdr.p_offset = offset;
		phdr.p_vaddr = 0;
		phdr.p_paddr = 0;
		phdr.p_filesz = sz;
		phdr.p_memsz = 0;
		phdr.p_flags = 0;
		phdr.p_align = 0;

		offset += phdr.p_filesz;
		DUMP_WRITE(&phdr, sizeof(phdr));
	}

	/* Page-align dumped data */
	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);

	/* Write program headers for segments dump */
	for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
		struct elf_phdr phdr;
		size_t sz;

		sz = vma->vm_end - vma->vm_start;

		phdr.p_type = PT_LOAD;
		phdr.p_offset = offset;
		phdr.p_vaddr = vma->vm_start;
		phdr.p_paddr = 0;
		phdr.p_filesz = maydump(vma) ? sz : 0;
		phdr.p_memsz = sz;
		offset += phdr.p_filesz;
		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
		if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
		if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
		phdr.p_align = ELF_EXEC_PAGESIZE;

		DUMP_WRITE(&phdr, sizeof(phdr));
	}

	for(i = 0; i < numnote; i++)
		if (!writenote(&notes[i], file))
			goto end_coredump;

	DUMP_SEEK(dataoff);

	for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
		unsigned long addr;

		if (!maydump(vma))
			continue;

#ifdef DEBUG
		printk("elf_core_dump: writing %08lx-%08lx\n", vma->vm_start, vma->vm_end);
#endif

		for (addr = vma->vm_start;
		     addr < vma->vm_end;
		     addr += PAGE_SIZE) {
			struct page* page;
			struct vm_area_struct *vma;

			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
						&page, &vma) <= 0) {
				DUMP_SEEK (file->f_pos + PAGE_SIZE);
			} else {
				if (page == ZERO_PAGE(addr)) {
					DUMP_SEEK (file->f_pos + PAGE_SIZE);
				} else {
					void *kaddr;
					flush_cache_page(vma, addr);
					kaddr = kmap(page);
					DUMP_WRITE(kaddr, PAGE_SIZE);
					flush_page_to_ram(page);
					kunmap(page);
				}
				put_page(page);
			}
		}
	}

	if ((off_t) file->f_pos != offset) {
		/* Sanity check */
		printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
		       (off_t) file->f_pos, offset);
	}

 end_coredump:
	set_fs(fs);
	up_write(&current->mm->mmap_sem);
	return has_dumped;
}
Пример #11
0
/* mm->page_table_lock is held. mmap_sem is not held */
static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone)
{
	pte_t pte;
	swp_entry_t entry;

	/* Don't look at this pte if it's been accessed recently. */
	if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) {
		mark_page_accessed(page);
		return 0;
	}

	/* Don't bother unmapping pages that are active */
	if (PageActive(page))
		return 0;

	/* Don't bother replenishing zones not under pressure.. */
	if (!memclass(page->zone, classzone))
		return 0;

	if (TryLockPage(page))
		return 0;

	/* From this point on, the odds are that we're going to
	 * nuke this pte, so read and clear the pte.  This hook
	 * is needed on CPUs which update the accessed and dirty
	 * bits in hardware.
	 */
	flush_cache_page(vma, address);
	pte = ptep_get_and_clear(page_table);
	flush_tlb_page(vma, address);

	if (pte_dirty(pte))
		set_page_dirty(page);

	/*
	 * Is the page already in the swap cache? If so, then
	 * we can just drop our reference to it without doing
	 * any IO - it's already up-to-date on disk.
	 */
	if (PageSwapCache(page)) {
		entry.val = page->index;
		swap_duplicate(entry);
set_swap_pte:
		set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
		mm->rss--;
		UnlockPage(page);
		{
			int freeable = page_count(page) - !!page->buffers <= 2;
			page_cache_release(page);
			return freeable;
		}
	}

	/*
	 * Is it a clean page? Then it must be recoverable
	 * by just paging it in again, and we can just drop
	 * it..  or if it's dirty but has backing store,
	 * just mark the page dirty and drop it.
	 *
	 * However, this won't actually free any real
	 * memory, as the page will just be in the page cache
	 * somewhere, and as such we should just continue
	 * our scan.
	 *
	 * Basically, this just makes it possible for us to do
	 * some real work in the future in "refill_inactive()".
	 */
	if (page->mapping)
		goto drop_pte;
	if (!PageDirty(page))
		goto drop_pte;

	/*
	 * Anonymous buffercache pages can be left behind by
	 * concurrent truncate and pagefault.
	 */
	if (page->buffers)
		goto preserve;

	/*
	 * This is a dirty, swappable page.  First of all,
	 * get a suitable swap entry for it, and make sure
	 * we have the swap cache set up to associate the
	 * page with that swap entry.
	 */
	for (;;) {
		entry = get_swap_page();
		if (!entry.val)
			break;
		/* Add it to the swap cache and mark it dirty
		 * (adding to the page cache will clear the dirty
		 * and uptodate bits, so we need to do it again)
		 */
		if (add_to_swap_cache(page, entry) == 0) {
			SetPageUptodate(page);
			set_page_dirty(page);
			goto set_swap_pte;
		}
		/* Raced with "speculative" read_swap_cache_async */
		swap_free(entry);
	}

	/* No swap space left */
preserve:
	set_pte(page_table, pte);
	UnlockPage(page);
	return 0;
}
Пример #12
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 *
 * We enter with the page table read-lock held, and need to exit without
 * it.
 */
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
	unsigned long address, pte_t *page_table, pte_t pte)
{
	struct page *old_page, *new_page;

	old_page = pte_page(pte);
	if (!VALID_PAGE(old_page))
		goto bad_wp_page;
	
	/*
	 * We can avoid the copy if:
	 * - we're the only user (count == 1)
	 * - the only other user is the swap cache,
	 *   and the only swap cache user is itself,
	 *   in which case we can just continue to
	 *   use the same swap cache (it will be
	 *   marked dirty).
	 */
	switch (page_count(old_page)) {
	case 2:
		/*
		 * Lock the page so that no one can look it up from
		 * the swap cache, grab a reference and start using it.
		 * Can not do lock_page, holding page_table_lock.
		 */
		if (!PageSwapCache(old_page) || TryLockPage(old_page))
			break;
		if (is_page_shared(old_page)) {
			UnlockPage(old_page);
			break;
		}
		UnlockPage(old_page);
		/* FallThrough */
	case 1:
		flush_cache_page(vma, address);
		establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
		spin_unlock(&mm->page_table_lock);
		return 1;	/* Minor fault */
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
	spin_unlock(&mm->page_table_lock);
	new_page = page_cache_alloc();
	if (!new_page)
		return -1;
	spin_lock(&mm->page_table_lock);

	/*
	 * Re-check the pte - we dropped the lock
	 */
	if (pte_same(*page_table, pte)) {
		if (PageReserved(old_page))
			++mm->rss;
		break_cow(vma, old_page, new_page, address, page_table);

		/* Free the old page.. */
		new_page = old_page;
	}
	spin_unlock(&mm->page_table_lock);
	page_cache_release(new_page);
	return 1;	/* Minor fault */

bad_wp_page:
	spin_unlock(&mm->page_table_lock);
	printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
	return -1;
}
Пример #13
0
int memory_remove_pte(struct local_page *lp) {

	unsigned long del_user_va = lp->user_va;
	unsigned long del_slab_va = lp->slab_va;
	unsigned long del_pfn = page_to_pfn(virt_to_page(del_slab_va));
	struct vm_area_struct *del_vma = lp->vma;
	struct mm_struct *del_mm = del_vma->vm_mm;

#if(DEBUG)
	printk("[%s]\n", __FUNCTION__);
	printk("del_user_va: %p\n", del_user_va);
	printk("del_slab_va: %p\n", del_slab_va);
	printk("del_pfn: %p\n", del_pfn);
	printk("del_vma: %p\n", del_vma);
	printk("del_mm: %p\n", del_mm);
#endif

	// TODO: find PTE (need to be changed for x86)
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *ptep;

	pgd = pgd_offset(del_mm, del_user_va);
	if (pgd_none(*pgd) || pgd_bad(*pgd)) {
		printk("<error> invalid pgd\n");
		return -1;
	}

	pud = pud_offset(pgd, del_user_va);
	if (pud_none(*pud) || pud_bad(*pud)) {
		printk("<error> invalid pud\n");
		return -1;
	}

	pmd = pmd_offset(pud, del_user_va);
	if (pmd_none(*pmd) || pmd_bad(*pmd)) {
		printk("<error> invalid pmd\n");
		return -1;
	}

	ptep = pte_offset_kernel(pmd, del_user_va);
	if (!ptep) {
		printk("<error> invalid pte\n");
		return -1;
	}

#if(DEBUG)
	printk("ptep: %p\n", ptep);
	printk("pte: %p\n", *ptep);
	printk("pfn: %p\n", pte_pfn(*ptep));
#endif

	// flush cache
	flush_cache_page(del_vma, del_user_va, del_pfn);

	// clear PTE
	pte_clear(del_mm, del_user_va, ptep);

	// flush TLB
	flush_tlb_page(del_vma, del_user_va);

	return 0;
}
Пример #14
0
/*
 * The swap-out functions return 1 if they successfully
 * threw something out, and we got a free page. It returns
 * zero if it couldn't do anything, and any other value
 * indicates it decreased rss, but the page was shared.
 *
 * NOTE! If it sleeps, it *must* return 1 to make sure we
 * don't continue with the swap-out. Otherwise we may be
 * using a process that no longer actually exists (it might
 * have died while we slept).
 */
static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
	pte_t pte;
	swp_entry_t entry;
	struct page * page;
	int onlist;

	pte = *page_table;
	if (!pte_present(pte))
		goto out_failed;
	page = pte_page(pte);
	if ((!VALID_PAGE(page)) || PageReserved(page))
		goto out_failed;

	if (mm->swap_cnt)
		mm->swap_cnt--;

	onlist = PageActive(page);
	/* Don't look at this pte if it's been accessed recently. */
	if (ptep_test_and_clear_young(page_table)) {
		age_page_up(page);
		goto out_failed;
	}
	if (!onlist)
		/* The page is still mapped, so it can't be freeable... */
		age_page_down_ageonly(page);

	/*
	 * If the page is in active use by us, or if the page
	 * is in active use by others, don't unmap it or
	 * (worse) start unneeded IO.
	 */
	if (page->age > 0)
		goto out_failed;

	if (TryLockPage(page))
		goto out_failed;

	/* From this point on, the odds are that we're going to
	 * nuke this pte, so read and clear the pte.  This hook
	 * is needed on CPUs which update the accessed and dirty
	 * bits in hardware.
	 */
	pte = ptep_get_and_clear(page_table);

	/*
	 * Is the page already in the swap cache? If so, then
	 * we can just drop our reference to it without doing
	 * any IO - it's already up-to-date on disk.
	 *
	 * Return 0, as we didn't actually free any real
	 * memory, and we should just continue our scan.
	 */
	if (PageSwapCache(page)) {
		entry.val = page->index;
		if (pte_dirty(pte))
			set_page_dirty(page);
set_swap_pte:
		swap_duplicate(entry);
		set_pte(page_table, swp_entry_to_pte(entry));
drop_pte:
		UnlockPage(page);
		mm->rss--;
		flush_tlb_page(vma, address);
		deactivate_page(page);
		page_cache_release(page);
out_failed:
		return 0;
	}

	/*
	 * Is it a clean page? Then it must be recoverable
	 * by just paging it in again, and we can just drop
	 * it..
	 *
	 * However, this won't actually free any real
	 * memory, as the page will just be in the page cache
	 * somewhere, and as such we should just continue
	 * our scan.
	 *
	 * Basically, this just makes it possible for us to do
	 * some real work in the future in "refill_inactive()".
	 */
	flush_cache_page(vma, address);
	if (!pte_dirty(pte))
		goto drop_pte;

	/*
	 * Ok, it's really dirty. That means that
	 * we should either create a new swap cache
	 * entry for it, or we should write it back
	 * to its own backing store.
	 */
	if (page->mapping) {
		set_page_dirty(page);
		goto drop_pte;
	}

	/*
	 * This is a dirty, swappable page.  First of all,
	 * get a suitable swap entry for it, and make sure
	 * we have the swap cache set up to associate the
	 * page with that swap entry.
	 */
	entry = get_swap_page();
	if (!entry.val)
		goto out_unlock_restore; /* No swap space left */

	/* Add it to the swap cache and mark it dirty */
	add_to_swap_cache(page, entry);
	set_page_dirty(page);
	goto set_swap_pte;

out_unlock_restore:
	set_pte(page_table, pte);
	UnlockPage(page);
	return 0;
}
Пример #15
0
static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
		struct page *page)
{
	flush_cache_page(bprm->vma, pos, page_to_pfn(page));
}
Пример #16
0
/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
 *
 * Goto-purists beware: the only reason for goto's here is that it results
 * in better assembly code.. The "default" path will see no jumps at all.
 *
 * Note that this routine assumes that the protection checks have been
 * done by the caller (the low-level page fault routine in most cases).
 * Thus we can safely just mark it writable once we've done any necessary
 * COW.
 *
 * We also mark the page dirty at this point even though the page will
 * change only once the write actually happens. This avoids a few races,
 * and potentially makes it more efficient.
 */
void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
	unsigned long address, int write_access)
{
	pgd_t *page_dir;
	pmd_t *page_middle;
	pte_t *page_table, pte;
	unsigned long old_page, new_page;

	new_page = __get_free_page(GFP_KERNEL);
	page_dir = pgd_offset(vma->vm_mm, address);
	if (pgd_none(*page_dir))
		goto end_wp_page;
	if (pgd_bad(*page_dir))
		goto bad_wp_pagedir;
	page_middle = pmd_offset(page_dir, address);
	if (pmd_none(*page_middle))
		goto end_wp_page;
	if (pmd_bad(*page_middle))
		goto bad_wp_pagemiddle;
	page_table = pte_offset(page_middle, address);
	pte = *page_table;
	if (!pte_present(pte))
		goto end_wp_page;
	if (pte_write(pte))
		goto end_wp_page;
	old_page = pte_page(pte);
	if (old_page >= high_memory)
		goto bad_wp_page;
	tsk->min_flt++;
	/*
	 * Do we need to copy?
	 */
	if (mem_map[MAP_NR(old_page)].count != 1) {
		if (new_page) {
			if (PageReserved(mem_map + MAP_NR(old_page)))
				++vma->vm_mm->rss;
			copy_page(old_page,new_page);
			flush_page_to_ram(old_page);
			flush_page_to_ram(new_page);
			flush_cache_page(vma, address);
			set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
			free_page(old_page);
			flush_tlb_page(vma, address);
			return;
		}
		flush_cache_page(vma, address);
		set_pte(page_table, BAD_PAGE);
		flush_tlb_page(vma, address);
		free_page(old_page);
		oom(tsk);
		return;
	}
	flush_cache_page(vma, address);
	set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
	flush_tlb_page(vma, address);
	if (new_page)
		free_page(new_page);
	return;
bad_wp_page:
	printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
	send_sig(SIGKILL, tsk, 1);
	goto end_wp_page;
bad_wp_pagemiddle:
	printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle));
	send_sig(SIGKILL, tsk, 1);
	goto end_wp_page;
bad_wp_pagedir:
	printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir));
	send_sig(SIGKILL, tsk, 1);
end_wp_page:
	if (new_page)
		free_page(new_page);
	return;
}