Example #1
0
/*
 * check that an address falls within the bounds of the target process's memory mappings
 */
static inline int is_user_addr_valid(struct task_struct *child,
				     unsigned long start, unsigned long len)
{
	struct vm_area_struct *vma;
	struct sram_list_struct *sraml;

	/* overflow */
	if (start + len < start)
		return -EIO;

	vma = find_vma(child->mm, start);
	if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
			return 0;

	for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
		if (start >= (unsigned long)sraml->addr
		    && start + len < (unsigned long)sraml->addr + sraml->length)
			return 0;

	if (start >= FIXED_CODE_START && start + len < FIXED_CODE_END)
		return 0;

	return -EIO;
}
static int check_vaddr_bounds(unsigned long start, unsigned long end)
{
	struct mm_struct *mm = current->active_mm;
	struct vm_area_struct *vma;
	int ret = 1;

	if (end < start)
		goto out;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, start);
	if (vma && vma->vm_start < end) {
		if (start < vma->vm_start)
			goto out_up;
		if (end > vma->vm_end)
			goto out_up;
		ret = 0;
	}

out_up:
	up_read(&mm->mmap_sem);
out:
	return ret;
}
Example #3
0
unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
		unsigned long len, unsigned long pgoff, unsigned long flags)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long task_size = TASK_SIZE;

	if (test_thread_flag(TIF_32BIT))
		task_size = STACK_TOP32;

	if (len & ~HPAGE_MASK)
		return -EINVAL;
	if (len > task_size)
		return -ENOMEM;

	if (flags & MAP_FIXED) {
		if (prepare_hugepage_range(file, addr, len))
			return -EINVAL;
		return addr;
	}

	if (addr) {
		addr = ALIGN(addr, HPAGE_SIZE);
		vma = find_vma(mm, addr);
		if (task_size - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			return addr;
	}
	if (mm->get_unmapped_area == arch_get_unmapped_area)
		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
				pgoff, flags);
	else
		return hugetlb_get_unmapped_area_topdown(file, addr, len,
				pgoff, flags);
}
Example #4
0
/*
 * IA-32 Huge TLB Page Support for Kernel.
 *
 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>

#if 0	/* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
	unsigned long start = address;
	int length = 1;
	int nr;
	struct page *page;
	struct vm_area_struct *vma;

	vma = find_vma(mm, addr);
	if (!vma || !is_vm_hugetlb_page(vma))
		return ERR_PTR(-EINVAL);

	pte = huge_pte_offset(mm, address);

	/* hugetlb should be locked, and hence, prefaulted */
	WARN_ON(!pte || pte_none(*pte));

	page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];

	WARN_ON(!PageHead(page));

	return page;
}
Example #5
0
int mm_brk(struct mm_struct *mm, uintptr_t addr, size_t len)
{
	uintptr_t start = ROUNDDOWN(addr, PGSIZE), end =
	    ROUNDUP(addr + len, PGSIZE);
	if (!USER_ACCESS(start, end)) {
		return -E_INVAL;
	}

	int ret;
	if ((ret = mm_unmap(mm, start, end - start)) != 0) {
		return ret;
	}
	uint32_t vm_flags = VM_READ | VM_WRITE;
	struct vma_struct *vma = find_vma(mm, start - 1);
	if (vma != NULL && vma->vm_end == start && vma->vm_flags == vm_flags) {
		vma->vm_end = end;
		return 0;
	}
	if ((vma = vma_create(start, end, vm_flags)) == NULL) {
		return -E_NO_MEM;
	}
	insert_vma_struct(mm, vma);
	return 0;
}
Example #6
0
static void
check_vma_struct(void) {
    size_t nr_free_pages_store = nr_free_pages();

    struct mm_struct *mm = mm_create();
    assert(mm != NULL);

    int step1 = 10, step2 = step1 * 10;

    int i;
    for (i = step1; i >= 1; i --) {
        struct vma_struct *vma = vma_create(i * 5, i * 5 + 2, 0);
        assert(vma != NULL);
        insert_vma_struct(mm, vma);
    }

    for (i = step1 + 1; i <= step2; i ++) {
        struct vma_struct *vma = vma_create(i * 5, i * 5 + 2, 0);
        assert(vma != NULL);
        insert_vma_struct(mm, vma);
    }

    list_entry_t *le = list_next(&(mm->mmap_list));

    for (i = 1; i <= step2; i ++) {
        assert(le != &(mm->mmap_list));
        struct vma_struct *mmap = le2vma(le, list_link);
        assert(mmap->vm_start == i * 5 && mmap->vm_end == i * 5 + 2);
        le = list_next(le);
    }

    for (i = 5; i <= 5 * step2; i +=5) {
        struct vma_struct *vma1 = find_vma(mm, i);
        assert(vma1 != NULL);
        struct vma_struct *vma2 = find_vma(mm, i+1);
        assert(vma2 != NULL);
        struct vma_struct *vma3 = find_vma(mm, i+2);
        assert(vma3 == NULL);
        struct vma_struct *vma4 = find_vma(mm, i+3);
        assert(vma4 == NULL);
        struct vma_struct *vma5 = find_vma(mm, i+4);
        assert(vma5 == NULL);

        assert(vma1->vm_start == i  && vma1->vm_end == i  + 2);
        assert(vma2->vm_start == i  && vma2->vm_end == i  + 2);
    }

    for (i =4; i>=0; i--) {
        struct vma_struct *vma_below_5= find_vma(mm,i);
        if (vma_below_5 != NULL ) {
           cprintf("vma_below_5: i %x, start %x, end %x\n",i, vma_below_5->vm_start, vma_below_5->vm_end); 
        }
        assert(vma_below_5 == NULL);
    }

    mm_destroy(mm);

    assert(nr_free_pages_store == nr_free_pages());

    cprintf("check_vma_struct() succeeded!\n");
}
/*
 * Hacked from kernel function __get_user_pages in mm/memory.c
 *
 * Handle buffers allocated by other kernel space driver and mmaped into user
 * space, function Ignore the VM_PFNMAP and VM_IO flag in VMA structure
 *
 * Get physical pages from user space virtual address and update into page list
 */
static int __get_pfnmap_pages(struct task_struct *tsk, struct mm_struct *mm,
			      unsigned long start, int nr_pages,
			      unsigned int gup_flags, struct page **pages,
			      struct vm_area_struct **vmas)
{
	int i, ret;
	unsigned long vm_flags;

	if (nr_pages <= 0)
		return 0;

	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));

	/*
	 * Require read or write permissions.
	 * If FOLL_FORCE is set, we only require the "MAY" flags.
	 */
	vm_flags  = (gup_flags & FOLL_WRITE) ?
			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
	vm_flags &= (gup_flags & FOLL_FORCE) ?
			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
	i = 0;

	do {
		struct vm_area_struct *vma;

		vma = find_vma(mm, start);
		if (!vma) {
			dev_err(atomisp_dev, "find_vma failed\n");
			return i ? : -EFAULT;
		}

		if (is_vm_hugetlb_page(vma)) {
			/*
			i = follow_hugetlb_page(mm, vma, pages, vmas,
					&start, &nr_pages, i, gup_flags);
			*/
			continue;
		}

		do {
			struct page *page;
			unsigned long pfn;

			/*
			 * If we have a pending SIGKILL, don't keep faulting
			 * pages and potentially allocating memory.
			 */
			if (unlikely(fatal_signal_pending(current))) {
				dev_err(atomisp_dev,
					"fatal_signal_pending in %s\n",
					__func__);
				return i ? i : -ERESTARTSYS;
			}

			ret = follow_pfn(vma, start, &pfn);
			if (ret) {
				dev_err(atomisp_dev, "follow_pfn() failed\n");
				return i ? : -EFAULT;
			}

			page = pfn_to_page(pfn);
			if (IS_ERR(page))
				return i ? i : PTR_ERR(page);
			if (pages) {
				pages[i] = page;
				get_page(page);
				flush_anon_page(vma, page, start);
				flush_dcache_page(page);
			}
			if (vmas)
				vmas[i] = vma;
			i++;
			start += PAGE_SIZE;
			nr_pages--;
		} while (nr_pages && start < vma->vm_end);
	} while (nr_pages);
Example #8
0
/*
 * This ought to be kept in sync with the powerpc specific do_page_fault
 * function. Currently, there are a few corner cases that we haven't had
 * to handle fortunately.
 */
int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
		unsigned long dsisr, unsigned *flt)
{
	struct vm_area_struct *vma;
	unsigned long is_write;
	int ret;

	if (mm == NULL)
		return -EFAULT;

	if (mm->pgd == NULL)
		return -EFAULT;

	down_read(&mm->mmap_sem);
	ret = -EFAULT;
	vma = find_vma(mm, ea);
	if (!vma)
		goto out_unlock;

	if (ea < vma->vm_start) {
		if (!(vma->vm_flags & VM_GROWSDOWN))
			goto out_unlock;
		if (expand_stack(vma, ea))
			goto out_unlock;
	}

	is_write = dsisr & DSISR_ISSTORE;
	if (is_write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto out_unlock;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto out_unlock;
		/*
		 * PROT_NONE is covered by the VMA check above.
		 * and hash should get a NOHPTE fault instead of
		 * a PROTFAULT in case fixup is needed for things
		 * like autonuma.
		 */
		if (!radix_enabled())
			WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
	}

	ret = 0;
	*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
	if (unlikely(*flt & VM_FAULT_ERROR)) {
		if (*flt & VM_FAULT_OOM) {
			ret = -ENOMEM;
			goto out_unlock;
		} else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
			ret = -EFAULT;
			goto out_unlock;
		}
		BUG();
	}

	if (*flt & VM_FAULT_MAJOR)
		current->maj_flt++;
	else
		current->min_flt++;

out_unlock:
	up_read(&mm->mmap_sem);
	return ret;
}
Example #9
0
void
dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
{   uint64_t *pcstack_end = pcstack + pcstack_limit;
    volatile uint8_t *flags =
        (volatile uint8_t *)&cpu_core[cpu_get_id()].cpuc_dtrace_flags;
    unsigned long *sp;
    unsigned long *bos;

    if (*flags & CPU_DTRACE_FAULT)
        return;

    if (pcstack_limit <= 0)
        return;

    *pcstack++ = (uint64_t)current->pid;

    if (pcstack >= pcstack_end)
        return;

    /***********************************************/
    /*   Linux provides a built in function which  */
    /*   is  good  because  stack walking is arch  */
    /*   dependent.            (save_stack_trace)  */
    /*   					       */
    /*   Unfortunately  this is options dependent  */
    /*   (CONFIG_STACKTRACE) so we cannot use it.  */
    /*   And  its  GPL  anyhow, so we cannot copy  */
    /*   it.				       */
    /*   					       */
    /*   Whats worse is that we might be compiled  */
    /*   with a frame pointer (only on x86-32) so  */
    /*   we have three scenarios to handle.	       */
    /***********************************************/

    /***********************************************/
    /*   Ye  gods! The world is an awful place to  */
    /*   live. The target process, may or may not  */
    /*   have   frame  pointers.  In  fact,  some  */
    /*   frames  may have it and some may not (eg  */
    /*   different   libraries  may  be  compiled  */
    /*   differently).			       */
    /*   					       */
    /*   Looks like distro owners dont care about  */
    /*   debuggabiity,   and  give  us  no  frame  */
    /*   pointers.				       */
    /*   					       */
    /*   This  function  is  really important and  */
    /*   useful.  On  modern  Linux  systems, gdb  */
    /*   (and  pstack) contain all the smarts. In  */
    /*   fact,  pstack  is often a wrapper around  */
    /*   gdb  -  i.e. its so complex we cannot do  */
    /*   this.				       */
    /***********************************************/

    /***********************************************/
    /*   Bear  in  mind  that  user stacks can be  */
    /*   megabytes  in  size,  vs  kernel  stacks  */
    /*   which  are  limited  to a few K (4 or 8K  */
    /*   typically).			       */
    /***********************************************/

//	sp = current->thread.rsp;
# if defined(__i386)
    bos = sp = KSTK_ESP(current);
#	define	ALIGN_MASK	3
# else
    /***********************************************/
    /*   KSTK_ESP()  doesnt exist for x86_64 (its  */
    /*   set to -1).			       */
    /***********************************************/
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
#  if defined(KSTK_EIP)
    /***********************************************/
    /*   Handle  ARM and more kernel independent,  */
    /*   but might not exist.		       */
    /***********************************************/
    bos = sp = (unsigned long *) KSTK_EIP(current);
#  else
    bos = sp = (unsigned long *) task_pt_regs(current)->sp;
#  endif
#else
    bos = sp = task_pt_regs(current)->rsp;
#endif
#	define	ALIGN_MASK	7
#endif

    /***********************************************/
    /*   Walk  the  stack.  We  cannot  rely on a  */
    /*   frame  pointer  at  each  level,  and we  */
    /*   really  want to avoid probing every word  */
    /*   in  the  stack  - a large stack will eat  */
    /*   cpu  looking at thousands of entries. So  */
    /*   try  and  heuristically see if we have a  */
    /*   likely  frame  pointer  to jump over the  */
    /*   frame,  but, if not, just go one word at  */
    /*   a time.				       */
    /*   					       */
    /*   Try  and be careful we dont walk outside  */
    /*   the  stack  or  walk  backwards  in  the  */
    /*   stack, too.			       */
    /***********************************************/
    {   uintptr_t *spend = sp + 1024;
        struct vm_area_struct *vma = find_vma(current->mm, (unsigned long) sp);
        if (vma)
            spend = (uintptr_t *) (vma->vm_end - sizeof(int *));

        /*printk("xbos=%p %p\n", bos, spend);*/

        /***********************************************/
        /*   Have  you ever looked at the output from  */
        /*   GCC in amd64 mode? Things like:	       */
        /*   					       */
        /*   push %r12				       */
        /*   push %rbp				       */
        /*   					       */
        /*   will make you come out in a cold sweat -  */
        /*   no   way  to  find  the  frame  pointer,  */
        /*   without doing what GDB does (ie read the  */
        /*   DWARF  stack  unwind info). So, for now,  */
        /*   you  get  some  false  positives  in the  */
        /*   output - but we try to be conservative.   */
        /***********************************************/
        while (sp >= bos && sp < spend && validate_ptr(sp)) {
            /*printk("  %p %d: %p %d\n", sp, validate_ptr(sp), sp[0], validate_ptr(sp[0]));*/
            if (validate_ptr((void *) sp[0])) {
                uintptr_t p = sp[-1];
                /***********************************************/
                /*   Try  and  avoid false positives in stack  */
                /*   entries   -   we  want  this  to  be  an  */
                /*   executable instruction.		       */
                /***********************************************/
                if (((unsigned long *) sp[0] < bos || (unsigned long *) sp[0] > spend) &&
                        (vma = find_vma(current->mm, sp[0])) != NULL &&
                        vma->vm_flags & VM_EXEC) {
                    *pcstack++ = sp[0];
                    if (pcstack >= pcstack_end)
                        break;
                }
                if (((int) p & ALIGN_MASK) == 0 && p > (uintptr_t) sp && p < (uintptr_t) spend)
                    sp = (unsigned long *) p;
            }
            sp++;
        }
    }

    /***********************************************/
    /*   Erase  anything  else  in  the buffer to  */
    /*   avoid confusion.			       */
    /***********************************************/
    while (pcstack < pcstack_end)
        *pcstack++ = (pc_t) NULL;
}
static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr,
				    unsigned long size,
				    enum dma_data_direction dma_dir)
{
	struct vb2_dma_sg_conf *conf = alloc_ctx;
	struct vb2_dma_sg_buf *buf;
	unsigned long first, last;
	int num_pages_from_user;
	struct vm_area_struct *vma;
	struct sg_table *sgt;
	DEFINE_DMA_ATTRS(attrs);

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0)
	dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
#endif

	buf = kzalloc(sizeof *buf, GFP_KERNEL);
	if (!buf)
		return NULL;

	buf->vaddr = NULL;
	buf->dev = conf->dev;
	buf->dma_dir = dma_dir;
	buf->offset = vaddr & ~PAGE_MASK;
	buf->size = size;
	buf->dma_sgt = &buf->sg_table;

	first = (vaddr           & PAGE_MASK) >> PAGE_SHIFT;
	last  = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT;
	buf->num_pages = last - first + 1;

	buf->pages = kzalloc(buf->num_pages * sizeof(struct page *),
			     GFP_KERNEL);
	if (!buf->pages)
		goto userptr_fail_alloc_pages;

	vma = find_vma(current->mm, vaddr);
	if (!vma) {
		dprintk(1, "no vma for address %lu\n", vaddr);
		goto userptr_fail_find_vma;
	}

	if (vma->vm_end < vaddr + size) {
		dprintk(1, "vma at %lu is too small for %lu bytes\n",
			vaddr, size);
		goto userptr_fail_find_vma;
	}

	buf->vma = vb2_get_vma(vma);
	if (!buf->vma) {
		dprintk(1, "failed to copy vma\n");
		goto userptr_fail_find_vma;
	}

	if (vma_is_io(buf->vma)) {
		for (num_pages_from_user = 0;
		     num_pages_from_user < buf->num_pages;
		     ++num_pages_from_user, vaddr += PAGE_SIZE) {
			unsigned long pfn;

			if (follow_pfn(vma, vaddr, &pfn)) {
				dprintk(1, "no page for address %lu\n", vaddr);
				break;
			}
			buf->pages[num_pages_from_user] = pfn_to_page(pfn);
		}
	} else
		num_pages_from_user = get_user_pages(current, current->mm,
					     vaddr & PAGE_MASK,
					     buf->num_pages,
					     buf->dma_dir == DMA_FROM_DEVICE,
					     1, /* force */
					     buf->pages,
					     NULL);

	if (num_pages_from_user != buf->num_pages)
		goto userptr_fail_get_user_pages;

	if (sg_alloc_table_from_pages(buf->dma_sgt, buf->pages,
			buf->num_pages, buf->offset, size, 0))
		goto userptr_fail_alloc_table_from_pages;

	sgt = &buf->sg_table;
	/*
	 * No need to sync to the device, this will happen later when the
	 * prepare() memop is called.
	 */
	sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents,
				      buf->dma_dir, &attrs);
	if (!sgt->nents)
		goto userptr_fail_map;

	return buf;

userptr_fail_map:
	sg_free_table(&buf->sg_table);
userptr_fail_alloc_table_from_pages:
userptr_fail_get_user_pages:
	dprintk(1, "get_user_pages requested/got: %d/%d]\n",
		buf->num_pages, num_pages_from_user);
	if (!vma_is_io(buf->vma))
		while (--num_pages_from_user >= 0)
			put_page(buf->pages[num_pages_from_user]);
	vb2_put_vma(buf->vma);
userptr_fail_find_vma:
	kfree(buf->pages);
userptr_fail_alloc_pages:
	kfree(buf);
	return NULL;
}
long s3c_mem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
#ifdef USE_DMA_ALLOC
	unsigned long virt_addr;
#else
	unsigned long *virt_addr;
#endif

	struct mm_struct *mm = current->mm;
	struct s3c_mem_alloc param;
	struct vm_area_struct *vma;
	unsigned long start, this_pfn;
#ifdef CONFIG_S3C_DMA_MEM
	struct s3c_mem_dma_param dma_param;
#endif

	switch (cmd) {
	case S3C_MEM_ALLOC:
		mutex_lock(&mem_alloc_lock);
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_alloc_lock);
			return -EFAULT;
		}
		flag = MEM_ALLOC;
		param.vir_addr = do_mmap(file, 0, param.size,
				(PROT_READ|PROT_WRITE), MAP_SHARED, 0);
		DEBUG("param.vir_addr = %08x, %d\n",
						param.vir_addr, __LINE__);
		if (param.vir_addr == -EINVAL) {
			printk(KERN_INFO "S3C_MEM_ALLOC FAILED\n");
			flag = 0;
			mutex_unlock(&mem_alloc_lock);
			return -EFAULT;
		}
		param.phy_addr = physical_address;
#ifdef USE_DMA_ALLOC
		param.kvir_addr = virtual_address;
#endif

		DEBUG("KERNEL MALLOC : param.phy_addr = 0x%X \t "
				"size = %d \t param.vir_addr = 0x%X, %d\n",
				param.phy_addr, param.size, param.vir_addr,
				__LINE__);

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			flag = 0;
			mutex_unlock(&mem_alloc_lock);
			return -EFAULT;
		}
		flag = 0;
		mutex_unlock(&mem_alloc_lock);

		break;

	case S3C_MEM_CACHEABLE_ALLOC:
		mutex_lock(&mem_cacheable_alloc_lock);
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_cacheable_alloc_lock);
			return -EFAULT;
		}
		flag = MEM_ALLOC_CACHEABLE;
		param.vir_addr = do_mmap(file, 0, param.size,
				(PROT_READ|PROT_WRITE), MAP_SHARED, 0);
		DEBUG("param.vir_addr = %08x, %d\n",
				param.vir_addr, __LINE__);
		if (param.vir_addr == -EINVAL) {
			printk(KERN_INFO "S3C_MEM_ALLOC FAILED\n");
			flag = 0;
			mutex_unlock(&mem_cacheable_alloc_lock);
			return -EFAULT;
		}
		param.phy_addr = physical_address;
		DEBUG("KERNEL MALLOC : param.phy_addr = 0x%X"
				" \t size = %d \t param.vir_addr = 0x%X, %d\n",
				param.phy_addr, param.size, param.vir_addr,
				__LINE__);

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			flag = 0;
			mutex_unlock(&mem_cacheable_alloc_lock);
			return -EFAULT;
		}
		flag = 0;
		mutex_unlock(&mem_cacheable_alloc_lock);

		break;

	case S3C_MEM_SHARE_ALLOC:
		mutex_lock(&mem_share_alloc_lock);
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_share_alloc_lock);
			return -EFAULT;
		}
		flag = MEM_ALLOC_SHARE;
		physical_address = param.phy_addr;
		DEBUG("param.phy_addr = %08x, %d\n",
				physical_address, __LINE__);
		param.vir_addr = do_mmap(file, 0, param.size,
				(PROT_READ|PROT_WRITE), MAP_SHARED, 0);
		DEBUG("param.vir_addr = %08x, %d\n",
				param.vir_addr, __LINE__);
		if (param.vir_addr == -EINVAL) {
			printk(KERN_INFO "S3C_MEM_SHARE_ALLOC FAILED\n");
			flag = 0;
			mutex_unlock(&mem_share_alloc_lock);
			return -EFAULT;
		}
		DEBUG("MALLOC_SHARE : param.phy_addr = 0x%X \t "
				"size = %d \t param.vir_addr = 0x%X, %d\n",
				param.phy_addr, param.size, param.vir_addr,
				__LINE__);

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			flag = 0;
			mutex_unlock(&mem_share_alloc_lock);
			return -EFAULT;
		}
		flag = 0;
		mutex_unlock(&mem_share_alloc_lock);

		break;

	case S3C_MEM_CACHEABLE_SHARE_ALLOC:
		mutex_lock(&mem_cacheable_share_alloc_lock);
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_cacheable_share_alloc_lock);
			return -EFAULT;
		}
		flag = MEM_ALLOC_CACHEABLE_SHARE;
		physical_address = param.phy_addr;
		DEBUG("param.phy_addr = %08x, %d\n",
				physical_address, __LINE__);
		param.vir_addr = do_mmap(file, 0, param.size,
				(PROT_READ|PROT_WRITE), MAP_SHARED, 0);
		DEBUG("param.vir_addr = %08x, %d\n",
				param.vir_addr, __LINE__);
		if (param.vir_addr == -EINVAL) {
			printk(KERN_INFO "S3C_MEM_SHARE_ALLOC FAILED\n");
			flag = 0;
			mutex_unlock(&mem_cacheable_share_alloc_lock);
			return -EFAULT;
		}
		DEBUG("MALLOC_SHARE : param.phy_addr = 0x%X \t "
				"size = %d \t param.vir_addr = 0x%X, %d\n",
				param.phy_addr, param.size, param.vir_addr,
				__LINE__);

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			flag = 0;
			mutex_unlock(&mem_cacheable_share_alloc_lock);
			return -EFAULT;
		}
		flag = 0;
		mutex_unlock(&mem_cacheable_share_alloc_lock);

		break;

	case S3C_MEM_FREE:
		mutex_lock(&mem_free_lock);
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_free_lock);
			return -EFAULT;
		}

		DEBUG("KERNEL FREE : param.phy_addr = 0x%X \t "
				"size = %d \t param.vir_addr = 0x%X, %d\n",
				param.phy_addr, param.size, param.vir_addr,
				__LINE__);

		if (do_munmap(mm, param.vir_addr, param.size) < 0) {
			printk(KERN_INFO "do_munmap() failed !!\n");
			mutex_unlock(&mem_free_lock);
			return -EINVAL;
		}

#ifdef USE_DMA_ALLOC
		virt_addr = param.kvir_addr;
		dma_free_writecombine(NULL, param.size,
				(unsigned int *) virt_addr, param.phy_addr);
#else
		virt_addr = (unsigned long *)phys_to_virt(param.phy_addr);
		kfree(virt_addr);
#endif
		param.size = 0;
		DEBUG("do_munmap() succeed !!\n");

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_free_lock);
			return -EFAULT;
		}

		mutex_unlock(&mem_free_lock);

		break;

	case S3C_MEM_SHARE_FREE:
		mutex_lock(&mem_share_free_lock);
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_share_free_lock);
			return -EFAULT; }

		DEBUG("MEM_SHARE_FREE : param.phy_addr = 0x%X \t "
				"size = %d \t param.vir_addr = 0x%X, %d\n",
				param.phy_addr, param.size, param.vir_addr,
				__LINE__);

		if (do_munmap(mm, param.vir_addr, param.size) < 0) {
			printk(KERN_INFO "do_munmap() failed - MEM_SHARE_FREE!!\n");
			mutex_unlock(&mem_share_free_lock);
			return -EINVAL;
		}

		param.vir_addr = 0;
		DEBUG("do_munmap() succeed !! - MEM_SHARE_FREE\n");

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			mutex_unlock(&mem_share_free_lock);
			return -EFAULT;
		}

		mutex_unlock(&mem_share_free_lock);

		break;

#ifdef CONFIG_S3C_DMA_MEM
	case S3C_MEM_DMA_COPY:
		if (copy_from_user(&dma_param, (struct s3c_mem_dma_param *)arg,
				sizeof(struct s3c_mem_dma_param))) {
			return -EFAULT;
		}
		if (s3c_dma_mem_start(current->mm, &dma_param,
				S3C_DMA_MEM2MEM)) {
			return -EINVAL;
		}
		if (copy_to_user((struct s3c_mem_dma_param *)arg, &dma_param,
				sizeof(struct s3c_mem_dma_param))) {
			return -EFAULT;
		}
		break;
#endif

	case S3C_MEM_GET_PADDR:
		if (copy_from_user(&param, (struct s3c_mem_alloc *)arg,
					sizeof(struct s3c_mem_alloc))) {
			return -EFAULT;
		}
		start = param.vir_addr;
		down_read(&mm->mmap_sem);
		vma = find_vma(mm, start);

		if (vma == NULL) {
			up_read(&mm->mmap_sem);
			return -EINVAL;
		}

		if (follow_pfn(vma, start, &this_pfn)) {
			up_read(&mm->mmap_sem);
			return -EINVAL;
		}

		param.phy_addr = this_pfn << PAGE_SHIFT;
		up_read(&mm->mmap_sem);

		if (copy_to_user((struct s3c_mem_alloc *)arg, &param,
					sizeof(struct s3c_mem_alloc))) {
			return -EFAULT;
		}
		break;

	default:
		DEBUG("s3c_mem_ioctl() : default !!\n");
		return -EINVAL;
	}

	return 0;
}
Example #12
0
int psb_get_vaddr_pages(u32 vaddr, u32 size, u32 **pfn_list, int *page_count)
{
	u32 num_pages;
	struct page **pages = 0;
	struct task_struct *task = current;
	struct mm_struct *mm = task->mm;
	struct vm_area_struct *vma;
	u32 *pfns = 0;
	int ret;
	int i;

	if (unlikely(!pfn_list || !page_count || !vaddr || !size))
		return -EINVAL;

	num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;

	pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL);
	if (unlikely(!pages)) {
		DRM_ERROR("Failed to allocate page list\n");
		return -ENOMEM;
	}

	down_read(&mm->mmap_sem);
	ret = get_user_pages(task, mm, vaddr, num_pages, 0, 0, pages, NULL);
	up_read(&mm->mmap_sem);

	if (ret <= 0) {
		DRM_DEBUG("failed to get user pages\n");
		kfree(pages);
		pages = 0;
	} else {
		DRM_DEBUG("num_pages %d, ret %d\n", num_pages, ret);
		num_pages = ret;
	}

	/*allocate page list*/
	pfns = kzalloc(num_pages * sizeof(u32), GFP_KERNEL);
	if (!pfns) {
		DRM_ERROR("No memory\n");
		goto get_page_err;
	}

	if (!pages) {
		DRM_ERROR("No pages found, trying to follow pfn\n");
		for (i = 0; i < num_pages; i++) {
			vma = find_vma(mm, vaddr + i * PAGE_SIZE);
			if (!vma) {
				DRM_ERROR("failed to find vma\n");
				goto find_vma_err;
			}

			ret = follow_pfn(vma,
				(unsigned long)(vaddr + i * PAGE_SIZE),
				(unsigned long *)&pfns[i]);
			if (ret) {
				DRM_ERROR("failed to follow pfn\n");
				goto follow_pfn_err;
			}
		}
	} else {
		DRM_ERROR("Found pages\n");
		for (i = 0; i < num_pages; i++)
			pfns[i] = page_to_pfn(pages[i]);
	}

	*pfn_list = pfns;
	*page_count = num_pages;

	kfree(pages);

	return 0;
find_vma_err:
follow_pfn_err:
	kfree(pfns);
get_page_err:
	if (pages) {
		for (i = 0; i < num_pages; i++)
			put_page(pages[i]);
		kfree(pages);
	}
	return -EINVAL;
}
int ttm_pl_ub_create_ioctl(struct ttm_object_file *tfile,
			   struct ttm_bo_device *bdev,
			   struct ttm_lock *lock, void *data)
{
	union ttm_pl_create_ub_arg *arg = data;
	struct ttm_pl_create_ub_req *req = &arg->req;
	struct ttm_pl_rep *rep = &arg->rep;
	struct ttm_buffer_object *bo;
	struct ttm_buffer_object *tmp;
	struct ttm_bo_user_object *user_bo;
	uint32_t flags;
	int ret = 0;
	struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
	struct ttm_placement placement = default_placement;

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0))
	size_t acc_size =
		ttm_pl_size(bdev, (req->size + PAGE_SIZE - 1) >> PAGE_SHIFT);
#else
	size_t acc_size = ttm_bo_acc_size(bdev, req->size,
		sizeof(struct ttm_buffer_object));
#endif
	if (req->user_address & ~PAGE_MASK) {
		printk(KERN_ERR "User pointer buffer need page alignment\n");
		return -EFAULT;
	}

	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
	if (unlikely(ret != 0))
		return ret;

	flags = req->placement;
	user_bo = kzalloc(sizeof(*user_bo), GFP_KERNEL);
	if (unlikely(user_bo == NULL)) {
		ttm_mem_global_free(mem_glob, acc_size);
		return -ENOMEM;
	}
	ret = ttm_read_lock(lock, true);
	if (unlikely(ret != 0)) {
		ttm_mem_global_free(mem_glob, acc_size);
		kfree(user_bo);
		return ret;
	}
	bo = &user_bo->bo;

	placement.num_placement = 1;
	placement.placement = &flags;

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0))

/*  For kernel 3.0, use the desired type. */
#define TTM_HACK_WORKAROUND_ttm_bo_type_user ttm_bo_type_user

#else
/*  TTM_HACK_WORKAROUND_ttm_bo_type_user -- Hack for porting,
    as ttm_bo_type_user is no longer implemented.
    This will not result in working code.
    FIXME - to be removed. */

#warning warning: ttm_bo_type_user no longer supported

/*  For kernel 3.3+, use the wrong type, which will compile but not work. */
#define TTM_HACK_WORKAROUND_ttm_bo_type_user ttm_bo_type_kernel

#endif

#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 3, 0))
		/* Handle frame buffer allocated in user space, Convert
		  user space virtual address into pages list */
		unsigned int page_nr = 0;
		struct vm_area_struct *vma = NULL;
		struct sg_table *sg = NULL;
		unsigned long num_pages = 0;
		struct page **pages = 0;

		num_pages = (req->size + PAGE_SIZE - 1) >> PAGE_SHIFT;
		pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL);
		if (unlikely(pages == NULL)) {
			printk(KERN_ERR "kzalloc pages failed\n");
			return -ENOMEM;
		}

		down_read(&current->mm->mmap_sem);
		vma = find_vma(current->mm, req->user_address);
		if (unlikely(vma == NULL)) {
			up_read(&current->mm->mmap_sem);
			kfree(pages);
			printk(KERN_ERR "find_vma failed\n");
			return -EFAULT;
		}
		unsigned long before_flags = vma->vm_flags;
		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
			vma->vm_flags = vma->vm_flags & ((~VM_IO) & (~VM_PFNMAP));
		page_nr = get_user_pages(current, current->mm,
					 req->user_address,
					 (int)(num_pages), 1, 0, pages,
					 NULL);
		vma->vm_flags = before_flags;
		up_read(&current->mm->mmap_sem);

		/* can be written by caller, not forced */
		if (unlikely(page_nr < num_pages)) {
			kfree(pages);
			pages = 0;
			printk(KERN_ERR "get_user_pages err.\n");
			return -ENOMEM;
		}
		sg = drm_prime_pages_to_sg(pages, num_pages);
		if (unlikely(sg == NULL)) {
			kfree(pages);
			printk(KERN_ERR "drm_prime_pages_to_sg err.\n");
			return -ENOMEM;
		}
		kfree(pages);
#endif

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0))
	ret = ttm_bo_init(bdev,
			  bo,
			  req->size,
			  TTM_HACK_WORKAROUND_ttm_bo_type_user,
			  &placement,
			  req->page_alignment,
			  req->user_address,
			  true,
			  NULL,
			  acc_size,
			  NULL,
			  &ttm_bo_user_destroy);
#elif (LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0))
	ret = ttm_bo_init(bdev,
			  bo,
			  req->size,
			  ttm_bo_type_sg,
			  &placement,
			  req->page_alignment,
			  req->user_address,
			  true,
			  NULL,
			  acc_size,
			  sg,
			  &ttm_ub_bo_user_destroy);
#else
	ret = ttm_bo_init(bdev,
			  bo,
			  req->size,
			  ttm_bo_type_sg,
			  &placement,
			  req->page_alignment,
			  true,
			  NULL,
			  acc_size,
			  sg,
			  &ttm_ub_bo_user_destroy);
#endif

	/*
	 * Note that the ttm_buffer_object_init function
	 * would've called the destroy function on failure!!
	 */
	ttm_read_unlock(lock);
	if (unlikely(ret != 0))
		goto out;

	tmp = ttm_bo_reference(bo);
	ret = ttm_base_object_init(tfile, &user_bo->base,
				   flags & TTM_PL_FLAG_SHARED,
				   ttm_buffer_type,
				   &ttm_bo_user_release,
				   &ttm_bo_user_ref_release);
	if (unlikely(ret != 0))
		goto out_err;

	ret = ttm_bo_reserve(bo, true, false, false, 0);
	if (unlikely(ret != 0))
		goto out_err;
	ttm_pl_fill_rep(bo, rep);
	ttm_bo_unreserve(bo);
	ttm_bo_unref(&bo);
out:
	return 0;
out_err:
	ttm_bo_unref(&tmp);
	ttm_bo_unref(&bo);
	return ret;
}
Example #14
0
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
			  const unsigned long len, const unsigned long pgoff,
			  const unsigned long flags)
{
	struct vm_area_struct *vma;
	struct mm_struct *mm = current->mm;
	unsigned long addr = addr0;
	int do_color_align, last_mmap;
	struct vm_unmapped_area_info info;

#ifdef CONFIG_64BIT
	/* This should only ever run for 32-bit processes.  */
	BUG_ON(!test_thread_flag(TIF_32BIT));
#endif

	/* requested length too big for entire address space */
	if (len > TASK_SIZE)
		return -ENOMEM;

	do_color_align = 0;
	if (filp || (flags & MAP_SHARED))
		do_color_align = 1;
	last_mmap = GET_LAST_MMAP(filp);

	if (flags & MAP_FIXED) {
		if ((flags & MAP_SHARED) && last_mmap &&
		    (addr - shared_align_offset(last_mmap, pgoff))
			& (SHM_COLOUR - 1))
			return -EINVAL;
		goto found_addr;
	}

	/* requesting a specific address */
	if (addr) {
		if (do_color_align && last_mmap)
			addr = COLOR_ALIGN(addr, last_mmap, pgoff);
		else
			addr = PAGE_ALIGN(addr);
		vma = find_vma(mm, addr);
		if (TASK_SIZE - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			goto found_addr;
	}

	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
	info.length = len;
	info.low_limit = PAGE_SIZE;
	info.high_limit = mm->mmap_base;
	info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
	info.align_offset = shared_align_offset(last_mmap, pgoff);
	addr = vm_unmapped_area(&info);
	if (!(addr & ~PAGE_MASK))
		goto found_addr;
	VM_BUG_ON(addr != -ENOMEM);

	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);

found_addr:
	if (do_color_align && !last_mmap && !(addr & ~PAGE_MASK))
		SET_LAST_MMAP(filp, addr - (pgoff << PAGE_SHIFT));

	return addr;
}
Example #15
0
asmlinkage void
do_page_fault(unsigned long address, struct pt_regs *regs,
	      int protection, int writeaccess)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct * vma;
	siginfo_t info;
	int fault;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	D(printk(KERN_DEBUG
		 "Page fault for %lX on %X at %lX, prot %d write %d\n",
		 address, smp_processor_id(), instruction_pointer(regs),
		 protection, writeaccess));

	tsk = current;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * NOTE2: This is done so that, when updating the vmalloc
	 * mappings we don't have to walk all processes pgdirs and
	 * add the high mappings all at once. Instead we do it as they
	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
	 * bit set so sometimes the TLB can use a lingering entry.
	 *
	 * This verifies that the fault happens in kernel space
	 * and that the fault was not a protection error (error_code & 1).
	 */

	if (address >= VMALLOC_START &&
	    !protection &&
	    !user_mode(regs))
		goto vmalloc_fault;

	/* When stack execution is not allowed we store the signal
	 * trampolines in the reserved cris_signal_return_page.
	 * Handle this in the exact same way as vmalloc (we know
	 * that the mapping is there and is valid so no need to
	 * call handle_mm_fault).
	 */
	if (cris_signal_return_page &&
	    address == cris_signal_return_page &&
	    !protection && user_mode(regs))
		goto vmalloc_fault;

	/* we can and should enable interrupts at this point */
	local_irq_enable();

	mm = tsk->mm;
	info.si_code = SEGV_MAPERR;

	/*
	 * If we're in an interrupt or "atomic" operation or have no
	 * user context, we must not take the fault.
	 */

	if (!mm || pagefault_disabled())
		goto no_context;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;
retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (user_mode(regs)) {
		/*
		 * accessing the stack below usp is always a bug.
		 * we get page-aligned addresses so we can only check
		 * if we're within a page from usp, but that might be
		 * enough to catch brutal errors at least.
		 */
		if (address + PAGE_SIZE < rdusp())
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */

 good_area:
	info.si_code = SEGV_ACCERR;

	/* first do some preliminary protection checks */

	if (writeaccess == 2){
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
	} else if (writeaccess == 1) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto bad_area;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */

	fault = handle_mm_fault(mm, vma, address, flags);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}

	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR)
			tsk->maj_flt++;
		else
			tsk->min_flt++;
		if (fault & VM_FAULT_RETRY) {
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;

			/*
			 * No need to up_read(&mm->mmap_sem) as we would
			 * have already released it in __lock_page_or_retry
			 * in mm/filemap.c.
			 */

			goto retry;
		}
	}

	up_read(&mm->mmap_sem);
	return;

	/*
	 * Something tried to access memory that isn't in our memory map..
	 * Fix it, but check if it's kernel or user first..
	 */

 bad_area:
	up_read(&mm->mmap_sem);

 bad_area_nosemaphore:
	DPG(show_registers(regs));

	/* User mode accesses just cause a SIGSEGV */

	if (user_mode(regs)) {
		printk(KERN_NOTICE "%s (pid %d) segfaults for page "
			"address %08lx at pc %08lx\n",
			tsk->comm, tsk->pid,
			address, instruction_pointer(regs));

		/* With DPG on, we've already dumped registers above.  */
		DPG(if (0))
			show_registers(regs);

#ifdef CONFIG_NO_SEGFAULT_TERMINATION
		DECLARE_WAIT_QUEUE_HEAD(wq);
		wait_event_interruptible(wq, 0 == 1);
#else
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void *)address;
		force_sig_info(SIGSEGV, &info, tsk);
#endif
		return;
	}

 no_context:

	/* Are we prepared to handle this kernel fault?
	 *
	 * (The kernel has valid exception-points in the source
	 *  when it accesses user-memory. When it fails in one
	 *  of those points, we find it in a table and do a jump
	 *  to some fixup code that loads an appropriate error
	 *  code)
	 */

	if (find_fixup_code(regs))
		return;

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */

	if (!oops_in_progress) {
		oops_in_progress = 1;
		if ((unsigned long) (address) < PAGE_SIZE)
			printk(KERN_ALERT "Unable to handle kernel NULL "
				"pointer dereference");
		else
			printk(KERN_ALERT "Unable to handle kernel access"
				" at virtual address %08lx\n", address);

		die_if_kernel("Oops", regs, (writeaccess << 1) | protection);
		oops_in_progress = 0;
	}

	do_exit(SIGKILL);

	/*
	 * We ran out of memory, or some other thing happened to us that made
	 * us unable to handle the page fault gracefully.
	 */

 out_of_memory:
	up_read(&mm->mmap_sem);
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

 do_sigbus:
	up_read(&mm->mmap_sem);

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void *)address;
	force_sig_info(SIGBUS, &info, tsk);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	return;

vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Use current_pgd instead of tsk->active_mm->pgd
		 * since the latter might be unavailable if this
		 * code is executed in a misfortunately run irq
		 * (like inside schedule() between switch_mm and
		 *  switch_to...).
		 */

		int offset = pgd_index(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

		pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset;
		pgd_k = init_mm.pgd + offset;

		/* Since we're two-level, we don't need to do both
		 * set_pgd and set_pmd (they do the same thing). If
		 * we go three-level at some point, do the right thing
		 * with pgd_present and set_pgd here.
		 *
		 * Also, since the vmalloc area is global, we don't
		 * need to copy individual PTE's, it is enough to
		 * copy the pgd pointer into the pte page of the
		 * root task. If that is there, we'll find our pte if
		 * it exists.
		 */

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);

		if (!pmd_present(*pmd_k))
			goto bad_area_nosemaphore;

		set_pmd(pmd, *pmd_k);

		/* Make sure the actual PTE exists as well to
		 * catch kernel vmalloc-area accesses to non-mapped
		 * addresses. If we don't do this, this will just
		 * silently loop forever.
		 */

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;

		return;
	}
}
Example #16
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * error_code:
 *             ****0004       Protection           ->  Write-Protection  (suprression)
 *             ****0010       Segment translation  ->  Not present       (nullification)
 *             ****0011       Page translation     ->  Not present       (nullification)
 *             ****003B       Region third exception ->  Not present       (nullification)
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
        struct task_struct *tsk;
        struct mm_struct *mm;
        struct vm_area_struct * vma;
        unsigned long address;
        unsigned long fixup;
        int write;
	int si_code = SEGV_MAPERR;
	int kernel_address = 0;

        tsk = current;
        mm = tsk->mm;
	
	/* 
         * Check for low-address protection.  This needs to be treated
	 * as a special case because the translation exception code 
	 * field is not guaranteed to contain valid data in this case.
	 */
	if ((error_code & 0xff) == 4 && !(S390_lowcore.trans_exc_code & 4)) {

		/* Low-address protection hit in kernel mode means 
		   NULL pointer write access in kernel mode.  */
 		if (!(regs->psw.mask & PSW_PROBLEM_STATE)) {
			address = 0;
			kernel_address = 1;
			goto no_context;
		}

		/* Low-address protection hit in user mode 'cannot happen'.  */
		die ("Low-address protection", regs, error_code);
        	do_exit(SIGKILL);
	}

        /* 
         * get the failing address 
         * more specific the segment and page table portion of 
         * the address 
         */

        address = S390_lowcore.trans_exc_code&-4096L;


	/*
	 * Check which address space the address belongs to
	 */
	switch (S390_lowcore.trans_exc_code & 3)
	{
	case 0: /* Primary Segment Table Descriptor */
		kernel_address = 1;
		goto no_context;

	case 1: /* STD determined via access register */
		if (S390_lowcore.exc_access_id == 0)
		{
			kernel_address = 1;
			goto no_context;
		}
		if (regs && S390_lowcore.exc_access_id < NUM_ACRS)
		{
			if (regs->acrs[S390_lowcore.exc_access_id] == 0)
			{
				kernel_address = 1;
				goto no_context;
			}
			if (regs->acrs[S390_lowcore.exc_access_id] == 1)
			{
				/* user space address */
				break;
			}
		}
		die("page fault via unknown access register", regs, error_code);
        	do_exit(SIGKILL);
		break;

	case 2: /* Secondary Segment Table Descriptor */
	case 3: /* Home Segment Table Descriptor */
		/* user space address */
		break;
	}

	/*
	 * Check whether we have a user MM in the first place.
	 */
        if (in_interrupt() || !mm || !(regs->psw.mask & _PSW_IO_MASK_BIT))
                goto no_context;

	/*
	 * When we get here, the fault happened in the current
	 * task's user address space, so we can switch on the
	 * interrupts again and then search the VMAs
	 */

	__sti();

        down_read(&mm->mmap_sem);

        vma = find_vma(mm, address);
        if (!vma)
                goto bad_area;
        if (vma->vm_start <= address) 
                goto good_area;
        if (!(vma->vm_flags & VM_GROWSDOWN))
                goto bad_area;
        if (expand_stack(vma, address))
                goto bad_area;
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
        write = 0;
	si_code = SEGV_ACCERR;

        switch (error_code & 0xFF) {
                case 0x04:                                /* write, present*/
                        write = 1;
                        break;
                case 0x10:                                   /* not present*/
                case 0x11:                                   /* not present*/
                case 0x3B:                                   /* not present*/
                        if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
                                goto bad_area;
                        break;
                default:
                       printk("code should be 4, 10 or 11 (%lX) \n",error_code&0xFF);  
                       goto bad_area;
        }

 survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	switch (handle_mm_fault(mm, vma, address, write)) {
	case 1:
		tsk->min_flt++;
		break;
	case 2:
		tsk->maj_flt++;
		break;
	case 0:
		goto do_sigbus;
	default:
		goto out_of_memory;
	}

        up_read(&mm->mmap_sem);
        return;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
        up_read(&mm->mmap_sem);

        /* User mode accesses just cause a SIGSEGV */
        if (regs->psw.mask & PSW_PROBLEM_STATE) {
                tsk->thread.prot_addr = address;
                tsk->thread.trap_no = error_code;
#ifndef CONFIG_SYSCTL
#ifdef CONFIG_PROCESS_DEBUG
                printk("User process fault: interruption code 0x%lX\n",error_code);
                printk("failing address: %lX\n",address);
		show_regs(regs);
#endif
#else
		if (sysctl_userprocess_debug) {
			printk("User process fault: interruption code 0x%lX\n",
			       error_code);
			printk("failing address: %lX\n", address);
			show_regs(regs);
		}
#endif

		force_sigsegv(tsk, si_code, (void *)address);
                return;
	}

no_context:
        /* Are we prepared to handle this kernel fault?  */
        if ((fixup = search_exception_table(regs->psw.addr)) != 0) {
                regs->psw.addr = fixup;
                return;
        }

/*
 * Oops. The kernel tried to access some bad page. We'll have to
 * terminate things with extreme prejudice.
 */

        if (kernel_address)
                printk(KERN_ALERT "Unable to handle kernel pointer dereference"
        	       " at virtual kernel address %016lx\n", address);
        else
                printk(KERN_ALERT "Unable to handle kernel paging request"
		       " at virtual user address %016lx\n", address);

        die("Oops", regs, error_code);
        do_exit(SIGKILL);


/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
*/
out_of_memory:
	up_read(&mm->mmap_sem);
	if (tsk->pid == 1) {
		tsk->policy |= SCHED_YIELD;
		schedule();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk("VM: killing process %s\n", tsk->comm);
	if (regs->psw.mask & PSW_PROBLEM_STATE)
		do_exit(SIGKILL);
	goto no_context;

do_sigbus:
	up_read(&mm->mmap_sem);

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
        tsk->thread.prot_addr = address;
        tsk->thread.trap_no = error_code;
	force_sig(SIGBUS, tsk);

	/* Kernel mode? Handle exceptions or die */
	if (!(regs->psw.mask & PSW_PROBLEM_STATE))
		goto no_context;
}
Example #17
0
/*
 * Canonical page fault handler
 */
void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
{
	struct vm_area_struct *vma;
	struct mm_struct *mm = current->mm;
	siginfo_t info;
	int si_code = SEGV_MAPERR;
	int fault;
	const struct exception_table_entry *fixup;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	/*
	 * If we're in an interrupt or have no user context,
	 * then must not take the fault.
	 */
	if (unlikely(in_interrupt() || !mm))
		goto no_context;

	local_irq_enable();

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;
retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;

	if (vma->vm_start <= address)
		goto good_area;

	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;

	if (expand_stack(vma, address))
		goto bad_area;

good_area:
	/* Address space is OK.  Now check access rights. */
	si_code = SEGV_ACCERR;

	switch (cause) {
	case FLT_IFETCH:
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
		break;
	case FLT_LOAD:
		if (!(vma->vm_flags & VM_READ))
			goto bad_area;
		break;
	case FLT_STORE:
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
		break;
	}

	fault = handle_mm_fault(mm, vma, address, flags);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return;

	/* The most common case -- we are done. */
	if (likely(!(fault & VM_FAULT_ERROR))) {
		if (flags & FAULT_FLAG_ALLOW_RETRY) {
			if (fault & VM_FAULT_MAJOR)
				current->maj_flt++;
			else
				current->min_flt++;
			if (fault & VM_FAULT_RETRY) {
				flags &= ~FAULT_FLAG_ALLOW_RETRY;
				flags |= FAULT_FLAG_TRIED;
				goto retry;
			}
		}

		up_read(&mm->mmap_sem);
		return;
	}

	up_read(&mm->mmap_sem);

	/* Handle copyin/out exception cases */
	if (!user_mode(regs))
		goto no_context;

	if (fault & VM_FAULT_OOM) {
		pagefault_out_of_memory();
		return;
	}

	/* User-mode address is in the memory map, but we are
	 * unable to fix up the page fault.
	 */
	if (fault & VM_FAULT_SIGBUS) {
		info.si_signo = SIGBUS;
		info.si_code = BUS_ADRERR;
	}
	/* Address is not in the memory map */
	else {
		info.si_signo = SIGSEGV;
		info.si_code = SEGV_ACCERR;
	}
	info.si_errno = 0;
	info.si_addr = (void __user *)address;
	force_sig_info(info.si_signo, &info, current);
	return;

bad_area:
	up_read(&mm->mmap_sem);

	if (user_mode(regs)) {
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		info.si_code = si_code;
		info.si_addr = (void *)address;
		force_sig_info(info.si_signo, &info, current);
		return;
	}
	/* Kernel-mode fault falls through */

no_context:
	fixup = search_exception_tables(pt_elr(regs));
	if (fixup) {
		pt_set_elr(regs, fixup->fixup);
		return;
	}

	/* Things are looking very, very bad now */
	bust_spinlocks(1);
	printk(KERN_EMERG "Unable to handle kernel paging request at "
		"virtual address 0x%08lx, regs %p\n", address, regs);
	die("Bad Kernel VA", regs, SIGKILL);
}
Example #18
0
int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
{
	unsigned long		ptr, end;
	int			err;
	struct mm_struct *	mm;
	struct vm_area_struct *	vma = 0;
	struct page *		map;
	int			i;
	int			datain = (rw == READ);
	
	/* Make sure the iobuf is not already mapped somewhere. */
	if (iobuf->nr_pages)
		return -EINVAL;

	mm = current->mm;
	dprintk ("map_user_kiobuf: begin\n");
	
	ptr = va & PAGE_MASK;
	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
	if (err)
		return err;

	down(&mm->mmap_sem);

	err = -EFAULT;
	iobuf->locked = 0;
	iobuf->offset = va & ~PAGE_MASK;
	iobuf->length = len;
	
	i = 0;
	
	/* 
	 * First of all, try to fault in all of the necessary pages
	 */
	while (ptr < end) {
		if (!vma || ptr >= vma->vm_end) {
			vma = find_vma(current->mm, ptr);
			if (!vma) 
				goto out_unlock;
			if (vma->vm_start > ptr) {
				if (!(vma->vm_flags & VM_GROWSDOWN))
					goto out_unlock;
				if (expand_stack(vma, ptr))
					goto out_unlock;
			}
			if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
					(!(vma->vm_flags & VM_READ))) {
				err = -EACCES;
				goto out_unlock;
			}
		}
		if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) 
			goto out_unlock;
		spin_lock(&mm->page_table_lock);
		map = follow_page(ptr);
		if (!map) {
			spin_unlock(&mm->page_table_lock);
			dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
			goto out_unlock;
		}
		map = get_page_map(map);
		if (map)
			atomic_inc(&map->count);
		else
			printk (KERN_INFO "Mapped page missing [%d]\n", i);
		spin_unlock(&mm->page_table_lock);
		iobuf->maplist[i] = map;
		iobuf->nr_pages = ++i;
		
		ptr += PAGE_SIZE;
	}

	up(&mm->mmap_sem);
	dprintk ("map_user_kiobuf: end OK\n");
	return 0;

 out_unlock:
	up(&mm->mmap_sem);
	unmap_kiobuf(iobuf);
	dprintk ("map_user_kiobuf: end %d\n", err);
	return err;
}
Example #19
0
static unsigned long arch_get_unmapped_area_common(struct file *filp,
	unsigned long addr0, unsigned long len, unsigned long pgoff,
	unsigned long flags, enum mmap_allocation_direction dir)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long addr = addr0;
	int do_color_align;

	if (unlikely(len > TASK_SIZE))
		return -ENOMEM;

	if (flags & MAP_FIXED) {
		
		if (TASK_SIZE - len < addr)
			return -EINVAL;

		if ((flags & MAP_SHARED) &&
		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
			return -EINVAL;
		return addr;
	}

	do_color_align = 0;
	if (filp || (flags & MAP_SHARED))
		do_color_align = 1;

	
	if (addr) {
		if (do_color_align)
			addr = COLOUR_ALIGN(addr, pgoff);
		else
			addr = PAGE_ALIGN(addr);

		vma = find_vma(mm, addr);
		if (TASK_SIZE - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			return addr;
	}

	if (dir == UP) {
		addr = mm->mmap_base;
		if (do_color_align)
			addr = COLOUR_ALIGN(addr, pgoff);
		else
			addr = PAGE_ALIGN(addr);

		for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
			
			if (TASK_SIZE - len < addr)
				return -ENOMEM;
			if (!vma || addr + len <= vma->vm_start)
				return addr;
			addr = vma->vm_end;
			if (do_color_align)
				addr = COLOUR_ALIGN(addr, pgoff);
		 }
	 } else {
		
		if (len <= mm->cached_hole_size) {
			mm->cached_hole_size = 0;
			mm->free_area_cache = mm->mmap_base;
		}

		addr = mm->free_area_cache;
		if (do_color_align) {
			unsigned long base =
				COLOUR_ALIGN_DOWN(addr - len, pgoff);
			addr = base + len;
		}

		
		if (likely(addr > len)) {
			vma = find_vma(mm, addr - len);
			if (!vma || addr <= vma->vm_start) {
				
				return mm->free_area_cache = addr - len;
			}
		}

		if (unlikely(mm->mmap_base < len))
			goto bottomup;

		addr = mm->mmap_base - len;
		if (do_color_align)
			addr = COLOUR_ALIGN_DOWN(addr, pgoff);

		do {
			vma = find_vma(mm, addr);
			if (likely(!vma || addr + len <= vma->vm_start)) {
				
				return mm->free_area_cache = addr;
			}

			
			if (addr + mm->cached_hole_size < vma->vm_start)
				mm->cached_hole_size = vma->vm_start - addr;

			
			addr = vma->vm_start - len;
			if (do_color_align)
				addr = COLOUR_ALIGN_DOWN(addr, pgoff);
		} while (likely(len < vma->vm_start));

bottomup:
		mm->cached_hole_size = ~0UL;
		mm->free_area_cache = TASK_UNMAPPED_BASE;
		addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
		mm->free_area_cache = mm->mmap_base;
		mm->cached_hole_size = ~0UL;

		return addr;
	}
}
Example #20
0
/*
 * This routine handles page faults.  It determines the address and the
 * problem, and then passes it off to one of the appropriate routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs)
{
	struct task_struct *tsk;
	struct vm_area_struct *vma;
	struct mm_struct *mm;
	unsigned long addr, cause;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
	int fault, code = SEGV_MAPERR;

	cause = regs->scause;
	addr = regs->sbadaddr;

	tsk = current;
	mm = tsk->mm;

	/*
	 * Fault-in kernel-space virtual memory on-demand.
	 * The 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
		goto vmalloc_fault;

	/* Enable interrupts if they were enabled in the parent context. */
	if (likely(regs->sstatus & SR_PIE))
		local_irq_enable();

	/*
	 * If we're in an interrupt, have no user context, or are running
	 * in an atomic region, then we must not take the fault.
	 */
	if (unlikely(in_atomic() || !mm))
		goto no_context;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;

	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, addr);
	if (unlikely(!vma))
		goto bad_area;
	if (likely(vma->vm_start <= addr))
		goto good_area;
	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
		goto bad_area;
	if (unlikely(expand_stack(vma, addr)))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it.
	 */
good_area:
	code = SEGV_ACCERR;

	switch (cause) {
	case EXC_INST_ACCESS:
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
		break;
	case EXC_LOAD_ACCESS:
		if (!(vma->vm_flags & VM_READ))
			goto bad_area;
		break;
	case EXC_STORE_ACCESS:
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
		break;
	default:
		panic("%s: unhandled cause %lu", __func__, cause);
	}

	/*
	 * If for any reason at all we could not handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, addr, flags);

	/*
	 * If we need to retry but a fatal signal is pending, handle the
	 * signal first. We do not need to release the mmap_sem because it
	 * would already be released in __lock_page_or_retry in mm/filemap.c.
	 */
	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
		return;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}

	/*
	 * Major/minor page fault accounting is only done on the
	 * initial attempt. If we go through a retry, it is extremely
	 * likely that the page will be found in page cache at that point.
	 */
	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR) {
			tsk->maj_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
		} else {
			tsk->min_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
		}
		if (fault & VM_FAULT_RETRY) {
			/*
			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
			 * of starvation.
			 */
			flags &= ~(FAULT_FLAG_ALLOW_RETRY);
			flags |= FAULT_FLAG_TRIED;

			/*
			 * No need to up_read(&mm->mmap_sem) as we would
			 * have already released it in __lock_page_or_retry
			 * in mm/filemap.c.
			 */
			goto retry;
		}
	}

	up_read(&mm->mmap_sem);
	return;

	/*
	 * Something tried to access memory that isn't in our memory map.
	 * Fix it, but check if it's kernel or user first.
	 */
bad_area:
	up_read(&mm->mmap_sem);
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		do_trap(regs, SIGSEGV, code, addr, tsk);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault? */
	if (fixup_exception(regs)) {
		return;
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
	bust_spinlocks(1);
	pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
		"paging request", addr);
	die(regs, "Oops");
	do_exit(SIGKILL);

	/*
	 * We ran out of memory, call the OOM killer, and return the userspace
	 * (which will retry the fault, or kill us if we got oom-killed).
	 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

do_sigbus:
	up_read(&mm->mmap_sem);
	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
	return;

vmalloc_fault:
	{
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;
		int index;

	        if (user_mode(regs))
			goto bad_area;

		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Do _not_ use "tsk->active_mm->pgd" here.
		 * We might be inside an interrupt in the middle
		 * of a task switch.
		 */
		index = pgd_index(addr);
		pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
		pgd_k = init_mm.pgd + index;

		if (!pgd_present(*pgd_k))
			goto no_context;
		set_pgd(pgd, *pgd_k);

		pud = pud_offset(pgd, addr);
		pud_k = pud_offset(pgd_k, addr);
		if (!pud_present(*pud_k))
			goto no_context;

		/* Since the vmalloc area is global, it is unnecessary
		   to copy individual PTEs */
		pmd = pmd_offset(pud, addr);
		pmd_k = pmd_offset(pud_k, addr);
		if (!pmd_present(*pmd_k))
			goto no_context;
		set_pmd(pmd, *pmd_k);

		/* Make sure the actual PTE exists as well to
		 * catch kernel vmalloc-area accesses to non-mapped
		 * addresses. If we don't do this, this will just
		 * silently loop forever.
		 */
		pte_k = pte_offset_kernel(pmd_k, addr);
		if (!pte_present(*pte_k))
			goto no_context;
		return;
	}
}
Example #21
0
static unsigned long arch_get_unmapped_area_topdown_sz(struct file *file,
		unsigned long addr0, unsigned long len, unsigned long align_size,
		unsigned long pgoff, unsigned long flags)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma, *prev_vma;
	unsigned long base = mm->mmap_base, addr = addr0;
	unsigned long largest_hole = mm->cached_hole_size;
	unsigned long align_mask = ~(align_size - 1);
	int first_time = 1;

	/* don't allow allocations above current base */
	if (mm->free_area_cache > base)
		mm->free_area_cache = base;

	if (len <= largest_hole) {
	        largest_hole = 0;
		mm->free_area_cache  = base;
	}
try_again:
	/* make sure it can fit in the remaining address space */
	if (mm->free_area_cache < len)
		goto fail;

	/* either no address requested or can't fit in requested address hole */
	addr = (mm->free_area_cache - len) & align_mask;
	do {
		/*
		 * Lookup failure means no vma is above this address,
		 * i.e. return with success:
		 */
		vma = find_vma(mm, addr);
		if (!vma)
			return addr;

		/*
		 * new region fits between prev_vma->vm_end and
		 * vma->vm_start, use it:
		 */
		prev_vma = vma->vm_prev;
		if (addr + len <= vma->vm_start &&
		            (!prev_vma || (addr >= prev_vma->vm_end))) {
			/* remember the address as a hint for next time */
		        mm->cached_hole_size = largest_hole;
		        return (mm->free_area_cache = addr);
		} else {
			/* pull free_area_cache down to the first hole */
		        if (mm->free_area_cache == vma->vm_end) {
				mm->free_area_cache = vma->vm_start;
				mm->cached_hole_size = largest_hole;
			}
		}

		/* remember the largest hole we saw so far */
		if (addr + largest_hole < vma->vm_start)
		        largest_hole = vma->vm_start - addr;

		/* try just below the current vma->vm_start */
		addr = (vma->vm_start - len) & align_mask;
	} while (len <= vma->vm_start);

fail:
	/*
	 * if hint left us with no space for the requested
	 * mapping then try again:
	 */
	if (first_time) {
		mm->free_area_cache = base;
		largest_hole = 0;
		first_time = 0;
		goto try_again;
	}
	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	mm->free_area_cache = TASK_UNMAPPED_BASE;
	mm->cached_hole_size = ~0UL;
	addr = arch_get_unmapped_area_bottomup_sz(file, addr0, len, align_size,
																pgoff, flags);

	/*
	 * Restore the topdown base:
	 */
	mm->free_area_cache = base;
	mm->cached_hole_size = ~0UL;

	return addr;
}
/*
 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
 * segv().
 */
int handle_page_fault(unsigned long address, unsigned long ip,
		      int is_write, int is_user, int *code_out)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	int err = -EFAULT;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
				 (is_write ? FAULT_FLAG_WRITE : 0);

	*code_out = SEGV_MAPERR;

	/*
	 * If the fault was during atomic operation, don't take the fault, just
	 * fail.
	 */
	if (in_atomic())
		goto out_nosemaphore;

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto out;
	else if (vma->vm_start <= address)
		goto good_area;
	else if (!(vma->vm_flags & VM_GROWSDOWN))
		goto out;
	else if (is_user && !ARCH_IS_STACKGROW(address))
		goto out;
	else if (expand_stack(vma, address))
		goto out;

good_area:
	*code_out = SEGV_ACCERR;
	if (is_write && !(vma->vm_flags & VM_WRITE))
		goto out;

	/* Don't require VM_READ|VM_EXEC for write faults! */
	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
		goto out;

	do {
		int fault;

		fault = handle_mm_fault(mm, vma, address, flags);

		if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
			goto out_nosemaphore;

		if (unlikely(fault & VM_FAULT_ERROR)) {
			if (fault & VM_FAULT_OOM) {
				goto out_of_memory;
			} else if (fault & VM_FAULT_SIGBUS) {
				err = -EACCES;
				goto out;
			}
			BUG();
		}
		if (flags & FAULT_FLAG_ALLOW_RETRY) {
			if (fault & VM_FAULT_MAJOR)
				current->maj_flt++;
			else
				current->min_flt++;
			if (fault & VM_FAULT_RETRY) {
				flags &= ~FAULT_FLAG_ALLOW_RETRY;
				flags |= FAULT_FLAG_TRIED;

				goto retry;
			}
		}

		pgd = pgd_offset(mm, address);
		pud = pud_offset(pgd, address);
		pmd = pmd_offset(pud, address);
		pte = pte_offset_kernel(pmd, address);
	} while (!pte_present(*pte));
	err = 0;
	/*
	 * The below warning was added in place of
	 *	pte_mkyoung(); if (is_write) pte_mkdirty();
	 * If it's triggered, we'd see normally a hang here (a clean pte is
	 * marked read-only to emulate the dirty bit).
	 * However, the generic code can mark a PTE writable but clean on a
	 * concurrent read fault, triggering this harmlessly. So comment it out.
	 */
#if 0
	WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
	flush_tlb_page(vma, address);
out:
	up_read(&mm->mmap_sem);
out_nosemaphore:
	return err;

out_of_memory:
	/*
	 * We ran out of memory, call the OOM killer, and return the userspace
	 * (which will retry the fault, or kill us if we got oom-killed).
	 */
	up_read(&mm->mmap_sem);
	pagefault_out_of_memory();
	return 0;
}
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * interruption code (int_code):
 *   04       Protection           ->  Write-Protection  (suprression)
 *   10       Segment translation  ->  Not present       (nullification)
 *   11       Page translation     ->  Not present       (nullification)
 *   3b       Region third trans.  ->  Not present       (nullification)
 */
static inline int do_exception(struct pt_regs *regs, int access)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long trans_exc_code;
	unsigned long address;
	unsigned int flags;
	int fault;

	if (notify_page_fault(regs))
		return 0;

	tsk = current;
	mm = tsk->mm;
	trans_exc_code = regs->int_parm_long;

	/*
	 * Verify that the fault happened in user space, that
	 * we are not in an interrupt and that there is a 
	 * user context.
	 */
	fault = VM_FAULT_BADCONTEXT;
	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
		goto out;

	address = trans_exc_code & __FAIL_ADDR_MASK;
	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
	flags = FAULT_FLAG_ALLOW_RETRY;
	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
		flags |= FAULT_FLAG_WRITE;
	down_read(&mm->mmap_sem);

#ifdef CONFIG_PGSTE
	if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
		address = __gmap_fault(address,
				     (struct gmap *) S390_lowcore.gmap);
		if (address == -EFAULT) {
			fault = VM_FAULT_BADMAP;
			goto out_up;
		}
		if (address == -ENOMEM) {
			fault = VM_FAULT_OOM;
			goto out_up;
		}
	}
#endif

retry:
	fault = VM_FAULT_BADMAP;
	vma = find_vma(mm, address);
	if (!vma)
		goto out_up;

	if (unlikely(vma->vm_start > address)) {
		if (!(vma->vm_flags & VM_GROWSDOWN))
			goto out_up;
		if (expand_stack(vma, address))
			goto out_up;
	}

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */
	fault = VM_FAULT_BADACCESS;
	if (unlikely(!(vma->vm_flags & access)))
		goto out_up;

	if (is_vm_hugetlb_page(vma))
		address &= HPAGE_MASK;
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, flags);
	if (unlikely(fault & VM_FAULT_ERROR))
		goto out_up;

	/*
	 * Major/minor page fault accounting is only done on the
	 * initial attempt. If we go through a retry, it is extremely
	 * likely that the page will be found in page cache at that point.
	 */
	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR) {
			tsk->maj_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
				      regs, address);
		} else {
			tsk->min_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
				      regs, address);
		}
		if (fault & VM_FAULT_RETRY) {
			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
			 * of starvation. */
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;
			down_read(&mm->mmap_sem);
			goto retry;
		}
	}
	/*
	 * The instruction that caused the program check will
	 * be repeated. Don't signal single step via SIGTRAP.
	 */
	clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
	fault = 0;
out_up:
	up_read(&mm->mmap_sem);
out:
	return fault;
}
Example #24
0
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
			  const unsigned long len, const unsigned long pgoff,
			  const unsigned long flags)
{
	struct vm_area_struct *vma, *prev_vma;
	struct mm_struct *mm = current->mm;
	unsigned long base = mm->mmap_base, addr = addr0;
	int first_time = 1;
	unsigned long begin, end;
	
	find_start_end(flags, &begin, &end); 

	/* requested length too big for entire address space */
	if (len > end)
		return -ENOMEM;

	/* dont allow allocations above current base */
	if (mm->free_area_cache > base)
		mm->free_area_cache = base;

	/* requesting a specific address */
	if (addr) {
		addr = PAGE_ALIGN(addr);
		vma = find_vma(mm, addr);
		if (end - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			return addr;
	}
	/* free_area_cache is not really optimized for 32 bit apps */
	if (sysctl_legacy_va_layout && ((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)))
		goto fail;

try_again:
	/* make sure it can fit in the remaining address space */
	if (mm->free_area_cache < len)
		goto fail;

	/* either no address requested or cant fit in requested address hole */
	addr = (mm->free_area_cache - len) & PAGE_MASK;
	do {
		/*
		 * Lookup failure means no vma is above this address,
		 * i.e. return with success:
		 */
 	 	if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
			return addr;

		/*
		 * new region fits between prev_vma->vm_end and
		 * vma->vm_start, use it:
		 */
		if (addr && addr+len <= vma->vm_start &&
				(!prev_vma || (addr >= prev_vma->vm_end)))
			/* remember the address as a hint for next time */
			return (mm->free_area_cache = addr);
		else
			/* pull free_area_cache down to the first hole */
			if (mm->free_area_cache == vma->vm_end)
				mm->free_area_cache = vma->vm_start;

		/* try just below the current vma->vm_start */
		addr = vma->vm_start-len;
	} while (len < vma->vm_start);

fail:
	/*
	 * if hint left us with no space for the requested
	 * mapping then try again:
	 */
	if (first_time) {
		mm->free_area_cache = base;
		first_time = 0;
		goto try_again;
	}
	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	mm->free_area_cache = begin;
	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
	/*
	 * Restore the topdown base:
	 */
	mm->free_area_cache = base;

	return addr;
}
Example #25
0
/*
 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
 * segv().
 */
int handle_page_fault(unsigned long address, unsigned long ip,
                      int is_write, int is_user, int *code_out)
{
    struct mm_struct *mm = current->mm;
    struct vm_area_struct *vma;
    pgd_t *pgd;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    int err = -EFAULT;

    *code_out = SEGV_MAPERR;

    /*
     * If the fault was during atomic operation, don't take the fault, just
     * fail.
     */
    if (in_atomic())
        goto out_nosemaphore;

    down_read(&mm->mmap_sem);
    vma = find_vma(mm, address);
    if (!vma)
        goto out;
    else if (vma->vm_start <= address)
        goto good_area;
    else if (!(vma->vm_flags & VM_GROWSDOWN))
        goto out;
    else if (is_user && !ARCH_IS_STACKGROW(address))
        goto out;
    else if (expand_stack(vma, address))
        goto out;

good_area:
    *code_out = SEGV_ACCERR;
    if (is_write && !(vma->vm_flags & VM_WRITE))
        goto out;

    /* Don't require VM_READ|VM_EXEC for write faults! */
    if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
        goto out;

    do {
        int fault;
survive:
        fault = handle_mm_fault(mm, vma, address, is_write);
        if (unlikely(fault & VM_FAULT_ERROR)) {
            if (fault & VM_FAULT_OOM) {
                err = -ENOMEM;
                goto out_of_memory;
            } else if (fault & VM_FAULT_SIGBUS) {
                err = -EACCES;
                goto out;
            }
            BUG();
        }
        if (fault & VM_FAULT_MAJOR)
            current->maj_flt++;
        else
            current->min_flt++;

        pgd = pgd_offset(mm, address);
        pud = pud_offset(pgd, address);
        pmd = pmd_offset(pud, address);
        pte = pte_offset_kernel(pmd, address);
    } while (!pte_present(*pte));
    err = 0;
    /*
     * The below warning was added in place of
     *	pte_mkyoung(); if (is_write) pte_mkdirty();
     * If it's triggered, we'd see normally a hang here (a clean pte is
     * marked read-only to emulate the dirty bit).
     * However, the generic code can mark a PTE writable but clean on a
     * concurrent read fault, triggering this harmlessly. So comment it out.
     */
#if 0
    WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
    flush_tlb_page(vma, address);
out:
    up_read(&mm->mmap_sem);
out_nosemaphore:
    return err;

    /*
     * We ran out of memory, or some other thing happened to us that made
     * us unable to handle the page fault gracefully.
     */
out_of_memory:
    if (is_global_init(current)) {
        up_read(&mm->mmap_sem);
        yield();
        down_read(&mm->mmap_sem);
        goto survive;
    }
    goto out;
}
Example #26
0
/*
 * This routine handles page faults.  It determines the problem, and
 * then passes it off to one of the appropriate routines.
 *
 * error_code:
 *	bit 0 == 0 means no page found, 1 means protection fault
 *	bit 1 == 0 means read, 1 means write
 *
 * If this routine detects a bad access, it returns 1, otherwise it
 * returns 0.
 */
int do_page_fault(struct pt_regs *regs, unsigned long address,
			      unsigned long error_code)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct * vma;
	int fault;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	pr_debug("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
		regs->sr, regs->pc, address, error_code, mm ? mm->pgd : NULL);

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (faulthandler_disabled() || !mm)
		goto no_context;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;
retry:
	down_read(&mm->mmap_sem);

	vma = find_vma(mm, address);
	if (!vma)
		goto map_err;
	if (vma->vm_flags & VM_IO)
		goto acc_err;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto map_err;
	if (user_mode(regs)) {
		/* Accessing the stack below usp is always a bug.  The
		   "+ 256" is there due to some instructions doing
		   pre-decrement on the stack and that doesn't show up
		   until later.  */
		if (address + 256 < rdusp())
			goto map_err;
	}
	if (expand_stack(vma, address))
		goto map_err;

/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	pr_debug("do_page_fault: good_area\n");
	switch (error_code & 3) {
		default:	/* 3: write, present */
			/* fall through */
		case 2:		/* write, not present */
			if (!(vma->vm_flags & VM_WRITE))
				goto acc_err;
			flags |= FAULT_FLAG_WRITE;
			break;
		case 1:		/* read, present */
			goto acc_err;
		case 0:		/* read, not present */
			if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
				goto acc_err;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */

	fault = handle_mm_fault(vma, address, flags);
	pr_debug("handle_mm_fault returns %d\n", fault);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return 0;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGSEGV)
			goto map_err;
		else if (fault & VM_FAULT_SIGBUS)
			goto bus_err;
		BUG();
	}

	/*
	 * Major/minor page fault accounting is only done on the
	 * initial attempt. If we go through a retry, it is extremely
	 * likely that the page will be found in page cache at that point.
	 */
	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR)
			current->maj_flt++;
		else
			current->min_flt++;
		if (fault & VM_FAULT_RETRY) {
			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
			 * of starvation. */
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;

			/*
			 * No need to up_read(&mm->mmap_sem) as we would
			 * have already released it in __lock_page_or_retry
			 * in mm/filemap.c.
			 */

			goto retry;
		}
	}

	up_read(&mm->mmap_sem);
	return 0;

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return 0;

no_context:
	current->thread.signo = SIGBUS;
	current->thread.faddr = address;
	return send_fault_sig(regs);

bus_err:
	current->thread.signo = SIGBUS;
	current->thread.code = BUS_ADRERR;
	current->thread.faddr = address;
	goto send_sig;

map_err:
	current->thread.signo = SIGSEGV;
	current->thread.code = SEGV_MAPERR;
	current->thread.faddr = address;
	goto send_sig;

acc_err:
	current->thread.signo = SIGSEGV;
	current->thread.code = SEGV_ACCERR;
	current->thread.faddr = address;

send_sig:
	up_read(&mm->mmap_sem);
	return send_fault_sig(regs);
}
Example #27
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * error_code:
 *	bit 0 == 0 means no page found, 1 means protection fault
 *	bit 1 == 0 means read, 1 means write
 *	bit 2 == 0 means kernel, 1 means user-mode
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct * vma;
	unsigned long address;
	unsigned long page;
	unsigned long fixup;
	int write;
	siginfo_t info;

	/* get the address */
	__asm__("movl %%cr2,%0":"=r" (address));
#if CONFIG_X86_SWITCH_PAGETABLES
	if (!user_mode(regs))
		goto oops;
#endif

	/* It's safe to allow irq's after cr2 has been saved */
	if (regs->eflags & X86_EFLAGS_IF)
		local_irq_enable();

	tsk = current;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * This verifies that the fault happens in kernel space
	 * (error_code & 4) == 0, and that the fault was not a
	 * protection error (error_code & 1) == 0.
	 */
	if (address >= TASK_SIZE && !(error_code & 5))
		goto vmalloc_fault;

	mm = tsk->mm;
	info.si_code = SEGV_MAPERR;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_interrupt() || !mm)
		goto no_context;

	down_read(&mm->mmap_sem);

	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (error_code & 4) {
		/*
		 * accessing the stack below %esp is always a bug.
		 * The "+ 32" is there due to some instructions (like
		 * pusha) doing post-decrement on the stack and that
		 * doesn't show up until later..
		 */
		if (address + 32 < regs->esp)
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	info.si_code = SEGV_ACCERR;
	write = 0;
	switch (error_code & 3) {
		default:	/* 3: write, present */
#ifdef TEST_VERIFY_AREA
			if (regs->cs == KERNEL_CS)
				printk("WP fault at %08lx\n", regs->eip);
#endif
			/* fall through */
		case 2:		/* write, not present */
			if (!(vma->vm_flags & VM_WRITE))
				goto bad_area;
			write++;
			break;
		case 1:		/* read, present */
			goto bad_area;
		case 0:		/* read, not present */
			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
				goto bad_area;
	}

 survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	switch (handle_mm_fault(mm, vma, address, write)) {
	case 1:
		tsk->min_flt++;
		break;
	case 2:
		tsk->maj_flt++;
		break;
	case 0:
		goto do_sigbus;
	default:
		goto out_of_memory;
	}

	/*
	 * Did it hit the DOS screen memory VA from vm86 mode?
	 */
	if (regs->eflags & VM_MASK) {
		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
		if (bit < 32)
			tsk->thread.screen_bitmap |= 1 << bit;
	}
Example #28
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
			      unsigned long address)
{
	struct vm_area_struct * vma = NULL;
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	const int field = sizeof(unsigned long) * 2;
	siginfo_t info;

#if 0
	printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(),
	       current->comm, current->pid, field, address, write,
	       field, regs->cp0_epc);
#endif

	info.si_code = SEGV_MAPERR;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
		goto vmalloc_fault;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_atomic() || !mm)
		goto bad_area_nosemaphore;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	info.si_code = SEGV_ACCERR;

	if (write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
			goto bad_area;
	}

survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	switch (handle_mm_fault(mm, vma, address, write)) {
	case VM_FAULT_MINOR:
		tsk->min_flt++;
		break;
	case VM_FAULT_MAJOR:
		tsk->maj_flt++;
		break;
	case VM_FAULT_SIGBUS:
		goto do_sigbus;
	case VM_FAULT_OOM:
		goto out_of_memory;
	default:
		BUG();
	}

	up_read(&mm->mmap_sem);
	return;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		tsk->thread.cp0_badvaddr = address;
		tsk->thread.error_code = write;
#if 0
		printk("do_page_fault() #2: sending SIGSEGV to %s for "
		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
		       tsk->comm,
		       write ? "write access to" : "read access from",
		       field, address,
		       field, (unsigned long) regs->cp0_epc,
		       field, (unsigned long) regs->regs[31]);
#endif
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void __user *) address;
		force_sig_info(SIGSEGV, &info, tsk);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault?  */
	if (fixup_exception(regs)) {
		current->thread.cp0_baduaddr = address;
		return;
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
	bust_spinlocks(1);

	printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
	       "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
	       smp_processor_id(), field, address, field, regs->cp0_epc,
	       field,  regs->regs[31]);
	die("Oops", regs);

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk("VM: killing process %s\n", tsk->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);
	goto no_context;

do_sigbus:
	up_read(&mm->mmap_sem);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	else
	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
#if 0
		printk("do_page_fault() #3: sending SIGBUS to %s for "
		       "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
		       tsk->comm,
		       write ? "write access to" : "read access from",
		       field, address,
		       field, (unsigned long) regs->cp0_epc,
		       field, (unsigned long) regs->regs[31]);
#endif
	tsk->thread.cp0_badvaddr = address;
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void __user *) address;
	force_sig_info(SIGBUS, &info, tsk);

	return;
vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Do _not_ use "tsk" here. We might be inside
		 * an interrupt in the middle of a task switch..
		 */
		int offset = __pgd_offset(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

		pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset;
		pgd_k = init_mm.pgd + offset;

		if (!pgd_present(*pgd_k))
			goto no_context;
		set_pgd(pgd, *pgd_k);

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);
		if (!pmd_present(*pmd_k))
			goto no_context;
		set_pmd(pmd, *pmd_k);

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;
		return;
	}
}
Example #29
0
/* do_pgfault - interrupt handler to process the page fault execption
 * @mm         : the control struct for a set of vma using the same PDT
 * @error_code : the error code recorded in trapframe->tf_err which is setted by x86 hardware
 * @addr       : the addr which causes a memory access exception, (the contents of the CR2 register)
 *
 * CALL GRAPH: trap--> trap_dispatch-->pgfault_handler-->do_pgfault
 * The processor provides ucore's do_pgfault function with two items of information to aid in diagnosing
 * the exception and recovering from it.
 *   (1) The contents of the CR2 register. The processor loads the CR2 register with the
 *       32-bit linear address that generated the exception. The do_pgfault fun can
 *       use this address to locate the corresponding page directory and page-table
 *       entries.
 *   (2) An error code on the kernel stack. The error code for a page fault has a format different from
 *       that for other exceptions. The error code tells the exception handler three things:
 *         -- The P flag   (bit 0) indicates whether the exception was due to a not-present page (0)
 *            or to either an access rights violation or the use of a reserved bit (1).
 *         -- The W/R flag (bit 1) indicates whether the memory access that caused the exception
 *            was a read (0) or write (1).
 *         -- The U/S flag (bit 2) indicates whether the processor was executing at user mode (1)
 *            or supervisor mode (0) at the time of the exception.
 */
int
do_pgfault(struct mm_struct *mm, uint32_t error_code, uintptr_t addr) {
    int ret = -E_INVAL;
    //try to find a vma which include addr
    struct vma_struct *vma = find_vma(mm, addr);

    pgfault_num++;
    //If the addr is in the range of a mm's vma?
    if (vma == NULL || vma->vm_start > addr) {
        cprintf("not valid addr %x, and  can not find it in vma\n", addr);
        goto failed;
    }
    //check the error_code
    switch (error_code & 3) {
    default:
            /* error code flag : default is 3 ( W/R=1, P=1): write, present */
    case 2: /* error code flag : (W/R=1, P=0): write, not present */
        if (!(vma->vm_flags & VM_WRITE)) {
            cprintf("do_pgfault failed: error code flag = write AND not present, but the addr's vma cannot write\n");
            goto failed;
        }
        break;
    case 1: /* error code flag : (W/R=0, P=1): read, present */
        cprintf("do_pgfault failed: error code flag = read AND present\n");
        goto failed;
    case 0: /* error code flag : (W/R=0, P=0): read, not present */
        if (!(vma->vm_flags & (VM_READ | VM_EXEC))) {
            cprintf("do_pgfault failed: error code flag = read AND not present, but the addr's vma cannot read or exec\n");
            goto failed;
        }
    }
    /* IF (write an existed addr ) OR
     *    (write an non_existed addr && addr is writable) OR
     *    (read  an non_existed addr && addr is readable)
     * THEN
     *    continue process
     */
    
    uint32_t perm = PTE_U;
    if (vma->vm_flags & VM_WRITE) {
        perm |= PTE_W;
    }
    addr = ROUNDDOWN(addr, PGSIZE);

    ret = -E_NO_MEM;

    pte_t *ptep=NULL;
    /*LAB3 EXERCISE 1: YOUR CODE
    * Maybe you want help comment, BELOW comments can help you finish the code
    *
    * Some Useful MACROs and DEFINEs, you can use them in below implementation.
    * MACROs or Functions:
    *   get_pte : get an pte and return the kernel virtual address of this pte for la
    *             if the PT contians this pte didn't exist, alloc a page for PT (notice the 3th parameter '1')
    *   pgdir_alloc_page : call alloc_page & page_insert functions to allocate a page size memory & setup
    *             an addr map pa<--->la with linear address la and the PDT pgdir
    * DEFINES:
    *   VM_WRITE  : If vma->vm_flags & VM_WRITE == 1/0, then the vma is writable/non writable
    *   PTE_W           0x002                   // page table/directory entry flags bit : Writeable
    *   PTE_U           0x004                   // page table/directory entry flags bit : User can access
    * VARIABLES:
    *   mm->pgdir : the PDT of these vma
    *
    */
//#if 0
    /*LAB3 EXERCISE 1: YOUR CODE*/
    ptep = get_pte(mm->pgdir, addr, 1);              //(1) try to find a pte, if pte's PT(Page Table) isn't existed, then create a PT.
    if (ptep == NULL) {
        cprintf("get_pte failed in do_pgfault \n");
        goto failed;
    }
    //(2) if the phy addr isn't exist, then alloc a page & map the phy addr with logical addr
    if (*ptep == 0) {
        if (pgdir_alloc_page(mm->pgdir, addr, perm) == NULL) {
            cprintf("pgdir_alloc_page failed in do_pgfault \n");
            goto failed;
        }
    }
    else {
    /*LAB3 EXERCISE 2: YOUR CODE
    * Now we think this pte is a  swap entry, we should load data from disk to a page with phy addr,
    * and map the phy addr with logical addr, trigger swap manager to record the access situation of this page.
    *
    *  Some Useful MACROs and DEFINEs, you can use them in below implementation.
    *  MACROs or Functions:
    *    swap_in(mm, addr, &page) : alloc a memory page, then according to the swap entry in PTE for addr,
    *                               find the addr of disk page, read the content of disk page into this memroy page
    *    page_insert : build the map of phy addr of an Page with the linear addr la
    *    swap_map_swappable : set the page swappable
    */
        if(swap_init_ok) {
            struct Page *page=NULL;
                                    //(1)According to the mm AND addr, try to load the content of right disk page
                                    //    into the memory which page managed.
            int r;
            r = swap_in(mm, addr, &page);
            if (r != 0) {
                cprintf("swap_in failed in do_pgfault \n");
                goto failed;
            }
                                    //(2) According to the mm, addr AND page, setup the map of phy addr <---> logical addr
            page_insert(mm->pgdir, page, addr, perm);
                                    //(3) make the page swappable.
            swap_map_swappable(mm, addr, page, 1);
            page->pra_vaddr = addr;
        }
        else {
            cprintf("no swap_init_ok but ptep is %x, failed\n",*ptep);
            goto failed;
        }
   }
//#endif
   ret = 0;
failed:
    return ret;
}
Example #30
0
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
			      unsigned long vector, int write_acc)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	siginfo_t info;
	int fault;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	tsk = current;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * NOTE2: This is done so that, when updating the vmalloc
	 * mappings we don't have to walk all processes pgdirs and
	 * add the high mappings all at once. Instead we do it as they
	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
	 * bit set so sometimes the TLB can use a lingering entry.
	 *
	 * This verifies that the fault happens in kernel space
	 * and that the fault was not a protection error.
	 */

	if (address >= VMALLOC_START &&
	    (vector != 0x300 && vector != 0x400) &&
	    !user_mode(regs))
		goto vmalloc_fault;

	/* If exceptions were enabled, we can reenable them here */
	if (user_mode(regs)) {
		/* Exception was in userspace: reenable interrupts */
		local_irq_enable();
	} else {
		/* If exception was in a syscall, then IRQ's may have
		 * been enabled or disabled.  If they were enabled,
		 * reenable them.
		 */
		if (regs->sr && (SPR_SR_IEE | SPR_SR_TEE))
			local_irq_enable();
	}

	mm = tsk->mm;
	info.si_code = SEGV_MAPERR;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */

	if (in_interrupt() || !mm)
		goto no_context;

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);

	if (!vma)
		goto bad_area;

	if (vma->vm_start <= address)
		goto good_area;

	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;

	if (user_mode(regs)) {
		/*
		 * accessing the stack below usp is always a bug.
		 * we get page-aligned addresses so we can only check
		 * if we're within a page from usp, but that might be
		 * enough to catch brutal errors at least.
		 */
		if (address + PAGE_SIZE < regs->sp)
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */

good_area:
	info.si_code = SEGV_ACCERR;

	/* first do some preliminary protection checks */

	if (write_acc) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
	} else {
		/* not present */
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto bad_area;
	}

	/* are we trying to execute nonexecutable area */
	if ((vector == 0x400) && !(vma->vm_page_prot.pgprot & _PAGE_EXEC))
		goto bad_area;

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */

	fault = handle_mm_fault(mm, vma, address, flags);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}

	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		/*RGD modeled on Cris */
		if (fault & VM_FAULT_MAJOR)
			tsk->maj_flt++;
		else
			tsk->min_flt++;
		if (fault & VM_FAULT_RETRY) {
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;

			 /* No need to up_read(&mm->mmap_sem) as we would
			 * have already released it in __lock_page_or_retry
			 * in mm/filemap.c.
			 */

			goto retry;
		}
	}

	up_read(&mm->mmap_sem);
	return;

	/*
	 * Something tried to access memory that isn't in our memory map..
	 * Fix it, but check if it's kernel or user first..
	 */

bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:

	/* User mode accesses just cause a SIGSEGV */

	if (user_mode(regs)) {
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void *)address;
		force_sig_info(SIGSEGV, &info, tsk);
		return;
	}

no_context:

	/* Are we prepared to handle this kernel fault?
	 *
	 * (The kernel has valid exception-points in the source
	 *  when it acesses user-memory. When it fails in one
	 *  of those points, we find it in a table and do a jump
	 *  to some fixup code that loads an appropriate error
	 *  code)
	 */

	{
		const struct exception_table_entry *entry;

		__asm__ __volatile__("l.nop 42");

		if ((entry = search_exception_tables(regs->pc)) != NULL) {
			/* Adjust the instruction pointer in the stackframe */
			regs->pc = entry->fixup;
			return;
		}
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */

	if ((unsigned long)(address) < PAGE_SIZE)
		printk(KERN_ALERT
		       "Unable to handle kernel NULL pointer dereference");
	else
		printk(KERN_ALERT "Unable to handle kernel access");
	printk(" at virtual address 0x%08lx\n", address);

	die("Oops", regs, write_acc);

	do_exit(SIGKILL);

	/*
	 * We ran out of memory, or some other thing happened to us that made
	 * us unable to handle the page fault gracefully.
	 */

out_of_memory:
	__asm__ __volatile__("l.nop 42");
	__asm__ __volatile__("l.nop 1");

	up_read(&mm->mmap_sem);
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

do_sigbus:
	up_read(&mm->mmap_sem);

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void *)address;
	force_sig_info(SIGBUS, &info, tsk);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	return;

vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Use current_pgd instead of tsk->active_mm->pgd
		 * since the latter might be unavailable if this
		 * code is executed in a misfortunately run irq
		 * (like inside schedule() between switch_mm and
		 *  switch_to...).
		 */

		int offset = pgd_index(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

/*
		phx_warn("do_page_fault(): vmalloc_fault will not work, "
			 "since current_pgd assign a proper value somewhere\n"
			 "anyhow we don't need this at the moment\n");

		phx_mmu("vmalloc_fault");
*/
		pgd = (pgd_t *)current_pgd + offset;
		pgd_k = init_mm.pgd + offset;

		/* Since we're two-level, we don't need to do both
		 * set_pgd and set_pmd (they do the same thing). If
		 * we go three-level at some point, do the right thing
		 * with pgd_present and set_pgd here.
		 *
		 * Also, since the vmalloc area is global, we don't
		 * need to copy individual PTE's, it is enough to
		 * copy the pgd pointer into the pte page of the
		 * root task. If that is there, we'll find our pte if
		 * it exists.
		 */

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);

		if (!pmd_present(*pmd_k))
			goto bad_area_nosemaphore;

		set_pmd(pmd, *pmd_k);

		/* Make sure the actual PTE exists as well to
		 * catch kernel vmalloc-area accesses to non-mapped
		 * addresses. If we don't do this, this will just
		 * silently loop forever.
		 */

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;

		return;
	}
}