/* * check that an address falls within the bounds of the target process's memory mappings */ static inline int is_user_addr_valid(struct task_struct *child, unsigned long start, unsigned long len) { struct vm_area_struct *vma; struct sram_list_struct *sraml; /* overflow */ if (start + len < start) return -EIO; vma = find_vma(child->mm, start); if (vma && start >= vma->vm_start && start + len <= vma->vm_end) return 0; for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next) if (start >= (unsigned long)sraml->addr && start + len < (unsigned long)sraml->addr + sraml->length) return 0; if (start >= FIXED_CODE_START && start + len < FIXED_CODE_END) return 0; return -EIO; }
static int check_vaddr_bounds(unsigned long start, unsigned long end) { struct mm_struct *mm = current->active_mm; struct vm_area_struct *vma; int ret = 1; if (end < start) goto out; down_read(&mm->mmap_sem); vma = find_vma(mm, start); if (vma && vma->vm_start < end) { if (start < vma->vm_start) goto out_up; if (end > vma->vm_end) goto out_up; ret = 0; } out_up: up_read(&mm->mmap_sem); out: return ret; }
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long task_size = TASK_SIZE; if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; if (len & ~HPAGE_MASK) return -EINVAL; if (len > task_size) return -ENOMEM; if (flags & MAP_FIXED) { if (prepare_hugepage_range(file, addr, len)) return -EINVAL; return addr; } if (addr) { addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) return hugetlb_get_unmapped_area_bottomup(file, addr, len, pgoff, flags); else return hugetlb_get_unmapped_area_topdown(file, addr, len, pgoff, flags); }
/* * IA-32 Huge TLB Page Support for Kernel. * * Copyright (C) 2002, Rohit Seth <*****@*****.**> */ #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> #include <linux/err.h> #include <linux/sysctl.h> #include <asm/mman.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { unsigned long start = address; int length = 1; int nr; struct page *page; struct vm_area_struct *vma; vma = find_vma(mm, addr); if (!vma || !is_vm_hugetlb_page(vma)) return ERR_PTR(-EINVAL); pte = huge_pte_offset(mm, address); /* hugetlb should be locked, and hence, prefaulted */ WARN_ON(!pte || pte_none(*pte)); page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; WARN_ON(!PageHead(page)); return page; }
int mm_brk(struct mm_struct *mm, uintptr_t addr, size_t len) { uintptr_t start = ROUNDDOWN(addr, PGSIZE), end = ROUNDUP(addr + len, PGSIZE); if (!USER_ACCESS(start, end)) { return -E_INVAL; } int ret; if ((ret = mm_unmap(mm, start, end - start)) != 0) { return ret; } uint32_t vm_flags = VM_READ | VM_WRITE; struct vma_struct *vma = find_vma(mm, start - 1); if (vma != NULL && vma->vm_end == start && vma->vm_flags == vm_flags) { vma->vm_end = end; return 0; } if ((vma = vma_create(start, end, vm_flags)) == NULL) { return -E_NO_MEM; } insert_vma_struct(mm, vma); return 0; }
static void check_vma_struct(void) { size_t nr_free_pages_store = nr_free_pages(); struct mm_struct *mm = mm_create(); assert(mm != NULL); int step1 = 10, step2 = step1 * 10; int i; for (i = step1; i >= 1; i --) { struct vma_struct *vma = vma_create(i * 5, i * 5 + 2, 0); assert(vma != NULL); insert_vma_struct(mm, vma); } for (i = step1 + 1; i <= step2; i ++) { struct vma_struct *vma = vma_create(i * 5, i * 5 + 2, 0); assert(vma != NULL); insert_vma_struct(mm, vma); } list_entry_t *le = list_next(&(mm->mmap_list)); for (i = 1; i <= step2; i ++) { assert(le != &(mm->mmap_list)); struct vma_struct *mmap = le2vma(le, list_link); assert(mmap->vm_start == i * 5 && mmap->vm_end == i * 5 + 2); le = list_next(le); } for (i = 5; i <= 5 * step2; i +=5) { struct vma_struct *vma1 = find_vma(mm, i); assert(vma1 != NULL); struct vma_struct *vma2 = find_vma(mm, i+1); assert(vma2 != NULL); struct vma_struct *vma3 = find_vma(mm, i+2); assert(vma3 == NULL); struct vma_struct *vma4 = find_vma(mm, i+3); assert(vma4 == NULL); struct vma_struct *vma5 = find_vma(mm, i+4); assert(vma5 == NULL); assert(vma1->vm_start == i && vma1->vm_end == i + 2); assert(vma2->vm_start == i && vma2->vm_end == i + 2); } for (i =4; i>=0; i--) { struct vma_struct *vma_below_5= find_vma(mm,i); if (vma_below_5 != NULL ) { cprintf("vma_below_5: i %x, start %x, end %x\n",i, vma_below_5->vm_start, vma_below_5->vm_end); } assert(vma_below_5 == NULL); } mm_destroy(mm); assert(nr_free_pages_store == nr_free_pages()); cprintf("check_vma_struct() succeeded!\n"); }
/* * Hacked from kernel function __get_user_pages in mm/memory.c * * Handle buffers allocated by other kernel space driver and mmaped into user * space, function Ignore the VM_PFNMAP and VM_IO flag in VMA structure * * Get physical pages from user space virtual address and update into page list */ static int __get_pfnmap_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { int i, ret; unsigned long vm_flags; if (nr_pages <= 0) return 0; VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); /* * Require read or write permissions. * If FOLL_FORCE is set, we only require the "MAY" flags. */ vm_flags = (gup_flags & FOLL_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); vm_flags &= (gup_flags & FOLL_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { struct vm_area_struct *vma; vma = find_vma(mm, start); if (!vma) { dev_err(atomisp_dev, "find_vma failed\n"); return i ? : -EFAULT; } if (is_vm_hugetlb_page(vma)) { /* i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, gup_flags); */ continue; } do { struct page *page; unsigned long pfn; /* * If we have a pending SIGKILL, don't keep faulting * pages and potentially allocating memory. */ if (unlikely(fatal_signal_pending(current))) { dev_err(atomisp_dev, "fatal_signal_pending in %s\n", __func__); return i ? i : -ERESTARTSYS; } ret = follow_pfn(vma, start, &pfn); if (ret) { dev_err(atomisp_dev, "follow_pfn() failed\n"); return i ? : -EFAULT; } page = pfn_to_page(pfn); if (IS_ERR(page)) return i ? i : PTR_ERR(page); if (pages) { pages[i] = page; get_page(page); flush_anon_page(vma, page, start); flush_dcache_page(page); } if (vmas) vmas[i] = vma; i++; start += PAGE_SIZE; nr_pages--; } while (nr_pages && start < vma->vm_end); } while (nr_pages);
/* * This ought to be kept in sync with the powerpc specific do_page_fault * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; unsigned long is_write; int ret; if (mm == NULL) return -EFAULT; if (mm->pgd == NULL) return -EFAULT; down_read(&mm->mmap_sem); ret = -EFAULT; vma = find_vma(mm, ea); if (!vma) goto out_unlock; if (ea < vma->vm_start) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_unlock; if (expand_stack(vma, ea)) goto out_unlock; } is_write = dsisr & DSISR_ISSTORE; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto out_unlock; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; /* * PROT_NONE is covered by the VMA check above. * and hash should get a NOHPTE fault instead of * a PROTFAULT in case fixup is needed for things * like autonuma. */ if (!radix_enabled()) WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); } ret = 0; *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; goto out_unlock; } else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { ret = -EFAULT; goto out_unlock; } BUG(); } if (*flt & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; out_unlock: up_read(&mm->mmap_sem); return ret; }
void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { uint64_t *pcstack_end = pcstack + pcstack_limit; volatile uint8_t *flags = (volatile uint8_t *)&cpu_core[cpu_get_id()].cpuc_dtrace_flags; unsigned long *sp; unsigned long *bos; if (*flags & CPU_DTRACE_FAULT) return; if (pcstack_limit <= 0) return; *pcstack++ = (uint64_t)current->pid; if (pcstack >= pcstack_end) return; /***********************************************/ /* Linux provides a built in function which */ /* is good because stack walking is arch */ /* dependent. (save_stack_trace) */ /* */ /* Unfortunately this is options dependent */ /* (CONFIG_STACKTRACE) so we cannot use it. */ /* And its GPL anyhow, so we cannot copy */ /* it. */ /* */ /* Whats worse is that we might be compiled */ /* with a frame pointer (only on x86-32) so */ /* we have three scenarios to handle. */ /***********************************************/ /***********************************************/ /* Ye gods! The world is an awful place to */ /* live. The target process, may or may not */ /* have frame pointers. In fact, some */ /* frames may have it and some may not (eg */ /* different libraries may be compiled */ /* differently). */ /* */ /* Looks like distro owners dont care about */ /* debuggabiity, and give us no frame */ /* pointers. */ /* */ /* This function is really important and */ /* useful. On modern Linux systems, gdb */ /* (and pstack) contain all the smarts. In */ /* fact, pstack is often a wrapper around */ /* gdb - i.e. its so complex we cannot do */ /* this. */ /***********************************************/ /***********************************************/ /* Bear in mind that user stacks can be */ /* megabytes in size, vs kernel stacks */ /* which are limited to a few K (4 or 8K */ /* typically). */ /***********************************************/ // sp = current->thread.rsp; # if defined(__i386) bos = sp = KSTK_ESP(current); # define ALIGN_MASK 3 # else /***********************************************/ /* KSTK_ESP() doesnt exist for x86_64 (its */ /* set to -1). */ /***********************************************/ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) # if defined(KSTK_EIP) /***********************************************/ /* Handle ARM and more kernel independent, */ /* but might not exist. */ /***********************************************/ bos = sp = (unsigned long *) KSTK_EIP(current); # else bos = sp = (unsigned long *) task_pt_regs(current)->sp; # endif #else bos = sp = task_pt_regs(current)->rsp; #endif # define ALIGN_MASK 7 #endif /***********************************************/ /* Walk the stack. We cannot rely on a */ /* frame pointer at each level, and we */ /* really want to avoid probing every word */ /* in the stack - a large stack will eat */ /* cpu looking at thousands of entries. So */ /* try and heuristically see if we have a */ /* likely frame pointer to jump over the */ /* frame, but, if not, just go one word at */ /* a time. */ /* */ /* Try and be careful we dont walk outside */ /* the stack or walk backwards in the */ /* stack, too. */ /***********************************************/ { uintptr_t *spend = sp + 1024; struct vm_area_struct *vma = find_vma(current->mm, (unsigned long) sp); if (vma) spend = (uintptr_t *) (vma->vm_end - sizeof(int *)); /*printk("xbos=%p %p\n", bos, spend);*/ /***********************************************/ /* Have you ever looked at the output from */ /* GCC in amd64 mode? Things like: */ /* */ /* push %r12 */ /* push %rbp */ /* */ /* will make you come out in a cold sweat - */ /* no way to find the frame pointer, */ /* without doing what GDB does (ie read the */ /* DWARF stack unwind info). So, for now, */ /* you get some false positives in the */ /* output - but we try to be conservative. */ /***********************************************/ while (sp >= bos && sp < spend && validate_ptr(sp)) { /*printk(" %p %d: %p %d\n", sp, validate_ptr(sp), sp[0], validate_ptr(sp[0]));*/ if (validate_ptr((void *) sp[0])) { uintptr_t p = sp[-1]; /***********************************************/ /* Try and avoid false positives in stack */ /* entries - we want this to be an */ /* executable instruction. */ /***********************************************/ if (((unsigned long *) sp[0] < bos || (unsigned long *) sp[0] > spend) && (vma = find_vma(current->mm, sp[0])) != NULL && vma->vm_flags & VM_EXEC) { *pcstack++ = sp[0]; if (pcstack >= pcstack_end) break; } if (((int) p & ALIGN_MASK) == 0 && p > (uintptr_t) sp && p < (uintptr_t) spend) sp = (unsigned long *) p; } sp++; } } /***********************************************/ /* Erase anything else in the buffer to */ /* avoid confusion. */ /***********************************************/ while (pcstack < pcstack_end) *pcstack++ = (pc_t) NULL; }
static void *vb2_dma_sg_get_userptr(void *alloc_ctx, unsigned long vaddr, unsigned long size, enum dma_data_direction dma_dir) { struct vb2_dma_sg_conf *conf = alloc_ctx; struct vb2_dma_sg_buf *buf; unsigned long first, last; int num_pages_from_user; struct vm_area_struct *vma; struct sg_table *sgt; DEFINE_DMA_ATTRS(attrs); #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0) dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs); #endif buf = kzalloc(sizeof *buf, GFP_KERNEL); if (!buf) return NULL; buf->vaddr = NULL; buf->dev = conf->dev; buf->dma_dir = dma_dir; buf->offset = vaddr & ~PAGE_MASK; buf->size = size; buf->dma_sgt = &buf->sg_table; first = (vaddr & PAGE_MASK) >> PAGE_SHIFT; last = ((vaddr + size - 1) & PAGE_MASK) >> PAGE_SHIFT; buf->num_pages = last - first + 1; buf->pages = kzalloc(buf->num_pages * sizeof(struct page *), GFP_KERNEL); if (!buf->pages) goto userptr_fail_alloc_pages; vma = find_vma(current->mm, vaddr); if (!vma) { dprintk(1, "no vma for address %lu\n", vaddr); goto userptr_fail_find_vma; } if (vma->vm_end < vaddr + size) { dprintk(1, "vma at %lu is too small for %lu bytes\n", vaddr, size); goto userptr_fail_find_vma; } buf->vma = vb2_get_vma(vma); if (!buf->vma) { dprintk(1, "failed to copy vma\n"); goto userptr_fail_find_vma; } if (vma_is_io(buf->vma)) { for (num_pages_from_user = 0; num_pages_from_user < buf->num_pages; ++num_pages_from_user, vaddr += PAGE_SIZE) { unsigned long pfn; if (follow_pfn(vma, vaddr, &pfn)) { dprintk(1, "no page for address %lu\n", vaddr); break; } buf->pages[num_pages_from_user] = pfn_to_page(pfn); } } else num_pages_from_user = get_user_pages(current, current->mm, vaddr & PAGE_MASK, buf->num_pages, buf->dma_dir == DMA_FROM_DEVICE, 1, /* force */ buf->pages, NULL); if (num_pages_from_user != buf->num_pages) goto userptr_fail_get_user_pages; if (sg_alloc_table_from_pages(buf->dma_sgt, buf->pages, buf->num_pages, buf->offset, size, 0)) goto userptr_fail_alloc_table_from_pages; sgt = &buf->sg_table; /* * No need to sync to the device, this will happen later when the * prepare() memop is called. */ sgt->nents = dma_map_sg_attrs(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir, &attrs); if (!sgt->nents) goto userptr_fail_map; return buf; userptr_fail_map: sg_free_table(&buf->sg_table); userptr_fail_alloc_table_from_pages: userptr_fail_get_user_pages: dprintk(1, "get_user_pages requested/got: %d/%d]\n", buf->num_pages, num_pages_from_user); if (!vma_is_io(buf->vma)) while (--num_pages_from_user >= 0) put_page(buf->pages[num_pages_from_user]); vb2_put_vma(buf->vma); userptr_fail_find_vma: kfree(buf->pages); userptr_fail_alloc_pages: kfree(buf); return NULL; }
long s3c_mem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { #ifdef USE_DMA_ALLOC unsigned long virt_addr; #else unsigned long *virt_addr; #endif struct mm_struct *mm = current->mm; struct s3c_mem_alloc param; struct vm_area_struct *vma; unsigned long start, this_pfn; #ifdef CONFIG_S3C_DMA_MEM struct s3c_mem_dma_param dma_param; #endif switch (cmd) { case S3C_MEM_ALLOC: mutex_lock(&mem_alloc_lock); if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_alloc_lock); return -EFAULT; } flag = MEM_ALLOC; param.vir_addr = do_mmap(file, 0, param.size, (PROT_READ|PROT_WRITE), MAP_SHARED, 0); DEBUG("param.vir_addr = %08x, %d\n", param.vir_addr, __LINE__); if (param.vir_addr == -EINVAL) { printk(KERN_INFO "S3C_MEM_ALLOC FAILED\n"); flag = 0; mutex_unlock(&mem_alloc_lock); return -EFAULT; } param.phy_addr = physical_address; #ifdef USE_DMA_ALLOC param.kvir_addr = virtual_address; #endif DEBUG("KERNEL MALLOC : param.phy_addr = 0x%X \t " "size = %d \t param.vir_addr = 0x%X, %d\n", param.phy_addr, param.size, param.vir_addr, __LINE__); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { flag = 0; mutex_unlock(&mem_alloc_lock); return -EFAULT; } flag = 0; mutex_unlock(&mem_alloc_lock); break; case S3C_MEM_CACHEABLE_ALLOC: mutex_lock(&mem_cacheable_alloc_lock); if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_cacheable_alloc_lock); return -EFAULT; } flag = MEM_ALLOC_CACHEABLE; param.vir_addr = do_mmap(file, 0, param.size, (PROT_READ|PROT_WRITE), MAP_SHARED, 0); DEBUG("param.vir_addr = %08x, %d\n", param.vir_addr, __LINE__); if (param.vir_addr == -EINVAL) { printk(KERN_INFO "S3C_MEM_ALLOC FAILED\n"); flag = 0; mutex_unlock(&mem_cacheable_alloc_lock); return -EFAULT; } param.phy_addr = physical_address; DEBUG("KERNEL MALLOC : param.phy_addr = 0x%X" " \t size = %d \t param.vir_addr = 0x%X, %d\n", param.phy_addr, param.size, param.vir_addr, __LINE__); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { flag = 0; mutex_unlock(&mem_cacheable_alloc_lock); return -EFAULT; } flag = 0; mutex_unlock(&mem_cacheable_alloc_lock); break; case S3C_MEM_SHARE_ALLOC: mutex_lock(&mem_share_alloc_lock); if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_share_alloc_lock); return -EFAULT; } flag = MEM_ALLOC_SHARE; physical_address = param.phy_addr; DEBUG("param.phy_addr = %08x, %d\n", physical_address, __LINE__); param.vir_addr = do_mmap(file, 0, param.size, (PROT_READ|PROT_WRITE), MAP_SHARED, 0); DEBUG("param.vir_addr = %08x, %d\n", param.vir_addr, __LINE__); if (param.vir_addr == -EINVAL) { printk(KERN_INFO "S3C_MEM_SHARE_ALLOC FAILED\n"); flag = 0; mutex_unlock(&mem_share_alloc_lock); return -EFAULT; } DEBUG("MALLOC_SHARE : param.phy_addr = 0x%X \t " "size = %d \t param.vir_addr = 0x%X, %d\n", param.phy_addr, param.size, param.vir_addr, __LINE__); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { flag = 0; mutex_unlock(&mem_share_alloc_lock); return -EFAULT; } flag = 0; mutex_unlock(&mem_share_alloc_lock); break; case S3C_MEM_CACHEABLE_SHARE_ALLOC: mutex_lock(&mem_cacheable_share_alloc_lock); if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_cacheable_share_alloc_lock); return -EFAULT; } flag = MEM_ALLOC_CACHEABLE_SHARE; physical_address = param.phy_addr; DEBUG("param.phy_addr = %08x, %d\n", physical_address, __LINE__); param.vir_addr = do_mmap(file, 0, param.size, (PROT_READ|PROT_WRITE), MAP_SHARED, 0); DEBUG("param.vir_addr = %08x, %d\n", param.vir_addr, __LINE__); if (param.vir_addr == -EINVAL) { printk(KERN_INFO "S3C_MEM_SHARE_ALLOC FAILED\n"); flag = 0; mutex_unlock(&mem_cacheable_share_alloc_lock); return -EFAULT; } DEBUG("MALLOC_SHARE : param.phy_addr = 0x%X \t " "size = %d \t param.vir_addr = 0x%X, %d\n", param.phy_addr, param.size, param.vir_addr, __LINE__); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { flag = 0; mutex_unlock(&mem_cacheable_share_alloc_lock); return -EFAULT; } flag = 0; mutex_unlock(&mem_cacheable_share_alloc_lock); break; case S3C_MEM_FREE: mutex_lock(&mem_free_lock); if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_free_lock); return -EFAULT; } DEBUG("KERNEL FREE : param.phy_addr = 0x%X \t " "size = %d \t param.vir_addr = 0x%X, %d\n", param.phy_addr, param.size, param.vir_addr, __LINE__); if (do_munmap(mm, param.vir_addr, param.size) < 0) { printk(KERN_INFO "do_munmap() failed !!\n"); mutex_unlock(&mem_free_lock); return -EINVAL; } #ifdef USE_DMA_ALLOC virt_addr = param.kvir_addr; dma_free_writecombine(NULL, param.size, (unsigned int *) virt_addr, param.phy_addr); #else virt_addr = (unsigned long *)phys_to_virt(param.phy_addr); kfree(virt_addr); #endif param.size = 0; DEBUG("do_munmap() succeed !!\n"); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_free_lock); return -EFAULT; } mutex_unlock(&mem_free_lock); break; case S3C_MEM_SHARE_FREE: mutex_lock(&mem_share_free_lock); if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_share_free_lock); return -EFAULT; } DEBUG("MEM_SHARE_FREE : param.phy_addr = 0x%X \t " "size = %d \t param.vir_addr = 0x%X, %d\n", param.phy_addr, param.size, param.vir_addr, __LINE__); if (do_munmap(mm, param.vir_addr, param.size) < 0) { printk(KERN_INFO "do_munmap() failed - MEM_SHARE_FREE!!\n"); mutex_unlock(&mem_share_free_lock); return -EINVAL; } param.vir_addr = 0; DEBUG("do_munmap() succeed !! - MEM_SHARE_FREE\n"); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { mutex_unlock(&mem_share_free_lock); return -EFAULT; } mutex_unlock(&mem_share_free_lock); break; #ifdef CONFIG_S3C_DMA_MEM case S3C_MEM_DMA_COPY: if (copy_from_user(&dma_param, (struct s3c_mem_dma_param *)arg, sizeof(struct s3c_mem_dma_param))) { return -EFAULT; } if (s3c_dma_mem_start(current->mm, &dma_param, S3C_DMA_MEM2MEM)) { return -EINVAL; } if (copy_to_user((struct s3c_mem_dma_param *)arg, &dma_param, sizeof(struct s3c_mem_dma_param))) { return -EFAULT; } break; #endif case S3C_MEM_GET_PADDR: if (copy_from_user(¶m, (struct s3c_mem_alloc *)arg, sizeof(struct s3c_mem_alloc))) { return -EFAULT; } start = param.vir_addr; down_read(&mm->mmap_sem); vma = find_vma(mm, start); if (vma == NULL) { up_read(&mm->mmap_sem); return -EINVAL; } if (follow_pfn(vma, start, &this_pfn)) { up_read(&mm->mmap_sem); return -EINVAL; } param.phy_addr = this_pfn << PAGE_SHIFT; up_read(&mm->mmap_sem); if (copy_to_user((struct s3c_mem_alloc *)arg, ¶m, sizeof(struct s3c_mem_alloc))) { return -EFAULT; } break; default: DEBUG("s3c_mem_ioctl() : default !!\n"); return -EINVAL; } return 0; }
int psb_get_vaddr_pages(u32 vaddr, u32 size, u32 **pfn_list, int *page_count) { u32 num_pages; struct page **pages = 0; struct task_struct *task = current; struct mm_struct *mm = task->mm; struct vm_area_struct *vma; u32 *pfns = 0; int ret; int i; if (unlikely(!pfn_list || !page_count || !vaddr || !size)) return -EINVAL; num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL); if (unlikely(!pages)) { DRM_ERROR("Failed to allocate page list\n"); return -ENOMEM; } down_read(&mm->mmap_sem); ret = get_user_pages(task, mm, vaddr, num_pages, 0, 0, pages, NULL); up_read(&mm->mmap_sem); if (ret <= 0) { DRM_DEBUG("failed to get user pages\n"); kfree(pages); pages = 0; } else { DRM_DEBUG("num_pages %d, ret %d\n", num_pages, ret); num_pages = ret; } /*allocate page list*/ pfns = kzalloc(num_pages * sizeof(u32), GFP_KERNEL); if (!pfns) { DRM_ERROR("No memory\n"); goto get_page_err; } if (!pages) { DRM_ERROR("No pages found, trying to follow pfn\n"); for (i = 0; i < num_pages; i++) { vma = find_vma(mm, vaddr + i * PAGE_SIZE); if (!vma) { DRM_ERROR("failed to find vma\n"); goto find_vma_err; } ret = follow_pfn(vma, (unsigned long)(vaddr + i * PAGE_SIZE), (unsigned long *)&pfns[i]); if (ret) { DRM_ERROR("failed to follow pfn\n"); goto follow_pfn_err; } } } else { DRM_ERROR("Found pages\n"); for (i = 0; i < num_pages; i++) pfns[i] = page_to_pfn(pages[i]); } *pfn_list = pfns; *page_count = num_pages; kfree(pages); return 0; find_vma_err: follow_pfn_err: kfree(pfns); get_page_err: if (pages) { for (i = 0; i < num_pages; i++) put_page(pages[i]); kfree(pages); } return -EINVAL; }
int ttm_pl_ub_create_ioctl(struct ttm_object_file *tfile, struct ttm_bo_device *bdev, struct ttm_lock *lock, void *data) { union ttm_pl_create_ub_arg *arg = data; struct ttm_pl_create_ub_req *req = &arg->req; struct ttm_pl_rep *rep = &arg->rep; struct ttm_buffer_object *bo; struct ttm_buffer_object *tmp; struct ttm_bo_user_object *user_bo; uint32_t flags; int ret = 0; struct ttm_mem_global *mem_glob = bdev->glob->mem_glob; struct ttm_placement placement = default_placement; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)) size_t acc_size = ttm_pl_size(bdev, (req->size + PAGE_SIZE - 1) >> PAGE_SHIFT); #else size_t acc_size = ttm_bo_acc_size(bdev, req->size, sizeof(struct ttm_buffer_object)); #endif if (req->user_address & ~PAGE_MASK) { printk(KERN_ERR "User pointer buffer need page alignment\n"); return -EFAULT; } ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); if (unlikely(ret != 0)) return ret; flags = req->placement; user_bo = kzalloc(sizeof(*user_bo), GFP_KERNEL); if (unlikely(user_bo == NULL)) { ttm_mem_global_free(mem_glob, acc_size); return -ENOMEM; } ret = ttm_read_lock(lock, true); if (unlikely(ret != 0)) { ttm_mem_global_free(mem_glob, acc_size); kfree(user_bo); return ret; } bo = &user_bo->bo; placement.num_placement = 1; placement.placement = &flags; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)) /* For kernel 3.0, use the desired type. */ #define TTM_HACK_WORKAROUND_ttm_bo_type_user ttm_bo_type_user #else /* TTM_HACK_WORKAROUND_ttm_bo_type_user -- Hack for porting, as ttm_bo_type_user is no longer implemented. This will not result in working code. FIXME - to be removed. */ #warning warning: ttm_bo_type_user no longer supported /* For kernel 3.3+, use the wrong type, which will compile but not work. */ #define TTM_HACK_WORKAROUND_ttm_bo_type_user ttm_bo_type_kernel #endif #if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 3, 0)) /* Handle frame buffer allocated in user space, Convert user space virtual address into pages list */ unsigned int page_nr = 0; struct vm_area_struct *vma = NULL; struct sg_table *sg = NULL; unsigned long num_pages = 0; struct page **pages = 0; num_pages = (req->size + PAGE_SIZE - 1) >> PAGE_SHIFT; pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL); if (unlikely(pages == NULL)) { printk(KERN_ERR "kzalloc pages failed\n"); return -ENOMEM; } down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, req->user_address); if (unlikely(vma == NULL)) { up_read(¤t->mm->mmap_sem); kfree(pages); printk(KERN_ERR "find_vma failed\n"); return -EFAULT; } unsigned long before_flags = vma->vm_flags; if (vma->vm_flags & (VM_IO | VM_PFNMAP)) vma->vm_flags = vma->vm_flags & ((~VM_IO) & (~VM_PFNMAP)); page_nr = get_user_pages(current, current->mm, req->user_address, (int)(num_pages), 1, 0, pages, NULL); vma->vm_flags = before_flags; up_read(¤t->mm->mmap_sem); /* can be written by caller, not forced */ if (unlikely(page_nr < num_pages)) { kfree(pages); pages = 0; printk(KERN_ERR "get_user_pages err.\n"); return -ENOMEM; } sg = drm_prime_pages_to_sg(pages, num_pages); if (unlikely(sg == NULL)) { kfree(pages); printk(KERN_ERR "drm_prime_pages_to_sg err.\n"); return -ENOMEM; } kfree(pages); #endif #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) ret = ttm_bo_init(bdev, bo, req->size, TTM_HACK_WORKAROUND_ttm_bo_type_user, &placement, req->page_alignment, req->user_address, true, NULL, acc_size, NULL, &ttm_bo_user_destroy); #elif (LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)) ret = ttm_bo_init(bdev, bo, req->size, ttm_bo_type_sg, &placement, req->page_alignment, req->user_address, true, NULL, acc_size, sg, &ttm_ub_bo_user_destroy); #else ret = ttm_bo_init(bdev, bo, req->size, ttm_bo_type_sg, &placement, req->page_alignment, true, NULL, acc_size, sg, &ttm_ub_bo_user_destroy); #endif /* * Note that the ttm_buffer_object_init function * would've called the destroy function on failure!! */ ttm_read_unlock(lock); if (unlikely(ret != 0)) goto out; tmp = ttm_bo_reference(bo); ret = ttm_base_object_init(tfile, &user_bo->base, flags & TTM_PL_FLAG_SHARED, ttm_buffer_type, &ttm_bo_user_release, &ttm_bo_user_ref_release); if (unlikely(ret != 0)) goto out_err; ret = ttm_bo_reserve(bo, true, false, false, 0); if (unlikely(ret != 0)) goto out_err; ttm_pl_fill_rep(bo, rep); ttm_bo_unreserve(bo); ttm_bo_unref(&bo); out: return 0; out_err: ttm_bo_unref(&tmp); ttm_bo_unref(&bo); return ret; }
unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long addr = addr0; int do_color_align, last_mmap; struct vm_unmapped_area_info info; #ifdef CONFIG_64BIT /* This should only ever run for 32-bit processes. */ BUG_ON(!test_thread_flag(TIF_32BIT)); #endif /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; last_mmap = GET_LAST_MMAP(filp); if (flags & MAP_FIXED) { if ((flags & MAP_SHARED) && last_mmap && (addr - shared_align_offset(last_mmap, pgoff)) & (SHM_COLOUR - 1)) return -EINVAL; goto found_addr; } /* requesting a specific address */ if (addr) { if (do_color_align && last_mmap) addr = COLOR_ALIGN(addr, last_mmap, pgoff); else addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) goto found_addr; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0; info.align_offset = shared_align_offset(last_mmap, pgoff); addr = vm_unmapped_area(&info); if (!(addr & ~PAGE_MASK)) goto found_addr; VM_BUG_ON(addr != -ENOMEM); /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ return arch_get_unmapped_area(filp, addr0, len, pgoff, flags); found_addr: if (do_color_align && !last_mmap && !(addr & ~PAGE_MASK)) SET_LAST_MMAP(filp, addr - (pgoff << PAGE_SHIFT)); return addr; }
asmlinkage void do_page_fault(unsigned long address, struct pt_regs *regs, int protection, int writeaccess) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; siginfo_t info; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; D(printk(KERN_DEBUG "Page fault for %lX on %X at %lX, prot %d write %d\n", address, smp_processor_id(), instruction_pointer(regs), protection, writeaccess)); tsk = current; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * NOTE2: This is done so that, when updating the vmalloc * mappings we don't have to walk all processes pgdirs and * add the high mappings all at once. Instead we do it as they * are used. However vmalloc'ed page entries have the PAGE_GLOBAL * bit set so sometimes the TLB can use a lingering entry. * * This verifies that the fault happens in kernel space * and that the fault was not a protection error (error_code & 1). */ if (address >= VMALLOC_START && !protection && !user_mode(regs)) goto vmalloc_fault; /* When stack execution is not allowed we store the signal * trampolines in the reserved cris_signal_return_page. * Handle this in the exact same way as vmalloc (we know * that the mapping is there and is valid so no need to * call handle_mm_fault). */ if (cris_signal_return_page && address == cris_signal_return_page && !protection && user_mode(regs)) goto vmalloc_fault; /* we can and should enable interrupts at this point */ local_irq_enable(); mm = tsk->mm; info.si_code = SEGV_MAPERR; /* * If we're in an interrupt or "atomic" operation or have no * user context, we must not take the fault. */ if (!mm || pagefault_disabled()) goto no_context; if (user_mode(regs)) flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (user_mode(regs)) { /* * accessing the stack below usp is always a bug. * we get page-aligned addresses so we can only check * if we're within a page from usp, but that might be * enough to catch brutal errors at least. */ if (address + PAGE_SIZE < rdusp()) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; /* first do some preliminary protection checks */ if (writeaccess == 2){ if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (writeaccess == 1) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: DPG(show_registers(regs)); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { printk(KERN_NOTICE "%s (pid %d) segfaults for page " "address %08lx at pc %08lx\n", tsk->comm, tsk->pid, address, instruction_pointer(regs)); /* With DPG on, we've already dumped registers above. */ DPG(if (0)) show_registers(regs); #ifdef CONFIG_NO_SEGFAULT_TERMINATION DECLARE_WAIT_QUEUE_HEAD(wq); wait_event_interruptible(wq, 0 == 1); #else info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); #endif return; } no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source * when it accesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) */ if (find_fixup_code(regs)) return; /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ if (!oops_in_progress) { oops_in_progress = 1; if ((unsigned long) (address) < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL " "pointer dereference"); else printk(KERN_ALERT "Unable to handle kernel access" " at virtual address %08lx\n", address); die_if_kernel("Oops", regs, (writeaccess << 1) | protection); oops_in_progress = 0; } do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void *)address; force_sig_info(SIGBUS, &info, tsk); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Use current_pgd instead of tsk->active_mm->pgd * since the latter might be unavailable if this * code is executed in a misfortunately run irq * (like inside schedule() between switch_mm and * switch_to...). */ int offset = pgd_index(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset; pgd_k = init_mm.pgd + offset; /* Since we're two-level, we don't need to do both * set_pgd and set_pmd (they do the same thing). If * we go three-level at some point, do the right thing * with pgd_present and set_pgd here. * * Also, since the vmalloc area is global, we don't * need to copy individual PTE's, it is enough to * copy the pgd pointer into the pte page of the * root task. If that is there, we'll find our pte if * it exists. */ pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto bad_area_nosemaphore; set_pmd(pmd, *pmd_k); /* Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * error_code: * ****0004 Protection -> Write-Protection (suprression) * ****0010 Segment translation -> Not present (nullification) * ****0011 Page translation -> Not present (nullification) * ****003B Region third exception -> Not present (nullification) */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; unsigned long fixup; int write; int si_code = SEGV_MAPERR; int kernel_address = 0; tsk = current; mm = tsk->mm; /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if ((error_code & 0xff) == 4 && !(S390_lowcore.trans_exc_code & 4)) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (!(regs->psw.mask & PSW_PROBLEM_STATE)) { address = 0; kernel_address = 1; goto no_context; } /* Low-address protection hit in user mode 'cannot happen'. */ die ("Low-address protection", regs, error_code); do_exit(SIGKILL); } /* * get the failing address * more specific the segment and page table portion of * the address */ address = S390_lowcore.trans_exc_code&-4096L; /* * Check which address space the address belongs to */ switch (S390_lowcore.trans_exc_code & 3) { case 0: /* Primary Segment Table Descriptor */ kernel_address = 1; goto no_context; case 1: /* STD determined via access register */ if (S390_lowcore.exc_access_id == 0) { kernel_address = 1; goto no_context; } if (regs && S390_lowcore.exc_access_id < NUM_ACRS) { if (regs->acrs[S390_lowcore.exc_access_id] == 0) { kernel_address = 1; goto no_context; } if (regs->acrs[S390_lowcore.exc_access_id] == 1) { /* user space address */ break; } } die("page fault via unknown access register", regs, error_code); do_exit(SIGKILL); break; case 2: /* Secondary Segment Table Descriptor */ case 3: /* Home Segment Table Descriptor */ /* user space address */ break; } /* * Check whether we have a user MM in the first place. */ if (in_interrupt() || !mm || !(regs->psw.mask & _PSW_IO_MASK_BIT)) goto no_context; /* * When we get here, the fault happened in the current * task's user address space, so we can switch on the * interrupts again and then search the VMAs */ __sti(); down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: write = 0; si_code = SEGV_ACCERR; switch (error_code & 0xFF) { case 0x04: /* write, present*/ write = 1; break; case 0x10: /* not present*/ case 0x11: /* not present*/ case 0x3B: /* not present*/ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; break; default: printk("code should be 4, 10 or 11 (%lX) \n",error_code&0xFF); goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ switch (handle_mm_fault(mm, vma, address, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: goto do_sigbus; default: goto out_of_memory; } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_PROBLEM_STATE) { tsk->thread.prot_addr = address; tsk->thread.trap_no = error_code; #ifndef CONFIG_SYSCTL #ifdef CONFIG_PROCESS_DEBUG printk("User process fault: interruption code 0x%lX\n",error_code); printk("failing address: %lX\n",address); show_regs(regs); #endif #else if (sysctl_userprocess_debug) { printk("User process fault: interruption code 0x%lX\n", error_code); printk("failing address: %lX\n", address); show_regs(regs); } #endif force_sigsegv(tsk, si_code, (void *)address); return; } no_context: /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->psw.addr)) != 0) { regs->psw.addr = fixup; return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ if (kernel_address) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %016lx\n", address); else printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %016lx\n", address); die("Oops", regs, error_code); do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (tsk->pid == 1) { tsk->policy |= SCHED_YIELD; schedule(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (regs->psw.mask & PSW_PROBLEM_STATE) do_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ tsk->thread.prot_addr = address; tsk->thread.trap_no = error_code; force_sig(SIGBUS, tsk); /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_PROBLEM_STATE)) goto no_context; }
/* * Canonical page fault handler */ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; siginfo_t info; int si_code = SEGV_MAPERR; int fault; const struct exception_table_entry *fixup; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * If we're in an interrupt or have no user context, * then must not take the fault. */ if (unlikely(in_interrupt() || !mm)) goto no_context; local_irq_enable(); if (user_mode(regs)) flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; good_area: /* Address space is OK. Now check access rights. */ si_code = SEGV_ACCERR; switch (cause) { case FLT_IFETCH: if (!(vma->vm_flags & VM_EXEC)) goto bad_area; break; case FLT_LOAD: if (!(vma->vm_flags & VM_READ)) goto bad_area; break; case FLT_STORE: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; break; } fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } up_read(&mm->mmap_sem); return; } up_read(&mm->mmap_sem); /* Handle copyin/out exception cases */ if (!user_mode(regs)) goto no_context; if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); return; } /* User-mode address is in the memory map, but we are * unable to fix up the page fault. */ if (fault & VM_FAULT_SIGBUS) { info.si_signo = SIGBUS; info.si_code = BUS_ADRERR; } /* Address is not in the memory map */ else { info.si_signo = SIGSEGV; info.si_code = SEGV_ACCERR; } info.si_errno = 0; info.si_addr = (void __user *)address; force_sig_info(info.si_signo, &info, current); return; bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void *)address; force_sig_info(info.si_signo, &info, current); return; } /* Kernel-mode fault falls through */ no_context: fixup = search_exception_tables(pt_elr(regs)); if (fixup) { pt_set_elr(regs, fixup->fixup); return; } /* Things are looking very, very bad now */ bust_spinlocks(1); printk(KERN_EMERG "Unable to handle kernel paging request at " "virtual address 0x%08lx, regs %p\n", address, regs); die("Bad Kernel VA", regs, SIGKILL); }
int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) { unsigned long ptr, end; int err; struct mm_struct * mm; struct vm_area_struct * vma = 0; struct page * map; int i; int datain = (rw == READ); /* Make sure the iobuf is not already mapped somewhere. */ if (iobuf->nr_pages) return -EINVAL; mm = current->mm; dprintk ("map_user_kiobuf: begin\n"); ptr = va & PAGE_MASK; end = (va + len + PAGE_SIZE - 1) & PAGE_MASK; err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT); if (err) return err; down(&mm->mmap_sem); err = -EFAULT; iobuf->locked = 0; iobuf->offset = va & ~PAGE_MASK; iobuf->length = len; i = 0; /* * First of all, try to fault in all of the necessary pages */ while (ptr < end) { if (!vma || ptr >= vma->vm_end) { vma = find_vma(current->mm, ptr); if (!vma) goto out_unlock; if (vma->vm_start > ptr) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_unlock; if (expand_stack(vma, ptr)) goto out_unlock; } if (((datain) && (!(vma->vm_flags & VM_WRITE))) || (!(vma->vm_flags & VM_READ))) { err = -EACCES; goto out_unlock; } } if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0) goto out_unlock; spin_lock(&mm->page_table_lock); map = follow_page(ptr); if (!map) { spin_unlock(&mm->page_table_lock); dprintk (KERN_ERR "Missing page in map_user_kiobuf\n"); goto out_unlock; } map = get_page_map(map); if (map) atomic_inc(&map->count); else printk (KERN_INFO "Mapped page missing [%d]\n", i); spin_unlock(&mm->page_table_lock); iobuf->maplist[i] = map; iobuf->nr_pages = ++i; ptr += PAGE_SIZE; } up(&mm->mmap_sem); dprintk ("map_user_kiobuf: end OK\n"); return 0; out_unlock: up(&mm->mmap_sem); unmap_kiobuf(iobuf); dprintk ("map_user_kiobuf: end %d\n", err); return err; }
static unsigned long arch_get_unmapped_area_common(struct file *filp, unsigned long addr0, unsigned long len, unsigned long pgoff, unsigned long flags, enum mmap_allocation_direction dir) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long addr = addr0; int do_color_align; if (unlikely(len > TASK_SIZE)) return -ENOMEM; if (flags & MAP_FIXED) { if (TASK_SIZE - len < addr) return -EINVAL; if ((flags & MAP_SHARED) && ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) return -EINVAL; return addr; } do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } if (dir == UP) { addr = mm->mmap_base; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { if (TASK_SIZE - len < addr) return -ENOMEM; if (!vma || addr + len <= vma->vm_start) return addr; addr = vma->vm_end; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); } } else { if (len <= mm->cached_hole_size) { mm->cached_hole_size = 0; mm->free_area_cache = mm->mmap_base; } addr = mm->free_area_cache; if (do_color_align) { unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff); addr = base + len; } if (likely(addr > len)) { vma = find_vma(mm, addr - len); if (!vma || addr <= vma->vm_start) { return mm->free_area_cache = addr - len; } } if (unlikely(mm->mmap_base < len)) goto bottomup; addr = mm->mmap_base - len; if (do_color_align) addr = COLOUR_ALIGN_DOWN(addr, pgoff); do { vma = find_vma(mm, addr); if (likely(!vma || addr + len <= vma->vm_start)) { return mm->free_area_cache = addr; } if (addr + mm->cached_hole_size < vma->vm_start) mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_start - len; if (do_color_align) addr = COLOUR_ALIGN_DOWN(addr, pgoff); } while (likely(len < vma->vm_start)); bottomup: mm->cached_hole_size = ~0UL; mm->free_area_cache = TASK_UNMAPPED_BASE; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; return addr; } }
/* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ asmlinkage void do_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr, cause; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; int fault, code = SEGV_MAPERR; cause = regs->scause; addr = regs->sbadaddr; tsk = current; mm = tsk->mm; /* * Fault-in kernel-space virtual memory on-demand. * The 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) goto vmalloc_fault; /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->sstatus & SR_PIE)) local_irq_enable(); /* * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ if (unlikely(in_atomic() || !mm)) goto no_context; if (user_mode(regs)) flags |= FAULT_FLAG_USER; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); retry: down_read(&mm->mmap_sem); vma = find_vma(mm, addr); if (unlikely(!vma)) goto bad_area; if (likely(vma->vm_start <= addr)) goto good_area; if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) goto bad_area; if (unlikely(expand_stack(vma, addr))) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it. */ good_area: code = SEGV_ACCERR; switch (cause) { case EXC_INST_ACCESS: if (!(vma->vm_flags & VM_EXEC)) goto bad_area; break; case EXC_LOAD_ACCESS: if (!(vma->vm_flags & VM_READ)) goto bad_area; break; case EXC_STORE_ACCESS: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; break; default: panic("%s: unhandled cause %lu", __func__, cause); } /* * If for any reason at all we could not handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, addr, flags); /* * If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_sem because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk)) return; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } if (fault & VM_FAULT_RETRY) { /* * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~(FAULT_FLAG_ALLOW_RETRY); flags |= FAULT_FLAG_TRIED; /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map. * Fix it, but check if it's kernel or user first. */ bad_area: up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { do_trap(regs, SIGSEGV, code, addr, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); die(regs, "Oops"); do_exit(SIGKILL); /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ out_of_memory: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk); return; vmalloc_fault: { pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; int index; if (user_mode(regs)) goto bad_area; /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk->active_mm->pgd" here. * We might be inside an interrupt in the middle * of a task switch. */ index = pgd_index(addr); pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, addr); pud_k = pud_offset(pgd_k, addr); if (!pud_present(*pud_k)) goto no_context; /* Since the vmalloc area is global, it is unnecessary to copy individual PTEs */ pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); /* Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); if (!pte_present(*pte_k)) goto no_context; return; } }
static unsigned long arch_get_unmapped_area_topdown_sz(struct file *file, unsigned long addr0, unsigned long len, unsigned long align_size, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev_vma; unsigned long base = mm->mmap_base, addr = addr0; unsigned long largest_hole = mm->cached_hole_size; unsigned long align_mask = ~(align_size - 1); int first_time = 1; /* don't allow allocations above current base */ if (mm->free_area_cache > base) mm->free_area_cache = base; if (len <= largest_hole) { largest_hole = 0; mm->free_area_cache = base; } try_again: /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; /* either no address requested or can't fit in requested address hole */ addr = (mm->free_area_cache - len) & align_mask; do { /* * Lookup failure means no vma is above this address, * i.e. return with success: */ vma = find_vma(mm, addr); if (!vma) return addr; /* * new region fits between prev_vma->vm_end and * vma->vm_start, use it: */ prev_vma = vma->vm_prev; if (addr + len <= vma->vm_start && (!prev_vma || (addr >= prev_vma->vm_end))) { /* remember the address as a hint for next time */ mm->cached_hole_size = largest_hole; return (mm->free_area_cache = addr); } else { /* pull free_area_cache down to the first hole */ if (mm->free_area_cache == vma->vm_end) { mm->free_area_cache = vma->vm_start; mm->cached_hole_size = largest_hole; } } /* remember the largest hole we saw so far */ if (addr + largest_hole < vma->vm_start) largest_hole = vma->vm_start - addr; /* try just below the current vma->vm_start */ addr = (vma->vm_start - len) & align_mask; } while (len <= vma->vm_start); fail: /* * if hint left us with no space for the requested * mapping then try again: */ if (first_time) { mm->free_area_cache = base; largest_hole = 0; first_time = 0; goto try_again; } /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; addr = arch_get_unmapped_area_bottomup_sz(file, addr0, len, align_size, pgoff, flags); /* * Restore the topdown base: */ mm->free_area_cache = base; mm->cached_hole_size = ~0UL; return addr; }
/* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by * segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (is_write ? FAULT_FLAG_WRITE : 0); *code_out = SEGV_MAPERR; /* * If the fault was during atomic operation, don't take the fault, just * fail. */ if (in_atomic()) goto out_nosemaphore; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto out; else if (vma->vm_start <= address) goto good_area; else if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; else if (expand_stack(vma, address)) goto out; good_area: *code_out = SEGV_ACCERR; if (is_write && !(vma->vm_flags & VM_WRITE)) goto out; /* Don't require VM_READ|VM_EXEC for write faults! */ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; do { int fault; fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; } BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } } pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out. */ #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); out_nosemaphore: return err; out_of_memory: /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ up_read(&mm->mmap_sem); pagefault_out_of_memory(); return 0; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; if (notify_page_fault(regs)) return 0; tsk = current; mm = tsk->mm; trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ fault = VM_FAULT_BADCONTEXT; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (address == -ENOMEM) { fault = VM_FAULT_OOM; goto out_up; } } #endif retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) goto out_up; if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; if (expand_stack(vma, address)) goto out_up; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) goto out_up; if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; } } /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(tsk, TIF_PER_TRAP); fault = 0; out_up: up_read(&mm->mmap_sem); out: return fault; }
unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, const unsigned long flags) { struct vm_area_struct *vma, *prev_vma; struct mm_struct *mm = current->mm; unsigned long base = mm->mmap_base, addr = addr0; int first_time = 1; unsigned long begin, end; find_start_end(flags, &begin, &end); /* requested length too big for entire address space */ if (len > end) return -ENOMEM; /* dont allow allocations above current base */ if (mm->free_area_cache > base) mm->free_area_cache = base; /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (end - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } /* free_area_cache is not really optimized for 32 bit apps */ if (sysctl_legacy_va_layout && ((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))) goto fail; try_again: /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; /* either no address requested or cant fit in requested address hole */ addr = (mm->free_area_cache - len) & PAGE_MASK; do { /* * Lookup failure means no vma is above this address, * i.e. return with success: */ if (!(vma = find_vma_prev(mm, addr, &prev_vma))) return addr; /* * new region fits between prev_vma->vm_end and * vma->vm_start, use it: */ if (addr && addr+len <= vma->vm_start && (!prev_vma || (addr >= prev_vma->vm_end))) /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); else /* pull free_area_cache down to the first hole */ if (mm->free_area_cache == vma->vm_end) mm->free_area_cache = vma->vm_start; /* try just below the current vma->vm_start */ addr = vma->vm_start-len; } while (len < vma->vm_start); fail: /* * if hint left us with no space for the requested * mapping then try again: */ if (first_time) { mm->free_area_cache = base; first_time = 0; goto try_again; } /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ mm->free_area_cache = begin; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ mm->free_area_cache = base; return addr; }
/* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by * segv(). */ int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; int err = -EFAULT; *code_out = SEGV_MAPERR; /* * If the fault was during atomic operation, don't take the fault, just * fail. */ if (in_atomic()) goto out_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto out; else if (vma->vm_start <= address) goto good_area; else if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; else if (is_user && !ARCH_IS_STACKGROW(address)) goto out; else if (expand_stack(vma, address)) goto out; good_area: *code_out = SEGV_ACCERR; if (is_write && !(vma->vm_flags & VM_WRITE)) goto out; /* Don't require VM_READ|VM_EXEC for write faults! */ if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) goto out; do { int fault; survive: fault = handle_mm_fault(mm, vma, address, is_write); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { err = -ENOMEM; goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; } BUG(); } if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); } while (!pte_present(*pte)); err = 0; /* * The below warning was added in place of * pte_mkyoung(); if (is_write) pte_mkdirty(); * If it's triggered, we'd see normally a hang here (a clean pte is * marked read-only to emulate the dirty bit). * However, the generic code can mark a PTE writable but clean on a * concurrent read fault, triggering this harmlessly. So comment it out. */ #if 0 WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); #endif flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); out_nosemaphore: return err; /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: if (is_global_init(current)) { up_read(&mm->mmap_sem); yield(); down_read(&mm->mmap_sem); goto survive; } goto out; }
/* * This routine handles page faults. It determines the problem, and * then passes it off to one of the appropriate routines. * * error_code: * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * * If this routine detects a bad access, it returns 1, otherwise it * returns 0. */ int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct mm_struct *mm = current->mm; struct vm_area_struct * vma; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; pr_debug("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n", regs->sr, regs->pc, address, error_code, mm ? mm->pgd : NULL); /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto map_err; if (vma->vm_flags & VM_IO) goto acc_err; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto map_err; if (user_mode(regs)) { /* Accessing the stack below usp is always a bug. The "+ 256" is there due to some instructions doing pre-decrement on the stack and that doesn't show up until later. */ if (address + 256 < rdusp()) goto map_err; } if (expand_stack(vma, address)) goto map_err; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: pr_debug("do_page_fault: good_area\n"); switch (error_code & 3) { default: /* 3: write, present */ /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto acc_err; flags |= FAULT_FLAG_WRITE; break; case 1: /* read, present */ goto acc_err; case 0: /* read, not present */ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto acc_err; } /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(vma, address, flags); pr_debug("handle_mm_fault returns %d\n", fault); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return 0; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGSEGV) goto map_err; else if (fault & VM_FAULT_SIGBUS) goto bus_err; BUG(); } /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) current->maj_flt++; else current->min_flt++; if (fault & VM_FAULT_RETRY) { /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); return 0; /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return 0; no_context: current->thread.signo = SIGBUS; current->thread.faddr = address; return send_fault_sig(regs); bus_err: current->thread.signo = SIGBUS; current->thread.code = BUS_ADRERR; current->thread.faddr = address; goto send_sig; map_err: current->thread.signo = SIGSEGV; current->thread.code = SEGV_MAPERR; current->thread.faddr = address; goto send_sig; acc_err: current->thread.signo = SIGSEGV; current->thread.code = SEGV_ACCERR; current->thread.faddr = address; send_sig: up_read(&mm->mmap_sem); return send_fault_sig(regs); }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * error_code: * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; unsigned long page; unsigned long fixup; int write; siginfo_t info; /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); #if CONFIG_X86_SWITCH_PAGETABLES if (!user_mode(regs)) goto oops; #endif /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); tsk = current; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * This verifies that the fault happens in kernel space * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 1) == 0. */ if (address >= TASK_SIZE && !(error_code & 5)) goto vmalloc_fault; mm = tsk->mm; info.si_code = SEGV_MAPERR; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_interrupt() || !mm) goto no_context; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (error_code & 4) { /* * accessing the stack below %esp is always a bug. * The "+ 32" is there due to some instructions (like * pusha) doing post-decrement on the stack and that * doesn't show up until later.. */ if (address + 32 < regs->esp) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; write = 0; switch (error_code & 3) { default: /* 3: write, present */ #ifdef TEST_VERIFY_AREA if (regs->cs == KERNEL_CS) printk("WP fault at %08lx\n", regs->eip); #endif /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; write++; break; case 1: /* read, present */ goto bad_area; case 0: /* read, not present */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ switch (handle_mm_fault(mm, vma, address, write)) { case 1: tsk->min_flt++; break; case 2: tsk->maj_flt++; break; case 0: goto do_sigbus; default: goto out_of_memory; } /* * Did it hit the DOS screen memory VA from vm86 mode? */ if (regs->eflags & VM_MASK) { unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; if (bit < 32) tsk->thread.screen_bitmap |= 1 << bit; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long address) { struct vm_area_struct * vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; siginfo_t info; #if 0 printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(), current->comm, current->pid, field, address, write, field, regs->cp0_epc); #endif info.si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. */ if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) goto vmalloc_fault; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_atomic() || !mm) goto bad_area_nosemaphore; down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ switch (handle_mm_fault(mm, vma, address, write)) { case VM_FAULT_MINOR: tsk->min_flt++; break; case VM_FAULT_MAJOR: tsk->maj_flt++; break; case VM_FAULT_SIGBUS: goto do_sigbus; case VM_FAULT_OOM: goto out_of_memory; default: BUG(); } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.cp0_badvaddr = address; tsk->thread.error_code = write; #if 0 printk("do_page_fault() #2: sending SIGSEGV to %s for " "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", tsk->comm, write ? "write access to" : "read access from", field, address, field, (unsigned long) regs->cp0_epc, field, (unsigned long) regs->regs[31]); #endif info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void __user *) address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { current->thread.cp0_baduaddr = address; return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ bust_spinlocks(1); printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", smp_processor_id(), field, address, field, regs->cp0_epc, field, regs->regs[31]); die("Oops", regs); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (user_mode(regs)) do_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; else /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ #if 0 printk("do_page_fault() #3: sending SIGBUS to %s for " "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", tsk->comm, write ? "write access to" : "read access from", field, address, field, (unsigned long) regs->cp0_epc, field, (unsigned long) regs->regs[31]); #endif tsk->thread.cp0_badvaddr = address; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void __user *) address; force_sig_info(SIGBUS, &info, tsk); return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) goto no_context; set_pgd(pgd, *pgd_k); pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }
/* do_pgfault - interrupt handler to process the page fault execption * @mm : the control struct for a set of vma using the same PDT * @error_code : the error code recorded in trapframe->tf_err which is setted by x86 hardware * @addr : the addr which causes a memory access exception, (the contents of the CR2 register) * * CALL GRAPH: trap--> trap_dispatch-->pgfault_handler-->do_pgfault * The processor provides ucore's do_pgfault function with two items of information to aid in diagnosing * the exception and recovering from it. * (1) The contents of the CR2 register. The processor loads the CR2 register with the * 32-bit linear address that generated the exception. The do_pgfault fun can * use this address to locate the corresponding page directory and page-table * entries. * (2) An error code on the kernel stack. The error code for a page fault has a format different from * that for other exceptions. The error code tells the exception handler three things: * -- The P flag (bit 0) indicates whether the exception was due to a not-present page (0) * or to either an access rights violation or the use of a reserved bit (1). * -- The W/R flag (bit 1) indicates whether the memory access that caused the exception * was a read (0) or write (1). * -- The U/S flag (bit 2) indicates whether the processor was executing at user mode (1) * or supervisor mode (0) at the time of the exception. */ int do_pgfault(struct mm_struct *mm, uint32_t error_code, uintptr_t addr) { int ret = -E_INVAL; //try to find a vma which include addr struct vma_struct *vma = find_vma(mm, addr); pgfault_num++; //If the addr is in the range of a mm's vma? if (vma == NULL || vma->vm_start > addr) { cprintf("not valid addr %x, and can not find it in vma\n", addr); goto failed; } //check the error_code switch (error_code & 3) { default: /* error code flag : default is 3 ( W/R=1, P=1): write, present */ case 2: /* error code flag : (W/R=1, P=0): write, not present */ if (!(vma->vm_flags & VM_WRITE)) { cprintf("do_pgfault failed: error code flag = write AND not present, but the addr's vma cannot write\n"); goto failed; } break; case 1: /* error code flag : (W/R=0, P=1): read, present */ cprintf("do_pgfault failed: error code flag = read AND present\n"); goto failed; case 0: /* error code flag : (W/R=0, P=0): read, not present */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) { cprintf("do_pgfault failed: error code flag = read AND not present, but the addr's vma cannot read or exec\n"); goto failed; } } /* IF (write an existed addr ) OR * (write an non_existed addr && addr is writable) OR * (read an non_existed addr && addr is readable) * THEN * continue process */ uint32_t perm = PTE_U; if (vma->vm_flags & VM_WRITE) { perm |= PTE_W; } addr = ROUNDDOWN(addr, PGSIZE); ret = -E_NO_MEM; pte_t *ptep=NULL; /*LAB3 EXERCISE 1: YOUR CODE * Maybe you want help comment, BELOW comments can help you finish the code * * Some Useful MACROs and DEFINEs, you can use them in below implementation. * MACROs or Functions: * get_pte : get an pte and return the kernel virtual address of this pte for la * if the PT contians this pte didn't exist, alloc a page for PT (notice the 3th parameter '1') * pgdir_alloc_page : call alloc_page & page_insert functions to allocate a page size memory & setup * an addr map pa<--->la with linear address la and the PDT pgdir * DEFINES: * VM_WRITE : If vma->vm_flags & VM_WRITE == 1/0, then the vma is writable/non writable * PTE_W 0x002 // page table/directory entry flags bit : Writeable * PTE_U 0x004 // page table/directory entry flags bit : User can access * VARIABLES: * mm->pgdir : the PDT of these vma * */ //#if 0 /*LAB3 EXERCISE 1: YOUR CODE*/ ptep = get_pte(mm->pgdir, addr, 1); //(1) try to find a pte, if pte's PT(Page Table) isn't existed, then create a PT. if (ptep == NULL) { cprintf("get_pte failed in do_pgfault \n"); goto failed; } //(2) if the phy addr isn't exist, then alloc a page & map the phy addr with logical addr if (*ptep == 0) { if (pgdir_alloc_page(mm->pgdir, addr, perm) == NULL) { cprintf("pgdir_alloc_page failed in do_pgfault \n"); goto failed; } } else { /*LAB3 EXERCISE 2: YOUR CODE * Now we think this pte is a swap entry, we should load data from disk to a page with phy addr, * and map the phy addr with logical addr, trigger swap manager to record the access situation of this page. * * Some Useful MACROs and DEFINEs, you can use them in below implementation. * MACROs or Functions: * swap_in(mm, addr, &page) : alloc a memory page, then according to the swap entry in PTE for addr, * find the addr of disk page, read the content of disk page into this memroy page * page_insert : build the map of phy addr of an Page with the linear addr la * swap_map_swappable : set the page swappable */ if(swap_init_ok) { struct Page *page=NULL; //(1)According to the mm AND addr, try to load the content of right disk page // into the memory which page managed. int r; r = swap_in(mm, addr, &page); if (r != 0) { cprintf("swap_in failed in do_pgfault \n"); goto failed; } //(2) According to the mm, addr AND page, setup the map of phy addr <---> logical addr page_insert(mm->pgdir, page, addr, perm); //(3) make the page swappable. swap_map_swappable(mm, addr, page, 1); page->pra_vaddr = addr; } else { cprintf("no swap_init_ok but ptep is %x, failed\n",*ptep); goto failed; } } //#endif ret = 0; failed: return ret; }
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long vector, int write_acc) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; siginfo_t info; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; tsk = current; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. * * NOTE! We MUST NOT take any locks for this case. We may * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. * * NOTE2: This is done so that, when updating the vmalloc * mappings we don't have to walk all processes pgdirs and * add the high mappings all at once. Instead we do it as they * are used. However vmalloc'ed page entries have the PAGE_GLOBAL * bit set so sometimes the TLB can use a lingering entry. * * This verifies that the fault happens in kernel space * and that the fault was not a protection error. */ if (address >= VMALLOC_START && (vector != 0x300 && vector != 0x400) && !user_mode(regs)) goto vmalloc_fault; /* If exceptions were enabled, we can reenable them here */ if (user_mode(regs)) { /* Exception was in userspace: reenable interrupts */ local_irq_enable(); } else { /* If exception was in a syscall, then IRQ's may have * been enabled or disabled. If they were enabled, * reenable them. */ if (regs->sr && (SPR_SR_IEE | SPR_SR_TEE)) local_irq_enable(); } mm = tsk->mm; info.si_code = SEGV_MAPERR; /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ if (in_interrupt() || !mm) goto no_context; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (user_mode(regs)) { /* * accessing the stack below usp is always a bug. * we get page-aligned addresses so we can only check * if we're within a page from usp, but that might be * enough to catch brutal errors at least. */ if (address + PAGE_SIZE < regs->sp) goto bad_area; } if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: info.si_code = SEGV_ACCERR; /* first do some preliminary protection checks */ if (write_acc) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; flags |= FAULT_FLAG_WRITE; } else { /* not present */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } /* are we trying to execute nonexecutable area */ if ((vector == 0x400) && !(vma->vm_page_prot.pgprot & _PAGE_EXEC)) goto bad_area; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { /*RGD modeled on Cris */ if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ goto retry; } } up_read(&mm->mmap_sem); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); return; } no_context: /* Are we prepared to handle this kernel fault? * * (The kernel has valid exception-points in the source * when it acesses user-memory. When it fails in one * of those points, we find it in a table and do a jump * to some fixup code that loads an appropriate error * code) */ { const struct exception_table_entry *entry; __asm__ __volatile__("l.nop 42"); if ((entry = search_exception_tables(regs->pc)) != NULL) { /* Adjust the instruction pointer in the stackframe */ regs->pc = entry->fixup; return; } } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ if ((unsigned long)(address) < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else printk(KERN_ALERT "Unable to handle kernel access"); printk(" at virtual address 0x%08lx\n", address); die("Oops", regs, write_acc); do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: __asm__ __volatile__("l.nop 42"); __asm__ __volatile__("l.nop 1"); up_read(&mm->mmap_sem); if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); return; do_sigbus: up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void *)address; force_sig_info(SIGBUS, &info, tsk); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; return; vmalloc_fault: { /* * Synchronize this task's top level page-table * with the 'reference' page table. * * Use current_pgd instead of tsk->active_mm->pgd * since the latter might be unavailable if this * code is executed in a misfortunately run irq * (like inside schedule() between switch_mm and * switch_to...). */ int offset = pgd_index(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; /* phx_warn("do_page_fault(): vmalloc_fault will not work, " "since current_pgd assign a proper value somewhere\n" "anyhow we don't need this at the moment\n"); phx_mmu("vmalloc_fault"); */ pgd = (pgd_t *)current_pgd + offset; pgd_k = init_mm.pgd + offset; /* Since we're two-level, we don't need to do both * set_pgd and set_pmd (they do the same thing). If * we go three-level at some point, do the right thing * with pgd_present and set_pgd here. * * Also, since the vmalloc area is global, we don't * need to copy individual PTE's, it is enough to * copy the pgd pointer into the pte page of the * root task. If that is there, we'll find our pte if * it exists. */ pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) goto no_context; pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto bad_area_nosemaphore; set_pmd(pmd, *pmd_k); /* Make sure the actual PTE exists as well to * catch kernel vmalloc-area accesses to non-mapped * addresses. If we don't do this, this will just * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) goto no_context; return; } }