static u32 do_solaris_mmap(u32 addr, u32 len, u32 prot, u32 flags, u32 fd, u64 off) { struct file *file = NULL; unsigned long retval, ret_type; lock_kernel(); current->personality |= PER_SVR4; if (flags & MAP_NORESERVE) { static int cnt = 0; if (cnt < 5) { printk("%s: unimplemented Solaris MAP_NORESERVE mmap() flag\n", current->comm); cnt++; } flags &= ~MAP_NORESERVE; } retval = -EBADF; if(!(flags & MAP_ANONYMOUS)) { if(fd >= SOLARIS_NR_OPEN) goto out; file = fget(fd); if (!file) goto out; if (file->f_dentry && file->f_dentry->d_inode) { struct inode * inode = file->f_dentry->d_inode; if(MAJOR(inode->i_rdev) == MEM_MAJOR && MINOR(inode->i_rdev) == 5) { flags |= MAP_ANONYMOUS; fput(file); file = NULL; } } } retval = -ENOMEM; if(!(flags & MAP_FIXED) && !addr) { unsigned long attempt = get_unmapped_area(addr, len); if(!attempt || (attempt >= 0xf0000000UL)) goto out_putf; addr = (u32) attempt; } if(!(flags & MAP_FIXED)) addr = 0; ret_type = flags & _MAP_NEW; flags &= ~_MAP_NEW; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); retval = do_mmap(file, (unsigned long) addr, (unsigned long) len, (unsigned long) prot, (unsigned long) flags, off); if(!ret_type) retval = ((retval < 0xf0000000) ? 0 : retval); out_putf: if (file) fput(file); out: unlock_kernel(); return (u32) retval; }
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long vdso_base, vdso_mapping_len; int ret; /* Be sure to map the data page */ vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { ret = vdso_base; goto up_fail; } mm->context.vdso = (void *)vdso_base; ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); if (ret) { mm->context.vdso = NULL; goto up_fail; } up_fail: up_write(&mm->mmap_sem); return ret; }
/* Setup a VMA at program startup for the vsyscall page. Not called for compat tasks */ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) { struct mm_struct *mm = current->mm; unsigned long addr; int ret; unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); if (!vdso_enabled) return 0; down_write(&mm->mmap_sem); addr = vdso_addr(mm->start_stack, len); addr = get_unmapped_area(NULL, addr, len, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } ret = install_special_mapping(mm, addr, len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pages); if (ret) goto up_fail; current->mm->context.vdso = (void *)addr; up_fail: up_write(&mm->mmap_sem); return ret; }
int __do_linux_mmap(uintptr_t __user *addr_store, size_t len, uint32_t mmap_flags) { struct mm_struct *mm = current->mm; if (mm == NULL) { panic("kernel thread call mmap!!.\n"); } if (addr_store == NULL || len == 0) { return -E_INVAL; } int ret = -E_INVAL; uintptr_t addr; addr = *addr_store; uintptr_t start = ROUNDDOWN(addr, PGSIZE), end = ROUNDUP(addr + len, PGSIZE); addr = start, len = end - start; uint32_t vm_flags = VM_READ; if (mmap_flags & MMAP_WRITE) vm_flags |= VM_WRITE; if (mmap_flags & MMAP_STACK) vm_flags |= VM_STACK; ret = -E_NO_MEM; if (addr == 0) { if ((addr = get_unmapped_area(mm, len)) == 0) { goto out_unlock; } } if ((ret = mm_map(mm, addr, len, vm_flags, NULL)) == 0) { *addr_store = addr; } out_unlock: return ret; }
static inline unsigned long move_vma(struct vm_area_struct * vma, unsigned long addr, unsigned long old_len, unsigned long new_len) { struct vm_area_struct * new_vma; new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (new_vma) { unsigned long new_addr = get_unmapped_area(addr, new_len); if (new_addr && !move_page_tables(current->mm, new_addr, addr, old_len)) { *new_vma = *vma; new_vma->vm_start = new_addr; new_vma->vm_end = new_addr+new_len; new_vma->vm_offset = vma->vm_offset + (addr - vma->vm_start); if (new_vma->vm_file) new_vma->vm_file->f_count++; if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); insert_vm_struct(current->mm, new_vma); merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end); do_munmap(addr, old_len); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { current->mm->locked_vm += new_len >> PAGE_SHIFT; make_pages_present(new_vma->vm_start, new_vma->vm_end); } return new_addr; }
/* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) { struct mm_struct *mm = current->mm; unsigned long addr; int ret; down_write(&mm->mmap_sem); addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_ALWAYSDUMP, syscall_pages); if (unlikely(ret)) goto up_fail; current->mm->context.vdso = (void *)addr; up_fail: up_write(&mm->mmap_sem); return ret; }
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { int ret; unsigned long addr; struct mm_struct *mm = current->mm; down_write(&mm->mmap_sem); addr = vdso_addr(mm->start_stack); addr = get_unmapped_area(NULL, addr, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_page); if (ret) goto up_fail; mm->context.vdso = (void *)addr; up_fail: up_write(&mm->mmap_sem); return ret; }
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; int ret; if (!vdso_enabled) return 0; down_write(&mm->mmap_sem); addr = vdso_addr(mm->start_stack, vdso_size); addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } current->mm->context.vdso = (void *)addr; ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pages); if (ret) { current->mm->context.vdso = NULL; goto up_fail; } up_fail: up_write(&mm->mmap_sem); return ret; }
/* * Called from binfmt_elf. Create a VMA for the vDSO page. */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { int ret; unsigned long vdso_base; struct mm_struct *mm = current->mm; if (down_write_killable(&mm->mmap_sem)) return -EINTR; /* Try to get it loaded right near ld.so/glibc. */ vdso_base = STACK_TOP; vdso_base = get_unmapped_area(NULL, vdso_base, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(vdso_base)) { ret = vdso_base; goto up_fail; } /* MAYWRITE to allow gdb to COW and set breakpoints. */ ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_page); if (ret) goto up_fail; mm->context.vdso = (void *)vdso_base; up_fail: up_write(&mm->mmap_sem); return ret; }
/* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; int ret = 0; bool compat; #ifdef CONFIG_X86_X32_ABI if (test_thread_flag(TIF_X32)) return x32_setup_additional_pages(bprm, uses_interp); #endif if (vdso_enabled == VDSO_DISABLED) return 0; down_write(&mm->mmap_sem); /* Test compat mode once here, in case someone changes it via sysctl */ compat = (vdso_enabled == VDSO_COMPAT); map_compat_vdso(compat); if (compat) addr = VDSO_HIGH_BASE; else { addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } } current->mm->context.vdso = (void *)addr; if (compat_uses_vma || !compat) { /* * MAYWRITE to allow gdb to COW and set breakpoints */ ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso32_pages); if (ret) goto up_fail; } current_thread_info()->sysenter_return = VDSO32_SYMBOL(addr, SYSENTER_RETURN); up_fail: if (ret) current->mm->context.vdso = NULL; up_write(&mm->mmap_sem); return ret; }
/* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; int ret = 0; bool compat; if (vdso_enabled == VDSO_DISABLED) return 0; down_write(&mm->mmap_sem); /* Test compat mode once here, in case someone changes it via sysctl */ compat = (vdso_enabled == VDSO_COMPAT); map_compat_vdso(compat); if (compat) addr = VDSO_HIGH_BASE; else { addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } } if (compat_uses_vma || !compat) { /* * MAYWRITE to allow gdb to COW and set breakpoints * * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully * interpretable later without matching up the same * kernel and hardware config to see what PC values * meant. */ ret = install_special_mapping(mm, addr, PAGE_SIZE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso32_pages); if (ret) goto up_fail; } current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = VDSO32_SYMBOL(addr, SYSENTER_RETURN); up_fail: up_write(&mm->mmap_sem); return ret; }
/* * This is really a simplified "vm_mmap". it only handles MPX * bounds tables (the bounds directory is user-allocated). * * Later on, we use the vma->vm_ops to uniquely identify these * VMAs. */ static unsigned long mpx_mmap(unsigned long len) { unsigned long ret; unsigned long addr, pgoff; struct mm_struct *mm = current->mm; vm_flags_t vm_flags; struct vm_area_struct *vma; /* Only bounds table and bounds directory can be allocated here */ if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) return -EINVAL; down_write(&mm->mmap_sem); /* Too many mappings? */ if (mm->map_count > sysctl_max_map_count) { ret = -ENOMEM; goto out; } /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE); if (addr & ~PAGE_MASK) { ret = addr; goto out; } vm_flags = VM_READ | VM_WRITE | VM_MPX | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; /* Set pgoff according to addr for anon_vma */ pgoff = addr >> PAGE_SHIFT; ret = mmap_region(NULL, addr, len, vm_flags, pgoff); if (IS_ERR_VALUE(ret)) goto out; vma = find_vma(mm, ret); if (!vma) { ret = -ENOMEM; goto out; } vma->vm_ops = &mpx_vma_ops; if (vm_flags & VM_LOCKED) { up_write(&mm->mmap_sem); mm_populate(ret, len); return ret; } out: up_write(&mm->mmap_sem); return ret; }
asmlinkage unsigned long sparc_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { struct vm_area_struct *vma; unsigned long ret = -EINVAL; if (ARCH_SUN4C_SUN4) { if (old_len > 0x20000000 || new_len > 0x20000000) goto out; if (addr < 0xe0000000 && addr + old_len > 0x20000000) goto out; } if (old_len > TASK_SIZE - PAGE_SIZE || new_len > TASK_SIZE - PAGE_SIZE) goto out; down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { if (ARCH_SUN4C_SUN4 && new_addr < 0xe0000000 && new_addr + new_len > 0x20000000) goto out_sem; if (new_addr + new_len > TASK_SIZE - PAGE_SIZE) goto out_sem; } else if ((ARCH_SUN4C_SUN4 && addr < 0xe0000000 && addr + new_len > 0x20000000) || addr + new_len > TASK_SIZE - PAGE_SIZE) { unsigned long map_flags = 0; struct file *file = NULL; ret = -ENOMEM; if (!(flags & MREMAP_MAYMOVE)) goto out_sem; vma = find_vma(current->mm, addr); if (vma) { if (vma->vm_flags & VM_SHARED) map_flags |= MAP_SHARED; file = vma->vm_file; } new_addr = get_unmapped_area(file, addr, new_len, vma ? vma->vm_pgoff : 0, map_flags); ret = new_addr; if (new_addr & ~PAGE_MASK) goto out_sem; flags |= MREMAP_FIXED; } ret = do_mremap(addr, old_len, new_len, flags, new_addr); out_sem: up_write(¤t->mm->mmap_sem); out: return ret; }
// do_shmem - create a share memory with addr, len, flags(VM_READ/M_WRITE/VM_STACK) int do_shmem(uintptr_t * addr_store, size_t len, uint32_t mmap_flags) { struct mm_struct *mm = current->mm; if (mm == NULL) { panic("kernel thread call mmap!!.\n"); } if (addr_store == NULL || len == 0) { return -E_INVAL; } int ret = -E_INVAL; uintptr_t addr; lock_mm(mm); if (!copy_from_user(mm, &addr, addr_store, sizeof(uintptr_t), 1)) { goto out_unlock; } uintptr_t start = ROUNDDOWN(addr, PGSIZE), end = ROUNDUP(addr + len, PGSIZE); addr = start, len = end - start; uint32_t vm_flags = VM_READ; if (mmap_flags & MMAP_WRITE) vm_flags |= VM_WRITE; if (mmap_flags & MMAP_STACK) vm_flags |= VM_STACK; ret = -E_NO_MEM; if (addr == 0) { if ((addr = get_unmapped_area(mm, len)) == 0) { goto out_unlock; } } struct shmem_struct *shmem; if ((shmem = shmem_create(len)) == NULL) { goto out_unlock; } if ((ret = mm_map_shmem(mm, addr, vm_flags, shmem, NULL)) != 0) { assert(shmem_ref(shmem) == 0); shmem_destroy(shmem); goto out_unlock; } copy_to_user(mm, addr_store, &addr, sizeof(uintptr_t)); out_unlock: unlock_mm(mm); return ret; }
asmlinkage unsigned long sys32_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, u32 __new_addr) { struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long new_addr = __new_addr; if (old_len > STACK_TOP32 || new_len > STACK_TOP32) goto out; if (addr > STACK_TOP32 - old_len) goto out; down_write(¤t->mm->mmap_sem); if (flags & MREMAP_FIXED) { if (new_addr > STACK_TOP32 - new_len) goto out_sem; } else if (addr > STACK_TOP32 - new_len) { unsigned long map_flags = 0; struct file *file = NULL; ret = -ENOMEM; if (!(flags & MREMAP_MAYMOVE)) goto out_sem; vma = find_vma(current->mm, addr); if (vma) { if (vma->vm_flags & VM_SHARED) map_flags |= MAP_SHARED; file = vma->vm_file; } /* MREMAP_FIXED checked above. */ new_addr = get_unmapped_area(file, addr, new_len, vma ? vma->vm_pgoff : 0, map_flags); ret = new_addr; if (new_addr & ~PAGE_MASK) goto out_sem; flags |= MREMAP_FIXED; } ret = do_mremap(addr, old_len, new_len, flags, new_addr); out_sem: up_write(¤t->mm->mmap_sem); out: return ret; }
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ vdso_mapping_len = vdso_text_len + PAGE_SIZE; if (down_write_killable(&mm->mmap_sem)) return -EINTR; vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { ret = ERR_PTR(vdso_base); goto up_fail; } ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_MAYREAD, &vdso_spec[0]); if (IS_ERR(ret)) goto up_fail; vdso_base += PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_spec[1]); if (IS_ERR(ret)) goto up_fail; up_write(&mm->mmap_sem); return 0; up_fail: mm->context.vdso = NULL; up_write(&mm->mmap_sem); return PTR_ERR(ret); }
//#ifdef UCONFIG_BIONIC_LIBC static int map_ph(int fd, struct proghdr *ph, struct mm_struct *mm, uint32_t * pbias, uint32_t linker) { int ret = 0; struct Page *page; uint32_t vm_flags = 0; uint32_t bias = 0; pte_perm_t perm = 0; ptep_set_u_read(&perm); if (ph->p_flags & ELF_PF_X) vm_flags |= VM_EXEC; if (ph->p_flags & ELF_PF_W) vm_flags |= VM_WRITE; if (ph->p_flags & ELF_PF_R) vm_flags |= VM_READ; if (vm_flags & VM_WRITE) ptep_set_u_write(&perm); if (pbias) { bias = *pbias; } if (!bias && !ph->p_va) { bias = get_unmapped_area(mm, ph->p_memsz + PGSIZE); bias = ROUNDUP(bias, PGSIZE); if (pbias) *pbias = bias; } if ((ret = mm_map(mm, ph->p_va + bias, ph->p_memsz, vm_flags, NULL)) != 0) { goto bad_cleanup_mmap; } if (!linker && mm->brk_start < ph->p_va + bias + ph->p_memsz) { mm->brk_start = ph->p_va + bias + ph->p_memsz; } off_t offset = ph->p_offset; size_t off, size; uintptr_t start = ph->p_va + bias, end, la = ROUNDDOWN(start, PGSIZE); end = ph->p_va + bias + ph->p_filesz; while (start < end) { if ((page = pgdir_alloc_page(mm->pgdir, la, perm)) == NULL) { ret = -E_NO_MEM; goto bad_cleanup_mmap; } off = start - la, size = PGSIZE - off, la += PGSIZE; if (end < la) { size -= la - end; } if ((ret = load_icode_read(fd, page2kva(page) + off, size, offset)) != 0) { goto bad_cleanup_mmap; } start += size, offset += size; } end = ph->p_va + bias + ph->p_memsz; if (start < la) { if (start == end) { goto normal_exit; } off = start + PGSIZE - la, size = PGSIZE - off; if (end < la) { size -= la - end; } memset(page2kva(page) + off, 0, size); start += size; assert((end < la && start == end) || (end >= la && start == la)); } while (start < end) { if ((page = pgdir_alloc_page(mm->pgdir, la, perm)) == NULL) { ret = -E_NO_MEM; goto bad_cleanup_mmap; } off = start - la, size = PGSIZE - off, la += PGSIZE; if (end < la) { size -= la - end; } memset(page2kva(page) + off, 0, size); start += size; } normal_exit: return 0; bad_cleanup_mmap: return ret; }
int setup_kernel_memory(uint64_t kernmem, uint64_t p_kern_start, uint64_t p_kern_end, uint64_t p_vdo_buff_start, uint32_t *modulep) { struct kernel_mm_struct *mm = get_kernel_mm(); // Set up vma // Kernel virtual memory space if(-1 == set_kernel_memory(kernmem , kernmem - p_kern_start + p_kern_end)) { return -1; } // Video buffer memory // TODO: Check return value uint64_t vdo_start_addr = get_unmapped_area(&(mm->mmap), kernmem + p_vdo_buff_start, SIZEOF_PAGE); if(-1 == set_video_buffer_memory(vdo_start_addr, vdo_start_addr + SIZEOF_PAGE)) { return -1; } //ASCI memory uint64_t ahci_start_addr = get_unmapped_area(&(mm->mmap), kernmem, SIZEOF_PAGE); if(-1 == set_ahci_memory(ahci_start_addr, ahci_start_addr + SIZEOF_PAGE)) { return -1; } // Scan physical pages struct smap_t { uint64_t base, length; uint32_t type; }__attribute__((packed)) *smap; uint64_t phys_end_addr = 0; int lower_chunk = 0; uint64_t lower_chunk_start = 0; uint64_t lower_chunk_end = 0; while(modulep[0] != 0x9001) modulep += modulep[1]+2; for(smap = (struct smap_t*)(modulep+2); smap < (struct smap_t*)((char*)modulep + modulep[1] + 2*4); ++smap) { if (smap->type == 1 && smap->length != 0) { if(phys_end_addr < smap->base + smap->length) { phys_end_addr = smap->base + smap->length; } if(!lower_chunk) { lower_chunk_start = smap->base; lower_chunk_end = smap->base + smap->length; lower_chunk ++; } if(!new_chunk(smap->base, smap->base + smap->length)) { return -1; } } } // TODO: Check return value uint64_t phys_mem_offset = get_unmapped_area(&(mm->mmap), kernmem, phys_end_addr); if(-1 == set_phys_memory(phys_mem_offset, phys_mem_offset + phys_end_addr)) { return -1; } if(-1 == scan_all_chunks()) { return -1; } // Mark used physical pages // The first page - just like that if(0 > inc_ref_count_pages(0, SIZEOF_PAGE)) { return -1; } // Video buffer memory - is not part of chunks obtained from modulep. No // need to mark. // Kernel physical pages if(0 > inc_ref_count_pages(p_kern_start, p_kern_end)) { return -1; } // Ignore lower chunk if(0 > inc_ref_count_pages(lower_chunk_start, lower_chunk_end)) { return -1; } // Initialize free pages if(-1 == init_free_phys_page_manager()) { return -1; } /* printf("start kernel: %p\n", mm->start_kernel); printf("end kernel : %p\n", mm->end_kernel); printf("start vdo : %p\n", mm->start_vdo_buff); printf("end vdo : %p\n", mm->end_vdo_buff); printf("start phys : %p\n", mm->start_phys_mem); printf("end phys : %p\n", mm->end_phys_mem); printf("start ahci : %p\n", mm->start_ahci_mem); printf("end ahci : %p\n", mm->end_ahci_mem); */ // Set up page tables uint64_t pml4_page = get_selfref_PML4(NULL); uint64_t paddr = p_kern_start; uint64_t vaddr = kernmem; while(paddr < p_kern_end) { update_page_table_idmap(pml4_page, paddr, vaddr, PAGE_TRANS_READ_WRITE); paddr += SIZEOF_PAGE; vaddr += SIZEOF_PAGE; } // TODO: Remove user supervisor permission from video buffer update_page_table_idmap(pml4_page, p_vdo_buff_start, vdo_start_addr, PAGE_TRANS_READ_WRITE | PAGE_TRANS_USER_SUPERVISOR); update_page_table_idmap(pml4_page, P_AHCI_START, ahci_start_addr, PAGE_TRANS_READ_WRITE | PAGE_TRANS_USER_SUPERVISOR); phys_mem_offset_map(pml4_page, phys_mem_offset); // Protect read-only pages from supervisor-level writes set_cr0(get_cr0() | CR0_WP); // Set cr3 struct str_cr3 cr3 = get_default_cr3(); cr3.p_PML4E_4Kb = pml4_page >> 12; set_cr3(cr3); // Indicate memory set up done kmDeviceMemorySetUpDone(); global_video_vaddr = (void *)vdo_start_addr; set_phys_mem_virt_map_base(phys_mem_offset); return 0; }
static int load_icode(int fd, int argc, char **kargv, int envc, char **kenvp) { assert(argc >= 0 && argc <= EXEC_MAX_ARG_NUM); assert(envc >= 0 && envc <= EXEC_MAX_ENV_NUM); if (current->mm != NULL) { panic("load_icode: current->mm must be empty.\n"); } int ret = -E_NO_MEM; //#ifdef UCONFIG_BIONIC_LIBC uint32_t real_entry; //#endif //UCONFIG_BIONIC_LIBC struct mm_struct *mm; if ((mm = mm_create()) == NULL) { goto bad_mm; } if (setup_pgdir(mm) != 0) { goto bad_pgdir_cleanup_mm; } mm->brk_start = 0; struct Page *page; struct elfhdr __elf, *elf = &__elf; if ((ret = load_icode_read(fd, elf, sizeof(struct elfhdr), 0)) != 0) { goto bad_elf_cleanup_pgdir; } if (elf->e_magic != ELF_MAGIC) { ret = -E_INVAL_ELF; goto bad_elf_cleanup_pgdir; } //#ifdef UCONFIG_BIONIC_LIBC real_entry = elf->e_entry; uint32_t load_address, load_address_flag = 0; //#endif //UCONFIG_BIONIC_LIBC struct proghdr __ph, *ph = &__ph; uint32_t vm_flags, phnum; pte_perm_t perm = 0; //#ifdef UCONFIG_BIONIC_LIBC uint32_t is_dynamic = 0, interp_idx; uint32_t bias = 0; //#endif //UCONFIG_BIONIC_LIBC for (phnum = 0; phnum < elf->e_phnum; phnum++) { off_t phoff = elf->e_phoff + sizeof(struct proghdr) * phnum; if ((ret = load_icode_read(fd, ph, sizeof(struct proghdr), phoff)) != 0) { goto bad_cleanup_mmap; } if (ph->p_type == ELF_PT_INTERP) { is_dynamic = 1; interp_idx = phnum; continue; } if (ph->p_type != ELF_PT_LOAD) { continue; } if (ph->p_filesz > ph->p_memsz) { ret = -E_INVAL_ELF; goto bad_cleanup_mmap; } if (ph->p_va == 0 && !bias) { bias = 0x00008000; } if ((ret = map_ph(fd, ph, mm, &bias, 0)) != 0) { kprintf("load address: 0x%08x size: %d\n", ph->p_va, ph->p_memsz); goto bad_cleanup_mmap; } if (load_address_flag == 0) load_address = ph->p_va + bias; ++load_address_flag; /*********************************************/ /* vm_flags = 0; ptep_set_u_read(&perm); if (ph->p_flags & ELF_PF_X) vm_flags |= VM_EXEC; if (ph->p_flags & ELF_PF_W) vm_flags |= VM_WRITE; if (ph->p_flags & ELF_PF_R) vm_flags |= VM_READ; if (vm_flags & VM_WRITE) ptep_set_u_write(&perm); if ((ret = mm_map(mm, ph->p_va, ph->p_memsz, vm_flags, NULL)) != 0) { goto bad_cleanup_mmap; } if (mm->brk_start < ph->p_va + ph->p_memsz) { mm->brk_start = ph->p_va + ph->p_memsz; } off_t offset = ph->p_offset; size_t off, size; uintptr_t start = ph->p_va, end, la = ROUNDDOWN(start, PGSIZE); end = ph->p_va + ph->p_filesz; while (start < end) { if ((page = pgdir_alloc_page(mm->pgdir, la, perm)) == NULL) { ret = -E_NO_MEM; goto bad_cleanup_mmap; } off = start - la, size = PGSIZE - off, la += PGSIZE; if (end < la) { size -= la - end; } if ((ret = load_icode_read(fd, page2kva(page) + off, size, offset)) != 0) { goto bad_cleanup_mmap; } start += size, offset += size; } end = ph->p_va + ph->p_memsz; if (start < la) { // ph->p_memsz == ph->p_filesz if (start == end) { continue ; } off = start + PGSIZE - la, size = PGSIZE - off; if (end < la) { size -= la - end; } memset(page2kva(page) + off, 0, size); start += size; assert((end < la && start == end) || (end >= la && start == la)); } while (start < end) { if ((page = pgdir_alloc_page(mm->pgdir, la, perm)) == NULL) { ret = -E_NO_MEM; goto bad_cleanup_mmap; } off = start - la, size = PGSIZE - off, la += PGSIZE; if (end < la) { size -= la - end; } memset(page2kva(page) + off, 0, size); start += size; } */ /**************************************/ } mm->brk_start = mm->brk = ROUNDUP(mm->brk_start, PGSIZE); /* setup user stack */ vm_flags = VM_READ | VM_WRITE | VM_STACK; if ((ret = mm_map(mm, USTACKTOP - USTACKSIZE, USTACKSIZE, vm_flags, NULL)) != 0) { goto bad_cleanup_mmap; } if (is_dynamic) { elf->e_entry += bias; bias = 0; off_t phoff = elf->e_phoff + sizeof(struct proghdr) * interp_idx; if ((ret = load_icode_read(fd, ph, sizeof(struct proghdr), phoff)) != 0) { goto bad_cleanup_mmap; } char *interp_path = (char *)kmalloc(ph->p_filesz); load_icode_read(fd, interp_path, ph->p_filesz, ph->p_offset); int interp_fd = sysfile_open(interp_path, O_RDONLY); assert(interp_fd >= 0); struct elfhdr interp___elf, *interp_elf = &interp___elf; assert((ret = load_icode_read(interp_fd, interp_elf, sizeof(struct elfhdr), 0)) == 0); assert(interp_elf->e_magic == ELF_MAGIC); struct proghdr interp___ph, *interp_ph = &interp___ph; uint32_t interp_phnum; uint32_t va_min = 0xffffffff, va_max = 0; for (interp_phnum = 0; interp_phnum < interp_elf->e_phnum; ++interp_phnum) { off_t interp_phoff = interp_elf->e_phoff + sizeof(struct proghdr) * interp_phnum; assert((ret = load_icode_read(interp_fd, interp_ph, sizeof(struct proghdr), interp_phoff)) == 0); if (interp_ph->p_type != ELF_PT_LOAD) { continue; } if (va_min > interp_ph->p_va) va_min = interp_ph->p_va; if (va_max < interp_ph->p_va + interp_ph->p_memsz) va_max = interp_ph->p_va + interp_ph->p_memsz; } bias = get_unmapped_area(mm, va_max - va_min + 1 + PGSIZE); bias = ROUNDUP(bias, PGSIZE); for (interp_phnum = 0; interp_phnum < interp_elf->e_phnum; ++interp_phnum) { off_t interp_phoff = interp_elf->e_phoff + sizeof(struct proghdr) * interp_phnum; assert((ret = load_icode_read(interp_fd, interp_ph, sizeof(struct proghdr), interp_phoff)) == 0); if (interp_ph->p_type != ELF_PT_LOAD) { continue; } assert((ret = map_ph(interp_fd, interp_ph, mm, &bias, 1)) == 0); } real_entry = interp_elf->e_entry + bias; sysfile_close(interp_fd); kfree(interp_path); } sysfile_close(fd); bool intr_flag; local_intr_save(intr_flag); { list_add(&(proc_mm_list), &(mm->proc_mm_link)); } local_intr_restore(intr_flag); mm_count_inc(mm); current->mm = mm; set_pgdir(current, mm->pgdir); mm->cpuid = myid(); mp_set_mm_pagetable(mm); if (!is_dynamic) { real_entry += bias; } #ifdef UCONFIG_BIONIC_LIBC if (init_new_context_dynamic(current, elf, argc, kargv, envc, kenvp, is_dynamic, real_entry, load_address, bias) < 0) goto bad_cleanup_mmap; #else if (init_new_context(current, elf, argc, kargv, envc, kenvp) < 0) goto bad_cleanup_mmap; #endif //UCONFIG_BIONIC_LIBC ret = 0; out: return ret; bad_cleanup_mmap: exit_mmap(mm); bad_elf_cleanup_pgdir: put_pgdir(mm); bad_pgdir_cleanup_mm: mm_destroy(mm); bad_mm: goto out; }
/* Setup a VMA at program startup for the vsyscall page */ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr; int ret = 0; struct vm_area_struct *vma; static struct page *no_pages[] = {NULL}; #ifdef CONFIG_X86_X32_ABI if (test_thread_flag(TIF_X32)) return x32_setup_additional_pages(bprm, uses_interp); #endif if (vdso_enabled != 1) /* Other values all mean "disabled" */ return 0; down_write(&mm->mmap_sem); addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } addr += VDSO_OFFSET(VDSO_PREV_PAGES); current->mm->context.vdso = (void *)addr; /* * MAYWRITE to allow gdb to COW and set breakpoints */ ret = install_special_mapping(mm, addr, vdso32_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso32_pages); if (ret) goto up_fail; vma = _install_special_mapping(mm, addr - VDSO_OFFSET(VDSO_PREV_PAGES), VDSO_OFFSET(VDSO_PREV_PAGES), VM_READ, no_pages); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto up_fail; } ret = remap_pfn_range(vma, addr - VDSO_OFFSET(VDSO_VVAR_PAGE), __pa_symbol(&__vvar_page) >> PAGE_SHIFT, PAGE_SIZE, PAGE_READONLY); if (ret) goto up_fail; #ifdef CONFIG_HPET_TIMER if (hpet_address) { ret = io_remap_pfn_range(vma, addr - VDSO_OFFSET(VDSO_HPET_PAGE), hpet_address >> PAGE_SHIFT, PAGE_SIZE, pgprot_noncached(PAGE_READONLY)); if (ret) goto up_fail; } #endif current_thread_info()->sysenter_return = VDSO32_SYMBOL(addr, SYSENTER_RETURN); up_fail: if (ret) current->mm->context.vdso = NULL; up_write(&mm->mmap_sem); return ret; }
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; struct mm_struct *mm = current->mm; unsigned long gic_size, vvar_size, size, base, data_addr, vdso_addr; struct vm_area_struct *vma; struct resource gic_res; int ret; if (down_write_killable(&mm->mmap_sem)) return -EINTR; /* * Determine total area size. This includes the VDSO data itself, the * data page, and the GIC user page if present. Always create a mapping * for the GIC user area if the GIC is present regardless of whether it * is the current clocksource, in case it comes into use later on. We * only map a page even though the total area is 64K, as we only need * the counter registers at the start. */ gic_size = gic_present ? PAGE_SIZE : 0; vvar_size = gic_size + PAGE_SIZE; size = vvar_size + image->size; base = get_unmapped_area(NULL, 0, size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; } data_addr = base + gic_size; vdso_addr = data_addr + PAGE_SIZE; vma = _install_special_mapping(mm, base, vvar_size, VM_READ | VM_MAYREAD, &vdso_vvar_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } /* Map GIC user page. */ if (gic_size) { ret = gic_get_usm_range(&gic_res); if (ret) goto out; ret = io_remap_pfn_range(vma, base, gic_res.start >> PAGE_SHIFT, gic_size, pgprot_noncached(PAGE_READONLY)); if (ret) goto out; } /* Map data page. */ ret = remap_pfn_range(vma, data_addr, virt_to_phys(&vdso_data) >> PAGE_SHIFT, PAGE_SIZE, PAGE_READONLY); if (ret) goto out; /* Map VDSO image. */ vma = _install_special_mapping(mm, vdso_addr, image->size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, &image->mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } mm->context.vdso = (void *)vdso_addr; ret = 0; out: up_write(&mm->mmap_sem); return ret; }
static int map_vdso(const struct vdso_image *image, bool calculate_addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long addr; int ret = 0; static struct page *no_pages[] = {NULL}; static struct vm_special_mapping vvar_mapping = { .name = "[vvar]", .pages = no_pages, }; if (calculate_addr) { addr = vdso_addr(current->mm->start_stack, image->sym_end_mapping); } else { addr = 0; } down_write(&mm->mmap_sem); addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } current->mm->context.vdso = (void __user *)addr; /* * MAYWRITE to allow gdb to COW and set breakpoints */ vma = _install_special_mapping(mm, addr, image->size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &image->text_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto up_fail; } vma = _install_special_mapping(mm, addr + image->size, image->sym_end_mapping - image->size, VM_READ, &vvar_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto up_fail; } if (image->sym_vvar_page) ret = remap_pfn_range(vma, addr + image->sym_vvar_page, __pa_symbol(&__vvar_page) >> PAGE_SHIFT, PAGE_SIZE, PAGE_READONLY); if (ret) goto up_fail; #ifdef CONFIG_HPET_TIMER if (hpet_address && image->sym_hpet_page) { ret = io_remap_pfn_range(vma, addr + image->sym_hpet_page, hpet_address >> PAGE_SHIFT, PAGE_SIZE, pgprot_noncached(PAGE_READONLY)); if (ret) goto up_fail; } #endif up_fail: if (ret) current->mm->context.vdso = NULL; up_write(&mm->mmap_sem); return ret; }
static int map_vdso(const struct vdso_image *image, struct vm_special_mapping *vdso_mapping) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long text_start, addr = 0; int ret = 0; down_write(&mm->mmap_sem); /* * First, get an unmapped region: then randomize it, and make sure that * region is free. */ if (current->flags & PF_RANDOMIZE) { addr = get_unmapped_area(NULL, 0, image->size - image->sym_vvar_start, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } addr = vdso_addr(addr, image->size - image->sym_vvar_start); } addr = get_unmapped_area(NULL, addr, image->size - image->sym_vvar_start, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } text_start = addr - image->sym_vvar_start; current->mm->context.vdso = (void __user *)text_start; /* * MAYWRITE to allow gdb to COW and set breakpoints */ vma = _install_special_mapping(mm, text_start, image->size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto up_fail; } vma = _install_special_mapping(mm, addr, -image->sym_vvar_start, VM_READ|VM_MAYREAD, &vvar_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); do_munmap(mm, text_start, image->size, NULL); } up_fail: if (ret) current->mm->context.vdso = NULL; up_write(&mm->mmap_sem); return ret; }
/* * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists. */ asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) { struct shmid_kernel *shp; struct vm_area_struct *shmd; int err = -EINVAL; unsigned int id; unsigned long addr; unsigned long len; down(¤t->mm->mmap_sem); lock_kernel(); if (shmid < 0) { /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */ goto out; } shp = shm_segs[id = (unsigned int) shmid % SHMMNI]; if (shp == IPC_UNUSED || shp == IPC_NOID) { /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */ goto out; } if (!(addr = (ulong) shmaddr)) { if (shmflg & SHM_REMAP) goto out; err = -ENOMEM; addr = 0; again: if (!(addr = get_unmapped_area(addr, shp->u.shm_segsz))) goto out; if(addr & (SHMLBA - 1)) { addr = (addr + (SHMLBA - 1)) & ~(SHMLBA - 1); goto again; } } else if (addr & (SHMLBA-1)) { if (shmflg & SHM_RND) addr &= ~(SHMLBA-1); /* round down */ else goto out; } /* * Check if addr exceeds TASK_SIZE (from do_mmap) */ len = PAGE_SIZE*shp->shm_npages; err = -EINVAL; if (addr >= TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE - len) goto out; /* * If shm segment goes below stack, make sure there is some * space left for the stack to grow (presently 4 pages). */ if (addr < current->mm->start_stack && addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4)) { /* printk("shmat() -> EINVAL because segment intersects stack\n"); */ goto out; } if (!(shmflg & SHM_REMAP)) if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) { /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n", addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */ goto out; } err = -EACCES; if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO)) goto out; err = -EIDRM; if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI) goto out; err = -ENOMEM; shmd = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!shmd) goto out; if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) { kmem_cache_free(vm_area_cachep, shmd); err = -EIDRM; goto out; } shmd->vm_pte = SWP_ENTRY(SHM_SWP_TYPE, id); shmd->vm_start = addr; shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE; shmd->vm_mm = current->mm; shmd->vm_page_prot = (shmflg & SHM_RDONLY) ? PAGE_READONLY : PAGE_SHARED; shmd->vm_flags = VM_SHM | VM_MAYSHARE | VM_SHARED | VM_MAYREAD | VM_MAYEXEC | VM_READ | VM_EXEC | ((shmflg & SHM_RDONLY) ? 0 : VM_MAYWRITE | VM_WRITE); shmd->vm_file = NULL; shmd->vm_offset = 0; shmd->vm_ops = &shm_vm_ops; shp->u.shm_nattch++; /* prevent destruction */ if (shp->u.shm_nattch > 0xffff - NR_TASKS || (err = shm_map (shmd))) { if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) killseg(id); kmem_cache_free(vm_area_cachep, shmd); goto out; } insert_attach(shp,shmd); /* insert shmd into shp->attaches */ shp->u.shm_lpid = current->pid; shp->u.shm_atime = CURRENT_TIME; *raddr = addr; err = 0; out: unlock_kernel(); up(¤t->mm->mmap_sem); return err; }
int setup_vdso_pages(void) { struct page **pagelist; unsigned long pages; struct mm_struct *mm = current->mm; unsigned long vdso_base = 0; int retval = 0; if (!vdso_ready) return 0; mm->context.vdso_base = 0; pagelist = vdso_pagelist; pages = vdso_pages; #ifdef CONFIG_COMPAT if (is_compat_task()) { pagelist = vdso32_pagelist; pages = vdso32_pages; } #endif /* * vDSO has a problem and was disabled, just don't "enable" it for the * process. */ if (pages == 0) return 0; vdso_base = get_unmapped_area(NULL, vdso_base, (pages << PAGE_SHIFT) + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), 0, 0); if (IS_ERR_VALUE(vdso_base)) { retval = vdso_base; return retval; } /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO (since arch_vma_name fails). */ mm->context.vdso_base = vdso_base; /* * our vma flags don't have VM_WRITE so by default, the process isn't * allowed to write those pages. * gdb can break that with ptrace interface, and thus trigger COW on * those pages but it's then your responsibility to never do that on * the "data" page of the vDSO or you'll stop getting kernel updates * and your nice userland gettimeofday will be totally dead. * It's fine to use that for setting breakpoints in the vDSO code * pages though */ retval = install_special_mapping(mm, vdso_base, pages << PAGE_SHIFT, VM_READ|VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, pagelist); if (retval) mm->context.vdso_base = 0; return retval; }
void *linux_regfile_mmap2(void *addr, size_t len, int prot, int flags, int fd, size_t off) { int subret = -E_INVAL; struct mm_struct *mm = current->mm; assert(mm != NULL); if (len == 0) { return -1; } lock_mm(mm); uintptr_t start = ROUNDDOWN(addr, PGSIZE); len = ROUNDUP(len, PGSIZE); uint32_t vm_flags = VM_READ; if (prot & PROT_WRITE) { vm_flags |= VM_WRITE; } if (prot & PROT_EXEC) { vm_flags |= VM_EXEC; } if (flags & MAP_STACK) { vm_flags |= VM_STACK; } if (flags & MAP_ANONYMOUS) { vm_flags |= VM_ANONYMOUS; } subret = -E_NO_MEM; if (start == 0 && (start = get_unmapped_area(mm, len)) == 0) { goto out_unlock; } uintptr_t end = start + len; struct vma_struct *vma = find_vma(mm, start); if (vma == NULL || vma->vm_start >= end) { vma = NULL; } else if (!(flags & MAP_FIXED)) { start = get_unmapped_area(mm, len); vma = NULL; } else if (!(vma->vm_flags & VM_ANONYMOUS)) { goto out_unlock; } else if (vma->vm_start == start && end == vma->vm_end) { vma->vm_flags = vm_flags; } else { assert(vma->vm_start <= start && end <= vma->vm_end); if ((subret = mm_unmap_keep_pages(mm, start, len)) != 0) { goto out_unlock; } vma = NULL; } if (vma == NULL && (subret = mm_map(mm, start, len, vm_flags, &vma)) != 0) { goto out_unlock; } if (!(flags & MAP_ANONYMOUS)) { vma_mapfile(vma, fd, off << 12, NULL); } subret = 0; out_unlock: unlock_mm(mm); return subret == 0 ? start : -1; }
long device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) { struct task_struct *ptr; struct mm_struct *target_mm, *current_mm; struct cow_monitor *cow; struct vm_area_struct *target_vm, *tmp, *prev, **pprev; struct rb_node **rb_link, *rb_parent; unsigned long addr; unsigned long flags; long retval; retval = -EINVAL; switch (ioctl_num) { case IOCTL_COW_MONITOR: ptr = &init_task; target_mm = NULL; current_mm = current->active_mm; tmp = NULL; cow = (struct cow_monitor *) ioctl_param; /*** get process task_struct from pid ***/ while (ptr->tasks.next != NULL && list_entry(ptr->tasks.next, struct task_struct, tasks) != &init_task) { if (ptr->pid == cow->pid) { target_mm = ptr->active_mm; } ptr = list_entry(ptr->tasks.next, struct task_struct, tasks); } if (target_mm == NULL) { printk(KERN_ALERT "no process found with pid %d\n", (int) cow->pid); return -EINVAL; } // we got target process printk("we got target process\n"); // try to get semaphore down_write(&target_mm->mmap_sem); down_write(¤t_mm->mmap_sem); /*** check if the memory region specified by address is valid and has len length ***/ target_vm = find_vma(target_mm, (unsigned long) cow->addr); if (target_vm == NULL || (unsigned long)cow->addr != target_vm->vm_start) { printk(KERN_ALERT "no vm area found with addr == vm_start\n"); goto free_out; } printk("we got target vm area\n"); /*** check if current task can have another memory map ***/ if (current_mm->map_count > sysctl_max_map_count) { printk(KERN_ALERT "no more region can be mapped to this process\n"); goto free_out; } flags = calc_mmap_flag_bits(target_vm->vm_flags); // calc MAP_XXX => VM_XXX flag for get_unmapped_area cow->len = PAGE_ALIGN(cow->len); /*** search for area which has enough size sequencially ***/ addr = get_unmapped_area(NULL, (unsigned long)cow->addr, cow->len, 0, flags); if (addr & ~PAGE_MASK) { //if addr is not page size aligned printk(KERN_ALERT "no unmapped are\n"); goto free_out; } tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!tmp) { printk(KERN_ALERT "not enough memory to allocate vm_area_struct\n"); goto oom_out; } if (target_vm->vm_flags & VM_ACCOUNT) printk("target region has VM_ACCOUNT flag\n"); /*** copy and set tmp ***/ *tmp = *target_vm; // copy target_vm INIT_LIST_HEAD(&tmp->anon_vma_chain); // here, vma_dup_policy retval = vma_dup_policy(target_vm, tmp); if (retval) goto oom_policy_out; if (anon_vma_fork(tmp, target_vm)) goto oom_anon_vma_fork; tmp->vm_mm = current_mm; // here, anon_vma_fork tmp->vm_flags &= ~VM_LOCKED; tmp->vm_next = tmp->vm_prev = NULL; rb_link = ¤t_mm->mm_rb.rb_node; rb_parent = NULL; /* pprev = ¤t_mm->mmap; *pprev = tmp; tmp->vm_prev = NULL; prev = tmp;*/ //here __vma_link_rb // __vma_link_rb rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; // current_mm->map_count++; up_write(¤t_mm->mmap_sem); up_write(&target_mm->mmap_sem); //free semaphore kmem_cache_free(vm_area_cachep, tmp); /*** ***/ return 0; oom_anon_vma_fork: retval = -ENOMEM; mpol_put(vma_policy(tmp)); oom_policy_out: oom_out: retval = -ENOMEM; free_out: kmem_cache_free(vm_area_cachep, tmp); up_write(¤t_mm->mmap_sem); up_write(&target_mm->mmap_sem); //free semaphore return retval; } return retval; }
static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, struct dentry * interpreter_dentry, unsigned long *interp_load_addr) { struct file * file; struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; unsigned long load_addr = 0; int load_addr_set = 0; unsigned long last_bss = 0, elf_bss = 0; unsigned long error = ~0UL; int elf_exec_fileno; int retval, i, size; /* First of all, some simple consistency checks */ if (interp_elf_ex->e_type != ET_EXEC && interp_elf_ex->e_type != ET_DYN) goto out; if (!elf_check_arch(interp_elf_ex->e_machine)) goto out; if (!interpreter_dentry->d_inode->i_op || !interpreter_dentry->d_inode->i_op->default_file_ops->mmap) goto out; /* * If the size of this structure has changed, then punt, since * we will be doing the wrong thing. */ if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) goto out; /* Now read in all of the header information */ size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum; if (size > ELF_EXEC_PAGESIZE) goto out; elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL); if (!elf_phdata) goto out; retval = read_exec(interpreter_dentry, interp_elf_ex->e_phoff, (char *) elf_phdata, size, 1); error = retval; if (retval < 0) goto out_free; error = ~0UL; elf_exec_fileno = open_dentry(interpreter_dentry, O_RDONLY); if (elf_exec_fileno < 0) goto out_free; file = fget(elf_exec_fileno); eppnt = elf_phdata; for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) { if (eppnt->p_type == PT_LOAD) { int elf_type = MAP_PRIVATE | MAP_DENYWRITE; int elf_prot = 0; unsigned long vaddr = 0; unsigned long k, map_addr; if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) { elf_type |= MAP_FIXED; #ifdef __sparc__ } else { load_addr = get_unmapped_area(0, eppnt->p_filesz + ELF_PAGEOFFSET(vaddr)); #endif } map_addr = do_mmap(file, load_addr + ELF_PAGESTART(vaddr), eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), elf_prot, elf_type, eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); if (map_addr > -1024UL) /* Real error */ goto out_close; if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) { load_addr = map_addr - ELF_PAGESTART(vaddr); load_addr_set = 1; } /* * Find the end of the file mapping for this phdr, and keep * track of the largest address we see for this. */ k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; if (k > elf_bss) elf_bss = k; /* * Do the same thing for the memory mapping - between * elf_bss and last_bss is the bss section. */ k = load_addr + eppnt->p_memsz + eppnt->p_vaddr; if (k > last_bss) last_bss = k; } } /* Now use mmap to map the library into memory. */ /* * Now fill out the bss section. First pad the last page up * to the page boundary, and then perform a mmap to make sure * that there are zero-mapped pages up to and including the * last bss page. */ padzero(elf_bss); elf_bss = ELF_PAGESTART(elf_bss + ELF_EXEC_PAGESIZE - 1); /* What we have mapped so far */ /* Map the last of the bss segment */ if (last_bss > elf_bss) do_mmap(NULL, elf_bss, last_bss - elf_bss, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, 0); *interp_load_addr = load_addr; error = ((unsigned long) interp_elf_ex->e_entry) + load_addr; out_close: fput(file); sys_close(elf_exec_fileno); out_free: kfree(elf_phdata); out: return error; }
unsigned long do_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, int fd, unsigned long off) { int error; struct file *file = NULL; struct vm_area_struct *vma; if (flags & MAP_ANONYMOUS) { if (fd != (unsigned long)-1) return -EINVAL; } else { if (fd >= NR_OPEN || !(file = current->files->fd[fd])) return -EBADF; } if ((len = PAGE_ALIGN(len)) == 0) return -EINVAL; if (addr > KERNEL_BASE || len > KERNEL_BASE || addr > KERNEL_BASE-len) return -EINVAL; if (flags & MAP_FIXED) { if (addr & ~PAGE_MASK) return -EINVAL; if (len > KERNEL_BASE || addr > KERNEL_BASE - len) return -EINVAL; } else { addr = get_unmapped_area(addr, len); if (!addr) return -ENOMEM; } if (file) { if (!file->f_op || !file->f_op->mmap) return -ENODEV; if (off & ~PAGE_MASK) return -EINVAL; /* offset overflow? */ if (off + len < off) return -EINVAL; } switch (flags & MAP_TYPE) { case MAP_SHARED: if (file && (prot & PROT_WRITE) && !(file->f_mode & 2)) return -EACCES; case MAP_PRIVATE: if (file && !(file->f_mode & 1)) return -EACCES; break; default: return -EINVAL; } vma = (struct vm_area_struct *)kmalloc(sizeof(*vma)); if (!vma) return -ENOMEM; vma->vm_mm= current->mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = prot & (VM_READ | VM_WRITE | VM_EXEC); if (flags & VM_GROWSDOWN) vma->vm_flags |= VM_GROWSDOWN; if ((flags & MAP_TYPE) == MAP_SHARED) vma->vm_flags |= VM_SHARED; /* initialize the share ring */ vma->vm_next_share = vma->vm_prev_share = vma; vma->vm_page_prot = get_page_prot(vma->vm_flags); vma->vm_ops = NULL; if (file) vma->vm_offset = off; else vma->vm_offset = 0; vma->vm_inode = NULL; do_munmap(addr, len); /* Clear old maps */ if (file) { error = file->f_op->mmap(file->f_inode, file, vma); if (error) { kfree(vma); return error; } } insert_vm_struct(current->mm, vma); /* merge_segments(current->mm, vma->vm_start, vma->vm_end); */ return addr; }