/* Read the environment variable of current process specified by @key. */ int cfs_get_environ(const char *key, char *value, int *val_len) { struct mm_struct *mm; char *buffer; int buf_len = PAGE_CACHE_SIZE; int key_len = strlen(key); unsigned long addr; int rc; ENTRY; buffer = kmalloc(buf_len, GFP_USER); if (!buffer) RETURN(-ENOMEM); mm = get_task_mm(current); if (!mm) { kfree(buffer); RETURN(-EINVAL); } /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), * which is already holding mmap_sem for writes. If some other * thread gets the write lock in the meantime, this thread will * block, but at least it won't deadlock on itself. LU-1735 */ if (down_read_trylock(&mm->mmap_sem) == 0) { kfree(buffer); return -EDEADLK; } up_read(&mm->mmap_sem); addr = mm->env_start; while (addr < mm->env_end) { int this_len, retval, scan_len; char *env_start, *env_end; memset(buffer, 0, buf_len); this_len = min_t(int, mm->env_end - addr, buf_len); retval = cfs_access_process_vm(current, addr, buffer, this_len, 0); if (retval != this_len) break; addr += retval; /* Parse the buffer to find out the specified key/value pair. * The "key=value" entries are separated by '\0'. */ env_start = buffer; scan_len = this_len; while (scan_len) { char *entry; int entry_len; env_end = memscan(env_start, '\0', scan_len); LASSERT(env_end >= env_start && env_end <= env_start + scan_len); /* The last entry of this buffer cross the buffer * boundary, reread it in next cycle. */ if (unlikely(env_end - env_start == scan_len)) { /* This entry is too large to fit in buffer */ if (unlikely(scan_len == this_len)) { CERROR("Too long env variable.\n"); GOTO(out, rc = -EINVAL); } addr -= scan_len; break; } entry = env_start; entry_len = env_end - env_start; /* Key length + length of '=' */ if (entry_len > key_len + 1 && !memcmp(entry, key, key_len)) { entry += key_len + 1; entry_len -= key_len + 1; /* The 'value' buffer passed in is too small.*/ if (entry_len >= *val_len) GOTO(out, rc = -EOVERFLOW); memcpy(value, entry, entry_len); *val_len = entry_len; GOTO(out, rc = 0); } scan_len -= (env_end - env_start + 1); env_start = env_end + 1; } } GOTO(out, rc = -ENOENT); out: mmput(mm); kfree((void *)buffer); return rc; }
/* * lmk_state_store() will called by framework, * the framework will send the pid of process that need to be swapped */ static ssize_t lmk_state_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { sscanf(buf, "%d,%d", &lmk_kill_pid, &lmk_kill_ok); /* if the screen on, the optimized compcache will stop */ if (atomic_read(&optimize_comp_on) != 1) return size; if (lmk_kill_ok == 1) { struct task_struct *p; struct task_struct *selected = NULL; struct sysinfo ramzswap_info = { 0 }; struct mm_struct *mm_scan = NULL; /* * check the free RAM and swap area, * stop the optimized compcache in cpu idle case; * leave some swap area for using in low memory case */ si_swapinfo(&ramzswap_info); si_meminfo(&ramzswap_info); if ((ramzswap_info.freeswap < CHECK_FREE_SWAPSPACE) || (ramzswap_info.freeram < check_free_memory)) { #if SWAP_PROCESS_DEBUG_LOG > 0 printk(KERN_INFO "idletime compcache is ignored : free RAM %lu, free swap %lu\n", ramzswap_info.freeram, ramzswap_info.freeswap); #endif lmk_kill_ok = 0; return size; } read_lock(&tasklist_lock); for_each_process(p) { if ((p->pid == lmk_kill_pid) && (__task_cred(p)->uid > 10000)) { task_lock(p); selected = p; if (!selected->mm || !selected->signal) { task_unlock(p); selected = NULL; break; } mm_scan = selected->mm; if (mm_scan) { if (selected->flags & PF_KTHREAD) mm_scan = NULL; else atomic_inc(&mm_scan->mm_users); } task_unlock(selected); #if SWAP_PROCESS_DEBUG_LOG > 0 printk(KERN_INFO "idle time compcache: swap process pid %d, name %s, oom %d, task size %ld\n", p->pid, p->comm, p->signal->oom_adj, get_mm_rss(p->mm)); #endif break; } } read_unlock(&tasklist_lock); if (mm_scan) { LIST_HEAD(zone0_page_list); LIST_HEAD(zone1_page_list); int pages_tofree = 0, pages_freed = 0; down_read(&mm_scan->mmap_sem); pages_tofree = shrink_pages(mm_scan, &zone0_page_list, &zone1_page_list, 0x7FFFFFFF); up_read(&mm_scan->mmap_sem); mmput(mm_scan); pages_freed = swap_pages(&zone0_page_list, &zone1_page_list); lmk_kill_ok = 0; } } return size; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; check_unshare_flags(&unshare_flags); /* Return -EINVAL for all unsupported flags */ err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_fs; if ((err = unshare_vm(unshare_flags, &new_mm))) goto bad_unshare_cleanup_sigh; if ((err = unshare_fd(unshare_flags, &new_fd))) goto bad_unshare_cleanup_vm; if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) goto bad_unshare_cleanup_fd; if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; } task_lock(current); if (new_fs) { fs = current->fs; write_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; write_unlock(&fs->lock); } if (new_mm) { mm = current->mm; active_mm = current->active_mm; current->mm = new_mm; current->active_mm = new_mm; activate_mm(active_mm, new_mm); new_mm = mm; } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_vm: if (new_mm) mmput(new_mm); bad_unshare_cleanup_sigh: if (new_sigh) if (atomic_dec_and_test(&new_sigh->count)) kmem_cache_free(sighand_cachep, new_sigh); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: return err; }
/* * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. */ static int try_to_unuse(unsigned int type) { struct swap_info_struct * si = &swap_info[type]; struct mm_struct *start_mm; unsigned short *swap_map; unsigned short swcount; struct page *page; swp_entry_t entry; int i = 0; int retval = 0; int reset_overflow = 0; /* * When searching mms for an entry, a good strategy is to * start at the first mm we freed the previous entry from * (though actually we don't notice whether we or coincidence * freed the entry). Initialize this start_mm with a hold. * * A simpler strategy would be to start at the last mm we * freed the previous entry from; but that would take less * advantage of mmlist ordering (now preserved by swap_out()), * which clusters forked address spaces together, most recent * child immediately after parent. If we race with dup_mmap(), * we very much want to resolve parent before child, otherwise * we may miss some entries: using last mm would invert that. */ start_mm = &init_mm; atomic_inc(&init_mm.mm_users); /* * Keep on scanning until all entries have gone. Usually, * one pass through swap_map is enough, but not necessarily: * mmput() removes mm from mmlist before exit_mmap() and its * zap_page_range(). That's not too bad, those entries are * on their way out, and handled faster there than here. * do_munmap() behaves similarly, taking the range out of mm's * vma list before zap_page_range(). But unfortunately, when * unmapping a part of a vma, it takes the whole out first, * then reinserts what's left after (might even reschedule if * open() method called) - so swap entries may be invisible * to swapoff for a while, then reappear - but that is rare. */ while ((i = find_next_to_unuse(si, i))) { /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean * page and read the swap into it. */ swap_map = &si->swap_map[i]; entry = SWP_ENTRY(type, i); page = read_swap_cache_async(entry); if (!page) { /* * Either swap_duplicate() failed because entry * has been freed independently, and will not be * reused since sys_swapoff() already disabled * allocation from here, or alloc_page() failed. */ if (!*swap_map) continue; retval = -ENOMEM; break; } /* * Don't hold on to start_mm if it looks like exiting. */ if (atomic_read(&start_mm->mm_users) == 1) { mmput(start_mm); start_mm = &init_mm; atomic_inc(&init_mm.mm_users); } /* * Wait for and lock page. When do_swap_page races with * try_to_unuse, do_swap_page can handle the fault much * faster than try_to_unuse can locate the entry. This * apparently redundant "wait_on_page" lets try_to_unuse * defer to do_swap_page in such a case - in some tests, * do_swap_page and try_to_unuse repeatedly compete. */ wait_on_page(page); lock_page(page); /* * Remove all references to entry, without blocking. * Whenever we reach init_mm, there's no address space * to search, but use it as a reminder to search shmem. */ swcount = *swap_map; if (swcount > 1) { flush_page_to_ram(page); if (start_mm == &init_mm) shmem_unuse(entry, page); else unuse_process(start_mm, entry, page); } if (*swap_map > 1) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; struct mm_struct *new_start_mm = start_mm; struct mm_struct *mm; spin_lock(&mmlist_lock); while (*swap_map > 1 && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); swcount = *swap_map; if (mm == &init_mm) { set_start_mm = 1; shmem_unuse(entry, page); } else unuse_process(mm, entry, page); if (set_start_mm && *swap_map < swcount) { new_start_mm = mm; set_start_mm = 0; } } atomic_inc(&new_start_mm->mm_users); spin_unlock(&mmlist_lock); mmput(start_mm); start_mm = new_start_mm; } /* * How could swap count reach 0x7fff when the maximum * pid is 0x7fff, and there's no way to repeat a swap * page within an mm (except in shmem, where it's the * shared object which takes the reference count)? * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. * * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { swap_list_lock(); swap_device_lock(si); nr_swap_pages++; *swap_map = 1; swap_device_unlock(si); swap_list_unlock(); reset_overflow = 1; } /* * If a reference remains (rare), we would like to leave * the page in the swap cache; but try_to_swap_out could * then re-duplicate the entry once we drop page lock, * so we might loop indefinitely; also, that page could * not be swapped out to other storage meanwhile. So: * delete from cache even if there's another reference, * after ensuring that the data has been saved to disk - * since if the reference remains (rarer), it will be * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. * Note shmem_unuse already deleted its from swap cache. */ swcount = *swap_map; if ((swcount > 0) != PageSwapCache(page)) BUG(); if ((swcount > 1) && PageDirty(page)) { rw_swap_page(WRITE, page); lock_page(page); } if (PageSwapCache(page)) delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went * to 1, we did not mark any present ptes as dirty: must * mark page dirty so try_to_swap_out will preserve it. */ SetPageDirty(page); UnlockPage(page); page_cache_release(page); /* * Make sure that we aren't completely killing * interactive performance. Interruptible check on * signal_pending() would be nice, but changes the spec? */ if (current->need_resched) schedule(); }
/** * process_vm_rw_core - core of reading/writing pages from task specified * @pid: PID of process to read/write from/to * @lvec: iovec array specifying where to copy to/from locally * @liovcnt: size of lvec array * @rvec: iovec array specifying where to copy to/from in the other process * @riovcnt: size of rvec array * @flags: currently unused * @vm_write: 0 if reading from other process, 1 if writing to other process * Returns the number of bytes read/written or error code. May * return less bytes than expected if an error occurs during the copying * process. */ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, unsigned long liovcnt, const struct iovec *rvec, unsigned long riovcnt, unsigned long flags, int vm_write) { struct task_struct *task; struct page *pp_stack[PVM_MAX_PP_ARRAY_COUNT]; struct page **process_pages = pp_stack; struct mm_struct *mm; unsigned long i; ssize_t rc = 0; ssize_t bytes_copied_loop; ssize_t bytes_copied = 0; unsigned long nr_pages = 0; unsigned long nr_pages_iov; unsigned long iov_l_curr_idx = 0; size_t iov_l_curr_offset = 0; ssize_t iov_len; /* * Work out how many pages of struct pages we're going to need * when eventually calling get_user_pages */ for (i = 0; i < riovcnt; i++) { iov_len = rvec[i].iov_len; if (iov_len > 0) { nr_pages_iov = ((unsigned long)rvec[i].iov_base + iov_len) / PAGE_SIZE - (unsigned long)rvec[i].iov_base / PAGE_SIZE + 1; nr_pages = max(nr_pages, nr_pages_iov); } } if (nr_pages == 0) return 0; if (nr_pages > PVM_MAX_PP_ARRAY_COUNT) { /* For reliability don't try to kmalloc more than 2 pages worth */ process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES, sizeof(struct pages *)*nr_pages), GFP_KERNEL); if (!process_pages) return -ENOMEM; } /* Get process information */ rcu_read_lock(); task = find_task_by_vpid(pid); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) { rc = -ESRCH; goto free_proc_pages; } mm = mm_access(task, PTRACE_MODE_ATTACH); if (!mm || IS_ERR(mm)) { rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; /* * Explicitly map EACCES to EPERM as EPERM is a more a * appropriate error code for process_vw_readv/writev */ if (rc == -EACCES) rc = -EPERM; goto put_task_struct; } for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { rc = process_vm_rw_single_vec( (unsigned long)rvec[i].iov_base, rvec[i].iov_len, lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset, process_pages, mm, task, vm_write, &bytes_copied_loop); bytes_copied += bytes_copied_loop; if (rc != 0) { /* If we have managed to copy any data at all then we return the number of bytes copied. Otherwise we return the error code */ if (bytes_copied) rc = bytes_copied; goto put_mm; } } rc = bytes_copied; put_mm: mmput(mm); put_task_struct: put_task_struct(task); free_proc_pages: if (process_pages != pp_stack) kfree(process_pages); return rc; }
static long compat_fimg2d_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { case COMPAT_FIMG2D_BITBLT_BLIT: { struct compat_fimg2d_blit __user *data32; struct fimg2d_blit __user *data; struct mm_struct *mm; enum blit_op op; enum blit_sync sync; enum fimg2d_qos_level qos_lv; compat_uint_t seq_no; unsigned long stack_cursor = 0; int err; mm = get_task_mm(current); if (!mm) { fimg2d_err("no mm for ctx\n"); return -ENXIO; } data32 = compat_ptr(arg); data = compat_alloc_user_space(sizeof(*data)); if (!data) { fimg2d_err("failed to allocate user compat space\n"); mmput(mm); return -ENOMEM; } stack_cursor += sizeof(*data); memset(data, 0, sizeof(*data)); err = get_user(op, &data32->op); err |= put_user(op, &data->op); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } err = compat_get_fimg2d_param(&data->param, &data32->param); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } if (data32->src) { data->src = compat_alloc_user_space(sizeof(*data->src) + stack_cursor); if (!data->src) { fimg2d_err("failed to allocate user compat space\n"); mmput(mm); return -ENOMEM; } stack_cursor += sizeof(*data->src); err = compat_get_fimg2d_image(data->src, data32->src); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } } if (data32->msk) { data->msk = compat_alloc_user_space(sizeof(*data->msk) + stack_cursor); if (!data->msk) { fimg2d_err("failed to allocate user compat space\n"); mmput(mm); return -ENOMEM; } stack_cursor += sizeof(*data->msk); err = compat_get_fimg2d_image(data->msk, data32->msk); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } } if (data32->tmp) { data->tmp = compat_alloc_user_space(sizeof(*data->tmp) + stack_cursor); if (!data->tmp) { fimg2d_err("failed to allocate user compat space\n"); mmput(mm); return -ENOMEM; } stack_cursor += sizeof(*data->tmp); err = compat_get_fimg2d_image(data->tmp, data32->tmp); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } } if (data32->dst) { data->dst = compat_alloc_user_space(sizeof(*data->dst) + stack_cursor); if (!data->dst) { fimg2d_err("failed to allocate user compat space\n"); mmput(mm); return -ENOMEM; } stack_cursor += sizeof(*data->dst); err = compat_get_fimg2d_image(data->dst, data32->dst); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } } err = get_user(sync, &data32->sync); err |= put_user(sync, &data->sync); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } err = get_user(seq_no, &data32->seq_no); err |= put_user(seq_no, &data->seq_no); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } err = get_user(qos_lv, &data32->qos_lv); err |= put_user(qos_lv, &data->qos_lv); if (err) { fimg2d_err("failed to get compat data\n"); mmput(mm); return err; } err = file->f_op->unlocked_ioctl(file, FIMG2D_BITBLT_BLIT, (unsigned long)data); mmput(mm); return err; } case COMPAT_FIMG2D_BITBLT_VERSION: { struct compat_fimg2d_version __user *data32; struct fimg2d_version __user *data; compat_uint_t i; int err; data32 = compat_ptr(arg); data = compat_alloc_user_space(sizeof(*data)); if (!data) { fimg2d_err("failed to allocate user compat space\n"); return -ENOMEM; } err = get_user(i, &data32->hw); err |= put_user(i, &data->hw); err |= get_user(i, &data32->sw); err |= put_user(i, &data->sw); if (err) return err; return file->f_op->unlocked_ioctl(file, FIMG2D_BITBLT_VERSION, (unsigned long)data); } case FIMG2D_BITBLT_ACTIVATE: { return file->f_op->unlocked_ioctl(file, FIMG2D_BITBLT_ACTIVATE, arg); } default: fimg2d_err("unknown ioctl\n"); return -EINVAL; } }
static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) { struct mm_struct * mm, *oldmm; int retval; tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; tsk->mm = NULL; tsk->active_mm = NULL; /* * Are we cloning a kernel thread? * * We need to steal a active VM for that.. */ oldmm = current->mm; if (!oldmm) return 0; if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; /* * There are cases where the PTL is held to ensure no * new threads start up in user mode using an mm, which * allows optimizing out ipis; the tlb_gather_mmu code * is an example. */ spin_unlock_wait(&oldmm->page_table_lock); goto good_mm; } retval = -ENOMEM; mm = allocate_mm(); if (!mm) goto fail_nomem; /* Copy the current MM stuff.. */ memcpy(mm, oldmm, sizeof(*mm)); if (!mm_init(mm)) goto fail_nomem; if (init_new_context(tsk,mm)) goto fail_nocontext; retval = dup_mmap(mm, oldmm); if (retval) goto free_pt; mm->hiwater_rss = get_mm_counter(mm,rss); mm->hiwater_vm = mm->total_vm; good_mm: tsk->mm = mm; tsk->active_mm = mm; return 0; free_pt: mmput(mm); fail_nomem: return retval; fail_nocontext: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ mm_free_pgd(mm); free_mm(mm); return retval; }
static int binder_update_page_range(struct binder_alloc *alloc, int allocate, void *start, void *end) { void *page_addr; unsigned long user_page_addr; struct binder_lru_page *page; struct vm_area_struct *vma = NULL; struct mm_struct *mm = NULL; bool need_mm = false; binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: %s pages %pK-%pK\n", alloc->pid, allocate ? "allocate" : "free", start, end); if (end <= start) return 0; trace_binder_update_page_range(alloc, allocate, start, end); if (allocate == 0) goto free_range; for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE]; if (!page->page_ptr) { need_mm = true; break; } } if (need_mm && mmget_not_zero(alloc->vma_vm_mm)) mm = alloc->vma_vm_mm; if (mm) { down_read(&mm->mmap_sem); vma = alloc->vma; } if (!vma && need_mm) { pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n", alloc->pid); goto err_no_vma; } for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; bool on_lru; size_t index; index = (page_addr - alloc->buffer) / PAGE_SIZE; page = &alloc->pages[index]; if (page->page_ptr) { trace_binder_alloc_lru_start(alloc, index); on_lru = list_lru_del(&binder_alloc_lru, &page->lru); WARN_ON(!on_lru); trace_binder_alloc_lru_end(alloc, index); continue; } if (WARN_ON(!vma)) goto err_page_ptr_cleared; trace_binder_alloc_page_start(alloc, index); page->page_ptr = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (!page->page_ptr) { pr_err("%d: binder_alloc_buf failed for page at %pK\n", alloc->pid, page_addr); goto err_alloc_page_failed; } page->alloc = alloc; INIT_LIST_HEAD(&page->lru); ret = map_kernel_range_noflush((unsigned long)page_addr, PAGE_SIZE, PAGE_KERNEL, &page->page_ptr); flush_cache_vmap((unsigned long)page_addr, (unsigned long)page_addr + PAGE_SIZE); if (ret != 1) { pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", alloc->pid, page_addr); goto err_map_kernel_failed; } user_page_addr = (uintptr_t)page_addr + alloc->user_buffer_offset; ret = vm_insert_page(vma, user_page_addr, page[0].page_ptr); if (ret) { pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n", alloc->pid, user_page_addr); goto err_vm_insert_page_failed; } if (index + 1 > alloc->pages_high) alloc->pages_high = index + 1; trace_binder_alloc_page_end(alloc, index); /* vm_insert_page does not seem to increment the refcount */ } if (mm) { up_read(&mm->mmap_sem); mmput(mm); } return 0; free_range: for (page_addr = end - PAGE_SIZE; page_addr >= start; page_addr -= PAGE_SIZE) { bool ret; size_t index; index = (page_addr - alloc->buffer) / PAGE_SIZE; page = &alloc->pages[index]; trace_binder_free_lru_start(alloc, index); ret = list_lru_add(&binder_alloc_lru, &page->lru); WARN_ON(!ret); trace_binder_free_lru_end(alloc, index); continue; err_vm_insert_page_failed: unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); err_map_kernel_failed: __free_page(page->page_ptr); page->page_ptr = NULL; err_alloc_page_failed: err_page_ptr_cleared: ; } err_no_vma: if (mm) { up_read(&mm->mmap_sem); mmput(mm); } return vma ? -ENOMEM : -ESRCH; }
static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF]; struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; int itype; int rv; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 10, &itype); if (rv < 0) return rv; type = (enum clear_refs_types)itype; if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) return -EINVAL; if (type == CLEAR_REFS_SOFT_DIRTY) { soft_dirty_cleared = true; pr_warn_once("The pagemap bits 55-60 has changed their meaning!" " See the linux/Documentation/vm/pagemap.txt for " "details.\n"); } task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mm = get_task_mm(task); if (mm) { struct clear_refs_private cp = { .type = type, }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, .test_walk = clear_refs_test_walk, .mm = mm, .private = &cp, }; if (type == CLEAR_REFS_MM_HIWATER_RSS) { /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ down_write(&mm->mmap_sem); reset_mm_hiwater_rss(mm); up_write(&mm->mmap_sem); goto out_mm; } down_read(&mm->mmap_sem); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; up_read(&mm->mmap_sem); down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); } downgrade_write(&mm->mmap_sem); break; } mmu_notifier_invalidate_range_start(mm, 0, -1); } walk_page_range(0, ~0UL, &clear_refs_walk); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(mm, 0, -1); flush_tlb_mm(mm); up_read(&mm->mmap_sem); out_mm: mmput(mm); } put_task_struct(task); return count; }
static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = p->prev_stime = 0; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; p->blocked_by = NULL; p->blocked_since = 0; #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_policy; retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; write_lock_irq(&tasklist_lock); if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: if (unlikely(clone_flags & CLONE_NEWPID)) pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); bad_fork_cleanup_files: exit_files(p); bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * Start a context * Code here similar to afu_ioctl_start_work(). */ int cxl_start_context(struct cxl_context *ctx, u64 wed, struct task_struct *task) { int rc = 0; bool kernel = true; pr_devel("%s: pe: %i\n", __func__, ctx->pe); mutex_lock(&ctx->status_mutex); if (ctx->status == STARTED) goto out; /* already started */ /* * Increment the mapped context count for adapter. This also checks * if adapter_context_lock is taken. */ rc = cxl_adapter_context_get(ctx->afu->adapter); if (rc) goto out; if (task) { ctx->pid = get_task_pid(task, PIDTYPE_PID); kernel = false; /* acquire a reference to the task's mm */ ctx->mm = get_task_mm(current); /* ensure this mm_struct can't be freed */ cxl_context_mm_count_get(ctx); if (ctx->mm) { /* decrement the use count from above */ mmput(ctx->mm); /* make TLBIs for this context global */ mm_context_add_copro(ctx->mm); } } /* * Increment driver use count. Enables global TLBIs for hash * and callbacks to handle the segment table */ cxl_ctx_get(); /* See the comment in afu_ioctl_start_work() */ smp_mb(); if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { put_pid(ctx->pid); ctx->pid = NULL; cxl_adapter_context_put(ctx->afu->adapter); cxl_ctx_put(); if (task) { cxl_context_mm_count_put(ctx); if (ctx->mm) mm_context_remove_copro(ctx->mm); } goto out; } ctx->status = STARTED; out: mutex_unlock(&ctx->status_mutex); return rc; }
static void __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { struct get_pages_work *work = container_of(_work, typeof(*work), work); struct drm_i915_gem_object *obj = work->obj; const int npages = obj->base.size >> PAGE_SHIFT; struct page **pvec; int pinned, ret; ret = -ENOMEM; pinned = 0; pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; unsigned int flags = 0; if (!i915_gem_object_is_readonly(obj)) flags |= FOLL_WRITE; ret = -EFAULT; if (mmget_not_zero(mm)) { down_read(&mm->mmap_sem); while (pinned < npages) { ret = get_user_pages_remote (work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, flags, pvec + pinned, NULL, NULL); if (ret < 0) break; pinned += ret; } up_read(&mm->mmap_sem); mmput(mm); } } mutex_lock(&obj->mm.lock); if (obj->userptr.work == &work->work) { struct sg_table *pages = ERR_PTR(ret); if (pinned == npages) { pages = __i915_gem_userptr_alloc_pages(obj, pvec, npages); if (!IS_ERR(pages)) { pinned = 0; pages = NULL; } } obj->userptr.work = ERR_CAST(pages); if (IS_ERR(pages)) __i915_gem_userptr_set_active(obj, false); } mutex_unlock(&obj->mm.lock); release_pages(pvec, pinned); kvfree(pvec); i915_gem_object_put(obj); put_task_struct(work->task); kfree(work); }
void decode_address(char *buf, unsigned long address) { struct task_struct *p; struct mm_struct *mm; unsigned long flags, offset; unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); struct rb_node *n; #ifdef CONFIG_KALLSYMS unsigned long symsize; const char *symname; char *modname; char *delim = ":"; char namebuf[128]; #endif buf += sprintf(buf, "<0x%08lx> ", address); #ifdef CONFIG_KALLSYMS symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); if (symname) { if (!modname) modname = delim = ""; sprintf(buf, "{ %s%s%s%s + 0x%lx }", delim, modname, delim, symname, (unsigned long)offset); return; } #endif if (address >= FIXED_CODE_START && address < FIXED_CODE_END) { strcat(buf, "/* Maybe fixed code section */"); return; } else if (address < CONFIG_BOOT_LOAD) { strcat(buf, "/* Maybe null pointer? */"); return; } else if (address >= COREMMR_BASE) { strcat(buf, "/* core mmrs */"); return; } else if (address >= SYSMMR_BASE) { strcat(buf, "/* system mmrs */"); return; } else if (address >= L1_ROM_START && address < L1_ROM_START + L1_ROM_LENGTH) { strcat(buf, "/* on-chip L1 ROM */"); return; } else if (address >= L1_SCRATCH_START && address < L1_SCRATCH_START + L1_SCRATCH_LENGTH) { strcat(buf, "/* on-chip scratchpad */"); return; } else if (address >= physical_mem_end && address < ASYNC_BANK0_BASE) { strcat(buf, "/* unconnected memory */"); return; } else if (address >= ASYNC_BANK3_BASE + ASYNC_BANK3_SIZE && address < BOOT_ROM_START) { strcat(buf, "/* reserved memory */"); return; } else if (address >= L1_DATA_A_START && address < L1_DATA_A_START + L1_DATA_A_LENGTH) { strcat(buf, "/* on-chip Data Bank A */"); return; } else if (address >= L1_DATA_B_START && address < L1_DATA_B_START + L1_DATA_B_LENGTH) { strcat(buf, "/* on-chip Data Bank B */"); return; } if (oops_in_progress) { strcat(buf, "/* kernel dynamic memory (maybe user-space) */"); return; } write_lock_irqsave(&tasklist_lock, flags); for_each_process(p) { mm = (in_atomic ? p->mm : get_task_mm(p)); if (!mm) continue; if (!down_read_trylock(&mm->mmap_sem)) { if (!in_atomic) mmput(mm); continue; } for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) { struct vm_area_struct *vma; vma = rb_entry(n, struct vm_area_struct, vm_rb); if (address >= vma->vm_start && address < vma->vm_end) { char _tmpbuf[256]; char *name = p->comm; struct file *file = vma->vm_file; if (file) { char *d_name = d_path(&file->f_path, _tmpbuf, sizeof(_tmpbuf)); if (!IS_ERR(d_name)) name = d_name; } if ((unsigned long)current >= FIXED_CODE_START && !((unsigned long)current & 0x3)) { if (current->mm && (address > current->mm->start_code) && (address < current->mm->end_code)) offset = address - current->mm->start_code; else offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT); sprintf(buf, "[ %s + 0x%lx ]", name, offset); } else sprintf(buf, "[ %s vma:0x%lx-0x%lx]", name, vma->vm_start, vma->vm_end); up_read(&mm->mmap_sem); if (!in_atomic) mmput(mm); if (buf[0] == '\0') sprintf(buf, "[ %s ] dynamic memory", name); goto done; } } up_read(&mm->mmap_sem); if (!in_atomic) mmput(mm); } sprintf(buf, "/* kernel dynamic memory */"); done: write_unlock_irqrestore(&tasklist_lock, flags); }
static int sec_restrict_fork(void) { struct cred *shellcred; int ret = 0; struct task_struct *parent_tsk; struct mm_struct *parent_mm = NULL; const struct cred *parent_cred; read_lock(&tasklist_lock); parent_tsk = current->parent; if (!parent_tsk) { read_unlock(&tasklist_lock); return 0; } get_task_struct(parent_tsk); /* holding on to the task struct is enough so just release * the tasklist lock here */ read_unlock(&tasklist_lock); if (current->pid == 1 || parent_tsk->pid == 1) goto out; /* get current->parent's mm struct to access it's mm * and to keep it alive */ parent_mm = get_task_mm(parent_tsk); if (current->mm == NULL || parent_mm == NULL) goto out; if (sec_check_execpath(parent_mm, "/sbin/adbd")) { shellcred = prepare_creds(); if (!shellcred) { ret = 1; goto out; } shellcred->uid = 2000; shellcred->gid = 2000; shellcred->euid = 2000; shellcred->egid = 2000; commit_creds(shellcred); ret = 0; goto out; } if (sec_check_execpath(current->mm, "/data/")) { ret = 1; goto out; } parent_cred = get_task_cred(parent_tsk); if (!parent_cred) goto out; if (!CHECK_ROOT_UID(parent_tsk)) { if(!sec_check_execpath(current->mm, "/system/bin/logwrapper")) ret = 1; } put_cred(parent_cred); out: if (parent_mm) mmput(parent_mm); put_task_struct(parent_tsk); return ret; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); /* * If the new process will be in a different pid or user namespace * do not allow it to share a thread group or signal handlers or * parent with the forking task. */ if (clone_flags & CLONE_SIGHAND) { if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || (task_active_pid_ns(current) != current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); } retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); get_seccomp_filter(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); p->flags |= PF_FORKNOEXEC; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_init(&p->vtime_seqlock); p->vtime_snap = 0; p->vtime_snap_whence = VTIME_SLEEPING; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_threadgroup_lock; } #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; p->hardirqs_enabled = 0; p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_MEMCG p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif #ifdef CONFIG_BCACHE p->sequential_io = 0; p->sequential_io_avg = 0; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); if (retval) goto bad_fork_cleanup_policy; retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_policy; /* copy all the process information */ retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->group_leader = p; p->tgid = p->pid; } p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; p->pdeath_signal = 0; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); if (is_child_reaper(pid)) { ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); uprobe_copy_process(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_threadgroup_lock: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * enumerate all modules for process * pid: process id * lsc: load sample count */ static void enum_modules_for_process(pid_t pid, unsigned long long lsc, const char * proc_name) { bool find = false; char *filename; unsigned long name_offset; unsigned int options; struct task_struct *task; struct vm_area_struct *mmap; struct mm_struct *mm; char * name = NULL; char * buffer = NULL; task = px_find_task_by_pid(pid); if (task == NULL) goto ret; name = kzalloc(PATH_MAX, GFP_ATOMIC); if (name == NULL) goto ret; buffer = kzalloc(PATH_MAX, GFP_ATOMIC); if (buffer == NULL) goto ret; mm = get_task_mm(task); if (mm != NULL) { down_read(&mm->mmap_sem); for (mmap = mm->mmap; mmap; mmap = mmap->vm_next) { if (is_valid_module(mmap)) { memset(name, 0, (PATH_MAX) * sizeof(char)); filename = px_d_path(mmap->vm_file, name, PATH_MAX); if (filename != NULL) { options = 0; if (find == false) { options |= MODULE_FLAG_1ST; find = true; if (proc_name != NULL) { /* for the first module (the executable image), if the process name is specified, use the specified one */ strcpy(buffer, proc_name); } else { /* * for the first module (the executable image), we need to get the process name again * because it may be modified by changing the argv[0] */ if (get_proc_name(task, buffer) == 0) { /* failed to get the process name, use the orignal module name */ strcpy(buffer, filename); } } /* save the orignal process name for checking if it will be updated later */ notify_new_loaded_process(task, buffer); } else { memset(buffer, 0, sizeof(PATH_MAX)); strcpy(buffer, filename); } name_offset = get_filename_offset(buffer); module_load_notif(buffer, name_offset, pid, mmap->vm_start, mmap->vm_end - mmap->vm_start, options, lsc); } } } up_read(&mm->mmap_sem); mmput(mm); } if (find == false) { module_load_notif(task->comm, 0, pid, LINUX_APP_BASE_LOW, 0,//mmap->vm_end - mmap->vm_start, MODULE_FLAG_1ST, lsc); } ret: if (name != NULL) kfree(name); if (buffer != NULL) kfree(buffer); return; }
static long fimg2d_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret = 0; struct fimg2d_context *ctx; struct mm_struct *mm; struct fimg2d_dma *usr_dst; ctx = file->private_data; switch (cmd) { case FIMG2D_BITBLT_BLIT: mm = get_task_mm(current); if (!mm) { fimg2d_err("no mm for ctx\n"); return -ENXIO; } g2d_lock(&ctrl->drvlock); ctx->mm = mm; if (atomic_read(&ctrl->drvact) || atomic_read(&ctrl->suspended)) { fimg2d_err("driver is unavailable, do sw fallback\n"); g2d_unlock(&ctrl->drvlock); mmput(mm); return -EPERM; } ret = fimg2d_add_command(ctrl, ctx, (struct fimg2d_blit __user *)arg); if (ret) { fimg2d_err("add command not allowed.\n"); g2d_unlock(&ctrl->drvlock); mmput(mm); return ret; } fimg2d_pm_qos_update(ctrl, FIMG2D_QOS_ON); usr_dst = kzalloc(sizeof(struct fimg2d_dma), GFP_KERNEL); if (!usr_dst) { fimg2d_err("failed to allocate memory for fimg2d_dma\n"); g2d_unlock(&ctrl->drvlock); mmput(mm); return -ENOMEM; } ret = store_user_dst((struct fimg2d_blit __user *)arg, usr_dst); if (ret) { fimg2d_err("store_user_dst() not allowed.\n"); g2d_unlock(&ctrl->drvlock); kfree(usr_dst); mmput(mm); return ret; } ret = fimg2d_request_bitblt(ctrl, ctx); if (ret) { fimg2d_info("request bitblit not allowed, " "so passing to s/w fallback.\n"); g2d_unlock(&ctrl->drvlock); kfree(usr_dst); mmput(mm); return -EBUSY; } g2d_unlock(&ctrl->drvlock); fimg2d_debug("addr : %p, size : %zd\n", (void *)usr_dst->addr, usr_dst->size); #ifndef CCI_SNOOP fimg2d_dma_unsync_inner(usr_dst->addr, usr_dst->size, DMA_FROM_DEVICE); #endif kfree(usr_dst); mmput(mm); break; case FIMG2D_BITBLT_VERSION: { struct fimg2d_version ver; struct fimg2d_platdata *pdata; #ifdef CONFIG_OF pdata = ctrl->pdata; #else pdata = to_fimg2d_plat(ctrl->dev); #endif ver.hw = pdata->hw_ver; ver.sw = 0; fimg2d_info("version info. hw(0x%x), sw(0x%x)\n", ver.hw, ver.sw); if (copy_to_user((void *)arg, &ver, sizeof(ver))) return -EFAULT; break; } case FIMG2D_BITBLT_ACTIVATE: { enum driver_act act; if (copy_from_user(&act, (void *)arg, sizeof(act))) return -EFAULT; g2d_lock(&ctrl->drvlock); atomic_set(&ctrl->drvact, act); if (act == DRV_ACT) { fimg2d_power_control(ctrl, FIMG2D_PW_OFF); fimg2d_info("fimg2d driver is activated\n"); } else { fimg2d_power_control(ctrl, FIMG2D_PW_ON); fimg2d_info("fimg2d driver is deactivated\n"); } g2d_unlock(&ctrl->drvlock); break; } default: fimg2d_err("unknown ioctl\n"); ret = -EFAULT; break; } return ret; }
/* Read the environment variable of current process specified by @key. */ int cfs_get_environ(const char *key, char *value, int *val_len) { struct mm_struct *mm; char *buffer; int buf_len = PAGE_CACHE_SIZE; int key_len = strlen(key); unsigned long addr; int rc; bool skip = false; ENTRY; buffer = kmalloc(buf_len, GFP_USER); if (!buffer) RETURN(-ENOMEM); mm = get_task_mm(current); if (!mm) { kfree(buffer); RETURN(-EINVAL); } addr = mm->env_start; while (addr < mm->env_end) { int this_len, retval, scan_len; char *env_start, *env_end; memset(buffer, 0, buf_len); this_len = min_t(int, mm->env_end - addr, buf_len); retval = cfs_access_process_vm(current, mm, addr, buffer, this_len, 0); if (retval < 0) GOTO(out, rc = retval); else if (retval != this_len) break; addr += retval; /* Parse the buffer to find out the specified key/value pair. * The "key=value" entries are separated by '\0'. */ env_start = buffer; scan_len = this_len; while (scan_len) { char *entry; int entry_len; env_end = memscan(env_start, '\0', scan_len); LASSERT(env_end >= env_start && env_end <= env_start + scan_len); /* The last entry of this buffer cross the buffer * boundary, reread it in next cycle. */ if (unlikely(env_end - env_start == scan_len)) { /* Just skip the entry larger than page size, * it can't be jobID env variable. */ if (unlikely(scan_len == this_len)) skip = true; else addr -= scan_len; break; } else if (unlikely(skip)) { skip = false; goto skip; } entry = env_start; entry_len = env_end - env_start; /* Key length + length of '=' */ if (entry_len > key_len + 1 && !memcmp(entry, key, key_len)) { entry += key_len + 1; entry_len -= key_len + 1; /* The 'value' buffer passed in is too small.*/ if (entry_len >= *val_len) GOTO(out, rc = -EOVERFLOW); memcpy(value, entry, entry_len); *val_len = entry_len; GOTO(out, rc = 0); } skip: scan_len -= (env_end - env_start + 1); env_start = env_end + 1; } } GOTO(out, rc = -ENOENT); out: mmput(mm); kfree((void *)buffer); return rc; }
int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret = -EIO; siginfo_t siginfo; void __user *datavp = (void __user *) data; unsigned long __user *datalp = datavp; switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: return generic_ptrace_peekdata(child, addr, data); case PTRACE_POKETEXT: case PTRACE_POKEDATA: return generic_ptrace_pokedata(child, addr, data); #ifdef PTRACE_OLDSETOPTIONS case PTRACE_OLDSETOPTIONS: #endif case PTRACE_SETOPTIONS: ret = ptrace_setoptions(child, data); break; case PTRACE_GETEVENTMSG: ret = put_user(child->ptrace_message, datalp); break; case PTRACE_GETSIGINFO: ret = ptrace_getsiginfo(child, &siginfo); if (!ret) ret = copy_siginfo_to_user(datavp, &siginfo); break; case PTRACE_SETSIGINFO: if (copy_from_user(&siginfo, datavp, sizeof siginfo)) ret = -EFAULT; else ret = ptrace_setsiginfo(child, &siginfo); break; case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; #ifdef CONFIG_BINFMT_ELF_FDPIC case PTRACE_GETFDPIC: { struct mm_struct *mm = get_task_mm(child); unsigned long tmp = 0; ret = -ESRCH; if (!mm) break; switch (addr) { case PTRACE_GETFDPIC_EXEC: tmp = mm->context.exec_fdpic_loadmap; break; case PTRACE_GETFDPIC_INTERP: tmp = mm->context.interp_fdpic_loadmap; break; default: break; } mmput(mm); ret = put_user(tmp, datalp); break; } #endif #ifdef PTRACE_SINGLESTEP case PTRACE_SINGLESTEP: #endif #ifdef PTRACE_SINGLEBLOCK case PTRACE_SINGLEBLOCK: #endif #ifdef PTRACE_SYSEMU case PTRACE_SYSEMU: case PTRACE_SYSEMU_SINGLESTEP: #endif case PTRACE_SYSCALL: case PTRACE_CONT: return ptrace_resume(child, request, data); case PTRACE_KILL: if (child->exit_state) /* already dead */ return 0; return ptrace_resume(child, request, SIGKILL); #ifdef CONFIG_HAVE_ARCH_TRACEHOOK case PTRACE_GETREGSET: case PTRACE_SETREGSET: { struct iovec kiov; struct iovec __user *uiov = datavp; if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) return -EFAULT; if (__get_user(kiov.iov_base, &uiov->iov_base) || __get_user(kiov.iov_len, &uiov->iov_len)) return -EFAULT; ret = ptrace_regset(child, request, addr, &kiov); if (!ret) ret = __put_user(kiov.iov_len, &uiov->iov_len); break; } #endif default: break; } return ret; }
/* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; if (!oldmm) return NULL; mm = allocate_mm(); if (!mm) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); mm_init_cpumask(mm); /* Initializing for Swap token stuff */ mm->token_priority = 0; mm->last_interval = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif uprobe_reset_state(mm); if (!mm_init(mm, tsk)) goto fail_nomem; if (init_new_context(tsk, mm)) goto fail_nocontext; dup_mm_exe_file(oldmm, mm); err = dup_mmap(mm, oldmm); if (err) goto free_pt; #ifdef CONFIG_HOMECACHE { /* Reset vm_pid on all vmas. In the new mm_struct, we * want to switch anything that was associated with * the parent to be associated with the child, and * clear everything else. */ struct vm_area_struct *mpnt; down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); for (mpnt = mm->mmap; mpnt; mpnt = mpnt->vm_next) { if (mpnt->vm_pid == current->pid) mpnt->vm_pid = tsk->pid; else mpnt->vm_pid = 0; } up_write(&mm->mmap_sem); } #endif mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; if (mm->binfmt && !try_module_get(mm->binfmt->module)) goto free_pt; return mm; free_pt: /* don't put binfmt in mmput, we haven't got module yet */ mm->binfmt = NULL; mmput(mm); fail_nomem: return NULL; fail_nocontext: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() */ mm_free_pgd(mm); free_mm(mm); return NULL; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static task_t *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, int pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; #ifdef CONFIG_LTT_USERSPACE_GENERIC if (clone_flags & CLONE_THREAD) memset(p->ltt_facilities, 0, sizeof(p->ltt_facilities)); else { int i; for(i=0; i<LTT_FAC_PER_PROCESS; i++) { p->ltt_facilities[i] = current->ltt_facilities[i]; if(p->ltt_facilities[i] != 0) ltt_facility_ref(p->ltt_facilities[i]); } } #endif //CONFIG_LTT_USERSPACE_GENERIC /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; #if 1 /* put here by ltt patch */ goto bad_fork_cleanup_ltt_facilities; #else goto bad_fork_cleanup_namespace; #endif } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->flags & SIGNAL_GROUP_EXIT) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_ltt_facilities; } p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } spin_unlock(¤t->sighand->siglock); } /* * inherit ioprio */ p->ioprio = current->ioprio; SET_LINKS(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); cpuset_fork(p); attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } if (!current->signal->tty && p->signal->tty) p->signal->tty = NULL; nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_ltt_facilities: #ifdef CONFIG_LTT_USERSPACE_GENERIC { int i; for(i=0; i<LTT_FAC_PER_PROCESS; i++) { if(p->ltt_facilities[i] == 0) break; WARN_ON(ltt_facility_unregister( p->ltt_facilities[i])); } } #endif //CONFIG_LTT_USERSPACE_GENERIC bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ asmlinkage long sys_unshare(unsigned long unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct sem_undo_list *new_ulist = NULL; struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; check_unshare_flags(&unshare_flags); /* Return -EINVAL for all unsupported flags */ err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC)) goto bad_unshare_out; if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_fs; if ((err = unshare_vm(unshare_flags, &new_mm))) goto bad_unshare_cleanup_sigh; if ((err = unshare_fd(unshare_flags, &new_fd))) goto bad_unshare_cleanup_vm; if ((err = unshare_semundo(unshare_flags, &new_ulist))) goto bad_unshare_cleanup_fd; if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) goto bad_unshare_cleanup_semundo; if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { task_lock(current); if (new_nsproxy) { old_nsproxy = current->nsproxy; current->nsproxy = new_nsproxy; new_nsproxy = old_nsproxy; } if (new_fs) { fs = current->fs; current->fs = new_fs; new_fs = fs; } if (new_mm) { mm = current->mm; active_mm = current->active_mm; current->mm = new_mm; current->active_mm = new_mm; activate_mm(active_mm, new_mm); new_mm = mm; } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_semundo: bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_vm: if (new_mm) mmput(new_mm); bad_unshare_cleanup_sigh: if (new_sigh) if (atomic_dec_and_test(&new_sigh->count)) kmem_cache_free(sighand_cachep, new_sigh); bad_unshare_cleanup_fs: if (new_fs) put_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: return err; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, struct pid *pid) { int retval; struct task_struct *p; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); p->pid = pid_nr(pid); retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup_delays_binfmt; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; #endif #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ #endif task_io_accounting_init(p); acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); #ifdef CONFIG_SECURITY p->security = NULL; #endif p->io_context = NULL; p->audit_context = NULL; cpuset_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cpuset; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespaces; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CLONE_PARENT) ? current->group_leader->exit_signal : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ p->ioprio = current->ioprio; /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cleanup_namespaces; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } } if (likely(p->pid)) { add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); set_signal_session(p->signal, process_session(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); return p; bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); bad_fork_cleanup_cpuset: #endif cpuset_exit(p); bad_fork_cleanup_delays_binfmt: delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* pvfs_bufmap_copy_to_user_task_iovec() * * copies data out of a mapped buffer to a vector of user space address * of a given task specified by the task structure argument (tsk) * This is used by the client-daemon for completing an aio * operation that was issued by an arbitrary user program. * Unfortunately, we cannot use a copy_to_user * in that case and need to map in the user pages before * attempting the copy! * * NOTE: There is no need for an analogous copy from user task since * the data buffers get copied in the context of the process initiating * the write system call! * * Returns number of bytes copied on success, -errno on failure. */ size_t pvfs_bufmap_copy_to_user_task_iovec( struct task_struct *tsk, struct iovec *iovec, unsigned long nr_segs, int buffer_index, size_t size_to_be_copied) { size_t ret = 0, amt_copied = 0, cur_copy_size = 0; int from_page_index = 0; void *from_kaddr = NULL; struct iovec *copied_iovec = NULL; struct pvfs_bufmap_desc *from = &desc_array[buffer_index]; struct mm_struct *mm = NULL; struct vm_area_struct *vma = NULL; struct page *page = NULL; unsigned long to_addr = 0; void *maddr = NULL; unsigned int to_offset = 0; unsigned int seg, from_page_offset = 0; gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_user_task_iovec: " " PID: %d, iovec %p, from %p, index %d, " " size %zd\n", tsk->pid, iovec, from, buffer_index, size_to_be_copied); down_read(&bufmap_init_sem); if (bufmap_init == 0) { gossip_err("pvfs2_bufmap_copy_to_user: not yet " "initialized.\n"); gossip_err("pvfs2: please confirm that pvfs2-client " "daemon is running.\n"); up_read(&bufmap_init_sem); return -EIO; } /* * copy the passed in iovec so that we can change some of its fields */ copied_iovec = kmalloc(nr_segs * sizeof(*copied_iovec), PVFS2_BUFMAP_GFP_FLAGS); if (copied_iovec == NULL) { gossip_err("pvfs_bufmap_copy_to_user_iovec: failed allocating memory\n"); up_read(&bufmap_init_sem); return -ENOMEM; } memcpy(copied_iovec, iovec, nr_segs * sizeof(*copied_iovec)); /* * Go through each segment in the iovec and make sure that * the summation of iov_len is greater than the given size. */ for (seg = 0, amt_copied = 0; seg < nr_segs; seg++) { amt_copied += copied_iovec[seg].iov_len; } if (amt_copied < size_to_be_copied) { gossip_err("pvfs_bufmap_copy_to_user_task_iovec: computed total (%zd) " "is less than (%zd)\n", amt_copied, size_to_be_copied); kfree(copied_iovec); up_read(&bufmap_init_sem); return -EINVAL; } mm = get_task_mm(tsk); if (!mm) { kfree(copied_iovec); up_read(&bufmap_init_sem); return -EIO; } from_page_index = 0; amt_copied = 0; seg = 0; from_page_offset = 0; /* * Go through each of the page in the specified process * address space and copy from the mapped * buffer, and make sure to do this one page at a time! */ down_read(&mm->mmap_sem); while (amt_copied < size_to_be_copied) { int inc_from_page_index = 0; struct iovec *iv = &copied_iovec[seg]; if (iv->iov_len < (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size_to_be_copied - amt_copied); seg++; to_addr = (unsigned long) iv->iov_base; inc_from_page_index = 0; } else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) { cur_copy_size = PVFS_util_min(iv->iov_len, size_to_be_copied - amt_copied); seg++; to_addr = (unsigned long) iv->iov_base; inc_from_page_index = 1; } else { cur_copy_size = PVFS_util_min(PAGE_SIZE - from_page_offset, size_to_be_copied - amt_copied); to_addr = (unsigned long) iv->iov_base; iv->iov_base += cur_copy_size; iv->iov_len -= cur_copy_size; inc_from_page_index = 1; } ret = get_user_pages(tsk, mm, to_addr, 1,/* count */ 1,/* write */ 1,/* force */ &page, &vma); if (ret <= 0) break; to_offset = to_addr & (PAGE_SIZE - 1); maddr = pvfs2_kmap(page); from_kaddr = pvfs2_kmap(from->page_array[from_page_index]); copy_to_user_page(vma, page, to_addr, maddr + to_offset /* dst */, from_kaddr + from_page_offset, /* src */ cur_copy_size /* len */); set_page_dirty_lock(page); pvfs2_kunmap(from->page_array[from_page_index]); pvfs2_kunmap(page); page_cache_release(page); amt_copied += cur_copy_size; if (inc_from_page_index) { from_page_offset = 0; from_page_index++; } else { from_page_offset += cur_copy_size; } } up_read(&mm->mmap_sem); mmput(mm); up_read(&bufmap_init_sem); kfree(copied_iovec); return (amt_copied < size_to_be_copied) ? -EFAULT: amt_copied; }
static int sec_restrict_fork(void) { struct cred *shellcred; int ret = 0; struct task_struct *parent_tsk; struct mm_struct *parent_mm = NULL; const struct cred *parent_cred; read_lock(&tasklist_lock); parent_tsk = current->parent; if (!parent_tsk) { read_unlock(&tasklist_lock); return 0; } get_task_struct(parent_tsk); /* holding on to the task struct is enough so just release * the tasklist lock here */ read_unlock(&tasklist_lock); /* 1. Allowed case - init process. */ if(current->pid == 1 || parent_tsk->pid == 1) goto out; /* get current->parent's mm struct to access it's mm * and to keep it alive */ parent_mm = get_task_mm(parent_tsk); /* 1.1 Skip for kernel tasks */ if(current->mm == NULL || parent_mm == NULL) goto out; /* 2. Restrict case - parent process is /sbin/adbd. */ if( sec_check_execpath(parent_mm, "/sbin/adbd") ) { shellcred = prepare_creds(); if(!shellcred) { ret = 1; goto out; } shellcred->uid = 2000; shellcred->gid = 2000; shellcred->euid = 2000; shellcred->egid = 2000; commit_creds(shellcred); ret = 0; goto out; } /* 3. Restrict case - execute file in /data directory. */ if( sec_check_execpath(current->mm, "/data/") ) { ret = 1; goto out; } /* 4. Restrict case - parent's privilege is not root. */ parent_cred = get_task_cred(parent_tsk); if (!parent_cred) goto out; if(!CHECK_ROOT_UID(parent_tsk)) ret = 1; put_cred(parent_cred); out: if (parent_mm) mmput(parent_mm); put_task_struct(parent_tsk); return ret; }
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) { struct mm_struct *mm; struct vm_area_struct *vma; struct page *page; void *old_buf = buf; mm = get_task_mm(tsk); if (!mm) return 0; down_read(&mm->mmap_sem); /* ignore errors, just check how much was sucessfully transfered */ while (len) { int bytes, ret, offset; void *maddr; unsigned long paddr; int xip = 0; #ifdef CONFIG_CRAMFS_XIP_DEBUGGABLE if (xip_enable_debug && !write) { vma = find_extend_vma(mm, addr); if (vma && (vma->vm_flags & VM_XIP)) xip = find_xip_untouched_entry(mm, addr, &paddr); } #endif if (xip) { maddr = ioremap(paddr, PAGE_SIZE); if (!maddr) break; page = NULL; } else { ret = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) break; maddr = kmap(page); } bytes = len; offset = addr & (PAGE_SIZE-1); if (bytes > PAGE_SIZE-offset) bytes = PAGE_SIZE-offset; if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); set_page_dirty_lock(page); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } if (xip) iounmap(maddr); else { kunmap(page); page_cache_release(page); } len -= bytes; buf += bytes; addr += bytes; } up_read(&mm->mmap_sem); mmput(mm); return buf - old_buf; }
/* * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. */ static int try_to_unuse(unsigned int type) { struct swap_info_struct * si = &swap_info[type]; struct mm_struct *start_mm; unsigned short *swap_map; unsigned short swcount; struct page *page; swp_entry_t entry; unsigned int i = 0; int retval = 0; int reset_overflow = 0; int shmem; /* * When searching mms for an entry, a good strategy is to * start at the first mm we freed the previous entry from * (though actually we don't notice whether we or coincidence * freed the entry). Initialize this start_mm with a hold. * * A simpler strategy would be to start at the last mm we * freed the previous entry from; but that would take less * advantage of mmlist ordering, which clusters forked mms * together, child after parent. If we race with dup_mmap(), we * prefer to resolve parent before child, lest we miss entries * duplicated after we scanned child: using last mm would invert * that. Though it's only a serious concern when an overflowed * swap count is reset from SWAP_MAP_MAX, preventing a rescan. */ start_mm = &init_mm; atomic_inc(&init_mm.mm_users); /* * Keep on scanning until all entries have gone. Usually, * one pass through swap_map is enough, but not necessarily: * there are races when an instance of an entry might be missed. */ while ((i = find_next_to_unuse(si, i)) != 0) { if (signal_pending(current)) { retval = -EINTR; break; } /* * Get a page for the entry, using the existing swap * cache page if there is one. Otherwise, get a clean * page and read the swap into it. */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); page = read_swap_cache_async(entry, NULL, 0); if (!page) { /* * Either swap_duplicate() failed because entry * has been freed independently, and will not be * reused since sys_swapoff() already disabled * allocation from here, or alloc_page() failed. */ if (!*swap_map) continue; retval = -ENOMEM; break; } /* * Don't hold on to start_mm if it looks like exiting. */ if (atomic_read(&start_mm->mm_users) == 1) { mmput(start_mm); start_mm = &init_mm; atomic_inc(&init_mm.mm_users); } /* * Wait for and lock page. When do_swap_page races with * try_to_unuse, do_swap_page can handle the fault much * faster than try_to_unuse can locate the entry. This * apparently redundant "wait_on_page_locked" lets try_to_unuse * defer to do_swap_page in such a case - in some tests, * do_swap_page and try_to_unuse repeatedly compete. */ wait_on_page_locked(page); wait_on_page_writeback(page); lock_page(page); wait_on_page_writeback(page); /* * Remove all references to entry. * Whenever we reach init_mm, there's no address space * to search, but use it as a reminder to search shmem. */ shmem = 0; swcount = *swap_map; if (swcount > 1) { if (start_mm == &init_mm) shmem = shmem_unuse(entry, page); else retval = unuse_mm(start_mm, entry, page); } if (*swap_map > 1) { int set_start_mm = (*swap_map >= swcount); struct list_head *p = &start_mm->mmlist; struct mm_struct *new_start_mm = start_mm; struct mm_struct *prev_mm = start_mm; struct mm_struct *mm; atomic_inc(&new_start_mm->mm_users); atomic_inc(&prev_mm->mm_users); spin_lock(&mmlist_lock); while (*swap_map > 1 && !retval && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!atomic_inc_not_zero(&mm->mm_users)) continue; spin_unlock(&mmlist_lock); mmput(prev_mm); prev_mm = mm; cond_resched(); swcount = *swap_map; if (swcount <= 1) ; else if (mm == &init_mm) { set_start_mm = 1; shmem = shmem_unuse(entry, page); } else retval = unuse_mm(mm, entry, page); if (set_start_mm && *swap_map < swcount) { mmput(new_start_mm); atomic_inc(&mm->mm_users); new_start_mm = mm; set_start_mm = 0; } spin_lock(&mmlist_lock); } spin_unlock(&mmlist_lock); mmput(prev_mm); mmput(start_mm); start_mm = new_start_mm; } if (retval) { unlock_page(page); page_cache_release(page); break; } /* * How could swap count reach 0x7fff when the maximum * pid is 0x7fff, and there's no way to repeat a swap * page within an mm (except in shmem, where it's the * shared object which takes the reference count)? * We believe SWAP_MAP_MAX cannot occur in Linux 2.4. * * If that's wrong, then we should worry more about * exit_mmap() and do_munmap() cases described above: * we might be resetting SWAP_MAP_MAX too early here. * We know "Undead"s can happen, they're okay, so don't * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { spin_lock(&swap_lock); *swap_map = 1; spin_unlock(&swap_lock); reset_overflow = 1; } /* * If a reference remains (rare), we would like to leave * the page in the swap cache; but try_to_unmap could * then re-duplicate the entry once we drop page lock, * so we might loop indefinitely; also, that page could * not be swapped out to other storage meanwhile. So: * delete from cache even if there's another reference, * after ensuring that the data has been saved to disk - * since if the reference remains (rarer), it will be * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. * * Note shmem_unuse already deleted a swappage from * the swap cache, unless the move to filepage failed: * in which case it left swappage in cache, lowered its * swap count to pass quickly through the loops above, * and now we must reincrement count to try again later. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; swap_writepage(page, &wbc); lock_page(page); wait_on_page_writeback(page); } if (PageSwapCache(page)) { if (shmem) swap_duplicate(entry); else delete_from_swap_cache(page); } /* * So we could skip searching mms once swap count went * to 1, we did not mark any present ptes as dirty: must * mark page dirty so shrink_page_list will preserve it. */ SetPageDirty(page); unlock_page(page); page_cache_release(page); /* * Make sure that we aren't completely killing * interactive performance. */ cond_resched(); }
static void decode_address(char *buf, unsigned long address) { struct vm_list_struct *vml; struct task_struct *p; struct mm_struct *mm; unsigned long flags, offset; unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic(); #ifdef CONFIG_KALLSYMS unsigned long symsize; const char *symname; char *modname; char *delim = ":"; char namebuf[128]; /* look up the address and see if we are in kernel space */ symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); if (symname) { /* yeah! kernel space! */ if (!modname) modname = delim = ""; sprintf(buf, "<0x%p> { %s%s%s%s + 0x%lx }", (void *)address, delim, modname, delim, symname, (unsigned long)offset); return; } #endif /* Problem in fixed code section? */ if (address >= FIXED_CODE_START && address < FIXED_CODE_END) { sprintf(buf, "<0x%p> /* Maybe fixed code section */", (void *)address); return; } /* Problem somewhere before the kernel start address */ if (address < CONFIG_BOOT_LOAD) { sprintf(buf, "<0x%p> /* Maybe null pointer? */", (void *)address); return; } /* looks like we're off in user-land, so let's walk all the * mappings of all our processes and see if we can't be a whee * bit more specific */ write_lock_irqsave(&tasklist_lock, flags); for_each_process(p) { mm = (in_atomic ? p->mm : get_task_mm(p)); if (!mm) continue; vml = mm->context.vmlist; while (vml) { struct vm_area_struct *vma = vml->vma; if (address >= vma->vm_start && address < vma->vm_end) { char _tmpbuf[256]; char *name = p->comm; struct file *file = vma->vm_file; if (file) name = d_path(&file->f_path, _tmpbuf, sizeof(_tmpbuf)); /* FLAT does not have its text aligned to the start of * the map while FDPIC ELF does ... */ /* before we can check flat/fdpic, we need to * make sure current is valid */ if ((unsigned long)current >= FIXED_CODE_START && !((unsigned long)current & 0x3)) { if (current->mm && (address > current->mm->start_code) && (address < current->mm->end_code)) offset = address - current->mm->start_code; else offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT); sprintf(buf, "<0x%p> [ %s + 0x%lx ]", (void *)address, name, offset); } else sprintf(buf, "<0x%p> [ %s vma:0x%lx-0x%lx]", (void *)address, name, vma->vm_start, vma->vm_end); if (!in_atomic) mmput(mm); if (!strlen(buf)) sprintf(buf, "<0x%p> [ %s ] dynamic memory", (void *)address, name); goto done; } vml = vml->next; } if (!in_atomic) mmput(mm); } /* we were unable to find this address anywhere */ sprintf(buf, "<0x%p> /* kernel dynamic memory */", (void *)address); done: write_unlock_irqrestore(&tasklist_lock, flags); }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); #ifdef CONFIG_MINI_CORE p->ptrace_attach_done = NULL; #endif clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_pid; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); perf_event_fork(p); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
static long fimg2d_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret = 0; struct fimg2d_context *ctx; struct mm_struct *mm; ctx = file->private_data; switch (cmd) { case FIMG2D_BITBLT_BLIT: mm = get_task_mm(current); if(!mm) { fimg2d_err("no mm for ctx\n"); return -ENXIO; } g2d_lock(&ctrl->drvlock); ctx->mm = mm; if (atomic_read(&ctrl->drvact) || atomic_read(&ctrl->suspended)) { fimg2d_err("driver is unavailable, do sw fallback\n"); g2d_unlock(&ctrl->drvlock); mmput(mm); return -EPERM; } ret = fimg2d_add_command(ctrl, ctx, (struct fimg2d_blit __user *)arg); if (ret) { fimg2d_err("add command not allowed.\n"); g2d_unlock(&ctrl->drvlock); mmput(mm); return ret; } ret = fimg2d_request_bitblt(ctrl, ctx); if (ret) { fimg2d_err("request bitblit not allowed.\n"); g2d_unlock(&ctrl->drvlock); mmput(mm); return -EBUSY; } g2d_unlock(&ctrl->drvlock); mmput(mm); break; case FIMG2D_BITBLT_VERSION: { struct fimg2d_version ver; struct fimg2d_platdata *pdata; pdata = to_fimg2d_plat(ctrl->dev); ver.hw = pdata->hw_ver; ver.sw = 0; fimg2d_info("version info. hw(0x%x), sw(0x%x)\n", ver.hw, ver.sw); if (copy_to_user((void *)arg, &ver, sizeof(ver))) return -EFAULT; break; } case FIMG2D_BITBLT_ACTIVATE: { enum driver_act act; if (copy_from_user(&act, (void *)arg, sizeof(act))) return -EFAULT; g2d_lock(&ctrl->drvlock); atomic_set(&ctrl->drvact, act); if (act == DRV_ACT) fimg2d_info("fimg2d driver is activated\n"); else fimg2d_info("fimg2d driver is deactivated\n"); g2d_unlock(&ctrl->drvlock); break; } default: fimg2d_err("unknown ioctl\n"); ret = -EFAULT; break; } return ret; }