int ptrace_attach(struct task_struct *task) { int retval; unsigned long flags; audit_ptrace(task); retval = -EPERM; if (same_thread_group(task, current)) goto out; /* Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently under ptrace. */ retval = mutex_lock_interruptible(&task->cred_exec_mutex); if (retval < 0) goto out; retval = -EPERM; repeat: /* * Nasty, nasty. * * We want to hold both the task-lock and the * tasklist_lock for writing at the same time. * But that's against the rules (tasklist_lock * is taken for reading by interrupts on other * cpu's that may have task_lock). */ task_lock(task); if (!write_trylock_irqsave(&tasklist_lock, flags)) { task_unlock(task); do { cpu_relax(); } while (!write_can_lock(&tasklist_lock)); goto repeat; } if (!task->mm) goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); if (retval) goto bad; /* Go */ task->ptrace |= PT_PTRACED; if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); mutex_unlock(&task->cred_exec_mutex); out: return retval; }
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; if (clone_flags & CLONE_THREAD) return 0; sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; if (clone_flags & CLONE_NEWPID) sig->flags |= SIGNAL_UNKILLABLE; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; sig->tty = NULL; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; sig->maxrss = sig->cmaxrss = 0; task_io_accounting_init(&sig->ioac); sig->sum_sched_runtime = 0; taskstats_tgid_init(sig); task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); posix_cpu_timers_init_group(sig); acct_init_pacct(&sig->pacct); tty_audit_fork(sig); #ifdef CONFIG_SCHED_AUTOGROUP sched_autogroup_fork(sig); #endif sig->oom_adj = current->signal->oom_adj; return 0; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; int err; err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; /* * If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) goto bad_unshare_out; err = unshare_fd(unshare_flags, &new_fd); if (err) goto bad_unshare_cleanup_fs; err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); if (err) goto bad_unshare_cleanup_fd; if (new_fs || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; } task_lock(current); if (new_fs) { fs = current->fs; spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; spin_unlock(&fs->lock); } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_out: return err; }
static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; unsigned int todo; bool wq_busy = false; struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; bool wakeup = false; #ifdef CONFIG_SHSYS_CUST struct timespec tu; #endif do_gettimeofday(&start); end_time = jiffies + TIMEOUT; if (!sig_only) freeze_workqueues_begin(); while (true) { todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { if (frozen(p) || !freezable(p)) continue; if (!freeze_task(p, sig_only)) continue; /* * Now that we've done set_freeze_flag, don't * perturb a task in TASK_STOPPED or TASK_TRACED. * It is "frozen enough". If the task does wake * up, it will immediately call try_to_freeze. * * Because freeze_task() goes through p's * scheduler lock after setting TIF_FREEZE, it's * guaranteed that either we see TASK_RUNNING or * try_to_stop() after schedule() in ptrace/signal * stop sees TIF_FREEZE. */ if (!task_is_stopped_or_traced(p) && !freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); if (!sig_only) { wq_busy = freeze_workqueues_busy(); todo += wq_busy; } if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) { wakeup = 1; break; } if (!todo || time_after(jiffies, end_time)) break; if (pm_wakeup_pending()) { wakeup = true; break; } /* * We need to retry, but first give the freezing tasks some * time to enter the regrigerator. */ #ifdef CONFIG_SHSYS_CUST tu.tv_sec = 0; tu.tv_nsec = 10000000; hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC); #else msleep(10); #endif } do_gettimeofday(&end); elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); do_div(elapsed_csecs64, NSEC_PER_SEC / 100); elapsed_csecs = elapsed_csecs64; if (todo) { /* This does not unfreeze processes that are already frozen * (we have slightly ugly calling convention in that respect, * and caller must call thaw_processes() if something fails), * but it cleans up leftover PF_FREEZE requests. */ if(wakeup) { printk("\n"); printk(KERN_ERR "Freezing of %s aborted\n", sig_only ? "user space " : "tasks "); } else { printk("\n"); printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_csecs / 100, elapsed_csecs % 100, todo - wq_busy, wq_busy); } thaw_workqueues(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); if (freezing(p) && !freezer_should_skip(p) && elapsed_csecs > 100) sched_show_task(p); cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else {
int fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) { vm_map_t map; vm_map_offset_t address = (vm_map_offset_t )arg; vm_map_entry_t tmp_entry; vm_map_entry_t entry; vm_map_offset_t start; vm_region_extended_info_data_t extended; vm_region_top_info_data_t top; task_lock(task); map = task->map; if (map == VM_MAP_NULL) { task_unlock(task); return(0); } vm_map_reference(map); task_unlock(task); vm_map_lock_read(map); start = address; if (!vm_map_lookup_entry(map, start, &tmp_entry)) { if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { vm_map_unlock_read(map); vm_map_deallocate(map); return(0); } } else { entry = tmp_entry; } start = entry->vme_start; pinfo->pri_offset = entry->offset; pinfo->pri_protection = entry->protection; pinfo->pri_max_protection = entry->max_protection; pinfo->pri_inheritance = entry->inheritance; pinfo->pri_behavior = entry->behavior; pinfo->pri_user_wired_count = entry->user_wired_count; pinfo->pri_user_tag = entry->alias; if (entry->is_sub_map) { pinfo->pri_flags |= PROC_REGION_SUBMAP; } else { if (entry->is_shared) pinfo->pri_flags |= PROC_REGION_SHARED; } extended.protection = entry->protection; extended.user_tag = entry->alias; extended.pages_resident = 0; extended.pages_swapped_out = 0; extended.pages_shared_now_private = 0; extended.pages_dirtied = 0; extended.external_pager = 0; extended.shadow_depth = 0; vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) extended.share_mode = SM_PRIVATE; top.private_pages_resident = 0; top.shared_pages_resident = 0; vm_map_region_top_walk(entry, &top); pinfo->pri_pages_resident = extended.pages_resident; pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; pinfo->pri_pages_swapped_out = extended.pages_swapped_out; pinfo->pri_pages_dirtied = extended.pages_dirtied; pinfo->pri_ref_count = extended.ref_count; pinfo->pri_shadow_depth = extended.shadow_depth; pinfo->pri_share_mode = extended.share_mode; pinfo->pri_private_pages_resident = top.private_pages_resident; pinfo->pri_shared_pages_resident = top.shared_pages_resident; pinfo->pri_obj_id = top.obj_id; pinfo->pri_address = (uint64_t)start; pinfo->pri_size = (uint64_t)(entry->vme_end - start); pinfo->pri_depth = 0; if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { *vnodeaddr = (uintptr_t)0; if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { vm_map_unlock_read(map); vm_map_deallocate(map); return(1); } } vm_map_unlock_read(map); vm_map_deallocate(map); return(1); }
static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) { struct mount *r = real_mount(mnt); int err = 0; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; if (sb->s_op->show_devname) { err = sb->s_op->show_devname(m, mnt_path.dentry); if (err) goto out; } else { mangle(m, r->mnt_devname ? r->mnt_devname : "none"); } seq_putc(m, ' '); seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); show_type(m, sb); seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; show_mnt_opts(m, mnt); if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); out: return err; } static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) { struct proc_mounts *p = proc_mounts(m); struct mount *r = real_mount(mnt); struct super_block *sb = mnt->mnt_sb; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct path root = p->root; int err = 0; seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id, MAJOR(sb->s_dev), MINOR(sb->s_dev)); if (sb->s_op->show_path) err = sb->s_op->show_path(m, mnt->mnt_root); else seq_dentry(m, mnt->mnt_root, " \t\n\\"); if (err) goto out; seq_putc(m, ' '); /* mountpoints outside of chroot jail will give SEQ_SKIP on this */ err = seq_path_root(m, &mnt_path, &root, " \t\n\\"); if (err) goto out; seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); show_mnt_opts(m, mnt); /* Tagged fields ("foo:X" or "bar") */ if (IS_MNT_SHARED(r)) seq_printf(m, " shared:%i", r->mnt_group_id); if (IS_MNT_SLAVE(r)) { int master = r->mnt_master->mnt_group_id; int dom = get_dominating_id(r, &p->root); seq_printf(m, " master:%i", master); if (dom && dom != master) seq_printf(m, " propagate_from:%i", dom); } if (IS_MNT_UNBINDABLE(r)) seq_puts(m, " unbindable"); /* Filesystem specific data */ seq_puts(m, " - "); show_type(m, sb); seq_putc(m, ' '); if (sb->s_op->show_devname) err = sb->s_op->show_devname(m, mnt->mnt_root); else mangle(m, r->mnt_devname ? r->mnt_devname : "none"); if (err) goto out; seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); err = show_sb_opts(m, sb); if (err) goto out; if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt->mnt_root); seq_putc(m, '\n'); out: return err; } static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt) { struct mount *r = real_mount(mnt); struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; int err = 0; /* device */ if (sb->s_op->show_devname) { seq_puts(m, "device "); err = sb->s_op->show_devname(m, mnt_path.dentry); } else { if (r->mnt_devname) { seq_puts(m, "device "); mangle(m, r->mnt_devname); } else seq_puts(m, "no device"); } /* mount point */ seq_puts(m, " mounted on "); seq_path(m, &mnt_path, " \t\n\\"); seq_putc(m, ' '); /* file system type */ seq_puts(m, "with fstype "); show_type(m, sb); /* optional statistics */ if (sb->s_op->show_stats) { seq_putc(m, ' '); if (!err) err = sb->s_op->show_stats(m, mnt_path.dentry); } seq_putc(m, '\n'); return err; } static int mounts_open_common(struct inode *inode, struct file *file, int (*show)(struct seq_file *, struct vfsmount *)) { struct task_struct *task = get_proc_task(inode); struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct path root; struct proc_mounts *p; int ret = -EINVAL; if (!task) goto err; task_lock(task); nsp = task->nsproxy; if (!nsp || !nsp->mnt_ns) { task_unlock(task); put_task_struct(task); goto err; } ns = nsp->mnt_ns; get_mnt_ns(ns); if (!task->fs) { task_unlock(task); put_task_struct(task); ret = -ENOENT; goto err_put_ns; } get_fs_root(task->fs, &root); task_unlock(task); put_task_struct(task); ret = -ENOMEM; p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); if (!p) goto err_put_path; file->private_data = &p->m; ret = seq_open(file, &mounts_op); if (ret) goto err_free; p->ns = ns; p->root = root; p->m.poll_event = ns->event; p->show = show; p->cached_event = ~0ULL; return 0; err_free: kfree(p); err_put_path: path_put(&root); err_put_ns: put_mnt_ns(ns); err: return ret; } static int mounts_release(struct inode *inode, struct file *file) { struct proc_mounts *p = proc_mounts(file->private_data); path_put(&p->root); put_mnt_ns(p->ns); return seq_release(inode, file); } static int mounts_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_vfsmnt); } static int mountinfo_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_mountinfo); } static int mountstats_open(struct inode *inode, struct file *file) { return mounts_open_common(inode, file, show_vfsstat); } const struct file_operations proc_mounts_operations = { .open = mounts_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, }; const struct file_operations proc_mountinfo_operations = { .open = mountinfo_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, .poll = mounts_poll, }; const struct file_operations proc_mountstats_operations = { .open = mountstats_open, .read = seq_read, .llseek = seq_lseek, .release = mounts_release, };
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM) - global_page_state(NR_MLOCK); int fork_boost = 0; int *adj_array; size_t *min_array; struct zone *zone; if (offlining) { /* Discount all free space in the section being offlined */ for_each_zone(zone) { if (zone_idx(zone) == ZONE_MOVABLE) { other_free -= zone_page_state(zone, NR_FREE_PAGES); lowmem_print(4, "lowmem_shrink discounted " "%lu pages in movable zone\n", zone_page_state(zone, NR_FREE_PAGES)); } } } /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (lowmem_fork_boost && time_before_eq(jiffies, lowmem_fork_boost_timeout)) { for (i = 0; i < lowmem_minfree_size; i++) minfree_tmp[i] = lowmem_minfree[i] + lowmem_fork_boost_minfree[i] ; adj_array = fork_boost_adj; min_array = minfree_tmp; } else { adj_array = lowmem_adj; min_array = lowmem_minfree; } if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < min_array[i] && (other_file < min_array[i] || !shrink_cache_possible(sc->gfp_mask))) { min_adj = adj_array[i]; fork_boost = lowmem_fork_boost_minfree[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { if (last_min_adj > selected_oom_adj && (selected_oom_adj == 12 || selected_oom_adj == 9 || selected_oom_adj == 7)) { last_min_adj = selected_oom_adj; lowmem_print(1, "lowmem_shrink: monitor memory status at selected_oom_adj=%d\n", selected_oom_adj); show_meminfo(); dump_tasks(); } lowmem_print(1, "[%s] send sigkill to %d (%s), adj %d, size %dK, min_adj=%d," " free=%dK, file=%dK, fork_boost=%d\n", current->comm, selected->pid, selected->comm, selected_oom_adj, selected_tasksize << 2, min_adj, other_free << 2, other_file << 2, fork_boost << 2); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; if (selected_oom_adj < 7) { show_meminfo(); dump_tasks(); } force_sig(SIGKILL, selected); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on this pass. */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } } rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; } if (selected) { lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected); rem -= selected_tasksize; } read_unlock(&tasklist_lock); return rem; }
static int lowmem_shrink(struct shrinker *s, int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); int lru_file = global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_FILE); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) { dump_deathpending(lowmem_deathpending); return 0; } #ifdef CONFIG_SWAP if(fudgeswap != 0){ struct sysinfo si; si_swapinfo(&si); if(si.freeswap > 0){ if(fudgeswap > si.freeswap) other_file += si.freeswap; else other_file += fudgeswap; } } #endif if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i]) { if (other_file < lowmem_minfree[i] || (lowmem_check_filepages && (lru_file < lowmem_minfile[i]))) { min_adj = lowmem_adj[i]; break; } } } if (nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n", nr_to_scan, gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) { struct signal_struct *sig; int ret; if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; ret = copy_thread_group_keys(tsk); if (ret < 0) { kmem_cache_free(signal_cachep, sig); return ret; } atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; sig->curr_target = NULL; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; sig->tsk = tsk; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; sig->it_prof_expires = cputime_zero; sig->it_prof_incr = cputime_zero; sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->sum_sched_runtime = 0; INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); taskstats_tgid_init(sig); task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { /* * New sole thread in the process gets an expiry time * of the whole CPU time limit. */ tsk->it_prof_expires = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); } acct_init_pacct(&sig->pacct); return 0; }
static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; unsigned int todo; struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; unsigned int wakeup = 0; do_gettimeofday(&start); end_time = jiffies + TIMEOUT; do { long max_wait; todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { if (frozen(p) || !freezeable(p)) continue; if (!freeze_task(p, sig_only)) continue; /* * Now that we've done set_freeze_flag, don't * perturb a task in TASK_STOPPED or TASK_TRACED. * It is "frozen enough". If the task does wake * up, it will immediately call try_to_freeze. */ if (!task_is_stopped_or_traced(p) && !freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ max_wait = has_wake_lock(WAKE_LOCK_SUSPEND); if (todo && ((max_wait < 0) || time_after(jiffies + max_wait, end_time)) ) { wakeup = 1; break; } if (time_after(jiffies, end_time)) break; } while (todo); do_gettimeofday(&end); elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); do_div(elapsed_csecs64, NSEC_PER_SEC / 100); elapsed_csecs = elapsed_csecs64; if (todo) { /* This does not unfreeze processes that are already frozen * (we have slightly ugly calling convention in that respect, * and caller must call thaw_processes() if something fails), * but it cleans up leftover PF_FREEZE requests. */ if(wakeup) { printk("\n"); printk(KERN_ERR "Freezing of %s aborted\n", sig_only ? "user space " : "tasks "); } else { printk("\n"); printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " "(%d tasks refusing to freeze):\n", elapsed_csecs / 100, elapsed_csecs % 100, todo); show_state(); } read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); if (freezing(p) && !freezer_should_skip(p)) sched_show_task(p); cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else {
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ asmlinkage long sys_unshare(unsigned long unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; struct mnt_namespace *ns, *new_ns = NULL; struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct sem_undo_list *new_ulist = NULL; struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; struct uts_namespace *uts, *new_uts = NULL; struct ipc_namespace *ipc, *new_ipc = NULL; check_unshare_flags(&unshare_flags); /* Return -EINVAL for all unsupported flags */ err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC)) goto bad_unshare_out; if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; if ((err = unshare_mnt_namespace(unshare_flags, &new_ns, new_fs))) goto bad_unshare_cleanup_fs; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_ns; if ((err = unshare_vm(unshare_flags, &new_mm))) goto bad_unshare_cleanup_sigh; if ((err = unshare_fd(unshare_flags, &new_fd))) goto bad_unshare_cleanup_vm; if ((err = unshare_semundo(unshare_flags, &new_ulist))) goto bad_unshare_cleanup_fd; if ((err = unshare_utsname(unshare_flags, &new_uts))) goto bad_unshare_cleanup_semundo; if ((err = unshare_ipcs(unshare_flags, &new_ipc))) goto bad_unshare_cleanup_uts; if (new_ns || new_uts || new_ipc) { old_nsproxy = current->nsproxy; new_nsproxy = dup_namespaces(old_nsproxy); if (!new_nsproxy) { err = -ENOMEM; goto bad_unshare_cleanup_ipc; } } if (new_fs || new_ns || new_mm || new_fd || new_ulist || new_uts || new_ipc) { task_lock(current); if (new_nsproxy) { current->nsproxy = new_nsproxy; new_nsproxy = old_nsproxy; } if (new_fs) { fs = current->fs; current->fs = new_fs; new_fs = fs; } if (new_ns) { ns = current->nsproxy->mnt_ns; current->nsproxy->mnt_ns = new_ns; new_ns = ns; } if (new_mm) { mm = current->mm; active_mm = current->active_mm; current->mm = new_mm; current->active_mm = new_mm; activate_mm(active_mm, new_mm); new_mm = mm; } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } if (new_uts) { uts = current->nsproxy->uts_ns; current->nsproxy->uts_ns = new_uts; new_uts = uts; } if (new_ipc) { ipc = current->nsproxy->ipc_ns; current->nsproxy->ipc_ns = new_ipc; new_ipc = ipc; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_ipc: if (new_ipc) put_ipc_ns(new_ipc); bad_unshare_cleanup_uts: if (new_uts) put_uts_ns(new_uts); bad_unshare_cleanup_semundo: bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_vm: if (new_mm) mmput(new_mm); bad_unshare_cleanup_sigh: if (new_sigh) if (atomic_dec_and_test(&new_sigh->count)) kmem_cache_free(sighand_cachep, new_sigh); bad_unshare_cleanup_ns: if (new_ns) put_mnt_ns(new_ns); bad_unshare_cleanup_fs: if (new_fs) put_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: return err; }
static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; unsigned int todo; bool wq_busy = false; struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; unsigned int wakeup = 0; do_gettimeofday(&start); end_time = jiffies + TIMEOUT; if (!sig_only) freeze_workqueues_begin(); while (true) { todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { if (frozen(p) || !freezeable(p)) continue; if (!freeze_task(p, sig_only)) continue; /* * Now that we've done set_freeze_flag, don't * perturb a task in TASK_STOPPED or TASK_TRACED. * It is "frozen enough". If the task does wake * up, it will immediately call try_to_freeze. */ if (!task_is_stopped_or_traced(p) && !freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); if (!sig_only) { wq_busy = freeze_workqueues_busy(); todo += wq_busy; } if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) { printk(KERN_ERR "Freezing aborted by %s\n", p->comm); wakeup = 1; break; } if (!todo || time_after(jiffies, end_time)) break; /* * We need to retry, but first give the freezing tasks some * time to enter the regrigerator. */ msleep(10); } do_gettimeofday(&end); elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); do_div(elapsed_csecs64, NSEC_PER_SEC / 100); elapsed_csecs = elapsed_csecs64; if (todo) { /* This does not unfreeze processes that are already frozen * (we have slightly ugly calling convention in that respect, * and caller must call thaw_processes() if something fails), * but it cleans up leftover PF_FREEZE requests. */ if(wakeup) { printk("\n"); printk(KERN_ERR "Freezing of %s aborted\n", sig_only ? "user space " : "tasks "); } else { printk("\n"); printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", elapsed_csecs / 100, elapsed_csecs % 100, todo - wq_busy, wq_busy); } thaw_workqueues(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); if (freezing(p) && !freezer_should_skip(p) && elapsed_csecs > 100) sched_show_task(p); cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else {
static int force_contiguous_lowmem_shrink(IN gckKERNEL Kernel) { struct task_struct *p; struct task_struct *selected = NULL; int tasksize; int ret = -1; int min_adj = 0; int selected_tasksize = 0; int selected_oom_adj; /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; selected_oom_adj = min_adj; rcu_read_lock(); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; gcuDATABASE_INFO info; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_score_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = 0; task_unlock(p); rcu_read_unlock(); if (gckKERNEL_QueryProcessDB(Kernel, p->pid, gcvFALSE, gcvDB_VIDEO_MEMORY, &info) == gcvSTATUS_OK){ tasksize += info.counters.bytes / PAGE_SIZE; } if (gckKERNEL_QueryProcessDB(Kernel, p->pid, gcvFALSE, gcvDB_CONTIGUOUS, &info) == gcvSTATUS_OK){ tasksize += info.counters.bytes / PAGE_SIZE; } rcu_read_lock(); if (tasksize <= 0) continue; gckOS_Print("<gpu> pid %d (%s), adj %d, size %d \n", p->pid, p->comm, oom_adj, tasksize); if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; } if (selected) { gckOS_Print("<gpu> send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected); ret = 0; } rcu_read_unlock(); return ret; }
static int ptrace_attach(struct task_struct *task) { bool wait_trap = false; int retval; audit_ptrace(task); retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) goto out; if (same_thread_group(task, current)) goto out; /* * Protect exec's credential calculations against our interference; * interference; SUID, SGID and LSM creds get determined differently * under ptrace. */ retval = -ERESTARTNOINTR; if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); task_unlock(task); if (retval) goto unlock_creds; write_lock_irq(&tasklist_lock); retval = -EPERM; if (unlikely(task->exit_state)) goto unlock_tasklist; if (task->ptrace) goto unlock_tasklist; task->ptrace = PT_PTRACED; if (task_ns_capable(task, CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); spin_lock(&task->sighand->siglock); /* * If the task is already STOPPED, set GROUP_STOP_PENDING and * TRAPPING, and kick it so that it transits to TRACED. TRAPPING * will be cleared if the child completes the transition or any * event which clears the group stop states happens. We'll wait * for the transition to complete before returning from this * function. * * This hides STOPPED -> RUNNING -> TRACED transition from the * attaching thread but a different thread in the same group can * still observe the transient RUNNING state. IOW, if another * thread's WNOHANG wait(2) on the stopped tracee races against * ATTACH, the wait(2) may fail due to the transient RUNNING. * * The following task_is_stopped() test is safe as both transitions * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task)) { task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; signal_wake_up_state(task, __TASK_STOPPED); wait_trap = true; } spin_unlock(&task->sighand->siglock); retval = 0; unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: if (wait_trap) wait_event(current->signal->wait_chldexit, !(task->group_stop & GROUP_STOP_TRAPPING)); return retval; }
static int lowmem_shrink(struct shrinker *s, int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); #ifdef SEC_ADJUST_LMK int other_file = global_page_state(NR_INACTIVE_FILE) + global_page_state(NR_ACTIVE_FILE); #else int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); #endif /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { #ifdef SEC_ADJUST_LMK if ((other_free + other_file) < lowmem_minfree[i]) #else if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) #endif { min_adj = lowmem_adj[i]; break; } } #ifdef SEC_ADJUST_LMK if (min_adj == OOM_ADJUST_MAX + 1) return 0; #endif if (nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n", nr_to_scan, gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); #ifdef SEC_ADJUST_LMK if (nr_to_scan <= 0) #else if (nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) #endif { lowmem_print(5, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { if (fatal_signal_pending(selected)) { pr_warning("process %d is suffering a slow death\n", selected->pid); read_unlock(&tasklist_lock); return rem; } lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; force_sig(SIGKILL, selected); rem -= selected_tasksize; } #ifdef SEC_ADJUST_LMK else rem = -1; #endif lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending) return 0; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; ///<<<<<<< HEAD /// for(i = 0; i < array_size; i++) { /// if ((other_free + other_file) < lowmem_minfree[i]) { ///======= for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { ///>>>>>>> 86b162d... android: backport to 2.6.29 min_adj = lowmem_adj[i]; break; } } ///<<<<<<< HEAD /// if(nr_to_scan > 0) /// lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n", nr_to_scan, gfp_mask, other_free, other_file, min_adj); /// lowmem_print(2, "lowmem_shrink free mem=%d MB\n", (other_free + other_file)*4/1024); ///======= if (nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n", nr_to_scan, gfp_mask, other_free, other_file, min_adj); ///>>>>>>> 86b162d... android: backport to 2.6.29 rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; task_free_register(&task_nb); force_sig(SIGKILL, selected); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { struct fs_struct *fs, *new_fs = NULL; struct files_struct *fd, *new_fd = NULL; struct cred *new_cred = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; int err; /* * If unsharing a user namespace must also unshare the thread. */ if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; /* * If unsharing a thread from a thread group, must also unshare vm. */ if (unshare_flags & CLONE_THREAD) unshare_flags |= CLONE_VM; /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; /* * If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; err = unshare_fs(unshare_flags, &new_fs); if (err) goto bad_unshare_out; err = unshare_fd(unshare_flags, &new_fd); if (err) goto bad_unshare_cleanup_fs; err = unshare_userns(unshare_flags, &new_cred); if (err) goto bad_unshare_cleanup_fd; err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_cred, new_fs); if (err) goto bad_unshare_cleanup_cred; if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (unshare_flags & CLONE_NEWIPC) { /* Orphan segments in old ns (see sem above). */ exit_shm(current); shm_init_task(current); } if (new_nsproxy) switch_task_namespaces(current, new_nsproxy); task_lock(current); if (new_fs) { fs = current->fs; spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; spin_unlock(&fs->lock); } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); if (new_cred) { /* Install the new user namespace */ commit_creds(new_cred); new_cred = NULL; } } bad_unshare_cleanup_cred: if (new_cred) put_cred(new_cred); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_out: return err; }
static int ptrace_start(long pid, long request, struct task_struct **childp, struct utrace_attached_engine **enginep, struct ptrace_state **statep) { struct task_struct *child; struct utrace_attached_engine *engine; struct ptrace_state *state; int ret; NO_LOCKS; if (request == PTRACE_TRACEME) return ptrace_traceme(); ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); pr_debug("ptrace pid %ld => %p\n", pid, child); if (!child) goto out; ret = -EPERM; if (pid == 1) /* you may not mess with init */ goto out_tsk; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; } rcu_read_lock(); engine = utrace_attach(child, UTRACE_ATTACH_MATCH_OPS, &ptrace_utrace_ops, NULL); ret = -ESRCH; if (IS_ERR(engine) || engine == NULL) goto out_tsk_rcu; state = rcu_dereference(engine->data); if (state == NULL || state->parent != current) goto out_tsk_rcu; /* * Traditional ptrace behavior demands that the target already be * quiescent, but not dead. */ if (request != PTRACE_KILL && !(engine->flags & UTRACE_ACTION_QUIESCE)) { /* * If it's in job control stop, turn it into proper quiescence. */ struct sighand_struct *sighand; unsigned long flags; sighand = lock_task_sighand(child, &flags); if (likely(sighand != NULL)) { if (child->state == TASK_STOPPED) ret = 0; unlock_task_sighand(child, &flags); } if (ret == 0) { ret = ptrace_update(child, state, UTRACE_ACTION_QUIESCE, 0); if (unlikely(ret == -EALREADY)) ret = -ESRCH; if (unlikely(ret)) BUG_ON(ret != -ESRCH); } if (ret) { pr_debug("%d not stopped (%lu)\n", child->pid, child->state); goto out_tsk_rcu; } ret = -ESRCH; /* Return value for exit_state bail-out. */ } atomic_inc(&state->refcnt); rcu_read_unlock(); NO_LOCKS; /* * We do this for all requests to match traditional ptrace behavior. * If the machine state synchronization done at context switch time * includes e.g. writing back to user memory, we want to make sure * that has finished before a PTRACE_PEEKDATA can fetch the results. * On most machines, only regset data is affected by context switch * and calling utrace_regset later on will take care of that, so * this is superfluous. * * To do this purely in utrace terms, we could do: * (void) utrace_regset(child, engine, utrace_native_view(child), 0); */ if (request != PTRACE_KILL) { wait_task_inactive(child); while (child->state != TASK_TRACED && child->state != TASK_STOPPED) { if (child->exit_state) { __ptrace_state_free(state); goto out_tsk; } task_lock(child); if (child->mm && child->mm->core_waiters) { task_unlock(child); __ptrace_state_free(state); goto out_tsk; } task_unlock(child); /* * This is a dismal kludge, but it only comes up on ia64. * It might be blocked inside regset->writeback() called * from ptrace_report(), when it's on its way to quiescing * in TASK_TRACED real soon now. We actually need that * writeback call to have finished, before a PTRACE_PEEKDATA * here, for example. So keep waiting until it's really there. */ yield(); wait_task_inactive(child); } } wait_task_inactive(child); *childp = child; *enginep = engine; *statep = state; return -EIO; out_tsk_rcu: rcu_read_unlock(); out_tsk: NO_LOCKS; put_task_struct(child); out: return ret; }
struct ion_client *ion_client_create(struct ion_device *dev, unsigned int heap_mask, const char *name) { struct ion_client *client; struct task_struct *task; struct rb_node **p; struct rb_node *parent = NULL; struct ion_client *entry; pid_t pid; unsigned int name_len = strnlen(name, 64); get_task_struct(current->group_leader); task_lock(current->group_leader); pid = task_pid_nr(current->group_leader); /* don't bother to store task struct for kernel threads, they can't be killed anyway */ if (current->group_leader->flags & PF_KTHREAD) { put_task_struct(current->group_leader); task = NULL; } else { task = current->group_leader; } task_unlock(current->group_leader); /* if this isn't a kernel thread, see if a client already exists */ if (task) { client = ion_client_lookup(dev, task); if (!IS_ERR_OR_NULL(client)) { put_task_struct(current->group_leader); return client; } } client = kzalloc(sizeof(struct ion_client), GFP_KERNEL); if (!client) { put_task_struct(current->group_leader); return ERR_PTR(-ENOMEM); } client->dev = dev; client->handles = RB_ROOT; mutex_init(&client->lock); client->name = kzalloc(name_len+1, GFP_KERNEL); if (!client->name) { put_task_struct(current->group_leader); kfree(client); return ERR_PTR(-ENOMEM); } else { strlcpy(client->name, name, name_len+1); } client->heap_mask = heap_mask; client->task = task; client->pid = pid; kref_init(&client->ref); mutex_lock(&dev->lock); if (task) { p = &dev->user_clients.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct ion_client, node); if (task < entry->task) p = &(*p)->rb_left; else if (task > entry->task) p = &(*p)->rb_right; } rb_link_node(&client->node, parent, p); rb_insert_color(&client->node, &dev->user_clients); } else {
asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) { struct rlimit new_rlim, *old_rlim; unsigned long it_prof_secs; int retval; if (resource >= RLIM_NLIMITS) return -EINVAL; if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT; if (new_rlim.rlim_cur > new_rlim.rlim_max) return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && !capable(CAP_SYS_RESOURCE)) return -EPERM; if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) return -EPERM; retval = security_task_setrlimit(resource, &new_rlim); if (retval) return retval; if (resource == RLIMIT_CPU && new_rlim.rlim_cur == 0) { /* * The caller is asking for an immediate RLIMIT_CPU * expiry. But we use the zero value to mean "it was * never set". So let's cheat and make it one second * instead */ new_rlim.rlim_cur = 1; } task_lock(current->group_leader); *old_rlim = new_rlim; task_unlock(current->group_leader); if (resource != RLIMIT_CPU) goto out; /* * RLIMIT_CPU handling. Note that the kernel fails to return an error * code if it rejected the user's attempt to set RLIMIT_CPU. This is a * very long-standing error, and fixing it now risks breakage of * applications, so we live with it */ if (new_rlim.rlim_cur == RLIM_INFINITY) goto out; it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { unsigned long rlim_cur = new_rlim.rlim_cur; cputime_t cputime; cputime = secs_to_cputime(rlim_cur); read_lock(&tasklist_lock); spin_lock_irq(¤t->sighand->siglock); set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); spin_unlock_irq(¤t->sighand->siglock); read_unlock(&tasklist_lock); } out: return 0; }
static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; unsigned long rem = 0; int tasksize; int i; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; int minfree = 0; int selected_tasksize = 0; short selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; int other_file = global_node_page_state(NR_FILE_PAGES) - global_node_page_state(NR_SHMEM) - total_swapcache_pages(); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { minfree = lowmem_minfree[i]; if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } } lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", sc->nr_to_scan, sc->gfp_mask); return 0; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; short oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (task_lmk_waiting(p) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { task_lock(selected); send_sig(SIGKILL, selected, 0); if (selected->mm) task_set_lmk_waiting(selected); task_unlock(selected); lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" " to free %ldkB on behalf of '%s' (%d) because\n" " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" " Free memory is %ldkB above reserved\n", selected->comm, selected->pid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, other_file * (long)(PAGE_SIZE / 1024), minfree * (long)(PAGE_SIZE / 1024), min_score_adj, other_free * (long)(PAGE_SIZE / 1024)); lowmem_deathpending_timeout = jiffies + HZ; rem += selected_tasksize; } lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; }
static int lowmem_shrink(int nr_to_scan, gfp_t gfp_mask) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } } if (nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %d, %x, ofree %d %d, ma %d\n", nr_to_scan, gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; int oom_adj; task_lock(p); mm = p->mm; if (!mm) { task_unlock(p); continue; } oom_adj = mm->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); force_sig(SIGKILL, selected); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %d, %x, return %d\n", nr_to_scan, gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *p; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int selected_tasksize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); struct zone *zone; unsigned long flags; if (offlining) { /* Discount all free space in the section being offlined */ for_each_zone(zone) { if (zone_idx(zone) == ZONE_MOVABLE) { other_free -= zone_page_state(zone, NR_FREE_PAGES); lowmem_print(4, "lowmem_shrink discounted " "%lu pages in movable zone\n", zone_page_state(zone, NR_FREE_PAGES)); } } } /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ //if (lowmem_deathpending && // time_before_eq(jiffies, lowmem_deathpending_timeout)) if (lowmem_deathpending) return 0; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_adj < selected_oom_adj) continue; if (oom_adj == selected_oom_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_adj = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } if (selected) { spin_lock_irqsave(&lowmem_deathpending_lock, flags); if (!lowmem_deathpending) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_tasksize); lowmem_deathpending = selected; //lowmem_deathpending_timeout = jiffies + HZ; task_free_register(&task_nb); force_sig(SIGKILL, selected); rem -= selected_tasksize; } spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly * because they modify an inactive task_struct that is being * constructed. Here we are modifying the current, active, * task_struct. */ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; check_unshare_flags(&unshare_flags); /* Return -EINVAL for all unsupported flags */ err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* * CLONE_NEWIPC must also detach from the undolist: after switching * to a new ipc namespace, the semaphore arrays from the old * namespace are unreachable. */ if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) do_sysvsem = 1; if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_fs; if ((err = unshare_vm(unshare_flags, &new_mm))) goto bad_unshare_cleanup_sigh; if ((err = unshare_fd(unshare_flags, &new_fd))) goto bad_unshare_cleanup_vm; if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) goto bad_unshare_cleanup_fd; if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { if (do_sysvsem) { /* * CLONE_SYSVSEM is equivalent to sys_exit(). */ exit_sem(current); } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); new_nsproxy = NULL; } task_lock(current); if (new_fs) { fs = current->fs; write_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; write_unlock(&fs->lock); } if (new_mm) { mm = current->mm; active_mm = current->active_mm; current->mm = new_mm; current->active_mm = new_mm; activate_mm(active_mm, new_mm); new_mm = mm; } if (new_fd) { fd = current->files; current->files = new_fd; new_fd = fd; } task_unlock(current); } if (new_nsproxy) put_nsproxy(new_nsproxy); bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); bad_unshare_cleanup_vm: if (new_mm) mmput(new_mm); bad_unshare_cleanup_sigh: if (new_sigh) if (atomic_dec_and_test(&new_sigh->count)) { kmem_cache_free(sighand_cachep, new_sigh); } bad_unshare_cleanup_fs: if (new_fs) free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: return err; }
static void lowmem_vm_shrinker(int largest, int rss_threshold) { struct task_struct *p; struct task_struct *selected = NULL; int vmsize, rssize; int min_adj, min_large_adj; int selected_vmsize = 0; int selected_oom_adj; int array_size = ARRAY_SIZE(lowmem_adj); unsigned long flags; /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending) return; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; min_adj = lowmem_adj[array_size - 2]; /* lock onto cached processes only */ min_large_adj = lowmem_adj[array_size - 3]; /* Minimum priority for large processes */ lowmem_print(3, "lowmem_vm_shrink ma %d, large ma %d, largest %d, rss_threshold=%d\n", min_adj, min_large_adj, largest, rss_threshold); selected_oom_adj = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; vmsize = get_mm_hiwater_vm(mm); rssize = get_mm_rss(mm) * PAGE_SIZE; task_unlock(p); if (vmsize <= 0) continue; /* Only look at cached processes */ if (oom_adj < min_adj) { /* Is this a very large home process in the background? */ if ((oom_adj > min_large_adj) && (rssize >= rss_threshold)) { selected = p; selected_vmsize = vmsize; selected_oom_adj = oom_adj; lowmem_print(2, "lowmem_shrink override %d (%s), adj %d, vm size %d, rs size %d to kill\n" ,p->pid, p->comm, oom_adj, vmsize, rssize); break; } continue; } /* Is this process a better fit than last selected? */ if (selected) { if (oom_adj < selected_oom_adj) continue; /* If looking for largest, ignore priority */ if ((largest || (oom_adj == selected_oom_adj)) && (vmsize <= selected_vmsize)) continue; } selected = p; selected_vmsize = vmsize; if (largest == 0) /* Do not filter by priority if searching for largest */ selected_oom_adj = oom_adj; lowmem_print(2, "lowmem_shrink select %d (%s), adj %d, vm size %d, rs size %d to kill\n", p->pid, p->comm, oom_adj, vmsize, rssize); } if (selected) { spin_lock_irqsave(&lowmem_deathpending_lock, flags); if (!lowmem_deathpending) { lowmem_print(1, "lowmem_shrink send sigkill to %d (%s), adj %d, vm size %d\n", selected->pid, selected->comm, selected_oom_adj, selected_vmsize); lowmem_deathpending = selected; task_free_register(&task_nb); force_sig(SIGKILL, selected); } spin_unlock_irqrestore(&lowmem_deathpending_lock, flags); } lowmem_print(4, "lowmem_vm_shrink, saved %d\n", selected_vmsize); read_unlock(&tasklist_lock); return; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_fork_read_lock(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_policy; /* copy all the process information */ retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_fork_read_unlock(current); perf_event_fork(p); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) { task_lock(p); if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) atomic_dec(&p->mm->oom_disable_count); task_unlock(p); mmput(p->mm); } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_fork_read_unlock(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; unsigned int todo; struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; do_gettimeofday(&start); end_time = jiffies + TIMEOUT; do { todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { if (frozen(p) || !freezeable(p)) continue; if (!freeze_task(p, sig_only)) continue; /* * Now that we've done set_freeze_flag, don't * perturb a task in TASK_STOPPED or TASK_TRACED. * It is "frozen enough". If the task does wake * up, it will immediately call try_to_freeze. * * Because freeze_task() goes through p's * scheduler lock after setting TIF_FREEZE, it's * guaranteed that either we see TASK_RUNNING or * try_to_stop() after schedule() in ptrace/signal * stop sees TIF_FREEZE. */ if (!task_is_stopped_or_traced(p) && !freezer_should_skip(p)) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ if (time_after(jiffies, end_time)) break; } while (todo); do_gettimeofday(&end); elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); do_div(elapsed_csecs64, NSEC_PER_SEC / 100); elapsed_csecs = elapsed_csecs64; if (todo) { /* This does not unfreeze processes that are already frozen * (we have slightly ugly calling convention in that respect, * and caller must call thaw_processes() if something fails), * but it cleans up leftover PF_FREEZE requests. */ printk("\n"); printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " "(%d tasks refusing to freeze):\n", elapsed_csecs / 100, elapsed_csecs % 100, todo); show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); if (freezing(p) && !freezer_should_skip(p)) printk(KERN_ERR " %s\n", p->comm); cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else {
struct ion_client *ion_client_create(struct ion_device *dev, unsigned int heap_mask, const char *name) { struct ion_client *client; struct task_struct *task; struct rb_node **p; struct rb_node *parent = NULL; struct ion_client *entry; pid_t pid; unsigned int name_len; if (!name) { pr_err("%s: Name cannot be null\n", __func__); return ERR_PTR(-EINVAL); } name_len = ION_CLIENT_NAME_LENGTH; get_task_struct(current->group_leader); task_lock(current->group_leader); pid = task_pid_nr(current->group_leader); if (current->group_leader->flags & PF_KTHREAD) { put_task_struct(current->group_leader); task = NULL; } else { task = current->group_leader; } task_unlock(current->group_leader); client = kzalloc(sizeof(struct ion_client), GFP_KERNEL); if (!client) { if (task) put_task_struct(current->group_leader); return ERR_PTR(-ENOMEM); } client->dev = dev; client->handles = RB_ROOT; mutex_init(&client->lock); client->name = kzalloc(name_len+1, GFP_KERNEL); if (!client->name) { put_task_struct(current->group_leader); kfree(client); return ERR_PTR(-ENOMEM); } else { strlcpy(client->name, name, name_len+1); } client->heap_mask = heap_mask; client->task = task; client->pid = pid; mutex_lock(&dev->lock); p = &dev->clients.rb_node; while (*p) { parent = *p; entry = rb_entry(parent, struct ion_client, node); if (client < entry) p = &(*p)->rb_left; else if (client > entry) p = &(*p)->rb_right; } rb_link_node(&client->node, parent, p); rb_insert_color(&client->node, &dev->clients); client->debug_root = debugfs_create_file(name, 0664, dev->debug_root, client, &debug_client_fops); mutex_unlock(&dev->lock); pr_info("%s: create ion_client (%s) at %p\n", __func__, client->name, client); return client; }
static inline int ltt_enumerate_process_states(void) { struct task_struct *t = &init_task; struct task_struct *p = t; enum lttng_process_status status; enum lttng_thread_type type; enum lttng_execution_mode mode; enum lttng_execution_submode submode; do { mode = LTTNG_MODE_UNKNOWN; submode = LTTNG_UNKNOWN; read_lock(&tasklist_lock); if(t != &init_task) { atomic_dec(&t->usage); t = next_thread(t); } if(t == p) { t = p = next_task(t); } atomic_inc(&t->usage); read_unlock(&tasklist_lock); task_lock(t); if(t->exit_state == EXIT_ZOMBIE) status = LTTNG_ZOMBIE; else if(t->exit_state == EXIT_DEAD) status = LTTNG_DEAD; else if(t->state == TASK_RUNNING) { /* Is this a forked child that has not run yet? */ if( list_empty(&t->run_list) ) status = LTTNG_WAIT_FORK; else /* All tasks are considered as wait_cpu; * the viewer will sort out if the task was * relly running at this time. */ status = LTTNG_WAIT_CPU; } else if(t->state & (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { /* Task is waiting for something to complete */ status = LTTNG_WAIT; } else status = LTTNG_UNNAMED; submode = LTTNG_NONE; /* Verification of t->mm is to filter out kernel threads; * Viewer will further filter out if a user-space thread was * in syscall mode or not */ if(t->mm) type = LTTNG_USER_THREAD; else type = LTTNG_KERNEL_THREAD; trace_statedump_enumerate_process_state( t->pid, t->parent->pid, t->comm, type, mode, submode, status, t->tgid); task_unlock(t); } while( t != &init_task ); return 0; }