/** * reparent_to_init() - Reparent the calling kernel thread to the init task. * * If a kernel thread is launched as a result of a system call, or if * it ever exits, it should generally reparent itself to init so that * it is correctly cleaned up on exit. * * The various task state such as scheduling policy and priority may have * been inherited from a user process, so we reset them to sane values here. * * NOTE that reparent_to_init() gives the caller full capabilities. */ void reparent_to_init(void) { write_lock_irq(&tasklist_lock); /* Reparent to init */ REMOVE_LINKS(current); current->p_pptr = child_reaper; current->p_opptr = child_reaper; SET_LINKS(current); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; current->ptrace = 0; if ((current->policy == SCHED_OTHER) && (task_nice(current) < 0)) set_user_nice(current, 0); /* cpus_allowed? */ /* rt_priority? */ /* signals? */ current->cap_effective = CAP_INIT_EFF_SET; current->cap_inheritable = CAP_INIT_INH_SET; current->cap_permitted = CAP_FULL_SET; current->keep_capabilities = 0; memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim))); current->user = INIT_USER; write_unlock_irq(&tasklist_lock); }
/* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * * Must be called with the tasklist lock write-held. */ void __ptrace_link(task_t *child, task_t *new_parent) { if (!list_empty(&child->ptrace_list)) BUG(); if (child->parent == new_parent) return; list_add(&child->ptrace_list, &child->parent->ptrace_children); REMOVE_LINKS(child); child->parent = new_parent; SET_LINKS(child); }
/* * unptrace a task: move it back to its original parent and * remove it from the ptrace list. * * Must be called with the tasklist lock write-held. */ void __ptrace_unlink(task_t *child) { if (!child->ptrace) BUG(); child->ptrace = 0; if (list_empty(&child->ptrace_list)) return; list_del_init(&child->ptrace_list); REMOVE_LINKS(child); child->parent = child->real_parent; SET_LINKS(child); }
/* * unptrace a task: move it back to its original parent and * remove it from the ptrace list. * * Must be called with the tasklist lock write-held. */ void __ptrace_unlink(task_t *child) { if (!child->ptrace) BUG(); child->ptrace = 0; if (!list_empty(&child->ptrace_list)) { list_del_init(&child->ptrace_list); REMOVE_LINKS(child); child->parent = child->real_parent; SET_LINKS(child); } if (child->state == TASK_TRACED) ptrace_untrace(child); }
int ptrace_attach(struct task_struct *task) { task_lock(task); if (task->pid <= 1) goto bad; if (task == current) goto bad; if (!task->mm) goto bad; if(((current->uid != task->euid) || (current->uid != task->suid) || (current->uid != task->uid) || (current->gid != task->egid) || (current->gid != task->sgid) || (!cap_issubset(task->cap_permitted, current->cap_permitted)) || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) goto bad; rmb(); if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; /* Go */ task->ptrace |= PT_PTRACED; if (capable(CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; task_unlock(task); write_lock_irq(&tasklist_lock); if (task->p_pptr != current) { REMOVE_LINKS(task); task->p_pptr = current; SET_LINKS(task); } write_unlock_irq(&tasklist_lock); send_sig(SIGSTOP, task, 1); return 0; bad: task_unlock(task); return -EPERM; }
void set_parents(struct task_struct *task, struct task_struct *real_parent, struct task_struct *parent) { /*SPEW2("Reparenting %d gets %d and %d as parents.", task->pid, real_parent->pid, parent->pid);*/ if (task == real_parent || task == parent) BUG(); list_del_init(&task->ptrace_list); /* always safe... */ task->real_parent = real_parent; REMOVE_LINKS(task); task->parent = parent; SET_LINKS(task); if (real_parent != parent) { if (!task->ptrace) /* Sanity check */ printk(KERN_ERR "bproc: different parent but no ptrace! (sps)\n"); list_add(&task->ptrace_list, &real_parent->ptrace_children); } }
int ptrace_detach(struct task_struct *child, unsigned int data) { if ((unsigned long) data > _NSIG) return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); /* .. re-parent .. */ child->ptrace = 0; child->exit_code = data; write_lock_irq(&tasklist_lock); REMOVE_LINKS(child); child->p_pptr = child->p_opptr; SET_LINKS(child); write_unlock_irq(&tasklist_lock); /* .. and wake it up. */ wake_up_process(child); return 0; }
void inode_pager_init(void) { kern_return_t kr; int nr; kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_PORT_SET, &inode_pager_port_set); if (kr != KERN_SUCCESS) { MACH3_DEBUG(0, kr, ("inode_pager_init: mach_port_allocate")); panic("inode_pager_init: can't allocate port set"); } /* * Create a new Linux task for the inode pager, so it can block * and handle page faults like if it was a user process. * Skip task[0] (the server task) and task[1] (reserved for init). */ for (nr = 2; nr < NR_TASKS; nr++) { if (!task[nr]) break; } if (nr >= NR_TASKS) panic("inode_pager_init: can't find empty process"); inode_pager_task = *current; /* XXX ? */ strncpy(inode_pager_task.comm, "inode pager", sizeof (inode_pager_task.comm)); task[nr] = &inode_pager_task; SET_LINKS(&inode_pager_task); nr_tasks++; (void) server_thread_start(inode_pager_thread, (void *) 0); }
asmlinkage int sys32_ptrace(long request, long pid, long addr, s32 data) { struct task_struct *child; int ret = -EPERM; unsigned long flags; u32 tmp; int copied; ptrace_area parea; lock_kernel(); if (request == PTRACE_TRACEME) { /* are we already being traced? */ if (current->ptrace & PT_PTRACED) goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; ret = 0; goto out; } ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); read_unlock(&tasklist_lock); if (!child) goto out; ret = -EPERM; if (pid == 1) /* you may not mess with init */ goto out; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; } ret = -ESRCH; // printk("child=%lX child->flags=%lX",child,child->flags); /* I added child!=current line so we can get the */ /* ieee_instruction_pointer from the user structure DJB */ if(child!=current) { if (!(child->ptrace & PT_PTRACED)) goto out; if (child->state != TASK_STOPPED) { if (request != PTRACE_KILL) goto out; } if (child->p_pptr != current) goto out; } switch (request) { /* If I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) goto out; ret = put_user(tmp,(u32 *)(unsigned long)data); goto out; /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: ret=copy_user(child,addr,data,sizeof(u32),1,0); break; /* If I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) goto out; ret = -EIO; goto out; break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret=copy_user(child,addr,(addr_t)&data,sizeof(u32),0,1); break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; if ((unsigned long) data >= _NSIG) break; if (request == PTRACE_SYSCALL) child->ptrace |= PT_TRACESYS; else child->ptrace &= ~PT_TRACESYS; child->exit_code = data; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); ret = 0; break; /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: ret = 0; if (child->state == TASK_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; clear_single_step(child); wake_up_process(child); /* make sure the single step bit is not set. */ break; case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; if ((unsigned long) data >= _NSIG) break; child->ptrace &= ~PT_TRACESYS; child->exit_code = data; set_single_step(child); /* give it a chance to run. */ wake_up_process(child); ret = 0; break; case PTRACE_DETACH: /* detach a process that was attached. */ ret = -EIO; if ((unsigned long) data >= _NSIG) break; child->ptrace &= ~(PT_PTRACED|PT_TRACESYS); child->exit_code = data; write_lock_irqsave(&tasklist_lock, flags); REMOVE_LINKS(child); child->p_pptr = child->p_opptr; SET_LINKS(child); write_unlock_irqrestore(&tasklist_lock, flags); /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); ret = 0; break; case PTRACE_PEEKUSR_AREA: case PTRACE_POKEUSR_AREA: { ptrace_area_emu31 * parea31 = (void *)addr; if (!access_ok(VERIFY_READ, parea31, sizeof(*parea31))) return(-EFAULT); ret = __get_user(parea.len, &parea31->len); ret |= __get_user(parea.kernel_addr, &parea31->kernel_addr); ret |= __get_user(parea.process_addr, &parea31->process_addr); if(ret==0) ret=copy_user(child,parea.kernel_addr,parea.process_addr, parea.len,1,(request==PTRACE_POKEUSR_AREA)); break; } default: ret = -EIO; break; } out: unlock_kernel(); return ret; }
int sys_wait4(pid_t pid,unsigned long * stat_addr, int options, struct rusage * ru) { int flag; struct task_struct *p; unsigned long oldblocked; if (stat_addr) { flag = verify_area(VERIFY_WRITE, stat_addr, 4); if (flag) return flag; } repeat: current->signal &= ~(1<<(SIGCHLD-1)); flag=0; for (p = current->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; } else if (pid != -1) { if (p->pgrp != -pid) continue; } switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) continue; if (stat_addr) put_fs_long((p->exit_code << 8) | 0x7f, stat_addr); p->exit_code = 0; if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); return p->pid; case TASK_ZOMBIE: current->cutime += p->utime + p->cutime; current->cstime += p->stime + p->cstime; current->cmin_flt += p->min_flt + p->cmin_flt; current->cmaj_flt += p->maj_flt + p->cmaj_flt; if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); flag = p->pid; if (stat_addr) put_fs_long(p->exit_code, stat_addr); if (p->p_opptr != p->p_pptr) { REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); send_sig(SIGCHLD,p->p_pptr,1); } else release(p); #ifdef DEBUG_PROC_TREE audit_ptree(); #endif return flag; default: flag=1; continue; } } if (flag) { if (options & WNOHANG) return 0; current->state=TASK_INTERRUPTIBLE; oldblocked = current->blocked; current->blocked &= ~(1<<(SIGCHLD-1)); schedule(); current->blocked = oldblocked; if (current->signal & ~(current->blocked | (1<<(SIGCHLD-1)))) return -ERESTARTSYS; else goto repeat; } return -ECHILD; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int *parent_tidptr, int *child_tidptr) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_DETACHED) && !(clone_flags & CLONE_THREAD)) return ERR_PTR(-EINVAL); if (!(clone_flags & CLONE_DETACHED) && (clone_flags & CLONE_THREAD)) return ERR_PTR(-EINVAL); retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; p->tux_info = NULL; retval = -EAGAIN; /* * Increment user->__count before the rlimit test so that it would * be correct if we take the bad_fork_free failure path. */ atomic_inc(&p->user->__count); if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) goto bad_fork_free; } atomic_inc(&p->user->processes); /* * Counter increases are protected by * the kernel lock so nr_threads can't * increase under us (but it may decrease). */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; get_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; copy_flags(clone_flags, p); if (clone_flags & CLONE_IDLETASK) p->pid = 0; else { p->pid = alloc_pidmap(); if (p->pid == -1) goto bad_fork_cleanup; } retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; INIT_LIST_HEAD(&p->run_list); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->switch_lock); p->sigpending = 0; init_sigpending(&p->pending); p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->leader = 0; /* session leadership doesn't inherit */ p->tty_old_pgrp = 0; memset(&p->utime, 0, sizeof(p->utime)); memset(&p->stime, 0, sizeof(p->stime)); memset(&p->cutime, 0, sizeof(p->cutime)); memset(&p->cstime, 0, sizeof(p->cstime)); memset(&p->group_utime, 0, sizeof(p->group_utime)); memset(&p->group_stime, 0, sizeof(p->group_stime)); memset(&p->group_cutime, 0, sizeof(p->group_cutime)); memset(&p->group_cstime, 0, sizeof(p->group_cstime)); #ifdef CONFIG_SMP memset(&p->per_cpu_utime, 0, sizeof(p->per_cpu_utime)); memset(&p->per_cpu_stime, 0, sizeof(p->per_cpu_stime)); #endif memset(&p->timing_state, 0, sizeof(p->timing_state)); p->timing_state.type = PROCESS_TIMING_USER; p->last_sigxcpu = 0; p->array = NULL; p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; retval = -ENOMEM; /* copy all the process information */ if (copy_files(clone_flags, p)) goto bad_fork_cleanup; if (copy_fs(clone_flags, p)) goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) goto bad_fork_cleanup_fs; if (copy_signal(clone_flags, p)) goto bad_fork_cleanup_sighand; if (copy_mm(clone_flags, p)) goto bad_fork_cleanup_signal; if (copy_namespace(clone_flags, p)) goto bad_fork_cleanup_mm; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->semundo = NULL; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->swappable = 1; if (clone_flags & CLONE_DETACHED) p->exit_signal = -1; else p->exit_signal = clone_flags & CSIGNAL; p->pdeath_signal = 0; /* * Share the timeslice between parent and child, thus the * total amount of pending timeslices in the system doesnt change, * resulting in more scheduling fairness. */ local_irq_disable(); p->time_slice = (current->time_slice + 1) >> 1; p->first_time_slice = 1; /* * The remainder of the first timeslice might be recovered by * the parent if the child exits early enough. */ current->time_slice >>= 1; p->last_run = jiffies; if (!current->time_slice) { /* * This case is rare, it happens when the parent has only * a single jiffy left from its timeslice. Taking the * runqueue lock is not a problem. */ current->time_slice = 1; scheduler_tick(0 /* don't update the time stats */); } local_irq_enable(); if ((int)current->time_slice <= 0) BUG(); if ((int)p->time_slice <= 0) BUG(); /* * Ok, add it to the run-queues and make it * visible to the rest of the system. * * Let it rip! */ p->tgid = p->pid; p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->group_exit) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } p->tgid = current->tgid; p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; p->sigpending = 1; } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (p->ptrace & PT_PTRACED) __ptrace_link(p, current->parent); attach_pid(p, PIDTYPE_PID, p->pid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_TGID, p->tgid); attach_pid(p, PIDTYPE_PGID, p->pgrp); attach_pid(p, PIDTYPE_SID, p->session); } else { link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); } /* clear controlling tty of new task if parent's was just cleared */ if (!current->tty && p->tty) p->tty = NULL; nr_threads++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_mm: exit_mm(p); if (p->active_mm) mmdrop(p->active_mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup: if (p->pid > 0) free_pidmap(p->pid); put_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_DEC_USE_COUNT(p->binfmt->module); bad_fork_cleanup_count: atomic_dec(&p->user->processes); bad_fork_free: p->state = TASK_ZOMBIE; /* debug */ atomic_dec(&p->usage); put_task_struct(p); goto fork_out; }
asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) { int flag, retval; struct wait_queue wait = { current, NULL }; struct task_struct *p; if (options & ~(WNOHANG|WUNTRACED|__WCLONE)) return -EINVAL; add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag = 0; /* The interruptible state must be set before looking at the children. This because we want to catch any racy exit from the children as do_exit() may run under us. The following read_lock will enforce SMP ordering at the CPU level. */ current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); for (p = current->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; } else if (pid != -1) { if (p->pgrp != -pid) continue; } /* wait for cloned processes iff the __WCLONE flag is set */ if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) continue; flag = 1; switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) continue; read_unlock(&tasklist_lock); current->state = TASK_RUNNING; /* We *must* do this before touching userspace! */ retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user((p->exit_code << 8) | 0x7f, stat_addr); if (!retval) { p->exit_code = 0; retval = p->pid; } goto end_wait4; case TASK_ZOMBIE: current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime; current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime; read_unlock(&tasklist_lock); current->state = TASK_RUNNING; /* We *must* do this before touching userspace! */ retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user(p->exit_code, stat_addr); if (retval) goto end_wait4; retval = p->pid; if (p->p_opptr != p->p_pptr) { write_lock_irq(&tasklist_lock); REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); write_unlock_irq(&tasklist_lock); notify_parent(p, SIGCHLD); } else release(p); #ifdef DEBUG_PROC_TREE audit_ptree(); #endif goto end_wait4; default: continue; } } read_unlock(&tasklist_lock); if (flag) { retval = 0; if (options & WNOHANG) goto end_wait4; retval = -ERESTARTSYS; if (signal_pending(current)) goto end_wait4; schedule(); goto repeat; } retval = -ECHILD; end_wait4: remove_wait_queue(¤t->wait_chldexit,&wait); current->state = TASK_RUNNING; return retval; }
/* 如果成功则返回0 */ asmlinkage int sys_wait4(pid_t pid,unsigned long * stat_addr, int options, struct rusage * ru) { int flag, retval; struct wait_queue wait = { current, NULL }; struct task_struct *p; if (stat_addr) { flag = verify_area(VERIFY_WRITE, stat_addr, 4); if (flag) return flag; } add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag=0; /* p_cptr表示最小的孩子进程,p_opptr表示老的兄弟进程 * 通过该循环可以知道从当前进程的最小进程开始一次向年长的进程开始扫描 */ for (p = current->p_cptr ; p ; p = p->p_osptr) { /* 如果pid>0表示等待某个具体的进程,等于0则表示进程组,小于0则表示所有子进程 */ if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; /* 如果是负数的话,则等待进程组号为-pid的所有子进程*/ } else if (pid != -1) { if (p->pgrp != -pid) continue; } /* wait for cloned processes iff the __WCLONE flag is set */ if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) continue; flag = 1; switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) continue; if (stat_addr) put_fs_long((p->exit_code << 8) | 0x7f, stat_addr); p->exit_code = 0; if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); retval = p->pid; goto end_wait4; case TASK_ZOMBIE: current->cutime += p->utime + p->cutime; current->cstime += p->stime + p->cstime; current->cmin_flt += p->min_flt + p->cmin_flt; current->cmaj_flt += p->maj_flt + p->cmaj_flt; if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); flag = p->pid; if (stat_addr) put_fs_long(p->exit_code, stat_addr); if (p->p_opptr != p->p_pptr) { REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); notify_parent(p); } else release(p); #ifdef DEBUG_PROC_TREE audit_ptree(); #endif retval = flag; goto end_wait4; default: continue; } } if (flag) { retval = 0; /* 如果子进程还在运行,并且选项标记为WNOHANG, * 则表示不等待,函数直接返回 */ if (options & WNOHANG) goto end_wait4; /* 设置进程为可中断状态,同时调用进程调度函数 */ current->state=TASK_INTERRUPTIBLE; schedule(); /* 设置进程收到SIGCHLD信号 */ current->signal &= ~(1<<(SIGCHLD-1)); retval = -ERESTARTSYS; /* 如果当前进程的所有信号都被阻塞了,也就是子进程退出时 * 给父进程发送的SIGCHLD信号也被阻塞了,则函数直接返回 */ if (current->signal & ~current->blocked) goto end_wait4; goto repeat; } retval = -ECHILD; end_wait4: remove_wait_queue(¤t->wait_chldexit,&wait); return retval; }
long sys_ptrace(long request, pid_t pid, long addr, long data) { struct task_struct *child; long ret; lock_kernel(); ret = -EPERM; if (request == PTRACE_TRACEME) { /* are we already being traced? */ if (current->ptrace & PT_PTRACED) goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; ret = 0; goto out; } ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) goto out; ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; if (request == PTRACE_ATTACH) { if (child == current) goto out_tsk; if ((!child->dumpable || (current->uid != child->euid) || (current->uid != child->suid) || (current->uid != child->uid) || (current->gid != child->egid) || (current->gid != child->sgid) || (!cap_issubset(child->cap_permitted, current->cap_permitted)) || (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE)) goto out_tsk; /* the same process cannot be attached many times */ if (child->ptrace & PT_PTRACED) goto out_tsk; child->ptrace |= PT_PTRACED; if (child->p_pptr != current) { unsigned long flags; write_lock_irqsave(&tasklist_lock, flags); REMOVE_LINKS(child); child->p_pptr = current; SET_LINKS(child); write_unlock_irqrestore(&tasklist_lock, flags); } send_sig(SIGSTOP, child, 1); ret = 0; goto out_tsk; } ret = -ESRCH; if (!(child->ptrace & PT_PTRACED)) goto out_tsk; if (child->state != TASK_STOPPED) { if (request != PTRACE_KILL) goto out_tsk; } if (child->p_pptr != current) goto out_tsk; switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { unsigned long tmp; int copied; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) goto out_tsk; ret = put_user(tmp,(unsigned long *) data); goto out_tsk; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) goto out_tsk; ret = -EIO; goto out_tsk; /* Read the word at location addr in the USER area. This will need to change when the kernel no longer saves all regs on a syscall. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if ((addr & 3) || (unsigned long) addr >= sizeof(struct pt_regs)) goto out_tsk; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); ret = put_user(tmp, (unsigned long *) data); goto out_tsk; } /* Write the word at location addr in the USER area. This will need to change when the kernel no longer saves all regs on a syscall. FIXME. There is a problem at the moment in that r3-r18 are only saved if the process is ptraced on syscall entry, and even then those values are overwritten by actual register values on syscall exit. */ case PTRACE_POKEUSR: ret = -EIO; if ((addr & 3) || (unsigned long) addr >= sizeof(struct pt_regs)) goto out_tsk; /* XXX This test probably needs adjusting. We probably want to * allow writes to some bits of PSW, and may want to block writes * to (some) space registers. Some register values written here * may be ignored in entry.S:syscall_restore_rfi; e.g. iaoq is * written with r31/r31+4, and not with the values in pt_regs. */ /* Allow writing of gr1-gr31, fr*, sr*, iasq*, iaoq*, sar */ if (addr == PT_PSW || (addr > PT_IAOQ1 && addr != PT_SAR)) goto out_tsk; *(unsigned long *) ((char *) task_regs(child) + addr) = data; ret = 0; goto out_tsk; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); if (request == PTRACE_SYSCALL) child->ptrace |= PT_TRACESYS; else child->ptrace &= ~PT_TRACESYS; child->exit_code = data; goto out_wake_notrap; case PTRACE_KILL: /* * make the child exit. Best I can do is send it a * sigkill. perhaps it should be put in the status * that it wants to exit. */ if (child->state == TASK_ZOMBIE) /* already dead */ goto out_tsk; child->exit_code = SIGKILL; goto out_wake_notrap; case PTRACE_SINGLEBLOCK: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_TRACESYS|PT_SINGLESTEP); child->ptrace |= PT_BLOCKSTEP; child->exit_code = data; /* Enable taken branch trap. */ pa_psw(child)->r = 0; pa_psw(child)->t = 1; pa_psw(child)->h = 0; pa_psw(child)->l = 0; goto out_wake; case PTRACE_SINGLESTEP: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_TRACESYS|PT_BLOCKSTEP); child->ptrace |= PT_SINGLESTEP; child->exit_code = data; if (pa_psw(child)->n) { struct siginfo si; /* Nullified, just crank over the queue. */ task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1]; task_regs(child)->iasq[0] = task_regs(child)->iasq[1]; task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4; pa_psw(child)->n = 0; pa_psw(child)->x = 0; pa_psw(child)->y = 0; pa_psw(child)->z = 0; pa_psw(child)->b = 0; pa_psw(child)->r = 0; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; /* Don't wake up the child, but let the parent know something happened. */ si.si_code = TRAP_TRACE; si.si_addr = (void *) (task_regs(child)->iaoq[0] & ~3); si.si_signo = SIGTRAP; si.si_errno = 0; force_sig_info(SIGTRAP, &si, child); //notify_parent(child, SIGCHLD); //ret = 0; goto out_wake; } /* Enable recovery counter traps. The recovery counter * itself will be set to zero on a task switch. If the * task is suspended on a syscall then the syscall return * path will overwrite the recovery counter with a suitable * value such that it traps once back in user space. We * disable interrupts in the childs PSW here also, to avoid * interrupts while the recovery counter is decrementing. */ pa_psw(child)->r = 1; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; /* give it a chance to run. */ goto out_wake; case PTRACE_DETACH: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_PTRACED|PT_TRACESYS|PT_SINGLESTEP|PT_BLOCKSTEP); child->exit_code = data; write_lock_irq(&tasklist_lock); REMOVE_LINKS(child); child->p_pptr = child->p_opptr; SET_LINKS(child); write_unlock_irq(&tasklist_lock); goto out_wake_notrap; default: ret = -EIO; goto out_tsk; } out_wake_notrap: /* make sure the trap bits are not set */ pa_psw(child)->r = 0; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; out_wake: wake_up_process(child); ret = 0; out_tsk: free_task_struct(child); out: unlock_kernel(); return ret; }
asmlinkage int exe$creprc(unsigned int *pidadr, void *image, void *input, void *output, void *error, struct _generic_64 *prvadr, unsigned int *quota, void*prcnam, unsigned int baspri, unsigned int uic, unsigned short int mbxunt, unsigned int stsflg,...) { unsigned long stack_here; struct _pcb * p, * cur; int retval; struct dsc$descriptor * imd = image, * ind = input, * oud = output, * erd = error; unsigned long clone_flags=CLONE_VFORK; //check pidadr ctl$gl_creprc_flags = stsflg; // check for PRC$M_NOUAF sometime if (stsflg&PRC$M_DETACH) { } if (uic) { } //setipl(IPL$_ASTDEL);//postpone this? cur=ctl$gl_pcb; vmslock(&SPIN_SCHED, IPL$_SCHED); vmslock(&SPIN_MMG, IPL$_MMG); p = alloc_task_struct(); //bzero(p,sizeof(struct _pcb));//not wise? memset(p,0,sizeof(struct _pcb)); // check more // compensate for no struct clone/copy p->sigmask_lock = SPIN_LOCK_UNLOCKED; p->alloc_lock = SPIN_LOCK_UNLOCKED; qhead_init(&p->pcb$l_astqfl); // and enable ast del to all modes p->pcb$b_type = DYN$C_PCB; p->pcb$b_asten=15; p->phd$b_astlvl=4; p->pr_astlvl=4; p->psl=0; p->pslindex=0; qhead_init(&p->pcb$l_lockqfl); // set capabilities p->pcb$l_permanent_capability = sch$gl_default_process_cap; p->pcb$l_capability = p->pcb$l_permanent_capability; // set affinity // set default fileprot // set arb // set mbx stuff // from setprn: if (prcnam) { struct dsc$descriptor *s=prcnam; strncpy(p->pcb$t_lname,s->dsc$a_pointer,s->dsc$w_length); } // set priv p->pcb$l_priv=ctl$gl_pcb->pcb$l_priv; // set pris p->pcb$b_prib=31-baspri; p->pcb$b_pri=31-baspri-6; // if (p->pcb$b_pri<16) p->pcb$b_pri=16; p->pcb$w_quant=-QUANTUM; // set uic p->pcb$l_uic=ctl$gl_pcb->pcb$l_uic; // set vms pid // check process name // do something with pqb p->pcb$l_pqb=kmalloc(sizeof(struct _pqb),GFP_KERNEL); memset(p->pcb$l_pqb,0,sizeof(struct _pqb)); struct _pqb * pqb = p->pcb$l_pqb; pqb->pqb$q_prvmsk = ctl$gq_procpriv; if (imd) memcpy(pqb->pqb$t_image,imd->dsc$a_pointer,imd->dsc$w_length); if (ind) memcpy(pqb->pqb$t_input,ind->dsc$a_pointer,ind->dsc$w_length); if (oud) memcpy(pqb->pqb$t_output,oud->dsc$a_pointer,oud->dsc$w_length); if (erd) memcpy(pqb->pqb$t_error,erd->dsc$a_pointer,erd->dsc$w_length); if (oud) // temp measure memcpy(p->pcb$t_terminal,oud->dsc$a_pointer,oud->dsc$w_length); // translate some logicals // copy security clearance // copy msg // copy flags // set jib // do quotas // process itmlst // set pcb$l_pqb #if 0 setipl(IPL$_MMG); vmslock(&SPIN_SCHED,-1); // find vacant slot in pcb vector // and store it #endif // make ipid and epid p->pcb$l_pid=alloc_ipid(); { unsigned long *vec=sch$gl_pcbvec; vec[p->pcb$l_pid&0xffff]=p; } p->pcb$l_epid=exe$ipid_to_epid(p->pcb$l_pid); // should invoke sch$chse, put this at bottom? // setipl(0) and return // now lots of things from fork retval = -EAGAIN; /* * Check if we are over our maximum process limit, but be sure to * exclude root. This is needed to make it possible for login and * friends to set the per-user process limit to something lower * than the amount of processes root is running. -- Rik */ #if 0 if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) goto bad_fork_free; atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); #endif /* * Counter increases are protected by * the kernel lock so nr_threads can't * increase under us (but it may decrease). */ get_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; //copy_flags(clone_flags, p); // not here? p->pcb$l_pid = alloc_ipid(); p->run_list.next = NULL; p->run_list.prev = NULL; p->p_cptr = NULL; init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); p->sigpending = 0; init_sigpending(&p->pending); p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->leader = 0; /* session leadership doesn't inherit */ p->tty_old_pgrp = 0; p->times.tms_utime = p->times.tms_stime = 0; p->times.tms_cutime = p->times.tms_cstime = 0; p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; INIT_LIST_HEAD(&p->local_pages); p->files = current->files; p->fs = current->fs; p->sig = current->sig; /* copy all the process information */ if (copy_files(clone_flags, p)) goto bad_fork_cleanup; if (copy_fs(clone_flags, p)) goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) goto bad_fork_cleanup_fs; bad_fork_cleanup: bad_fork_cleanup_files: bad_fork_cleanup_fs: // now a hole // now more from fork /* ok, now we should be set up.. */ p->swappable = 1; p->exit_signal = 0; p->pdeath_signal = 0; /* * "share" dynamic priority between parent and child, thus the * total amount of dynamic priorities in the system doesnt change, * more scheduling fairness. This is only important in the first * timeslice, on the long run the scheduling behaviour is unchanged. */ /* * Ok, add it to the run-queues and make it * visible to the rest of the system. * * Let it rip! */ retval = p->pcb$l_epid; INIT_LIST_HEAD(&p->thread_group); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT and CLONE_THREAD re-use the old parent */ p->p_opptr = current->p_opptr; p->p_pptr = current->p_pptr; p->p_opptr = current /*->p_opptr*/; p->p_pptr = current /*->p_pptr*/; SET_LINKS(p); nr_threads++; write_unlock_irq(&tasklist_lock); // printk("fork befwak\n"); //wake_up_process(p); /* do this last */ // wake_up_process2(p,PRI$_TICOM); /* do this last */ //goto fork_out;//?? // now something from exec // wait, better do execve itself memcpy(p->rlim, current->rlim, sizeof(p->rlim)); qhead_init(&p->pcb$l_sqfl); struct mm_struct * mm = mm_alloc(); p->mm = mm; p->active_mm = mm; p->user = INIT_USER; spin_lock(&mmlist_lock); #if 0 list_add(&mm->mmlist, &p->p_pptr->mm->mmlist); #endif mmlist_nr++; spin_unlock(&mmlist_lock); // Now we are getting into the area that is really the swappers // To be moved to shell.c and swp$shelinit later p->pcb$l_phd=kmalloc(sizeof(struct _phd),GFP_KERNEL); init_phd(p->pcb$l_phd); init_fork_p1pp(p,p->pcb$l_phd,ctl$gl_pcb,ctl$gl_pcb->pcb$l_phd); #ifdef __x86_64__ shell_init_other(p,ctl$gl_pcb,0x7ff80000-0x1000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ff80000-0x2000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ff90000-0x1000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ff90000-0x2000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ffa0000-0x1000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ffa0000-0x2000,0x7fffe000); #else shell_init_other(p,ctl$gl_pcb,0x7ff80000-0x1000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ff80000-0x2000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ff90000-0x1000,0x7fffe000); shell_init_other(p,ctl$gl_pcb,0x7ff90000-0x2000,0x7fffe000); #endif int exe$procstrt(struct _pcb * p); struct pt_regs * regs = &pidadr; //printk("newthread %x\n",p), retval = new_thread(0, clone_flags, 0, 0, p, 0); int eip=0,esp=0; // start_thread(regs,eip,esp); sch$chse(p, PRI$_TICOM); vmsunlock(&SPIN_MMG,-1); vmsunlock(&SPIN_SCHED,0); return SS$_NORMAL; #if 0 return sys_execve(((struct dsc$descriptor *)image)->dsc$a_pointer,0,0); return SS$_NORMAL; #endif #if 0 { char * filename=((struct dsc$descriptor *)image)->dsc$a_pointer; char ** argv=0; char ** envp=0; struct pt_regs * regs=0; struct linux_binprm bprm; struct file *file; int retval; int i; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) return retval; bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); bprm.file = file; bprm.filename = filename; bprm.sh_bang = 0; bprm.loader = 0; bprm.exec = 0; if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { allow_write_access(file); fput(file); //printk("here 7 %x\n",bprm.argc); return bprm.argc; } if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) { allow_write_access(file); fput(file); //printk("here 6\n"); return bprm.envc; } retval = prepare_binprm(&bprm); //printk("here 4\n"); if (retval < 0) goto out; retval = copy_strings_kernel(1, &bprm.filename, &bprm); //printk("here 3\n"); if (retval < 0) goto out; bprm.exec = bprm.p; retval = copy_strings(bprm.envc, envp, &bprm); //printk("here 2\n"); if (retval < 0) goto out; retval = copy_strings(bprm.argc, argv, &bprm); //printk("here 1\n"); if (retval < 0) goto out; retval = search_binary_handler(&bprm,regs); if (retval >= 0) /* execve success */ return retval; out: /* Something went wrong, return the inode and free the argument pages*/ allow_write_access(bprm.file); if (bprm.file) fput(bprm.file); for (i = 0 ; i < MAX_ARG_PAGES ; i++) { struct page * page = bprm.page[i]; if (page) __free_page(page); } return retval; } #endif fork_out: return retval; bad_fork_free: free_task_struct(p); goto fork_out; }
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) { int flag, retval; DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { struct task_struct *p; for (p = tsk->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; } else if (pid != -1) { if (p->pgrp != -pid) continue; } /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: * A "clone" child here is one that reports to its parent * using a signal other than SIGCHLD.) */ if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) && !(options & __WALL)) continue; flag = 1; switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED)) continue; read_unlock(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user((p->exit_code << 8) | 0x7f, stat_addr); if (!retval) { p->exit_code = 0; retval = p->pid; } goto end_wait4; case TASK_ZOMBIE: current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime; current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime; read_unlock(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user(p->exit_code, stat_addr); if (retval) goto end_wait4; retval = p->pid; if (p->p_opptr != p->p_pptr) { write_lock_irq(&tasklist_lock); REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); do_notify_parent(p, SIGCHLD); write_unlock_irq(&tasklist_lock); } else release_task(p); goto end_wait4; default: continue; } } if (options & __WNOTHREAD) break; tsk = next_thread(tsk); } while (tsk != current); read_unlock(&tasklist_lock); if (flag) { retval = 0; if (options & WNOHANG) goto end_wait4; retval = -ERESTARTSYS; if (signal_pending(current)) goto end_wait4; schedule(); goto repeat; } retval = -ECHILD; end_wait4: current->state = TASK_RUNNING; remove_wait_queue(¤t->wait_chldexit,&wait); return retval; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static task_t *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, int pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->it_real_value = 0; p->it_real_incr = 0; p->it_virt_value = cputime_zero; p->it_virt_incr = cputime_zero; p->it_prof_value = cputime_zero; p->it_prof_incr = cputime_zero; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->utime = cputime_zero; p->stime = cputime_zero; p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ acct_clear_integrals(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* Perform scheduler related setup */ sched_fork(p); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so cpus_allowed mask cannot * have changed. The cpus_allowed mask of the parent may have * changed after it was copied first time, and it may then move to * another CPU - so we re-copy it here and set the child's CPU to * the parent's CPU. This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; set_task_cpu(p, smp_processor_id()); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->flags & SIGNAL_GROUP_EXIT) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_namespace; } p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) { int flag, retval; struct wait_queue wait = { current, NULL }; struct task_struct *p; if (stat_addr) { flag = verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr)); if (flag) return flag; } if (ru) { flag = verify_area(VERIFY_WRITE, ru, sizeof(*ru)); if (flag) return flag; } if (options & ~(WNOHANG|WUNTRACED|__WCLONE)) return -EINVAL; add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag=0; for (p = current->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; } else if (pid != -1) { if (p->pgrp != -pid) continue; } /* wait for cloned processes iff the __WCLONE flag is set */ if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) continue; flag = 1; switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) continue; if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); if (stat_addr) put_user((p->exit_code << 8) | 0x7f, stat_addr); p->exit_code = 0; retval = p->pid; goto end_wait4; case TASK_ZOMBIE: current->cutime += p->utime + p->cutime; current->cstime += p->stime + p->cstime; if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); if (stat_addr) put_user(p->exit_code, stat_addr); retval = p->pid; if (p->p_opptr != p->p_pptr) { REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); notify_parent(p); } else release(p); #ifdef DEBUG_PROC_TREE audit_ptree(); #endif goto end_wait4; default: continue; } } if (flag) { retval = 0; if (options & WNOHANG) goto end_wait4; retval = -ERESTARTSYS; if (current->signal & ~current->blocked) goto end_wait4; current->state=TASK_INTERRUPTIBLE; schedule(); goto repeat; } retval = -ECHILD; end_wait4: remove_wait_queue(¤t->wait_chldexit,&wait); return retval; }