long arch_ptrace(struct task_struct *child, long request, long addr, long data) { unsigned long tmp; int ret; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { int copied; ret = -EIO; if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) break; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); if (copied != sizeof(tmp)) break; ret = put_user(tmp,(unsigned long *) data); break; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { tmp = 0; ret = -EIO; if ((addr & 3) || addr < 0) break; ret = 0; switch (addr >> 2) { case 0 ... PT__END - 1: tmp = get_reg(child, addr >> 2); break; case PT__END + 0: tmp = child->mm->end_code - child->mm->start_code; break; case PT__END + 1: tmp = child->mm->end_data - child->mm->start_data; break; case PT__END + 2: tmp = child->mm->start_stack - child->mm->start_brk; break; case PT__END + 3: tmp = child->mm->start_code; break; case PT__END + 4: tmp = child->mm->start_stack; break; default: ret = -EIO; break; } if (ret == 0) ret = put_user(tmp, (unsigned long *) data); break; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = -EIO; if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) break; if (access_process_vm(child, addr, &data, sizeof(data), 1) != sizeof(data)) break; ret = 0; break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & 3) || addr < 0) break; ret = 0; switch (addr >> 2) { case 0 ... PT__END-1: ret = put_reg(child, addr >> 2, data); break; default: ret = -EIO; break; } break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; ptrace_disable(child); wake_up_process(child); ret = 0; break; /* make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; clear_tsk_thread_flag(child, TIF_SINGLESTEP); ptrace_disable(child); wake_up_process(child); break; case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); ptrace_enable(child); child->exit_code = data; wake_up_process(child); ret = 0; break; case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; case PTRACE_GETREGS: { /* Get all integer regs from the child. */ int i; for (i = 0; i < PT__GPEND; i++) { tmp = get_reg(child, i); if (put_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } data += sizeof(long); } ret = 0; break; } case PTRACE_SETREGS: { /* Set all integer regs in the child. */ int i; for (i = 0; i < PT__GPEND; i++) { if (get_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } put_reg(child, i, tmp); data += sizeof(long); } ret = 0; break; } case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */ ret = 0; if (copy_to_user((void *) data, &child->thread.user->f, sizeof(child->thread.user->f))) ret = -EFAULT; break; } case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */ ret = 0; if (copy_from_user(&child->thread.user->f, (void *) data, sizeof(child->thread.user->f))) ret = -EFAULT; break; } case PTRACE_GETFDPIC: tmp = 0; switch (addr) { case PTRACE_GETFDPIC_EXEC: tmp = child->mm->context.exec_fdpic_loadmap; break; case PTRACE_GETFDPIC_INTERP: tmp = child->mm->context.interp_fdpic_loadmap; break; default: break; } ret = 0; if (put_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } break; default: ret = -EIO; break; } return ret; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); /* * If the new process will be in a different pid or user namespace * do not allow it to share a thread group or signal handlers or * parent with the forking task. */ if (clone_flags & CLONE_SIGHAND) { if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || (task_active_pid_ns(current) != current->nsproxy->pid_ns_for_children)) return ERR_PTR(-EINVAL); } retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); p->flags |= PF_FORKNOEXEC; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_init(&p->vtime_seqlock); p->vtime_snap = 0; p->vtime_snap_whence = VTIME_SLEEPING; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->start_time = ktime_get_ns(); p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_threadgroup_lock; } #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; p->hardirqs_enabled = 0; p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_BCACHE p->sequential_io = 0; p->sequential_io_avg = 0; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); if (retval) goto bad_fork_cleanup_policy; retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_perf; /* copy all the process information */ shm_init_task(p); retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->group_leader = p; p->tgid = p->pid; } p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; p->pdeath_signal = 0; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Copy seccomp details explicitly here, in case they were changed * before holding sighand lock. */ copy_seccomp(p); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); if (is_child_reaper(pid)) { ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); list_add_tail_rcu(&p->thread_node, &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); uprobe_copy_process(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_perf: perf_event_free_task(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_threadgroup_lock: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); delayacct_tsk_free(p); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { long ret; #ifdef DEBUG_PTRACE long oaddr=addr, odata=data; #endif switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { #ifdef CONFIG_64BIT if (__is_compat_task(child)) { int copied; unsigned int tmp; addr &= 0xffffffffL; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) goto out_tsk; ret = put_user(tmp,(unsigned int *) data); DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n", request == PTRACE_PEEKTEXT ? "TEXT" : "DATA", pid, oaddr, odata, ret, tmp); } else #endif ret = generic_ptrace_peekdata(child, addr, data); goto out_tsk; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; #ifdef CONFIG_64BIT if (__is_compat_task(child)) { unsigned int tmp = (unsigned int)data; DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n", request == PTRACE_POKETEXT ? "TEXT" : "DATA", pid, oaddr, odata); addr &= 0xffffffffL; if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp)) goto out_tsk; } else #endif { if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) goto out_tsk; } ret = -EIO; goto out_tsk; /* Read the word at location addr in the USER area. For ptraced processes, the kernel saves all regs on a syscall. */ case PTRACE_PEEKUSR: { ret = -EIO; #ifdef CONFIG_64BIT if (__is_compat_task(child)) { unsigned int tmp; if (addr & (sizeof(int)-1)) goto out_tsk; if ((addr = translate_usr_offset(addr)) < 0) goto out_tsk; tmp = *(unsigned int *) ((char *) task_regs(child) + addr); ret = put_user(tmp, (unsigned int *) data); DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n", pid, oaddr, odata, ret, addr, tmp); } else #endif { unsigned long tmp; if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs)) goto out_tsk; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); ret = put_user(tmp, (unsigned long *) data); } goto out_tsk; } /* Write the word at location addr in the USER area. This will need to change when the kernel no longer saves all regs on a syscall. FIXME. There is a problem at the moment in that r3-r18 are only saved if the process is ptraced on syscall entry, and even then those values are overwritten by actual register values on syscall exit. */ case PTRACE_POKEUSR: ret = -EIO; /* Some register values written here may be ignored in * entry.S:syscall_restore_rfi; e.g. iaoq is written with * r31/r31+4, and not with the values in pt_regs. */ /* PT_PSW=0, so this is valid for 32 bit processes under 64 * bit kernels. */ if (addr == PT_PSW) { /* PT_PSW=0, so this is valid for 32 bit processes * under 64 bit kernels. * * Allow writing to Nullify, Divide-step-correction, * and carry/borrow bits. * BEWARE, if you set N, and then single step, it won't * stop on the nullified instruction. */ DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n", pid, oaddr, odata); data &= USER_PSW_BITS; task_regs(child)->gr[0] &= ~USER_PSW_BITS; task_regs(child)->gr[0] |= data; ret = 0; goto out_tsk; } #ifdef CONFIG_64BIT if (__is_compat_task(child)) { if (addr & (sizeof(int)-1)) goto out_tsk; if ((addr = translate_usr_offset(addr)) < 0) goto out_tsk; DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n", pid, oaddr, odata, addr); if (addr >= PT_FR0 && addr <= PT_FR31 + 4) { /* Special case, fp regs are 64 bits anyway */ *(unsigned int *) ((char *) task_regs(child) + addr) = data; ret = 0; } else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) || addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 || addr == PT_SAR+4) { /* Zero the top 32 bits */ *(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0; *(unsigned int *) ((char *) task_regs(child) + addr) = data; ret = 0; } goto out_tsk; } else #endif { if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs)) goto out_tsk; if ((addr >= PT_GR1 && addr <= PT_GR31) || addr == PT_IAOQ0 || addr == PT_IAOQ1 || (addr >= PT_FR0 && addr <= PT_FR31 + 4) || addr == PT_SAR) { *(unsigned long *) ((char *) task_regs(child) + addr) = data; ret = 0; } goto out_tsk; } case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: ret = -EIO; DBG("sys_ptrace(%s)\n", request == PTRACE_SYSCALL ? "SYSCALL" : "CONT"); if (!valid_signal(data)) goto out_tsk; child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } else { clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; goto out_wake_notrap; case PTRACE_KILL: /* * make the child exit. Best I can do is send it a * sigkill. perhaps it should be put in the status * that it wants to exit. */ ret = 0; DBG("sys_ptrace(KILL)\n"); if (child->exit_state == EXIT_ZOMBIE) /* already dead */ goto out_tsk; child->exit_code = SIGKILL; goto out_wake_notrap; case PTRACE_SINGLEBLOCK: DBG("sys_ptrace(SINGLEBLOCK)\n"); ret = -EIO; if (!valid_signal(data)) goto out_tsk; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->ptrace &= ~PT_SINGLESTEP; child->ptrace |= PT_BLOCKSTEP; child->exit_code = data; /* Enable taken branch trap. */ pa_psw(child)->r = 0; pa_psw(child)->t = 1; pa_psw(child)->h = 0; pa_psw(child)->l = 0; goto out_wake; case PTRACE_SINGLESTEP: DBG("sys_ptrace(SINGLESTEP)\n"); ret = -EIO; if (!valid_signal(data)) goto out_tsk; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->ptrace &= ~PT_BLOCKSTEP; child->ptrace |= PT_SINGLESTEP; child->exit_code = data; if (pa_psw(child)->n) { struct siginfo si; /* Nullified, just crank over the queue. */ task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1]; task_regs(child)->iasq[0] = task_regs(child)->iasq[1]; task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4; pa_psw(child)->n = 0; pa_psw(child)->x = 0; pa_psw(child)->y = 0; pa_psw(child)->z = 0; pa_psw(child)->b = 0; ptrace_disable(child); /* Don't wake up the child, but let the parent know something happened. */ si.si_code = TRAP_TRACE; si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3); si.si_signo = SIGTRAP; si.si_errno = 0; force_sig_info(SIGTRAP, &si, child); //notify_parent(child, SIGCHLD); //ret = 0; goto out_wake; } /* Enable recovery counter traps. The recovery counter * itself will be set to zero on a task switch. If the * task is suspended on a syscall then the syscall return * path will overwrite the recovery counter with a suitable * value such that it traps once back in user space. We * disable interrupts in the childs PSW here also, to avoid * interrupts while the recovery counter is decrementing. */ pa_psw(child)->r = 1; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; /* give it a chance to run. */ goto out_wake; case PTRACE_GETEVENTMSG: ret = put_user(child->ptrace_message, (unsigned int __user *) data); goto out_tsk; default: ret = ptrace_request(child, request, addr, data); goto out_tsk; } out_wake_notrap: ptrace_disable(child); out_wake: wake_up_process(child); ret = 0; out_tsk: DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n", request, pid, oaddr, odata, ret); return ret; }
asmlinkage int sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; int ret; lock_kernel(); ret = -EPERM; if (request == PTRACE_TRACEME) { /* are we already being traced? */ if (current->ptrace & PT_PTRACED) goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; ret = 0; goto out; } ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) goto out; ret = -EPERM; if (pid == 1) /* you may not mess with init */ goto out_tsk; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; } ret = ptrace_check_attach(child, request == PTRACE_KILL); if (ret < 0) goto out_tsk; switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { unsigned long tmp; ret = read_long(child, addr, &tmp); if (ret < 0) break ; ret = put_user(tmp, (unsigned long *) data); break ; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp = 0; if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { ret = -EIO; break ; } ret = 0; /* Default return condition */ addr = addr >> 2; /* temporary hack. */ if (addr < H8300_REGS_NO) tmp = h8300_get_reg(child, addr); else { switch(addr) { case 49: tmp = child->mm->start_code; break ; case 50: tmp = child->mm->start_data; break ; case 51: tmp = child->mm->end_code; break ; case 52: tmp = child->mm->end_data; break ; default: ret = -EIO; } } if (!ret) ret = put_user(tmp,(unsigned long *) data); break ; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; ret = -EIO; break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { ret = -EIO; break ; } addr = addr >> 2; /* temporary hack. */ if (addr == PT_ORIG_ER0) { ret = -EIO; break ; } if (addr < H8300_REGS_NO) { ret = h8300_put_reg(child, addr, data); break ; } ret = -EIO; break ; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; if ((unsigned long) data >= _NSIG) break ; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; wake_up_process(child); /* make sure the single step bit is not set. */ h8300_disable_trace(child); ret = 0; } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: { ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; h8300_disable_trace(child); wake_up_process(child); break; } case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; if ((unsigned long) data > _NSIG) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; h8300_enable_trace(child); wake_up_process(child); ret = 0; break; } case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; case PTRACE_GETREGS: { /* Get all gp regs from the child. */ int i; unsigned long tmp; for (i = 0; i < H8300_REGS_NO; i++) { tmp = h8300_get_reg(child, i); if (put_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } data += sizeof(long); } ret = 0; break; } case PTRACE_SETREGS: { /* Set all gp regs in the child. */ int i; unsigned long tmp; for (i = 0; i < H8300_REGS_NO; i++) { if (get_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } h8300_put_reg(child, i, tmp); data += sizeof(long); } ret = 0; break; } default: ret = -EIO; break; } out_tsk: put_task_struct(child); out: unlock_kernel(); return ret; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); if (clone_flags & CLONE_IDLETASK) p->pid = 0; else { p->pid = alloc_pidmap(); if (p->pid == -1) goto bad_fork_cleanup; } retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->utime = p->stime = 0; p->cutime = p->cstime = 0; p->lock_depth = -1; /* -1 = no lock */ p->start_time = get_jiffies_64(); p->security = NULL; p->io_context = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif retval = -ENOMEM; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_mm; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; /* Perform scheduler related setup */ sched_fork(p); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->tgid = p->pid; p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & CLONE_PARENT) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->group_exit) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_namespace; } p->tgid = current->tgid; p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (p->ptrace & PT_PTRACED) __ptrace_link(p, current->parent); attach_pid(p, PIDTYPE_PID, p->pid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_TGID, p->tgid); attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } else link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); nr_threads++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_mm: exit_mm(p); if (p->active_mm) mmdrop(p->active_mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->pid > 0) free_pidmap(p->pid); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
/* * Copy architecture-specific thread state */ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, struct task_struct *p) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; memset(childregs, 0, sizeof(struct pt_regs)); ti->addr_limit = KERNEL_DS; p->thread.reg16 = usp; /* fn */ p->thread.reg17 = kthread_arg; p->thread.reg29 = childksp; p->thread.reg31 = (unsigned long) ret_from_kernel_thread; #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) | ((status & (ST0_KUC | ST0_IEC)) << 2); #else status |= ST0_EXL; #endif childregs->cp0_status = status; return 0; } /* user thread */ *childregs = *regs; childregs->regs[7] = 0; /* Clear error flag */ childregs->regs[2] = 0; /* Child gets zero as return value */ if (usp) childregs->regs[29] = usp; ti->addr_limit = USER_DS; p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context * switching for most programs since they don't use the fpu. */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDMSA); clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); #ifdef CONFIG_MIPS_MT_FPAFF clear_tsk_thread_flag(p, TIF_FPUBOUND); #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; return 0; }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { /* Read word at location address. */ case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: { unsigned long tmp; int copied; ret = -EIO; /* The signal trampoline page is outside the normal user-addressable * space but still accessible. This is hack to make it possible to * access the signal handler code in GDB. */ if ((addr & PAGE_MASK) == cris_signal_return_page) { /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); if (copied != sizeof(tmp)) break; } ret = put_user(tmp,datap); break; } /* Read the word at location address in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) break; tmp = get_reg(child, addr >> 2); ret = put_user(tmp, datap); break; } /* Write the word at location address. */ case PTRACE_POKETEXT: case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; ret = -EIO; break; /* Write the word at location address in the USER area. */ case PTRACE_POKEUSR: ret = -EIO; if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) break; addr >>= 2; if (addr == PT_CCS) { /* don't allow the tracing process to change stuff like * interrupt enable, kernel/user bit, dma enables etc. */ data &= CCS_MASK; data |= get_reg(child, PT_CCS) & ~CCS_MASK; } if (put_reg(child, addr, data)) break; ret = 0; break; case PTRACE_SYSCALL: case PTRACE_CONT: ret = -EIO; if (!valid_signal(data)) break; /* Continue means no single-step. */ put_reg(child, PT_SPC, 0); if (!get_debugreg(child->pid, PT_BP_CTRL)) { unsigned long tmp; /* If no h/w bp configured, disable S bit. */ tmp = get_reg(child, PT_CCS) & ~SBIT_USER; put_reg(child, PT_CCS, tmp); } if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } else { clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; /* TODO: make sure any pending breakpoint is killed */ wake_up_process(child); ret = 0; break; /* Make the child exit by sending it a sigkill. */ case PTRACE_KILL: ret = 0; if (child->exit_state == EXIT_ZOMBIE) break; child->exit_code = SIGKILL; /* Deconfigure single-step and h/w bp. */ ptrace_disable(child); /* TODO: make sure any pending breakpoint is killed */ wake_up_process(child); break; /* Set the trap flag. */ case PTRACE_SINGLESTEP: { unsigned long tmp; ret = -EIO; /* Set up SPC if not set already (in which case we have no other choice but to trust it). */ if (!get_reg(child, PT_SPC)) { /* In case we're stopped in a delay slot. */ tmp = get_reg(child, PT_ERP) & ~1; put_reg(child, PT_SPC, tmp); } tmp = get_reg(child, PT_CCS) | SBIT_USER; put_reg(child, PT_CCS, tmp); if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); /* TODO: set some clever breakpoint mechanism... */ child->exit_code = data; wake_up_process(child); ret = 0; break; } case PTRACE_DETACH: ret = ptrace_detach(child, data); break; /* Get all GP registers from the child. */ case PTRACE_GETREGS: { int i; unsigned long tmp; for (i = 0; i <= PT_MAX; i++) { tmp = get_reg(child, i); if (put_user(tmp, datap)) { ret = -EFAULT; goto out_tsk; } datap++; } ret = 0; break; } /* Set all GP registers in the child. */ case PTRACE_SETREGS: { int i; unsigned long tmp; for (i = 0; i <= PT_MAX; i++) { if (get_user(tmp, datap)) { ret = -EFAULT; goto out_tsk; } if (i == PT_CCS) { tmp &= CCS_MASK; tmp |= get_reg(child, PT_CCS) & ~CCS_MASK; } put_reg(child, i, tmp); datap++; } ret = 0; break; } default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
/* * Called by kernel/ptrace.c when detaching.. * * Make sure single step bits etc are not set. */ void ptrace_disable(struct task_struct *child) { /* Don't load the watchpoint registers for the ex-child. */ clear_tsk_thread_flag(child, TIF_LOAD_WATCH); }
static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = p->prev_stime = 0; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; p->blocked_by = NULL; p->blocked_since = 0; #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_policy; retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; write_lock_irq(&tasklist_lock); if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: if (unlikely(clone_flags & CLONE_NEWPID)) pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); bad_fork_cleanup_files: exit_files(p); bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; long childksp; p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; preempt_disable(); if (is_fpu_owner()) save_fp(p); if (cpu_has_dsp) save_dsp(p); preempt_enable(); /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; *childregs = *regs; childregs->regs[7] = 0; /* Clear error flag */ #if defined(CONFIG_BINFMT_IRIX) if (current->personality != PER_LINUX) { /* Under IRIX things are a little different. */ childregs->regs[3] = 1; regs->regs[3] = 0; } #endif childregs->regs[2] = 0; /* Child gets zero as return value */ regs->regs[2] = p->pid; if (childregs->cp0_status & ST0_CU0) { childregs->regs[28] = (unsigned long) ti; childregs->regs[29] = childksp; ti->addr_limit = KERNEL_DS; } else { childregs->regs[29] = usp; ti->addr_limit = USER_DS; } p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context * switching for most programs since they don't use the fpu. */ p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF /* * FPU affinity support is cleaner if we track the * user-visible CPU affinity from the very beginning. * The generic cpus_allowed mask will already have * been copied from the parent before copy_thread * is invoked. */ p->thread.user_cpus_allowed = p->cpus_allowed; #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; return 0; }
int copy_thread(unsigned long clone_flags, unsigned long stack_start, unsigned long stk_sz, struct task_struct *p) { struct pt_regs *childregs = task_pt_regs(p); memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); /* * Unalias p->thread.sve_state (if any) from the parent task * and disable discard SVE state for p: */ clear_tsk_thread_flag(p, TIF_SVE); p->thread.sve_state = NULL; /* * In case p was allocated the same task_struct pointer as some * other recently-exited task, make sure p is disassociated from * any cpu that may have run that now-exited task recently. * Otherwise we could erroneously skip reloading the FPSIMD * registers for p. */ fpsimd_flush_task_state(p); if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; /* * Read the current TLS pointer from tpidr_el0 as it may be * out-of-sync with the saved value. */ *task_user_tls(p) = read_sysreg(tpidr_el0); if (stack_start) { if (is_compat_thread(task_thread_info(p))) childregs->compat_sp = stack_start; else childregs->sp = stack_start; } /* * If a TLS pointer was passed to clone (4th argument), use it * for the new thread. */ if (clone_flags & CLONE_SETTLS) p->thread.uw.tp_value = childregs->regs[3]; } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) childregs->pstate |= PSR_SSBS_BIT; if (system_uses_irq_prio_masking()) childregs->pmr_save = GIC_PRIO_IRQON; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; ptrace_hw_copy_thread(p); return 0; }
/* * Note that this implementation of ptrace behaves differently from vanilla * ptrace. Contrary to what the man page says, in the PTRACE_PEEKTEXT, * PTRACE_PEEKDATA, and PTRACE_PEEKUSER requests the data variable is not * ignored. Instead, the data variable is expected to point at a location * (in user space) where the result of the ptrace call is written (instead of * being returned). */ long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { /* Read word at location address. */ case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: ret = generic_ptrace_peekdata(child, addr, data); break; /* Read the word at location address in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) break; tmp = get_reg(child, addr >> 2); ret = put_user(tmp, datap); break; } /* Write the word at location address. */ case PTRACE_POKETEXT: case PTRACE_POKEDATA: ret = generic_ptrace_pokedata(child, addr, data); break; /* Write the word at location address in the USER area. */ case PTRACE_POKEUSR: ret = -EIO; if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) break; addr >>= 2; if (addr == PT_DCCR) { /* don't allow the tracing process to change stuff like * interrupt enable, kernel/user bit, dma enables etc. */ data &= DCCR_MASK; data |= get_reg(child, PT_DCCR) & ~DCCR_MASK; } if (put_reg(child, addr, data)) break; ret = 0; break; case PTRACE_SYSCALL: case PTRACE_CONT: ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } else { clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; /* TODO: make sure any pending breakpoint is killed */ wake_up_process(child); ret = 0; break; /* Make the child exit by sending it a sigkill. */ case PTRACE_KILL: ret = 0; if (child->exit_state == EXIT_ZOMBIE) break; child->exit_code = SIGKILL; /* TODO: make sure any pending breakpoint is killed */ wake_up_process(child); break; /* Set the trap flag. */ case PTRACE_SINGLESTEP: ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); /* TODO: set some clever breakpoint mechanism... */ child->exit_code = data; wake_up_process(child); ret = 0; break; /* Get all GP registers from the child. */ case PTRACE_GETREGS: { int i; unsigned long tmp; ret = 0; for (i = 0; i <= PT_MAX; i++) { tmp = get_reg(child, i); if (put_user(tmp, datap)) { ret = -EFAULT; break; } data += sizeof(long); } break; } /* Set all GP registers in the child. */ case PTRACE_SETREGS: { int i; unsigned long tmp; ret = 0; for (i = 0; i <= PT_MAX; i++) { if (get_user(tmp, datap)) { ret = -EFAULT; break; } if (i == PT_DCCR) { tmp &= DCCR_MASK; tmp |= get_reg(child, PT_DCCR) & ~DCCR_MASK; } put_reg(child, i, tmp); data += sizeof(long); } break; } default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static task_t *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, int pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* Perform scheduler related setup */ sched_fork(p); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so cpus_allowed mask cannot * have changed. The cpus_allowed mask of the parent may have * changed after it was copied first time, and it may then move to * another CPU - so we re-copy it here and set the child's CPU to * the parent's CPU. This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; set_task_cpu(p, smp_processor_id()); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->flags & SIGNAL_GROUP_EXIT) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_namespace; } p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); cpuset_fork(p); attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
void user_disable_single_step(struct task_struct *child) { clear_tsk_thread_flag(child, TIF_SINGLESTEP); }
int sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; int ret = -EPERM; lock_kernel(); if (request == PTRACE_TRACEME) { /* are we already being traced? */ if (current->ptrace & PT_PTRACED) goto out; ret = security_ptrace(current->parent, current); if (ret) goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; ret = 0; goto out; } ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) goto out; ret = -EPERM; if (pid == 1) /* you may not mess with init */ goto out_tsk; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; } ret = ptrace_check_attach(child, request == PTRACE_KILL); if (ret < 0) goto out_tsk; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { unsigned long tmp; int copied; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) break; ret = put_user(tmp,(unsigned long __user *) data); break; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long index; unsigned long tmp; ret = -EIO; /* convert to index and check */ index = (unsigned long) addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) break; if (index < PT_FPR0) { tmp = get_reg(child, (int)index); } else { flush_fp_to_thread(child); tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; } ret = put_user(tmp,(unsigned long __user *) data); break; } /* If I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; ret = -EIO; break; /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: { unsigned long index; ret = -EIO; /* convert to index and check */ index = (unsigned long) addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) break; if (index == PT_ORIG_R3) break; if (index < PT_FPR0) { ret = put_reg(child, index, data); } else { flush_fp_to_thread(child); ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; ret = 0; } break; } case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); ret = 0; break; } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: { ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); break; } case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_single_step(child); child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); ret = 0; break; } case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; ret = put_user(child->thread.dabr, (unsigned long __user *)data); break; } case PTRACE_SET_DEBUGREG: ret = ptrace_set_debugreg(child, addr, data); break; case PTRACE_DETACH: ret = ptrace_detach(child, data); break; case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; for (i = 0; i < 32; i++) { ret = put_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; for (i = 0; i < 32; i++) { ret = get_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; flush_fp_to_thread(child); for (i = 0; i < 32; i++) { ret = put_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; flush_fp_to_thread(child); for (i = 0; i < 32; i++) { ret = get_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } #ifdef CONFIG_ALTIVEC case PTRACE_GETVRREGS: /* Get the child altivec register state. */ flush_altivec_to_thread(child); ret = get_vrregs((unsigned long __user *)data, child); break; case PTRACE_SETVRREGS: /* Set the child altivec register state. */ flush_altivec_to_thread(child); ret = set_vrregs(child, (unsigned long __user *)data); break; #endif default: ret = ptrace_request(child, request, addr, data); break; } out_tsk: put_task_struct(child); out: unlock_kernel(); return ret; }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKDATA: pr_debug("ptrace: PEEKDATA\n"); /* fall through */ case PTRACE_PEEKTEXT: /* read word at location addr. */ { unsigned long tmp = 0; int copied; ret = -EIO; pr_debug("ptrace: PEEKTEXT at addr 0x%08lx + %ld\n", addr, sizeof(data)); if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) break; pr_debug("ptrace: user address is valid\n"); if (L1_CODE_LENGTH != 0 && addr >= get_l1_code_start() && addr + sizeof(tmp) <= get_l1_code_start() + L1_CODE_LENGTH) { safe_dma_memcpy (&tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else if (L1_DATA_A_LENGTH != 0 && addr >= L1_DATA_A_START && addr + sizeof(tmp) <= L1_DATA_A_START + L1_DATA_A_LENGTH) { memcpy(&tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else if (L1_DATA_B_LENGTH != 0 && addr >= L1_DATA_B_START && addr + sizeof(tmp) <= L1_DATA_B_START + L1_DATA_B_LENGTH) { memcpy(&tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else if (addr >= FIXED_CODE_START && addr + sizeof(tmp) <= FIXED_CODE_END) { copy_from_user_page(0, 0, 0, &tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); pr_debug("ptrace: copied size %d [0x%08lx]\n", copied, tmp); if (copied != sizeof(tmp)) break; ret = put_user(tmp, datap); break; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; tmp = 0; if ((addr & 3) || (addr > (sizeof(struct pt_regs) + 16))) { printk(KERN_WARNING "ptrace error : PEEKUSR : temporarily returning " "0 - %x sizeof(pt_regs) is %lx\n", (int)addr, sizeof(struct pt_regs)); break; } if (addr == sizeof(struct pt_regs)) { /* PT_TEXT_ADDR */ tmp = child->mm->start_code + TEXT_OFFSET; } else if (addr == (sizeof(struct pt_regs) + 4)) { /* PT_TEXT_END_ADDR */ tmp = child->mm->end_code; } else if (addr == (sizeof(struct pt_regs) + 8)) { /* PT_DATA_ADDR */ tmp = child->mm->start_data; #ifdef CONFIG_BINFMT_ELF_FDPIC } else if (addr == (sizeof(struct pt_regs) + 12)) { tmp = child->mm->context.exec_fdpic_loadmap; } else if (addr == (sizeof(struct pt_regs) + 16)) { tmp = child->mm->context.interp_fdpic_loadmap; #endif } else { tmp = get_reg(child, addr); } ret = put_user(tmp, datap); break; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKEDATA: pr_debug("ptrace: PTRACE_PEEKDATA\n"); /* fall through */ case PTRACE_POKETEXT: /* write the word at location addr. */ { int copied; ret = -EIO; pr_debug("ptrace: POKETEXT at addr 0x%08lx + %ld bytes %lx\n", addr, sizeof(data), data); if (is_user_addr_valid(child, addr, sizeof(data)) < 0) break; pr_debug("ptrace: user address is valid\n"); if (L1_CODE_LENGTH != 0 && addr >= get_l1_code_start() && addr + sizeof(data) <= get_l1_code_start() + L1_CODE_LENGTH) { safe_dma_memcpy ((void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else if (L1_DATA_A_LENGTH != 0 && addr >= L1_DATA_A_START && addr + sizeof(data) <= L1_DATA_A_START + L1_DATA_A_LENGTH) { memcpy((void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else if (L1_DATA_B_LENGTH != 0 && addr >= L1_DATA_B_START && addr + sizeof(data) <= L1_DATA_B_START + L1_DATA_B_LENGTH) { memcpy((void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else if (addr >= FIXED_CODE_START && addr + sizeof(data) <= FIXED_CODE_END) { copy_to_user_page(0, 0, 0, (void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else copied = access_process_vm(child, addr, &data, sizeof(data), 1); pr_debug("ptrace: copied size %d\n", copied); if (copied != sizeof(data)) break; ret = 0; break; } case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & 3) || (addr > (sizeof(struct pt_regs) + 16))) { printk(KERN_WARNING "ptrace error : POKEUSR: temporarily returning 0\n"); break; } if (addr >= (sizeof(struct pt_regs))) { ret = 0; break; } if (addr == PT_SYSCFG) { data &= SYSCFG_MASK; data |= get_reg(child, PT_SYSCFG); } ret = put_reg(child, addr, data); break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ pr_debug("ptrace: syscall/cont\n"); ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; ptrace_disable(child); pr_debug("ptrace: before wake_up_process\n"); wake_up_process(child); ret = 0; break; /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; ptrace_disable(child); wake_up_process(child); break; case PTRACE_SINGLESTEP: /* set the trap flag. */ pr_debug("ptrace: single step\n"); ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); ptrace_enable(child); child->exit_code = data; wake_up_process(child); ret = 0; break; case PTRACE_GETREGS: /* Get all gp regs from the child. */ ret = ptrace_getregs(child, datap); break; case PTRACE_SETREGS: printk(KERN_WARNING "ptrace: SETREGS: **** NOT IMPLEMENTED ***\n"); /* Set all gp regs in the child. */ ret = 0; break; default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int rval; unsigned long val = 0; unsigned long copied; switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: pr_debug("PEEKTEXT/PEEKDATA at %08lX\n", addr); copied = access_process_vm(child, addr, &val, sizeof(val), 0); rval = -EIO; if (copied != sizeof(val)) break; rval = put_user(val, (unsigned long *)data); break; case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: pr_debug("POKETEXT/POKEDATA to %08lX\n", addr); rval = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; rval = -EIO; break; /* Read/write the word at location ADDR in the registers. */ case PTRACE_PEEKUSR: case PTRACE_POKEUSR: pr_debug("PEEKUSR/POKEUSR : 0x%08lx\n", addr); rval = 0; if (addr >= PT_SIZE && request == PTRACE_PEEKUSR) { /* * Special requests that don't actually correspond * to offsets in struct pt_regs. */ if (addr == PT_TEXT_ADDR) { val = child->mm->start_code; } else if (addr == PT_DATA_ADDR) { val = child->mm->start_data; } else if (addr == PT_TEXT_LEN) { val = child->mm->end_code - child->mm->start_code; } else { rval = -EIO; } } else if (addr >= 0 && addr < PT_SIZE && (addr & 0x3) == 0) { microblaze_reg_t *reg_addr = reg_save_addr(addr, child); if (request == PTRACE_PEEKUSR) val = *reg_addr; else *reg_addr = data; } else rval = -EIO; if (rval == 0 && request == PTRACE_PEEKUSR) rval = put_user(val, (unsigned long *)data); break; /* Continue and stop at next (return from) syscall */ case PTRACE_SYSCALL: pr_debug("PTRACE_SYSCALL\n"); case PTRACE_SINGLESTEP: pr_debug("PTRACE_SINGLESTEP\n"); /* Restart after a signal. */ case PTRACE_CONT: pr_debug("PTRACE_CONT\n"); rval = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; pr_debug("wakeup_process\n"); wake_up_process(child); rval = 0; break; /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: pr_debug("PTRACE_KILL\n"); rval = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; wake_up_process(child); break; case PTRACE_DETACH: /* detach a process that was attached. */ pr_debug("PTRACE_DETACH\n"); rval = ptrace_detach(child, data); break; default: /* rval = ptrace_request(child, request, addr, data); noMMU */ rval = -EIO; } return rval; }
/* * Called by kernel/ptrace.c when detaching. * * Make sure the single step bit is not set. */ void ptrace_disable(struct task_struct *child) { /* Todo - pending singlesteps? */ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); }
int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; unsigned long childksp; p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; preempt_disable(); if (is_fpu_owner()) save_fp(p); if (cpu_has_dsp) save_dsp(p); preempt_enable(); /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; *childregs = *regs; childregs->regs[7] = 0; /* Clear error flag */ childregs->regs[2] = 0; /* Child gets zero as return value */ if (childregs->cp0_status & ST0_CU0) { childregs->regs[28] = (unsigned long) ti; childregs->regs[29] = childksp; ti->addr_limit = KERNEL_DS; } else { childregs->regs[29] = usp; ti->addr_limit = USER_DS; } p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context * switching for most programs since they don't use the fpu. */ p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); #ifdef CONFIG_MIPS_MT_SMTC /* * SMTC restores TCStatus after Status, and the CU bits * are aliased there. */ childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1); #endif clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF clear_tsk_thread_flag(p, TIF_FPUBOUND); #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; return 0; }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret = -EPERM; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: ret = generic_ptrace_peekdata(child, addr, data); break; /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long index, tmp; ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 index = (unsigned long) addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else index = (unsigned long) addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { tmp = ptrace_get_reg(child, (int) index); } else { flush_fp_to_thread(child); tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; } ret = put_user(tmp,(unsigned long __user *) data); break; } /* If I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = generic_ptrace_pokedata(child, addr, data); break; /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: { unsigned long index; ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 index = (unsigned long) addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else index = (unsigned long) addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_put_reg(child, index, data); } else { flush_fp_to_thread(child); ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; ret = 0; } break; } case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); ret = 0; break; } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: { ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); break; } case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_single_step(child); child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); ret = 0; break; } case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; ret = put_user(child->thread.dabr, (unsigned long __user *)data); break; } case PTRACE_SET_DEBUGREG: ret = ptrace_set_debugreg(child, addr, data); break; #ifdef CONFIG_PPC64 case PTRACE_GETREGS64: #endif case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ int ui; if (!access_ok(VERIFY_WRITE, (void __user *)data, sizeof(struct pt_regs))) { ret = -EIO; break; } CHECK_FULL_REGS(child->thread.regs); ret = 0; for (ui = 0; ui < PT_REGS_COUNT; ui ++) { ret |= __put_user(ptrace_get_reg(child, ui), (unsigned long __user *) data); data += sizeof(long); } break; } #ifdef CONFIG_PPC64 case PTRACE_SETREGS64: #endif case PTRACE_SETREGS: { /* Set all gp regs in the child. */ unsigned long tmp; int ui; if (!access_ok(VERIFY_READ, (void __user *)data, sizeof(struct pt_regs))) { ret = -EIO; break; } CHECK_FULL_REGS(child->thread.regs); ret = 0; for (ui = 0; ui < PT_REGS_COUNT; ui ++) { ret = __get_user(tmp, (unsigned long __user *) data); if (ret) break; ptrace_put_reg(child, ui, tmp); data += sizeof(long); } break; } case PTRACE_GETFPREGS: { /* Get the child FPU state (FPR0...31 + FPSCR) */ flush_fp_to_thread(child); ret = get_fpregs((void __user *)data, child, 1); break; } case PTRACE_SETFPREGS: { /* Set the child FPU state (FPR0...31 + FPSCR) */ flush_fp_to_thread(child); ret = set_fpregs((void __user *)data, child, 1); break; } #ifdef CONFIG_ALTIVEC case PTRACE_GETVRREGS: /* Get the child altivec register state. */ flush_altivec_to_thread(child); ret = get_vrregs((unsigned long __user *)data, child); break; case PTRACE_SETVRREGS: /* Set the child altivec register state. */ flush_altivec_to_thread(child); ret = set_vrregs(child, (unsigned long __user *)data); break; #endif #ifdef CONFIG_SPE case PTRACE_GETEVRREGS: /* Get the child spe register state. */ flush_spe_to_thread(child); ret = get_evrregs((unsigned long __user *)data, child); break; case PTRACE_SETEVRREGS: /* Set the child spe register state. */ /* this is to clear the MSR_SPE bit to force a reload * of register state from memory */ flush_spe_to_thread(child); ret = set_evrregs(child, (unsigned long __user *)data); break; #endif /* Old reverse args ptrace callss */ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ ret = arch_ptrace_old(child, request, addr, data); break; default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
/* * arch_ptrace() * architecture specific ptrace routine. */ long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: ret = generic_ptrace_peekdata(child, addr, data); break; /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if (((unsigned long) addr > PT_INTERP_FDPIC_LOADMAP) || (addr & 3)) break; tmp = 0; /* Default return condition */ ret = -EIO; if (addr < sizeof(struct pt_regs)) { tmp = ptrace_get_reg(child, addr); } else if (addr == PT_TEXT_ADDR) { tmp = child->mm->start_code; } else if (addr == PT_TEXT_END_ADDR) { tmp = child->mm->end_code; } else if (addr == PT_DATA_ADDR) { tmp = child->mm->start_data; } else if (addr == PT_EXEC_FDPIC_LOADMAP) { #ifdef CONFIG_BINFMT_ELF_FDPIC tmp = child->mm->context.exec_fdpic_loadmap; #endif } else if (addr == PT_INTERP_FDPIC_LOADMAP) { #ifdef CONFIG_BINFMT_ELF_FDPIC tmp = child->mm->context.interp_fdpic_loadmap; #endif } else { break; } ret = put_user(tmp, (unsigned long *)data); break; } case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = generic_ptrace_pokedata(child, addr, data); /* * If we just changed some code so we need to * correct the caches */ if (request == PTRACE_POKETEXT && ret == 0) { flush_icache_range(addr, addr + 4); } break; case PTRACE_POKEUSR: /* write the word at location addr * in the USER area */ ret = -EIO; if (((unsigned long) addr > PT_DATA_ADDR) || (addr & 3)) break; if (addr < sizeof(struct pt_regs)) { ret = ptrace_put_reg(child, addr, data); } break; case PTRACE_SYSCALL: /* continue and stop at next (return from) * syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; /* make sure the single step bit is not set. */ ptrace_disable_single_step(child); wake_up_process(child); ret = 0; break; } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to exit. */ case PTRACE_KILL: { ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; /* make sure the single step bit is not set. */ ptrace_disable_single_step(child); wake_up_process(child); break; } case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; case PTRACE_GETREGS: /* Get all gp regs from the child. */ ptrace_getregs(child, (unsigned long *)data); ret = 0; break; case PTRACE_SETREGS: { /* Set all gp regs in the child. */ int i; unsigned long tmp; int count = sizeof(struct pt_regs) / sizeof(unsigned long); for (i = 0; i < count; i++) { if (get_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } ptrace_put_reg(child, sizeof(unsigned long) * i, tmp); data += sizeof(long); } ret = 0; break; } default: return ptrace_request(child, request, addr, data); break; } return ret; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, int pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_TRACE_IRQFLAGS DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup_delays_binfmt; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ #endif task_io_accounting_init(p); acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; cpuset_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cpuset; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespaces; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ p->ioprio = current->ioprio; /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cleanup_namespaces; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } } if (likely(p->pid)) { add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); set_signal_session(p->signal, process_session(current)); attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, process_session(p)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, p->pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); return p; bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); bad_fork_cleanup_cpuset: #endif cpuset_exit(p); bad_fork_cleanup_delays_binfmt: delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore * it is possible to get a watchpoint trap here from inside the kernel. * However, the code in ./ptrace.c has ensured that the user can * only set watchpoints on userspace addresses. Therefore the in-kernel * watchpoint trap can only occur in code which is reading/writing * from user space. Such code must not hold kernel locks (since it * can equally take a page fault), therefore it is safe to call * force_sig_info even though that claims and releases locks. * * Code in ./signal.c ensures that the debug control register * is restored before we deliver any signal, and therefore that * user code runs with the correct debug control register even though * we clear it here. * * Being careful here means that we don't have to be as careful in a * lot of more complicated places (task switching can be a bit lazy * about restoring all the debug state, and ptrace doesn't have to * find every occurrence of the TF bit that could be saved away even * by user code) * * May run on IST stack. */ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned long condition; int si_code; get_debugreg(condition, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg7) goto clear_dr7; } #ifdef CONFIG_X86_32 if (v8086_mode(regs)) goto debug_vm86; #endif /* Save debug status register where ptrace can see it */ tsk->thread.debugreg6 = condition; /* * Single-stepping through TF: make sure we ignore any events in * kernel space (but re-enable TF when returning to user mode). */ if (condition & DR_STEP) { if (!user_mode_novm(regs)) goto clear_TF_reenable; } si_code = get_si_code(condition); /* Ok, finally something we can handle */ send_sigtrap(tsk, regs, error_code, si_code); /* * Disable additional traps. They'll be re-enabled when * the signal is delivered. */ clear_dr7: set_debugreg(0, 7); preempt_conditional_cli(regs); return; #ifdef CONFIG_X86_32 debug_vm86: /* reenable preemption: handle_vm86_trap() might sleep */ dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); conditional_cli(regs); return; #endif clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; preempt_conditional_cli(regs); return; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * error_code: * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ extern inline void do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; int user_address; const struct exception_table_entry *fixup; int si_code = SEGV_MAPERR; tsk = current; mm = tsk->mm; /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (is_protection && !(S390_lowcore.trans_exc_code & 4)) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) { address = 0; user_address = 0; goto no_context; } /* Low-address protection hit in user mode 'cannot happen'. */ die ("Low-address protection", regs, error_code); do_exit(SIGKILL); } /* * get the failing address * more specific the segment and page table portion of * the address */ address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; user_address = check_user_space(regs, error_code); /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ if (user_address == 0 || in_atomic() || !mm) goto no_context; /* * When we get here, the fault happened in the current * task's user address space, so we can switch on the * interrupts again and then search the VMAs */ local_irq_enable(); down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) goto bad_area; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, address)) goto bad_area; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ good_area: si_code = SEGV_ACCERR; if (!is_protection) { /* page not present, check vm flags */ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; } else { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ switch (handle_mm_fault(mm, vma, address, is_protection)) { case VM_FAULT_MINOR: tsk->min_flt++; break; case VM_FAULT_MAJOR: tsk->maj_flt++; break; case VM_FAULT_SIGBUS: goto do_sigbus; case VM_FAULT_OOM: goto out_of_memory; default: BUG(); } up_read(&mm->mmap_sem); /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(current, TIF_SINGLE_STEP); return; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ bad_area: up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { tsk->thread.prot_addr = address; tsk->thread.trap_no = error_code; do_sigsegv(regs, error_code, si_code, address); return; } no_context: /* Are we prepared to handle this kernel fault? */ fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); if (fixup) { regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; return; } /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ if (user_address == 0) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %p\n", (void *)address); else printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %p\n", (void *)address); die("Oops", regs, error_code); do_exit(SIGKILL); /* * We ran out of memory, or some other thing happened to us that made * us unable to handle the page fault gracefully. */ out_of_memory: up_read(&mm->mmap_sem); if (tsk->pid == 1) { yield(); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (regs->psw.mask & PSW_MASK_PSTATE) do_exit(SIGKILL); goto no_context; do_sigbus: up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ tsk->thread.prot_addr = address; tsk->thread.trap_no = error_code; force_sig(SIGBUS, tsk); /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) goto no_context; }
static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, struct core_state *core_state, int exit_code) { struct task_struct *g, *p; unsigned long flags; int nr = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); if (!signal_group_exit(tsk->signal)) { mm->core_state = core_state; nr = zap_process(tsk, exit_code); tsk->signal->group_exit_task = tsk; /* ignore all signals except SIGKILL, see prepare_signal() */ tsk->signal->flags = SIGNAL_GROUP_COREDUMP; clear_tsk_thread_flag(tsk, TIF_SIGPENDING); } spin_unlock_irq(&tsk->sighand->siglock); if (unlikely(nr < 0)) return nr; tsk->flags |= PF_DUMPCORE; if (atomic_read(&mm->mm_users) == nr + 1) goto done; /* * We should find and kill all tasks which use this mm, and we should * count them correctly into ->nr_threads. We don't take tasklist * lock, but this is safe wrt: * * fork: * None of sub-threads can fork after zap_process(leader). All * processes which were created before this point should be * visible to zap_threads() because copy_process() adds the new * process to the tail of init_task.tasks list, and lock/unlock * of ->siglock provides a memory barrier. * * do_exit: * The caller holds mm->mmap_sem. This means that the task which * uses this mm can't pass exit_mm(), so it can't exit or clear * its ->mm. * * de_thread: * It does list_replace_rcu(&leader->tasks, ¤t->tasks), * we must see either old or new leader, this does not matter. * However, it can change p->sighand, so lock_task_sighand(p) * must be used. Since p->mm != NULL and we hold ->mmap_sem * it can't fail. * * Note also that "g" can be the old leader with ->mm == NULL * and already unhashed and thus removed from ->thread_group. * This is OK, __unhash_process()->list_del_rcu() does not * clear the ->next pointer, we will find the new leader via * next_thread(). */ rcu_read_lock(); for_each_process(g) { if (g == tsk->group_leader) continue; if (g->flags & PF_KTHREAD) continue; p = g; do { if (p->mm) { if (unlikely(p->mm == mm)) { lock_task_sighand(p, &flags); nr += zap_process(p, exit_code); p->signal->flags = SIGNAL_GROUP_EXIT; unlock_task_sighand(p, &flags); } break; } } while_each_thread(g, p); } rcu_read_unlock(); done: atomic_set(&core_state->nr_threads, nr); return nr; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); #ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; p->rcu_flipctr_idx = 0; #endif /* #ifdef CONFIG_PREEMPT_RCU */ p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; p->default_timer_slack_ns = current->timer_slack_ns; #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; #endif task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif if (unlikely(current->ptrace)) ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } ftrace_graph_init_task(p); p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_graph; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_graph; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { list_add_tail(&p->sibling, &p->real_parent->children); tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } #ifdef CONFIG_LTT_LITE ltt_lite_ev_process(LTT_LITE_EV_PROCESS_FORK, p); #endif total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); return p; bad_fork_free_graph: ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); put_cred(p->real_cred); put_cred(p->cred); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/** * __context_tracking_task_switch - context switch the syscall callbacks * @prev: the task that is being switched out * @next: the task that is being switched in * * The context tracking uses the syscall slow path to implement its user-kernel * boundaries probes on syscalls. This way it doesn't impact the syscall fast * path on CPUs that don't do context tracking. * * But we need to clear the flag on the previous task because it may later * migrate to some CPU that doesn't do the context tracking. As such the TIF * flag may not be desired there. */ void __context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { clear_tsk_thread_flag(prev, TIF_NOHZ); set_tsk_thread_flag(next, TIF_NOHZ); }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; if (notify_page_fault(regs)) return 0; tsk = current; mm = tsk->mm; trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ fault = VM_FAULT_BADCONTEXT; if (unlikely(!user_space_fault(trans_exc_code) || !mm || pagefault_disabled())) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (address == -ENOMEM) { fault = VM_FAULT_OOM; goto out_up; } } #endif retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) goto out_up; if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; if (expand_stack(vma, address)) goto out_up; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) goto out_up; if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; down_read(&mm->mmap_sem); goto retry; } } /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(tsk, TIF_PER_TRAP); fault = 0; out_up: up_read(&mm->mmap_sem); out: return fault; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_fork_read_lock(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_fork_read_unlock(current); perf_event_fork(p); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: if (unlikely(clone_flags & CLONE_NEWPID)) pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) { task_lock(p); if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) atomic_dec(&p->mm->oom_disable_count); task_unlock(p); mmput(p->mm); } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_fork_read_unlock(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * Called by kernel/ptrace.c when detaching * * Make sure any single step bits, etc. are not set */ void ptrace_disable(struct task_struct *child) { clear_tsk_thread_flag(child, TIF_SINGLE_STEP); clear_tsk_thread_flag(child, TIF_BREAKPOINT); ocd_disable(child); }