int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; long childksp; p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; preempt_disable(); if (is_fpu_owner()) save_fp(p); if (cpu_has_dsp) save_dsp(p); preempt_enable(); /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; *childregs = *regs; childregs->regs[7] = 0; /* Clear error flag */ #if defined(CONFIG_BINFMT_IRIX) if (current->personality != PER_LINUX) { /* Under IRIX things are a little different. */ childregs->regs[3] = 1; regs->regs[3] = 0; } #endif childregs->regs[2] = 0; /* Child gets zero as return value */ regs->regs[2] = p->pid; if (childregs->cp0_status & ST0_CU0) { childregs->regs[28] = (unsigned long) ti; childregs->regs[29] = childksp; ti->addr_limit = KERNEL_DS; } else { childregs->regs[29] = usp; ti->addr_limit = USER_DS; } p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context * switching for most programs since they don't use the fpu. */ p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF /* * FPU affinity support is cleaner if we track the * user-visible CPU affinity from the very beginning. * The generic cpus_allowed mask will already have * been copied from the parent before copy_thread * is invoked. */ p->thread.user_cpus_allowed = p->cpus_allowed; #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; return 0; }
/* * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; unsigned long flags; preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Optimistic spinning. * * We try to spin for acquisition when we find that there are no * pending waiters and the lock owner is currently running on a * (different) CPU. * * The rationale is that if the lock owner is running, it is likely to * release the lock soon. * * Since this needs the lock owner, and this mutex implementation * doesn't track the owner atomically in the lock field, we need to * track it non-atomically. * * We can't do this for DEBUG_MUTEXES because that relies on wait_lock * to serialize everything. * * The mutex spinners are queued up using MCS lock so that only one * spinner can compete for the mutex. However, if mutex spinning isn't * going to happen, there is no point in going through the lock/unlock * overhead. */ if (!mutex_can_spin_on_owner(lock)) goto slowpath; for (;;) { struct task_struct *owner; struct mspin_node node; /* * If there's an owner, wait for it to either * release the lock or go to sleep. */ mspin_lock(MLOCK(lock), &node); owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) { mspin_unlock(MLOCK(lock), &node); break; } if ((atomic_read(&lock->count) == 1) && (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); mspin_unlock(MLOCK(lock), &node); preempt_enable(); return 0; } mspin_unlock(MLOCK(lock), &node); /* * When there's no owner, we might have preempted between the * owner acquiring the lock and setting the owner field. If * we're an RT task that will live-lock because we won't let * the owner complete. */ if (!owner && (need_resched() || rt_task(task))) break; /* * The cpu_relax() call is a compiler barrier which forces * everything in this loop to be re-loaded. We don't need * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ arch_mutex_cpu_relax(); } slowpath: #endif spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1)) goto done; lock_contended(&lock->dep_map, ip); for (;;) { /* * Lets try to take the lock again - this is needed even if * we get here for the first time (shortly after failing to * acquire the lock), to make sure that we get a wakeup once * it's unlocked. Later on, if we sleep, this is the * operation that gives us the lock. We xchg it to -1, so * that when we release the lock, we properly wake up the * other waiters: */ if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1)) break; /* * got a signal? (This code gets eliminated in the * TASK_UNINTERRUPTIBLE case.) */ if (unlikely(signal_pending_state(state, task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); preempt_enable(); return -EINTR; } __set_task_state(task, state); /* didn't get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); schedule_preempt_disabled(); spin_lock_mutex(&lock->wait_lock, flags); } done: lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, current_thread_info()); mutex_set_owner(lock); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); preempt_enable(); return 0; }
int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs = task_pt_regs(p); struct thread_info *ti = task_thread_info(p); *childregs = *regs; if (user_mode(regs)) childregs->r1 = usp; else childregs->r1 = ((unsigned long) ti) + THREAD_SIZE; #ifndef CONFIG_MMU memset(&ti->cpu_context, 0, sizeof(struct cpu_context)); ti->cpu_context.r1 = (unsigned long)childregs; ti->cpu_context.msr = (unsigned long)childregs->msr; #else /* if creating a kernel thread then update the current reg (we don't * want to use the parent's value when restoring by POP_STATE) */ if (kernel_mode(regs)) /* save new current on stack to use POP_STATE */ childregs->CURRENT_TASK = (unsigned long)p; /* if returning to user then use the parent's value of this register */ /* if we're creating a new kernel thread then just zeroing all * the registers. That's OK for a brand new thread.*/ /* Pls. note that some of them will be restored in POP_STATE */ if (kernel_mode(regs)) memset(&ti->cpu_context, 0, sizeof(struct cpu_context)); /* if this thread is created for fork/vfork/clone, then we want to * restore all the parent's context */ /* in addition to the registers which will be restored by POP_STATE */ else { ti->cpu_context = *(struct cpu_context *)regs; childregs->msr |= MSR_UMS; } /* FIXME STATE_SAVE_PT_OFFSET; */ ti->cpu_context.r1 = (unsigned long)childregs; /* we should consider the fact that childregs is a copy of the parent * regs which were saved immediately after entering the kernel state * before enabling VM. This MSR will be restored in switch_to and * RETURN() and we want to have the right machine state there * specifically this state must have INTs disabled before and enabled * after performing rtbd * compose the right MSR for RETURN(). It will work for switch_to also * excepting for VM and UMS * don't touch UMS , CARRY and cache bits * right now MSR is a copy of parent one */ childregs->msr |= MSR_BIP; childregs->msr &= ~MSR_EIP; childregs->msr |= MSR_IE; childregs->msr &= ~MSR_VM; childregs->msr |= MSR_VMS; childregs->msr |= MSR_EE; /* exceptions will be enabled*/ ti->cpu_context.msr = (childregs->msr|MSR_VM); ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */ ti->cpu_context.msr &= ~MSR_IE; #endif ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8; /* * r21 is the thread reg, r10 is 6th arg to clone * which contains TLS area */ if (clone_flags & CLONE_SETTLS) childregs->r21 = childregs->r10; return 0; }
/* * Copy architecture-specific thread state */ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); struct hexagon_switch_stack *ss; struct pt_regs *childregs; asmlinkage void ret_from_fork(void); childregs = (struct pt_regs *) (((unsigned long) ti + THREAD_SIZE) - sizeof(*childregs)); memcpy(childregs, regs, sizeof(*childregs)); ti->regs = childregs; /* * Establish kernel stack pointer and initial PC for new thread */ ss = (struct hexagon_switch_stack *) ((unsigned long) childregs - sizeof(*ss)); ss->lr = (unsigned long)ret_from_fork; p->thread.switch_sp = ss; /* If User mode thread, set pt_reg stack pointer as per parameter */ if (user_mode(childregs)) { pt_set_rte_sp(childregs, usp); /* Child sees zero return value */ childregs->r00 = 0; /* * The clone syscall has the C signature: * int [r0] clone(int flags [r0], * void *child_frame [r1], * void *parent_tid [r2], * void *child_tid [r3], * void *thread_control_block [r4]); * ugp is used to provide TLS support. */ if (clone_flags & CLONE_SETTLS) childregs->ugp = childregs->r04; /* * Parent sees new pid -- not necessary, not even possible at * this point in the fork process * Might also want to set things like ti->addr_limit */ } else { /* * If kernel thread, resume stack is kernel stack base. * Note that this is pointer arithmetic on pt_regs * */ pt_set_rte_sp(childregs, (unsigned long)(childregs + 1)); /* * We need the current thread_info fast path pointer * set up in pt_regs. The register to be used is * parametric for assembler code, but the mechanism * doesn't drop neatly into C. Needs to be fixed. */ childregs->THREADINFO_REG = (unsigned long) ti; } /* * thread_info pointer is pulled out of task_struct "stack" * field on switch_to. */ p->stack = (void *)ti; return 0; }
static int setup_frame32(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32)); if (frame == (void __user *) -1UL) goto give_sigsegv; if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) goto give_sigsegv; if (save_sigregs32(regs, &frame->sregs)) goto give_sigsegv; if (save_sigregs_gprs_high(regs, frame->gprs_high)) goto give_sigsegv; if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } /* Set up backchain. */ if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | (psw_user_bits & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || sig == SIGTRAP || sig == SIGFPE) { /* set extra registers only for synchronous signals */ regs->gprs[4] = regs->int_code & 127; regs->gprs[5] = regs->int_parm_long; regs->gprs[6] = task_thread_info(current)->last_break; } /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; give_sigsegv: force_sigsegv(sig, current); return -EFAULT; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); #ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; p->rcu_flipctr_idx = 0; #endif /* #ifdef CONFIG_PREEMPT_RCU */ p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif if (unlikely(current->ptrace)) ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } ftrace_graph_init_task(p); p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_graph; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_graph; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { list_add_tail(&p->sibling, &p->real_parent->children); tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); return p; bad_fork_free_graph: ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); put_cred(p->real_cred); put_cred(p->cred); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { struct sigframe __user *frame; struct sigcontext sc; unsigned long restorer; size_t frame_size; /* * gprs_high are only present for a 31-bit task running on * a 64-bit kernel (see compat_signal.c) but the space for * gprs_high need to be allocated if vector registers are * included in the signal frame on a 31-bit system. */ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); if (MACHINE_HAS_VX) frame_size += sizeof(frame->sregs_ext); frame = get_sigframe(ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; /* Set up backchain. */ if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; /* Create struct sigcontext on the signal stack */ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE); sc.sregs = (_sigregs __user __force *) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; /* Store registers needed to create the signal frame */ store_sigregs(); /* Create _sigregs on the signal stack */ if (save_sigregs(regs, &frame->sregs)) return -EFAULT; /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) return -EFAULT; /* Create _sigregs_ext on the signal stack */ if (save_sigregs_ext(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE; } else { /* Signal frame without vector registers are short ! */ __u16 __user *svc = (void __user *) frame + frame_size - 2; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; restorer = (unsigned long) svc | PSW_ADDR_AMODE; } /* Set up registers for signal handler */ regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (unsigned long) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || sig == SIGTRAP || sig == SIGFPE) { /* set extra registers only for synchronous signals */ regs->gprs[4] = regs->int_code & 127; regs->gprs[5] = regs->int_parm_long; regs->gprs[6] = task_thread_info(current)->last_break; } return 0; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, struct pid *pid) { int retval; struct task_struct *p; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); p->pid = pid_nr(pid); retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup_delays_binfmt; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; #endif #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ #endif task_io_accounting_init(p); acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); #ifdef CONFIG_SECURITY p->security = NULL; #endif p->io_context = NULL; p->audit_context = NULL; cpuset_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cpuset; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespaces; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CLONE_PARENT) ? current->group_leader->exit_signal : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ p->ioprio = current->ioprio; /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cleanup_namespaces; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } } if (likely(p->pid)) { add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); set_signal_session(p->signal, process_session(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); return p; bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); bad_fork_cleanup_cpuset: #endif cpuset_exit(p); bad_fork_cleanup_delays_binfmt: delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * switch_to(x,yn) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time * (as a call from the fsave or fwait in effect) rather than to * the wrong process. Lazy FP saving no longer makes any sense * with modern CPU's, and this simplifies a lot of things (SMP * and UP become the same). * * NOTE! We used to use the x86 hardware context switching. The * reason for not using it any more becomes apparent when you * try to recover gracefully from saved state that is no longer * valid (stale segment register values in particular). With the * hardware task-switch, there is no way to fix up bad state in * a reasonable manner. * * The fact that Intel documents the hardware task-switching to * be slow is a fairly red herring - this code is not noticeably * faster. However, there _is_ some room for improvement here, * so the performance issues may eventually be a valid point. * More important, however, is the fact that this allows us much * more flexibility. * * The return value (in %ax) will be the "prev" task after * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); bool preload_fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* * If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now */ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; __unlazy_fpu(prev_p); /* we're going to use this soon, after a few expensive things */ if (preload_fpu) prefetch(next->fpu.state); /* * Reload esp0. */ load_sp0(tss, next); /* * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation * where we temporarily have non-reloadable segments in %fs * and %gs. This could be an issue if the NMI handler ever * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); /* * Restore IOPL if needed. In normal use, the flags restore * in the switch assembly will handle this. But if the kernel * is running virtualized at a non-zero CPL, the popf will * not restore flags, so it must be done in a separate step. */ if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); /* * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); /* If we're going to preload the fpu context, make sure clts is run while we're batching the cpu state updates. */ if (preload_fpu) clts(); /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be * done before math_state_restore, so the TS bit is up * to date. */ arch_end_context_switch(next_p); if (preload_fpu) __math_state_restore(); /* * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) lazy_load_gs(next->gs); percpu_write(current_task, next_p); return prev_p; }
void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irq_stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; if (!task) task = current; if (!stack) { unsigned long dummy; stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; } #ifdef CONFIG_FRAME_POINTER if (!bp) { if (task == current) { /* Grab bp right from our regs */ get_bp(bp); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; } } #endif /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested * exceptions */ tinfo = task_thread_info(task); for (;;) { char *id; unsigned long *estack_end; estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id); if (estack_end) { if (ops->stack(data, id) < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, data, estack_end, &graph); ops->stack(data, "<EOE>"); /* * We link to the next stack via the * second-to-last pointer (index -2 to end) in the * exception stack: */ stack = (unsigned long *) estack_end[-2]; continue; } if (irq_stack_end) { unsigned long *irq_stack; irq_stack = irq_stack_end - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ stack = (unsigned long *) (irq_stack_end[-1]); bp = fixup_bp_irq_link(bp, stack, irq_stack, irq_stack_end); irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } } break; } /* * This handles the process stack: */ bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); put_cpu(); }
int save_i387_xstate(void __user *buf) { struct task_struct *tsk = current; int err = 0; if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) return -EACCES; BUG_ON(sig_xstate_size < xstate_size); if ((unsigned long)buf % 64) printk("save_i387_xstate: bad fpstate %p\n", buf); if (!used_math()) return 0; if (task_thread_info(tsk)->status & TS_USEDFPU) { if (use_xsave()) err = xsave_user(buf); else err = fxsave_user(buf); if (err) return err; task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } else { sanitize_i387_state(tsk); if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, xstate_size)) return -1; } clear_used_math(); /* trigger finit */ if (use_xsave()) { struct _fpstate __user *fx = buf; struct _xstate __user *x = buf; u64 xstate_bv; err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, sizeof(struct _fpx_sw_bytes)); err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *) (buf + sig_xstate_size - FP_XSTATE_MAGIC2_SIZE)); /* * Read the xstate_bv which we copied (directly from the cpu or * from the state in task struct) to the user buffers and * set the FP/SSE bits. */ err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. This will * enable us capturing any changes(during sigreturn) to * the FP/SSE bits by the legacy applications which don't touch * xstate_bv in the xsave header. * * xsave aware apps can change the xstate_bv in the xsave * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ xstate_bv |= XSTATE_FPSSE; err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); if (err) return err; } return 1; }
int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long stack_size, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs; unsigned long ksp; /* * When creating a new kernel thread we pass sp as zero. * Assign it to a reasonable value now that we have the stack. */ if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0)) sp = KSTK_TOP(p); /* * Do not clone step state from the parent; each thread * must make its own lazily. */ task_thread_info(p)->step_state = NULL; /* * Start new thread in ret_from_fork so it schedules properly * and then return from interrupt like the parent. */ p->thread.pc = (unsigned long) ret_from_fork; /* Save user stack top pointer so we can ID the stack vm area later. */ p->thread.usp0 = sp; /* Record the pid of the process that created this one. */ p->thread.creator_pid = current->pid; /* * Copy the registers onto the kernel stack so the * return-from-interrupt code will reload it into registers. */ childregs = task_pt_regs(p); *childregs = *regs; childregs->regs[0] = 0; /* return value is zero */ childregs->sp = sp; /* override with new user stack pointer */ /* * If CLONE_SETTLS is set, set "tp" in the new task to "r4", * which is passed in as arg #5 to sys_clone(). */ if (clone_flags & CLONE_SETTLS) childregs->tp = regs->regs[4]; /* * Copy the callee-saved registers from the passed pt_regs struct * into the context-switch callee-saved registers area. * This way when we start the interrupt-return sequence, the * callee-save registers will be correctly in registers, which * is how we assume the compiler leaves them as we start doing * the normal return-from-interrupt path after calling C code. * Zero out the C ABI save area to mark the top of the stack. */ ksp = (unsigned long) childregs; ksp -= C_ABI_SAVE_AREA_SIZE; /* interrupt-entry save area */ ((long *)ksp)[0] = ((long *)ksp)[1] = 0; ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long); memcpy((void *)ksp, ®s->regs[CALLEE_SAVED_FIRST_REG], CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long)); ksp -= C_ABI_SAVE_AREA_SIZE; /* __switch_to() save area */ ((long *)ksp)[0] = ((long *)ksp)[1] = 0; p->thread.ksp = ksp; #if CHIP_HAS_TILE_DMA() /* * No DMA in the new thread. We model this on the fact that * fork() clears the pending signals, alarms, and aio for the child. */ memset(&p->thread.tile_dma_state, 0, sizeof(struct tile_dma_state)); memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); #endif #if CHIP_HAS_SN_PROC() /* Likewise, the new thread is not running static processor code. */ p->thread.sn_proc_running = 0; memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb)); #endif #if CHIP_HAS_PROC_STATUS_SPR() /* New thread has its miscellaneous processor state bits clear. */ p->thread.proc_status = 0; #endif #ifdef CONFIG_HARDWALL /* New thread does not own any networks. */ p->thread.hardwall = NULL; #endif /* * Start the new thread with the current architecture state * (user interrupt masks, etc.). */ save_arch_state(&p->thread); return 0; }
static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; u32 uc_flags; frame_size = sizeof(*frame) - sizeof(frame->uc.uc_mcontext_ext.__reserved); /* * gprs_high are always present for 31-bit compat tasks. * The space for vector registers is only allocated if * the machine supports it */ uc_flags = UC_GPRS_HIGH; if (MACHINE_HAS_VX) { if (current->thread.vxrs) uc_flags |= UC_VXRS; } else frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + sizeof(frame->uc.uc_mcontext_ext.vxrs_high); frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; /* Set up backchain. */ if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { restorer = (unsigned long __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { __u16 __user *svc = &frame->svc_insn; if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } /* Create siginfo on the signal stack */ if (copy_siginfo_to_user32(&frame->info, &ksig->info)) return -EFAULT; /* Store registers needed to create the signal frame */ store_sigregs(); /* Create ucontext on the signal stack. */ sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler; regs->gprs[2] = map_signal(ksig->sig); regs->gprs[3] = (__force __u64) &frame->info; regs->gprs[4] = (__force __u64) &frame->uc; regs->gprs[5] = task_thread_info(current)->last_break; return 0; }
static int setup_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { int sig = ksig->sig; sigframe32 __user *frame; struct sigcontext32 sc; unsigned long restorer; size_t frame_size; /* * gprs_high are always present for 31-bit compat tasks. * The space for vector registers is only allocated if * the machine supports it */ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved); if (!MACHINE_HAS_VX) frame_size -= sizeof(frame->sregs_ext.vxrs_low) + sizeof(frame->sregs_ext.vxrs_high); frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; /* Set up backchain. */ if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) return -EFAULT; /* Create struct sigcontext32 on the signal stack */ sigset_to_sigset32(set->sig, sc.oldmask); sc.sregs = (__u32)(unsigned long __force) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; /* Store registers needed to create the signal frame */ store_sigregs(); /* Create _sigregs32 on the signal stack */ if (save_sigregs32(regs, &frame->sregs)) return -EFAULT; /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) return -EFAULT; /* Create _sigregs_ext32 on the signal stack */ if (save_sigregs_ext32(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { restorer = (unsigned long __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { /* Signal frames without vectors registers are short ! */ __u16 __user *svc = (void *) frame + frame_size - 2; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } /* Set up registers for signal handler */ regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || sig == SIGTRAP || sig == SIGFPE) { /* set extra registers only for synchronous signals */ regs->gprs[4] = regs->int_code & 127; regs->gprs[5] = regs->int_parm_long; regs->gprs[6] = task_thread_info(current)->last_break; } return 0; }
SYSCALL_DEFINE1(arc_settls, void *, user_tls_data_ptr) { task_thread_info(current)->thr_ptr = (unsigned int)user_tls_data_ptr; return 0; }
void switch_to_tt(void *prev, void *next) { struct task_struct *from, *to, *prev_sched; unsigned long flags; int err, vtalrm, alrm, prof, cpu; char c; from = prev; to = next; cpu = task_thread_info(from)->cpu; if(cpu == 0) forward_interrupts(to->thread.mode.tt.extern_pid); #ifdef CONFIG_SMP forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid); #endif local_irq_save(flags); vtalrm = change_sig(SIGVTALRM, 0); alrm = change_sig(SIGALRM, 0); prof = change_sig(SIGPROF, 0); forward_pending_sigio(to->thread.mode.tt.extern_pid); c = 0; /* Notice that here we "up" the semaphore on which "to" is waiting, and * below (the read) we wait on this semaphore (which is implemented by * switch_pipe) and go sleeping. Thus, after that, we have resumed in * "to", and can't use any more the value of "from" (which is outdated), * nor the value in "to" (since it was the task which stole us the CPU, * which we don't care about). */ err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); if(err != sizeof(c)) panic("write of switch_pipe failed, err = %d", -err); if(from->thread.mode.tt.switch_pipe[0] == -1) os_kill_process(os_getpid(), 0); err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); if(err != sizeof(c)) panic("read of switch_pipe failed, errno = %d", -err); /* If the process that we have just scheduled away from has exited, * then it needs to be killed here. The reason is that, even though * it will kill itself when it next runs, that may be too late. Its * stack will be freed, possibly before then, and if that happens, * we have a use-after-free situation. So, it gets killed here * in case it has not already killed itself. */ prev_sched = current->thread.prev_sched; if(prev_sched->thread.mode.tt.switch_pipe[0] == -1) os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1); change_sig(SIGVTALRM, vtalrm); change_sig(SIGALRM, alrm); change_sig(SIGPROF, prof); arch_switch_to_tt(prev_sched, current); flush_tlb_all(); local_irq_restore(flags); }
/* * Copy architecture-specific thread state * * Layout of Child kernel mode stack as setup at the end of this function is * * | ... | * | ... | * | unused | * | | * ------------------ * | r25 | <==== top of Stack (thread.ksp) * ~ ~ * | --to-- | (CALLEE Regs of user mode) * | r13 | * ------------------ * | fp | * | blink | @ret_from_fork * ------------------ * | | * ~ ~ * ~ ~ * | | * ------------------ * | r12 | * ~ ~ * | --to-- | (scratch Regs of user mode) * | r0 | * ------------------ * | SP | * | orig_r0 | * | event/ECR | * | user_r25 | * ------------------ <===== END of PAGE */ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, struct task_struct *p) { struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ struct callee_regs *c_callee; /* child's callee regs */ struct callee_regs *parent_callee; /* paren't callee */ struct pt_regs *regs = current_pt_regs(); /* Mark the specific anchors to begin with (see pic above) */ c_regs = task_pt_regs(p); childksp = (unsigned long *)c_regs - 2; /* 2 words for FP/BLINK */ c_callee = ((struct callee_regs *)childksp) - 1; /* * __switch_to() uses thread.ksp to start unwinding stack * For kernel threads we don't need to create callee regs, the * stack layout nevertheless needs to remain the same. * Also, since __switch_to anyways unwinds callee regs, we use * this to populate kernel thread entry-pt/args into callee regs, * so that ret_from_kernel_thread() becomes simpler. */ p->thread.ksp = (unsigned long)c_callee; /* THREAD_KSP */ /* __switch_to expects FP(0), BLINK(return addr) at top */ childksp[0] = 0; /* fp */ childksp[1] = (unsigned long)ret_from_fork; /* blink */ if (unlikely(p->flags & PF_KTHREAD)) { memset(c_regs, 0, sizeof(struct pt_regs)); c_callee->r13 = kthread_arg; c_callee->r14 = usp; /* function */ return 0; } /*--------- User Task Only --------------*/ /* __switch_to expects FP(0), BLINK(return addr) at top of stack */ childksp[0] = 0; /* for POP fp */ childksp[1] = (unsigned long)ret_from_fork; /* for POP blink */ /* Copy parents pt regs on child's kernel mode stack */ *c_regs = *regs; if (usp) c_regs->sp = usp; c_regs->r0 = 0; /* fork returns 0 in child */ parent_callee = ((struct callee_regs *)regs) - 1; *c_callee = *parent_callee; if (unlikely(clone_flags & CLONE_SETTLS)) { /* * set task's userland tls data ptr from 4th arg * clone C-lib call is difft from clone sys-call */ task_thread_info(p)->thr_ptr = regs->r3; } else { /* Normal fork case: set parent's TLS ptr in child */ task_thread_info(p)->thr_ptr = task_thread_info(current)->thr_ptr; } return 0; }
long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; long ret = -EIO; char *childreg; struct pt_regs copyregs; switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ if (addr >= PTREGS_SIZE) break; childreg = getregs(child, ©regs) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) break; ret = put_user(*(compat_long_t *)childreg, (compat_long_t __user *)datap); } else #endif { if (addr & (sizeof(long)-1)) break; ret = put_user(*(long *)childreg, datap); } break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ if (addr >= PTREGS_SIZE) break; childreg = getregs(child, ©regs) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) break; *(compat_long_t *)childreg = data; } else #endif { if (addr & (sizeof(long)-1)) break; *(long *)childreg = data; } putregs(child, ©regs); ret = 0; break; case PTRACE_GETREGS: /* Get all registers from the child. */ ret = copy_regset_to_user(child, &tile_user_regset_view, REGSET_GPR, 0, sizeof(struct pt_regs), datap); break; case PTRACE_SETREGS: /* Set all registers in the child. */ ret = copy_regset_from_user(child, &tile_user_regset_view, REGSET_GPR, 0, sizeof(struct pt_regs), datap); break; case PTRACE_GETFPREGS: /* Get the child FPU state. */ case PTRACE_SETFPREGS: /* Set the child FPU state. */ break; case PTRACE_SETOPTIONS: /* Support TILE-specific ptrace options. */ BUILD_BUG_ON(PTRACE_O_MASK_TILE & PTRACE_O_MASK); tmp = data & PTRACE_O_MASK_TILE; data &= ~PTRACE_O_MASK_TILE; ret = ptrace_request(child, request, addr, data); if (ret == 0) { unsigned int flags = child->ptrace; flags &= ~(PTRACE_O_MASK_TILE << PT_OPT_FLAG_SHIFT); flags |= (tmp << PT_OPT_FLAG_SHIFT); child->ptrace = flags; } break; default: #ifdef CONFIG_COMPAT if (task_thread_info(current)->status & TS_COMPAT) { ret = compat_ptrace_request(child, request, addr, data); break; } #endif ret = ptrace_request(child, request, addr, data); break; } return ret; }
long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; long ret = -EIO; char *childreg; struct pt_regs copyregs; int ex1_offset; switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) break; ret = put_user(*(compat_long_t *)childreg, (compat_long_t __user *)datap); } else #endif { if (addr & (sizeof(long)-1)) break; ret = put_user(*(long *)childreg, datap); } break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; /* Guard against overwrites of the privilege level. */ ex1_offset = PTREGS_OFFSET_EX1; #if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) if (is_compat_task()) /* point at low word */ ex1_offset += sizeof(compat_long_t); #endif if (addr == ex1_offset) data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) break; *(compat_long_t *)childreg = data; } else #endif { if (addr & (sizeof(long)-1)) break; *(long *)childreg = data; } ret = 0; break; case PTRACE_GETREGS: /* Get all registers from the child. */ if (copy_to_user(datap, task_pt_regs(child), sizeof(struct pt_regs)) == 0) { ret = 0; } break; case PTRACE_SETREGS: /* Set all registers in the child. */ if (copy_from_user(©regs, datap, sizeof(struct pt_regs)) == 0) { copyregs.ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); *task_pt_regs(child) = copyregs; ret = 0; } break; case PTRACE_GETFPREGS: /* Get the child FPU state. */ case PTRACE_SETFPREGS: /* Set the child FPU state. */ break; case PTRACE_SETOPTIONS: /* Support TILE-specific ptrace options. */ child->ptrace &= ~PT_TRACE_MASK_TILE; tmp = data & PTRACE_O_MASK_TILE; data &= ~PTRACE_O_MASK_TILE; ret = ptrace_request(child, request, addr, data); if (tmp & PTRACE_O_TRACEMIGRATE) child->ptrace |= PT_TRACE_MIGRATE; break; default: #ifdef CONFIG_COMPAT if (task_thread_info(current)->status & TS_COMPAT) { ret = compat_ptrace_request(child, request, addr, data); break; } #endif ret = ptrace_request(child, request, addr, data); break; } return ret; }
/* * mipsmt_sys_sched_setaffinity - set the cpu affinity of a process */ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { cpumask_var_t cpus_allowed, new_mask, effective_mask; struct thread_info *ti; struct task_struct *p; int retval; if (len < sizeof(new_mask)) return -EINVAL; if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask))) return -EFAULT; get_online_cpus(); rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); put_online_cpus(); return -ESRCH; } /* Prevent p going away */ get_task_struct(p); rcu_read_unlock(); if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { retval = -ENOMEM; goto out_put_task; } if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { retval = -ENOMEM; goto out_free_cpus_allowed; } if (!alloc_cpumask_var(&effective_mask, GFP_KERNEL)) { retval = -ENOMEM; goto out_free_new_mask; } retval = -EPERM; if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p); if (retval) goto out_unlock; /* Record new user-specified CPU set for future reference */ cpumask_copy(&p->thread.user_cpus_allowed, new_mask); again: /* Compute new global allowed CPU set if necessary */ ti = task_thread_info(p); if (test_ti_thread_flag(ti, TIF_FPUBOUND) && cpus_intersects(*new_mask, mt_fpu_cpumask)) { cpus_and(*effective_mask, *new_mask, mt_fpu_cpumask); retval = set_cpus_allowed_ptr(p, effective_mask); } else { cpumask_copy(effective_mask, new_mask); clear_ti_thread_flag(ti, TIF_FPUBOUND); retval = set_cpus_allowed_ptr(p, new_mask); } if (!retval) { cpuset_cpus_allowed(p, cpus_allowed); if (!cpumask_subset(effective_mask, cpus_allowed)) { /* * We must have raced with a concurrent cpuset * update. Just reset the cpus_allowed to the * cpuset's cpus_allowed */ cpumask_copy(new_mask, cpus_allowed); goto again; } } out_unlock: free_cpumask_var(effective_mask); out_free_new_mask: free_cpumask_var(new_mask); out_free_cpus_allowed: free_cpumask_var(cpus_allowed); out_put_task: put_task_struct(p); put_online_cpus(); return retval; }
static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; unsigned long uc_flags, restorer; size_t frame_size; frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); /* * gprs_high are only present for a 31-bit task running on * a 64-bit kernel (see compat_signal.c) but the space for * gprs_high need to be allocated if vector registers are * included in the signal frame on a 31-bit system. */ uc_flags = 0; #ifdef CONFIG_64BIT if (MACHINE_HAS_VX) { frame_size += sizeof(_sigregs_ext); if (current->thread.vxrs) uc_flags |= UC_VXRS; } #endif frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; /* Set up backchain. */ if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { restorer = (unsigned long) ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE; } else { __u16 __user *svc = &frame->svc_insn; if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; restorer = (unsigned long) svc | PSW_ADDR_AMODE; } /* Create siginfo on the signal stack */ if (copy_siginfo_to_user(&frame->info, &ksig->info)) return -EFAULT; /* Store registers needed to create the signal frame */ store_sigregs(); /* Create ucontext on the signal stack. */ if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(NULL, &frame->uc.uc_link) || __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs(regs, &frame->uc.uc_mcontext) || __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | (PSW_USER_BITS & PSW_MASK_ASC) | (regs->psw.mask & ~PSW_MASK_ASC); regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(ksig->sig); regs->gprs[3] = (unsigned long) &frame->info; regs->gprs[4] = (unsigned long) &frame->uc; regs->gprs[5] = task_thread_info(current)->last_break; return 0; }
/*==========================================================================* * Name: do_boot_cpu * * Description: This routine boot up one AP. * * Born on Date: 2002.02.05 * * Arguments: phys_id - Target CPU physical ID * * Returns: void (cannot fail) * * Modification log: * Date Who Description * ---------- --- -------------------------------------------------------- * 2003-06-24 hy modify for linux-2.5.69 * *==========================================================================*/ static void __init do_boot_cpu(int phys_id) { struct task_struct *idle; unsigned long send_status, boot_status; int timeout, cpu_id; cpu_id = ++cpucount; /* * We can't use kernel_thread since we must avoid to * reschedule the child. */ idle = fork_idle(cpu_id); if (IS_ERR(idle)) panic("failed fork for CPU#%d.", cpu_id); idle->thread.lr = (unsigned long)start_secondary; map_cpu_to_physid(cpu_id, phys_id); /* So we see what's up */ printk("Booting processor %d/%d\n", phys_id, cpu_id); stack_start.spi = (void *)idle->thread.sp; task_thread_info(idle)->cpu = cpu_id; /* * Send Startup IPI * 1.IPI received by CPU#(phys_id). * 2.CPU#(phys_id) enter startup_AP (arch/m32r/kernel/head.S) * 3.CPU#(phys_id) enter start_secondary() */ send_status = 0; boot_status = 0; cpumask_set_cpu(phys_id, &cpu_bootout_map); /* Send Startup IPI */ send_IPI_mask_phys(cpumask_of(phys_id), CPU_BOOT_IPI, 0); Dprintk("Waiting for send to finish...\n"); timeout = 0; /* Wait 100[ms] */ do { Dprintk("+"); udelay(1000); send_status = !cpumask_test_cpu(phys_id, &cpu_bootin_map); } while (send_status && (timeout++ < 100)); Dprintk("After Startup.\n"); if (!send_status) { /* * allow APs to start initializing. */ Dprintk("Before Callout %d.\n", cpu_id); cpumask_set_cpu(cpu_id, &cpu_callout_map); Dprintk("After Callout %d.\n", cpu_id); /* * Wait 5s total for a response */ for (timeout = 0; timeout < 5000; timeout++) { if (cpumask_test_cpu(cpu_id, &cpu_callin_map)) break; /* It has booted */ udelay(1000); } if (cpumask_test_cpu(cpu_id, &cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("OK.\n"); } else { boot_status = 1; printk("Not responding.\n"); } } else printk("IPI never delivered???\n"); if (send_status || boot_status) { unmap_cpu_to_physid(cpu_id, phys_id); cpumask_clear_cpu(cpu_id, &cpu_callout_map); cpumask_clear_cpu(cpu_id, &cpu_callin_map); cpumask_clear_cpu(cpu_id, &cpu_initialized); cpucount--; } }
long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *) data; switch (request) { case PTRACE_PEEKUSR: ret = ptrace_read_user(child, addr, datap); break; case PTRACE_POKEUSR: ret = ptrace_write_user(child, addr, data); break; case PTRACE_GETREGS: ret = copy_regset_to_user(child, &user_arm_view, REGSET_GPR, 0, sizeof(struct pt_regs), datap); break; case PTRACE_SETREGS: ret = copy_regset_from_user(child, &user_arm_view, REGSET_GPR, 0, sizeof(struct pt_regs), datap); break; case PTRACE_GETFPREGS: ret = copy_regset_to_user(child, &user_arm_view, REGSET_FPR, 0, sizeof(union fp_state), datap); break; case PTRACE_SETFPREGS: ret = copy_regset_from_user(child, &user_arm_view, REGSET_FPR, 0, sizeof(union fp_state), datap); break; #ifdef CONFIG_IWMMXT case PTRACE_GETWMMXREGS: ret = ptrace_getwmmxregs(child, datap); break; case PTRACE_SETWMMXREGS: ret = ptrace_setwmmxregs(child, datap); break; #endif case PTRACE_GET_THREAD_AREA: ret = put_user(task_thread_info(child)->tp_value, datap); break; case PTRACE_SET_SYSCALL: task_thread_info(child)->syscall = data; ret = 0; break; #ifdef CONFIG_CRUNCH case PTRACE_GETCRUNCHREGS: ret = ptrace_getcrunchregs(child, datap); break; case PTRACE_SETCRUNCHREGS: ret = ptrace_setcrunchregs(child, datap); break; #endif #ifdef CONFIG_VFP case PTRACE_GETVFPREGS: ret = copy_regset_to_user(child, &user_arm_view, REGSET_VFP, 0, ARM_VFPREGS_SIZE, datap); break; case PTRACE_SETVFPREGS: ret = copy_regset_from_user(child, &user_arm_view, REGSET_VFP, 0, ARM_VFPREGS_SIZE, datap); break; #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT case PTRACE_GETHBPREGS: if (ptrace_get_breakpoints(child) < 0) return -ESRCH; ret = ptrace_gethbpregs(child, addr, (unsigned long __user *)data); ptrace_put_breakpoints(child); break; case PTRACE_SETHBPREGS: if (ptrace_get_breakpoints(child) < 0) return -ESRCH; ret = ptrace_sethbpregs(child, addr, (unsigned long __user *)data); ptrace_put_breakpoints(child); break; #endif default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; void __user *addrp = (void __user *) addr; void __user *datavp = (void __user *) data; unsigned long __user *datalp = (void __user *) data; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: ret = generic_ptrace_peekdata(child, addr, data); break; /* Read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { struct pt_regs *regs; union fpureg *fregs; unsigned long tmp = 0; regs = task_pt_regs(child); ret = 0; /* Default return value. */ switch (addr) { case 0 ... 31: tmp = regs->regs[addr]; break; case FPR_BASE ... FPR_BASE + 31: if (!tsk_used_math(child)) { /* FP not yet used */ tmp = -1; break; } fregs = get_fpu_regs(child); #ifdef CONFIG_32BIT if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even * registers. */ tmp = get_fpr32(&fregs[(addr & ~1) - FPR_BASE], addr & 1); break; } #endif tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; break; case CAUSE: tmp = regs->cp0_cause; break; case BADVADDR: tmp = regs->cp0_badvaddr; break; case MMHI: tmp = regs->hi; break; case MMLO: tmp = regs->lo; break; #ifdef CONFIG_CPU_HAS_SMARTMIPS case ACX: tmp = regs->acx; break; #endif case FPC_CSR: tmp = child->thread.fpu.fcr31; break; case FPC_EIR: /* implementation / version register */ tmp = boot_cpu_data.fpu_id; break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; if (!cpu_has_dsp) { tmp = 0; ret = -EIO; goto out; } dregs = __get_dsp_regs(child); tmp = dregs[addr - DSP_BASE]; break; } case DSP_CONTROL: if (!cpu_has_dsp) { tmp = 0; ret = -EIO; goto out; } tmp = child->thread.dsp.dspcontrol; break; default: tmp = 0; ret = -EIO; goto out; } ret = put_user(tmp, datalp); break; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = generic_ptrace_pokedata(child, addr, data); break; case PTRACE_POKEUSR: { struct pt_regs *regs; ret = 0; regs = task_pt_regs(child); switch (addr) { case 0 ... 31: regs->regs[addr] = data; /* System call number may have been changed */ if (addr == 2) mips_syscall_update_nr(child, regs); else if (addr == 4 && mips_syscall_is_indirect(child, regs)) mips_syscall_update_nr(child, regs); break; case FPR_BASE ... FPR_BASE + 31: { union fpureg *fregs = get_fpu_regs(child); init_fp_ctx(child); #ifdef CONFIG_32BIT if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even * registers. */ set_fpr32(&fregs[(addr & ~1) - FPR_BASE], addr & 1, data); break; } #endif set_fpr64(&fregs[addr - FPR_BASE], 0, data); break; } case PC: regs->cp0_epc = data; break; case MMHI: regs->hi = data; break; case MMLO: regs->lo = data; break; #ifdef CONFIG_CPU_HAS_SMARTMIPS case ACX: regs->acx = data; break; #endif case FPC_CSR: init_fp_ctx(child); ptrace_setfcr31(child, data); break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; if (!cpu_has_dsp) { ret = -EIO; break; } dregs = __get_dsp_regs(child); dregs[addr - DSP_BASE] = data; break; } case DSP_CONTROL: if (!cpu_has_dsp) { ret = -EIO; break; } child->thread.dsp.dspcontrol = data; break; default: /* The rest are not allowed. */ ret = -EIO; break; } break; } case PTRACE_GETREGS: ret = ptrace_getregs(child, datavp); break; case PTRACE_SETREGS: ret = ptrace_setregs(child, datavp); break; case PTRACE_GETFPREGS: ret = ptrace_getfpregs(child, datavp); break; case PTRACE_SETFPREGS: ret = ptrace_setfpregs(child, datavp); break; case PTRACE_GET_THREAD_AREA: ret = put_user(task_thread_info(child)->tp_value, datalp); break; case PTRACE_GET_WATCH_REGS: ret = ptrace_get_watch_regs(child, addrp); break; case PTRACE_SET_WATCH_REGS: ret = ptrace_set_watch_regs(child, addrp); break; default: ret = ptrace_request(child, request, addr, data); break; } out: return ret; }
int dump_elf_task(elf_greg_t *dest, struct task_struct *task) { dump_elf_thread(dest, task_pt_regs(task), task_thread_info(task)); return 1; }
int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, struct task_struct *p) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; preempt_disable(); if (is_fpu_owner()) save_fp(p); if (cpu_has_dsp) save_dsp(p); preempt_enable(); /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); if (unlikely(p->flags & PF_KTHREAD)) { unsigned long status = p->thread.cp0_status; memset(childregs, 0, sizeof(struct pt_regs)); ti->addr_limit = KERNEL_DS; p->thread.reg16 = usp; /* fn */ p->thread.reg17 = arg; p->thread.reg29 = childksp; p->thread.reg31 = (unsigned long) ret_from_kernel_thread; #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) | ((status & (ST0_KUC | ST0_IEC)) << 2); #else status |= ST0_EXL; #endif childregs->cp0_status = status; return 0; } *childregs = *regs; childregs->regs[7] = 0; /* Clear error flag */ childregs->regs[2] = 0; /* Child gets zero as return value */ childregs->regs[29] = usp; ti->addr_limit = USER_DS; p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context * switching for most programs since they don't use the fpu. */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); #ifdef CONFIG_MIPS_MT_SMTC /* * SMTC restores TCStatus after Status, and the CU bits * are aliased there. */ childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1); #endif clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF clear_tsk_thread_flag(p, TIF_FPUBOUND); #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; return 0; }
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) { int pid; current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, ¤t->thread.regs, 0, NULL, NULL); return pid; } EXPORT_SYMBOL(kernel_thread); static inline void set_current(struct task_struct *task) { cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) { external_pid(), task }); } extern void arch_switch_to(struct task_struct *to); void *__switch_to(struct task_struct *from, struct task_struct *to) { to->thread.prev_sched = from; set_current(to); do { current->thread.saved_task = NULL; switch_threads(&from->thread.switch_buf, &to->thread.switch_buf);
/* * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; unsigned int old_val; unsigned long flags; spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); mutex_acquire(&lock->dep_map, subclass, 0, ip); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; old_val = atomic_xchg(&lock->count, -1); if (old_val == 1) goto done; lock_contended(&lock->dep_map, ip); for (;;) { /* * Lets try to take the lock again - this is needed even if * we get here for the first time (shortly after failing to * acquire the lock), to make sure that we get a wakeup once * it's unlocked. Later on, if we sleep, this is the * operation that gives us the lock. We xchg it to -1, so * that when we release the lock, we properly wake up the * other waiters: */ old_val = atomic_xchg(&lock->count, -1); if (old_val == 1) break; /* * got a signal? (This code gets eliminated in the * TASK_UNINTERRUPTIBLE case.) */ if (unlikely((state == TASK_INTERRUPTIBLE && signal_pending(task)) || (state == TASK_KILLABLE && fatal_signal_pending(task)))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); return -EINTR; } __set_task_state(task, state); /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); schedule(); spin_lock_mutex(&lock->wait_lock, flags); } done: lock_acquired(&lock->dep_map); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); return 0; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = p->prev_stime = 0; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_policy; /* copy all the process information */ retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Copy seccomp details explicitly here, in case they were changed * before holding sighand lock. */ copy_seccomp(p); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: if (unlikely(clone_flags & CLONE_NEWPID)) pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
int copy_thread(unsigned long clone_flags, unsigned long new_stackp, unsigned long arg, struct task_struct *p) { struct thread_info *ti; struct fake_frame { struct stack_frame sf; struct pt_regs childregs; } *frame; frame = container_of(task_pt_regs(p), struct fake_frame, childregs); p->thread.ksp = (unsigned long) frame; /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); /* start new process with ar4 pointing to the correct address space */ p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); clear_tsk_thread_flag(p, TIF_SINGLE_STEP); /* Initialize per thread user and system timer values */ ti = task_thread_info(p); ti->user_timer = 0; ti->system_timer = 0; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ frame->sf.gprs[8] = (unsigned long) ret_from_fork; /* fake return stack for resume(), don't go back to schedule */ frame->sf.gprs[9] = (unsigned long) frame; /* Store access registers to kernel stack of new process. */ if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ memset(&frame->childregs, 0, sizeof(struct pt_regs)); frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; frame->childregs.psw.addr = PSW_ADDR_AMODE | (unsigned long) kernel_thread_starter; frame->childregs.gprs[9] = new_stackp; /* function */ frame->childregs.gprs[10] = arg; frame->childregs.gprs[11] = (unsigned long) do_exit; frame->childregs.orig_gpr2 = -1; return 0; } frame->childregs = *current_pt_regs(); frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */ frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; p->thread.ri_signum = 0; frame->childregs.psw.mask &= ~PSW_MASK_RI; #ifndef CONFIG_64BIT /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the child. */ save_fp_ctl(¤t->thread.fp_regs.fpc); save_fp_regs(current->thread.fp_regs.fprs); memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) p->thread.acrs[0] = frame->childregs.gprs[6]; #else /* CONFIG_64BIT */ /* Save the fpu registers to new thread structure. */ save_fp_ctl(&p->thread.fp_regs.fpc); save_fp_regs(p->thread.fp_regs.fprs); p->thread.fp_regs.pad = 0; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { unsigned long tls = frame->childregs.gprs[6]; if (is_compat_task()) { p->thread.acrs[0] = (unsigned int)tls; } else { p->thread.acrs[0] = (unsigned int)(tls >> 32); p->thread.acrs[1] = (unsigned int)tls; } }