/** * zfcp_unit_enqueue - enqueue unit to unit list of a port. * @port: pointer to port where unit is added * @fcp_lun: FCP LUN of unit to be enqueued * Returns: pointer to enqueued unit on success, ERR_PTR on error * * Sets up some unit internal structures and creates sysfs entry. */ struct zfcp_unit *zfcp_unit_enqueue(struct zfcp_port *port, u64 fcp_lun) { struct zfcp_unit *unit; int retval = -ENOMEM; get_device(&port->dev); unit = zfcp_get_unit_by_lun(port, fcp_lun); if (unit) { put_device(&unit->dev); retval = -EEXIST; goto err_out; } unit = kzalloc(sizeof(struct zfcp_unit), GFP_KERNEL); if (!unit) goto err_out; unit->port = port; unit->fcp_lun = fcp_lun; unit->dev.parent = &port->dev; unit->dev.release = zfcp_unit_release; if (dev_set_name(&unit->dev, "0x%016llx", (unsigned long long) fcp_lun)) { kfree(unit); goto err_out; } retval = -EINVAL; INIT_WORK(&unit->scsi_work, zfcp_scsi_scan); spin_lock_init(&unit->latencies.lock); unit->latencies.write.channel.min = 0xFFFFFFFF; unit->latencies.write.fabric.min = 0xFFFFFFFF; unit->latencies.read.channel.min = 0xFFFFFFFF; unit->latencies.read.fabric.min = 0xFFFFFFFF; unit->latencies.cmd.channel.min = 0xFFFFFFFF; unit->latencies.cmd.fabric.min = 0xFFFFFFFF; if (device_register(&unit->dev)) { put_device(&unit->dev); goto err_out; } if (sysfs_create_group(&unit->dev.kobj, &zfcp_sysfs_unit_attrs)) goto err_out_put; write_lock_irq(&port->unit_list_lock); list_add_tail(&unit->list, &port->unit_list); write_unlock_irq(&port->unit_list_lock); atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &unit->status); return unit; err_out_put: device_unregister(&unit->dev); err_out: put_device(&port->dev); return ERR_PTR(retval); }
static int ptrace_attach(struct task_struct *task) { bool wait_trap = false; int retval; audit_ptrace(task); retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) goto out; if (same_thread_group(task, current)) goto out; /* * Protect exec's credential calculations against our interference; * interference; SUID, SGID and LSM creds get determined differently * under ptrace. */ retval = -ERESTARTNOINTR; if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) goto out; task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); task_unlock(task); if (retval) goto unlock_creds; write_lock_irq(&tasklist_lock); retval = -EPERM; if (unlikely(task->exit_state)) goto unlock_tasklist; if (task->ptrace) goto unlock_tasklist; task->ptrace = PT_PTRACED; if (task_ns_capable(task, CAP_SYS_PTRACE)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); spin_lock(&task->sighand->siglock); /* * If the task is already STOPPED, set GROUP_STOP_PENDING and * TRAPPING, and kick it so that it transits to TRACED. TRAPPING * will be cleared if the child completes the transition or any * event which clears the group stop states happens. We'll wait * for the transition to complete before returning from this * function. * * This hides STOPPED -> RUNNING -> TRACED transition from the * attaching thread but a different thread in the same group can * still observe the transient RUNNING state. IOW, if another * thread's WNOHANG wait(2) on the stopped tracee races against * ATTACH, the wait(2) may fail due to the transient RUNNING. * * The following task_is_stopped() test is safe as both transitions * in and out of STOPPED are protected by siglock. */ if (task_is_stopped(task)) { task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; signal_wake_up(task, 1); wait_trap = true; } spin_unlock(&task->sighand->siglock); retval = 0; unlock_tasklist: write_unlock_irq(&tasklist_lock); unlock_creds: mutex_unlock(&task->signal->cred_guard_mutex); out: if (wait_trap) wait_event(current->signal->wait_chldexit, !(task->group_stop & GROUP_STOP_TRAPPING)); return retval; }
/*------------------------------------------------------------------------- * bproc_masq_wait */ int masq_wait(pid_t pid, int options, struct siginfo *infop, unsigned int * stat_addr, struct rusage * ru) { int result, lpid, status; struct bproc_krequest_t *req; struct bproc_rsyscall_msg_t *msg; struct bproc_wait_resp_t *resp_msg; struct task_struct *child; /* XXX to be 100% semantically correct, we need to verify_area * here on stat_addr and ru here... */ req = bpr_rsyscall1(BPROC_SYS_WAIT); if (!req){ printk("bproc: masq: sys_wait: out of memory.\n"); return -ENOMEM; } msg = (struct bproc_rsyscall_msg_t *) bproc_msg(req); msg->arg[0] = pid; msg->arg[1] = options; if (bpr_rsyscall2(BPROC_MASQ_MASTER(current), req, 1) != 0) { bproc_put_req(req); return -EIO; } resp_msg = bproc_msg(req->response); result = resp_msg->hdr.result; status = resp_msg->status; if (stat_addr) put_user(status, stat_addr); if (ru) { /* Only a few elements of the rusage structure are provided by * Linux. Also, convert the times back to the HZ * representation. */ struct rusage ru_tmp; memset(&ru_tmp, 0, sizeof(ru_tmp)); ru_tmp.ru_utime.tv_sec = resp_msg->utime/HZ; ru_tmp.ru_utime.tv_usec = (resp_msg->utime*(1000000/HZ))%1000000; ru_tmp.ru_stime.tv_sec = resp_msg->stime/HZ; ru_tmp.ru_stime.tv_usec = (resp_msg->stime*(1000000/HZ))%1000000; ru_tmp.ru_minflt = resp_msg->minflt; ru_tmp.ru_majflt = resp_msg->majflt; ru_tmp.ru_nswap = resp_msg->nswap; copy_to_user(ru, &ru_tmp, sizeof(ru_tmp)); } bproc_put_req(req); if (result > 0 && (status & 0xff) != 0x7f) { /* It's possible that the process we waited on was actually * local. We need to make sure and get it out of this process * tree too. If it's not, we need to down the non local child * count by one... */ write_lock_irq(&tasklist_lock); child = masq_find_task_by_pid(BPROC_MASQ_MASTER(current), result); if (child) lpid = child->pid; else { lpid = 0; current->bproc.nlchild--; } write_unlock_irq(&tasklist_lock); if (lpid) { /* Do all of this as a kernel call to avoid re-entering * this whole mess... */ set_bit(BPROC_FLAG_KCALL, ¤t->bproc.flag); if (k_wait4(lpid, options & ~WNOHANG, NULL, NULL, NULL) == -1) { printk(KERN_ERR "bproc: masq: local wait failed on %d (%d)\n", lpid, result); #if 0 /* This probably isn't correct. If we fail to wait, * that probably means that somebody else picked it * up. */ write_lock_irq(&tasklist_lock); current->bproc.nlchild--; write_unlock_irq(&tasklist_lock); #endif } } } return result; }
/* * Send signals to all our closest relatives so that they know * to properly mourn us.. */ static void exit_notify(void) { struct task_struct * p, *t; forget_original_parent(current); /* * Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) * * Case i: Our father is in a different pgrp than we are * and we were the only connection outside, so our pgrp * is about to become orphaned. */ t = current->p_pptr; if ((t->pgrp != current->pgrp) && (t->session == current->session) && will_become_orphaned_pgrp(current->pgrp, current) && has_stopped_jobs(current->pgrp)) { kill_pg(current->pgrp,SIGHUP,1); kill_pg(current->pgrp,SIGCONT,1); } /* Let father know we died * * Thread signals are configurable, but you aren't going to use * that to send signals to arbitary processes. * That stops right now. * * If the parent exec id doesn't match the exec id we saved * when we started then we know the parent has changed security * domain. * * If our self_exec id doesn't match our parent_exec_id then * we have changed execution domain as these two values started * the same after a fork. * */ if(current->exit_signal != SIGCHLD && ( current->parent_exec_id != t->self_exec_id || current->self_exec_id != current->parent_exec_id) && !capable(CAP_KILL)) current->exit_signal = SIGCHLD; notify_parent(current, current->exit_signal); /* * This loop does two things: * * A. Make init inherit all the child processes * B. Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) */ write_lock_irq(&tasklist_lock); while (current->p_cptr != NULL) { p = current->p_cptr; current->p_cptr = p->p_osptr; p->p_ysptr = NULL; p->flags &= ~(PF_PTRACED|PF_TRACESYS); p->p_pptr = p->p_opptr; p->p_osptr = p->p_pptr->p_cptr; if (p->p_osptr) p->p_osptr->p_ysptr = p; p->p_pptr->p_cptr = p; if (p->state == TASK_ZOMBIE) notify_parent(p, p->exit_signal); /* * process group orphan check * Case ii: Our child is in a different pgrp * than we are, and it was the only connection * outside, so the child pgrp is now orphaned. */ if ((p->pgrp != current->pgrp) && (p->session == current->session)) { int pgrp = p->pgrp; write_unlock_irq(&tasklist_lock); if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) { kill_pg(pgrp,SIGHUP,1); kill_pg(pgrp,SIGCONT,1); } write_lock_irq(&tasklist_lock); } } write_unlock_irq(&tasklist_lock); if (current->leader) disassociate_ctty(1); }
unsigned long hp_sdc_put(void) { hp_sdc_transaction *curr; uint8_t act; int idx, curridx; int limit = 0; write_lock(&hp_sdc.lock); /* If i8042 buffers are full, we cannot do anything that requires output, so we skip to the administrativa. */ if (hp_sdc.ibf) { hp_sdc_status_in8(); if (hp_sdc.ibf) goto finish; } anew: /* See if we are in the middle of a sequence. */ if (hp_sdc.wcurr < 0) hp_sdc.wcurr = 0; read_lock_irq(&hp_sdc.rtq_lock); if (hp_sdc.rcurr == hp_sdc.wcurr) hp_sdc.wcurr++; read_unlock_irq(&hp_sdc.rtq_lock); if (hp_sdc.wcurr >= HP_SDC_QUEUE_LEN) hp_sdc.wcurr = 0; curridx = hp_sdc.wcurr; if (hp_sdc.tq[curridx] != NULL) goto start; while (++curridx != hp_sdc.wcurr) { if (curridx >= HP_SDC_QUEUE_LEN) { curridx = -1; /* Wrap to top */ continue; } read_lock_irq(&hp_sdc.rtq_lock); if (hp_sdc.rcurr == curridx) { read_unlock_irq(&hp_sdc.rtq_lock); continue; } read_unlock_irq(&hp_sdc.rtq_lock); if (hp_sdc.tq[curridx] != NULL) break; /* Found one. */ } if (curridx == hp_sdc.wcurr) { /* There's nothing queued to do. */ curridx = -1; } hp_sdc.wcurr = curridx; start: /* Check to see if the interrupt mask needs to be set. */ if (hp_sdc.set_im) { hp_sdc_status_out8(hp_sdc.im | HP_SDC_CMD_SET_IM); hp_sdc.set_im = 0; goto finish; } if (hp_sdc.wcurr == -1) goto done; curr = hp_sdc.tq[curridx]; idx = curr->actidx; if (curr->actidx >= curr->endidx) { hp_sdc.tq[curridx] = NULL; /* Interleave outbound data between the transactions. */ hp_sdc.wcurr++; if (hp_sdc.wcurr >= HP_SDC_QUEUE_LEN) hp_sdc.wcurr = 0; goto finish; } act = curr->seq[idx]; idx++; if (curr->idx >= curr->endidx) { if (act & HP_SDC_ACT_DEALLOC) kfree(curr); hp_sdc.tq[curridx] = NULL; /* Interleave outbound data between the transactions. */ hp_sdc.wcurr++; if (hp_sdc.wcurr >= HP_SDC_QUEUE_LEN) hp_sdc.wcurr = 0; goto finish; } while (act & HP_SDC_ACT_PRECMD) { if (curr->idx != idx) { idx++; act &= ~HP_SDC_ACT_PRECMD; break; } hp_sdc_status_out8(curr->seq[idx]); curr->idx++; /* act finished? */ if ((act & HP_SDC_ACT_DURING) == HP_SDC_ACT_PRECMD) goto actdone; /* skip quantity field if data-out sequence follows. */ if (act & HP_SDC_ACT_DATAOUT) curr->idx++; goto finish; } if (act & HP_SDC_ACT_DATAOUT) { int qty; qty = curr->seq[idx]; idx++; if (curr->idx - idx < qty) { hp_sdc_data_out8(curr->seq[curr->idx]); curr->idx++; /* act finished? */ if (curr->idx - idx >= qty && (act & HP_SDC_ACT_DURING) == HP_SDC_ACT_DATAOUT) goto actdone; goto finish; } idx += qty; act &= ~HP_SDC_ACT_DATAOUT; } else while (act & HP_SDC_ACT_DATAREG) { int mask; uint8_t w7[4]; mask = curr->seq[idx]; if (idx != curr->idx) { idx++; idx += !!(mask & 1); idx += !!(mask & 2); idx += !!(mask & 4); idx += !!(mask & 8); act &= ~HP_SDC_ACT_DATAREG; break; } w7[0] = (mask & 1) ? curr->seq[++idx] : hp_sdc.r7[0]; w7[1] = (mask & 2) ? curr->seq[++idx] : hp_sdc.r7[1]; w7[2] = (mask & 4) ? curr->seq[++idx] : hp_sdc.r7[2]; w7[3] = (mask & 8) ? curr->seq[++idx] : hp_sdc.r7[3]; if (hp_sdc.wi > 0x73 || hp_sdc.wi < 0x70 || w7[hp_sdc.wi - 0x70] == hp_sdc.r7[hp_sdc.wi - 0x70]) { int i = 0; /* Need to point the write index register */ while (i < 4 && w7[i] == hp_sdc.r7[i]) i++; if (i < 4) { hp_sdc_status_out8(HP_SDC_CMD_SET_D0 + i); hp_sdc.wi = 0x70 + i; goto finish; } idx++; if ((act & HP_SDC_ACT_DURING) == HP_SDC_ACT_DATAREG) goto actdone; curr->idx = idx; act &= ~HP_SDC_ACT_DATAREG; break; } hp_sdc_data_out8(w7[hp_sdc.wi - 0x70]); hp_sdc.r7[hp_sdc.wi - 0x70] = w7[hp_sdc.wi - 0x70]; hp_sdc.wi++; /* write index register autoincrements */ { int i = 0; while ((i < 4) && w7[i] == hp_sdc.r7[i]) i++; if (i >= 4) { curr->idx = idx + 1; if ((act & HP_SDC_ACT_DURING) == HP_SDC_ACT_DATAREG) goto actdone; } } goto finish; } /* We don't go any further in the command if there is a pending read, because we don't want interleaved results. */ read_lock_irq(&hp_sdc.rtq_lock); if (hp_sdc.rcurr >= 0) { read_unlock_irq(&hp_sdc.rtq_lock); goto finish; } read_unlock_irq(&hp_sdc.rtq_lock); if (act & HP_SDC_ACT_POSTCMD) { uint8_t postcmd; /* curr->idx should == idx at this point. */ postcmd = curr->seq[idx]; curr->idx++; if (act & HP_SDC_ACT_DATAIN) { /* Start a new read */ hp_sdc.rqty = curr->seq[curr->idx]; do_gettimeofday(&hp_sdc.rtv); curr->idx++; /* Still need to lock here in case of spurious irq. */ write_lock_irq(&hp_sdc.rtq_lock); hp_sdc.rcurr = curridx; write_unlock_irq(&hp_sdc.rtq_lock); hp_sdc_status_out8(postcmd); goto finish; } hp_sdc_status_out8(postcmd); goto actdone; } actdone: if (act & HP_SDC_ACT_SEMAPHORE) up(curr->act.semaphore); else if (act & HP_SDC_ACT_CALLBACK) curr->act.irqhook(0,NULL,0,0); if (curr->idx >= curr->endidx) { /* This transaction is over. */ if (act & HP_SDC_ACT_DEALLOC) kfree(curr); hp_sdc.tq[curridx] = NULL; } else { curr->actidx = idx + 1; curr->idx = idx + 2; } /* Interleave outbound data between the transactions. */ hp_sdc.wcurr++; if (hp_sdc.wcurr >= HP_SDC_QUEUE_LEN) hp_sdc.wcurr = 0; finish: /* If by some quirk IBF has cleared and our ISR has run to see that that has happened, do it all again. */ if (!hp_sdc.ibf && limit++ < 20) goto anew; done: if (hp_sdc.wcurr >= 0) tasklet_schedule(&hp_sdc.task); write_unlock(&hp_sdc.lock); return 0; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); if (clone_flags & CLONE_IDLETASK) p->pid = 0; else { p->pid = alloc_pidmap(); if (p->pid == -1) goto bad_fork_cleanup; } retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->utime = p->stime = 0; p->cutime = p->cstime = 0; p->lock_depth = -1; /* -1 = no lock */ p->start_time = get_jiffies_64(); p->security = NULL; p->io_context = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif retval = -ENOMEM; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_mm; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; /* Perform scheduler related setup */ sched_fork(p); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->tgid = p->pid; p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & CLONE_PARENT) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->group_exit) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_namespace; } p->tgid = current->tgid; p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (p->ptrace & PT_PTRACED) __ptrace_link(p, current->parent); attach_pid(p, PIDTYPE_PID, p->pid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_TGID, p->tgid); attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } else link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); nr_threads++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_mm: exit_mm(p); if (p->active_mm) mmdrop(p->active_mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->pid > 0) free_pidmap(p->pid); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
static void vcc_remove_socket(struct sock *sk) { write_lock_irq(&vcc_sklist_lock); sk_del_node_init(sk); write_unlock_irq(&vcc_sklist_lock); }
static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = p->prev_stime = 0; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; p->blocked_by = NULL; p->blocked_since = 0; #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_policy; retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; write_lock_irq(&tasklist_lock); if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: if (unlikely(clone_flags & CLONE_NEWPID)) pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); bad_fork_cleanup_files: exit_files(p); bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) { int flag, retval; DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag = 0; current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { struct task_struct *p; for (p = tsk->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; } else if (pid != -1) { if (p->pgrp != -pid) continue; } /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: * A "clone" child here is one that reports to its parent * using a signal other than SIGCHLD.) */ if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) && !(options & __WALL)) continue; flag = 1; switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED)) continue; read_unlock(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user((p->exit_code << 8) | 0x7f, stat_addr); if (!retval) { p->exit_code = 0; retval = p->pid; } goto end_wait4; case TASK_ZOMBIE: current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime; current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime; read_unlock(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user(p->exit_code, stat_addr); if (retval) goto end_wait4; retval = p->pid; if (p->p_opptr != p->p_pptr) { write_lock_irq(&tasklist_lock); REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); do_notify_parent(p, SIGCHLD); write_unlock_irq(&tasklist_lock); } else release_task(p); goto end_wait4; default: continue; } } if (options & __WNOTHREAD) break; tsk = next_thread(tsk); } while (tsk != current); read_unlock(&tasklist_lock); if (flag) { retval = 0; if (options & WNOHANG) goto end_wait4; retval = -ERESTARTSYS; if (signal_pending(current)) goto end_wait4; schedule(); goto repeat; } retval = -ECHILD; end_wait4: current->state = TASK_RUNNING; remove_wait_queue(¤t->wait_chldexit,&wait); return retval; }
static void ib_cache_update(struct ib_device *device, u8 port) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; int i, j; int ret; union ib_gid gid, empty_gid; u16 pkey; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; ret = ib_query_port(device, port, tprops); if (ret) { printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", ret, device->name); goto err; } pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * sizeof *pkey_cache->entry, GFP_KERNEL); if (!pkey_cache) goto err; pkey_cache->table_len = 0; gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len * sizeof *gid_cache->entry, GFP_KERNEL); if (!gid_cache) goto err; gid_cache->table_len = 0; for (i = 0, j = 0; i < tprops->pkey_tbl_len; ++i) { ret = ib_query_pkey(device, port, i, &pkey); if (ret) { printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } /* pkey 0xffff must be the default pkeyand 0x0000 must be the invalid * pkey per IBTA spec */ if (pkey) { pkey_cache->entry[j].index = i; pkey_cache->entry[j++].pkey = pkey; } } pkey_cache->table_len = j; memset(&empty_gid, 0, sizeof empty_gid); for (i = 0, j = 0; i < tprops->gid_tbl_len; ++i) { ret = ib_query_gid(device, port, i, &gid); if (ret) { printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } /* if the lower 8 bytes the device GID entry is all 0, * our entry is a blank, invalid entry... * depending on device, the upper 8 bytes might or might * not be prefilled with a valid subnet prefix, so * don't rely on them for determining a valid gid * entry */ if (memcmp(&gid + 8, &empty_gid + 8, sizeof gid - 8)) { gid_cache->entry[j].index = i; gid_cache->entry[j++].gid = gid; } } gid_cache->table_len = j; old_pkey_cache = pkey_cache; pkey_cache = kmalloc(sizeof *pkey_cache + old_pkey_cache->table_len * sizeof *pkey_cache->entry, GFP_KERNEL); if (!pkey_cache) pkey_cache = old_pkey_cache; else { pkey_cache->table_len = old_pkey_cache->table_len; memcpy(&pkey_cache->entry[0], &old_pkey_cache->entry[0], pkey_cache->table_len * sizeof *pkey_cache->entry); kfree(old_pkey_cache); } old_gid_cache = gid_cache; gid_cache = kmalloc(sizeof *gid_cache + old_gid_cache->table_len * sizeof *gid_cache->entry, GFP_KERNEL); if (!gid_cache) gid_cache = old_gid_cache; else { gid_cache->table_len = old_gid_cache->table_len; memcpy(&gid_cache->entry[0], &old_gid_cache->entry[0], gid_cache->table_len * sizeof *gid_cache->entry); kfree(old_gid_cache); } write_lock_irq(&device->cache.lock); old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; old_gid_cache = device->cache.gid_cache [port - start_port(device)]; device->cache.pkey_cache[port - start_port(device)] = pkey_cache; device->cache.gid_cache [port - start_port(device)] = gid_cache; device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; write_unlock_irq(&device->cache.lock); kfree(old_pkey_cache); kfree(old_gid_cache); kfree(tprops); return; err: kfree(pkey_cache); kfree(gid_cache); kfree(tprops); }
/* * Adjust the time obtained from the CMOS to be UTC time instead of * local time. * * This is ugly, but preferable to the alternatives. Otherwise we * would either need to write a program to do it in /etc/rc (and risk * confusion if the program gets run more than once; it would also be * hard to make the program warp the clock precisely n hours) or * compile in the timezone information into the kernel. Bad, bad.... * * - TYT, 1992-01-01 * * The best thing to do is to keep the CMOS clock in universal time (UTC) * as real UNIX machines always do it. This avoids all headaches about * daylight saving times and warping kernel clocks. */ inline static void warp_clock(void) { write_lock_irq(&xtime_lock); xtime.tv_sec += sys_tz.tz_minuteswest * 60; write_unlock_irq(&xtime_lock); }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static task_t *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, int pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* Perform scheduler related setup */ sched_fork(p); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so cpus_allowed mask cannot * have changed. The cpus_allowed mask of the parent may have * changed after it was copied first time, and it may then move to * another CPU - so we re-copy it here and set the child's CPU to * the parent's CPU. This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; set_task_cpu(p, smp_processor_id()); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->flags & SIGNAL_GROUP_EXIT) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_namespace; } p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); cpuset_fork(p); attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
long sys_ptrace(long request, pid_t pid, long addr, long data) { struct task_struct *child; long ret; lock_kernel(); ret = -EPERM; if (request == PTRACE_TRACEME) { /* are we already being traced? */ if (current->ptrace & PT_PTRACED) goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; ret = 0; goto out; } ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) goto out; ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; if (request == PTRACE_ATTACH) { if (child == current) goto out_tsk; if ((!child->dumpable || (current->uid != child->euid) || (current->uid != child->suid) || (current->uid != child->uid) || (current->gid != child->egid) || (current->gid != child->sgid) || (!cap_issubset(child->cap_permitted, current->cap_permitted)) || (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE)) goto out_tsk; /* the same process cannot be attached many times */ if (child->ptrace & PT_PTRACED) goto out_tsk; child->ptrace |= PT_PTRACED; if (child->p_pptr != current) { unsigned long flags; write_lock_irqsave(&tasklist_lock, flags); REMOVE_LINKS(child); child->p_pptr = current; SET_LINKS(child); write_unlock_irqrestore(&tasklist_lock, flags); } send_sig(SIGSTOP, child, 1); ret = 0; goto out_tsk; } ret = -ESRCH; if (!(child->ptrace & PT_PTRACED)) goto out_tsk; if (child->state != TASK_STOPPED) { if (request != PTRACE_KILL) goto out_tsk; } if (child->p_pptr != current) goto out_tsk; switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { unsigned long tmp; int copied; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) goto out_tsk; ret = put_user(tmp,(unsigned long *) data); goto out_tsk; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) goto out_tsk; ret = -EIO; goto out_tsk; /* Read the word at location addr in the USER area. This will need to change when the kernel no longer saves all regs on a syscall. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if ((addr & 3) || (unsigned long) addr >= sizeof(struct pt_regs)) goto out_tsk; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); ret = put_user(tmp, (unsigned long *) data); goto out_tsk; } /* Write the word at location addr in the USER area. This will need to change when the kernel no longer saves all regs on a syscall. FIXME. There is a problem at the moment in that r3-r18 are only saved if the process is ptraced on syscall entry, and even then those values are overwritten by actual register values on syscall exit. */ case PTRACE_POKEUSR: ret = -EIO; if ((addr & 3) || (unsigned long) addr >= sizeof(struct pt_regs)) goto out_tsk; /* XXX This test probably needs adjusting. We probably want to * allow writes to some bits of PSW, and may want to block writes * to (some) space registers. Some register values written here * may be ignored in entry.S:syscall_restore_rfi; e.g. iaoq is * written with r31/r31+4, and not with the values in pt_regs. */ /* Allow writing of gr1-gr31, fr*, sr*, iasq*, iaoq*, sar */ if (addr == PT_PSW || (addr > PT_IAOQ1 && addr != PT_SAR)) goto out_tsk; *(unsigned long *) ((char *) task_regs(child) + addr) = data; ret = 0; goto out_tsk; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); if (request == PTRACE_SYSCALL) child->ptrace |= PT_TRACESYS; else child->ptrace &= ~PT_TRACESYS; child->exit_code = data; goto out_wake_notrap; case PTRACE_KILL: /* * make the child exit. Best I can do is send it a * sigkill. perhaps it should be put in the status * that it wants to exit. */ if (child->state == TASK_ZOMBIE) /* already dead */ goto out_tsk; child->exit_code = SIGKILL; goto out_wake_notrap; case PTRACE_SINGLEBLOCK: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_TRACESYS|PT_SINGLESTEP); child->ptrace |= PT_BLOCKSTEP; child->exit_code = data; /* Enable taken branch trap. */ pa_psw(child)->r = 0; pa_psw(child)->t = 1; pa_psw(child)->h = 0; pa_psw(child)->l = 0; goto out_wake; case PTRACE_SINGLESTEP: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_TRACESYS|PT_BLOCKSTEP); child->ptrace |= PT_SINGLESTEP; child->exit_code = data; if (pa_psw(child)->n) { struct siginfo si; /* Nullified, just crank over the queue. */ task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1]; task_regs(child)->iasq[0] = task_regs(child)->iasq[1]; task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4; pa_psw(child)->n = 0; pa_psw(child)->x = 0; pa_psw(child)->y = 0; pa_psw(child)->z = 0; pa_psw(child)->b = 0; pa_psw(child)->r = 0; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; /* Don't wake up the child, but let the parent know something happened. */ si.si_code = TRAP_TRACE; si.si_addr = (void *) (task_regs(child)->iaoq[0] & ~3); si.si_signo = SIGTRAP; si.si_errno = 0; force_sig_info(SIGTRAP, &si, child); //notify_parent(child, SIGCHLD); //ret = 0; goto out_wake; } /* Enable recovery counter traps. The recovery counter * itself will be set to zero on a task switch. If the * task is suspended on a syscall then the syscall return * path will overwrite the recovery counter with a suitable * value such that it traps once back in user space. We * disable interrupts in the childs PSW here also, to avoid * interrupts while the recovery counter is decrementing. */ pa_psw(child)->r = 1; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; /* give it a chance to run. */ goto out_wake; case PTRACE_DETACH: ret = -EIO; if ((unsigned long) data > _NSIG) goto out_tsk; child->ptrace &= ~(PT_PTRACED|PT_TRACESYS|PT_SINGLESTEP|PT_BLOCKSTEP); child->exit_code = data; write_lock_irq(&tasklist_lock); REMOVE_LINKS(child); child->p_pptr = child->p_opptr; SET_LINKS(child); write_unlock_irq(&tasklist_lock); goto out_wake_notrap; default: ret = -EIO; goto out_tsk; } out_wake_notrap: /* make sure the trap bits are not set */ pa_psw(child)->r = 0; pa_psw(child)->t = 0; pa_psw(child)->h = 0; pa_psw(child)->l = 0; out_wake: wake_up_process(child); ret = 0; out_tsk: free_task_struct(child); out: unlock_kernel(); return ret; }
/** * zfcp_adapter_enqueue - enqueue a new adapter to the list * @ccw_device: pointer to the struct cc_device * * Returns: 0 if a new adapter was successfully enqueued * -ENOMEM if alloc failed * Enqueues an adapter at the end of the adapter list in the driver data. * All adapter internal structures are set up. * Proc-fs entries are also created. * locks: config_sema must be held to serialise changes to the adapter list */ int zfcp_adapter_enqueue(struct ccw_device *ccw_device) { struct zfcp_adapter *adapter; /* * Note: It is safe to release the list_lock, as any list changes * are protected by the config_sema, which must be held to get here */ adapter = kzalloc(sizeof(struct zfcp_adapter), GFP_KERNEL); if (!adapter) return -ENOMEM; ccw_device->handler = NULL; adapter->ccw_device = ccw_device; atomic_set(&adapter->refcount, 0); if (zfcp_qdio_allocate(adapter)) goto qdio_allocate_failed; if (zfcp_allocate_low_mem_buffers(adapter)) goto failed_low_mem_buffers; if (zfcp_reqlist_alloc(adapter)) goto failed_low_mem_buffers; if (zfcp_adapter_debug_register(adapter)) goto debug_register_failed; init_waitqueue_head(&adapter->remove_wq); init_waitqueue_head(&adapter->erp_thread_wqh); init_waitqueue_head(&adapter->erp_done_wqh); INIT_LIST_HEAD(&adapter->port_list_head); INIT_LIST_HEAD(&adapter->erp_ready_head); INIT_LIST_HEAD(&adapter->erp_running_head); spin_lock_init(&adapter->req_list_lock); spin_lock_init(&adapter->hba_dbf_lock); spin_lock_init(&adapter->san_dbf_lock); spin_lock_init(&adapter->scsi_dbf_lock); spin_lock_init(&adapter->rec_dbf_lock); spin_lock_init(&adapter->req_q_lock); rwlock_init(&adapter->erp_lock); rwlock_init(&adapter->abort_lock); sema_init(&adapter->erp_ready_sem, 0); INIT_WORK(&adapter->stat_work, _zfcp_status_read_scheduler); INIT_WORK(&adapter->scan_work, _zfcp_scan_ports_later); /* mark adapter unusable as long as sysfs registration is not complete */ atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status); dev_set_drvdata(&ccw_device->dev, adapter); if (sysfs_create_group(&ccw_device->dev.kobj, &zfcp_sysfs_adapter_attrs)) goto sysfs_failed; write_lock_irq(&zfcp_data.config_lock); atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &adapter->status); list_add_tail(&adapter->list, &zfcp_data.adapter_list_head); write_unlock_irq(&zfcp_data.config_lock); zfcp_fc_nameserver_init(adapter); return 0; sysfs_failed: zfcp_adapter_debug_unregister(adapter); debug_register_failed: dev_set_drvdata(&ccw_device->dev, NULL); kfree(adapter->req_list); failed_low_mem_buffers: zfcp_free_low_mem_buffers(adapter); qdio_allocate_failed: zfcp_qdio_free(adapter); kfree(adapter); return -ENOMEM; }
/** * zfcp_port_enqueue - enqueue port to port list of adapter * @adapter: adapter where remote port is added * @wwpn: WWPN of the remote port to be enqueued * @status: initial status for the port * @d_id: destination id of the remote port to be enqueued * Returns: pointer to enqueued port on success, ERR_PTR on error * * All port internal structures are set up and the sysfs entry is generated. * d_id is used to enqueue ports with a well known address like the Directory * Service for nameserver lookup. */ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, u32 status, u32 d_id) { struct zfcp_port *port; int retval = -ENOMEM; kref_get(&adapter->ref); port = zfcp_get_port_by_wwpn(adapter, wwpn); if (port) { put_device(&port->dev); retval = -EEXIST; goto err_out; } port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL); if (!port) goto err_out; rwlock_init(&port->unit_list_lock); INIT_LIST_HEAD(&port->unit_list); INIT_WORK(&port->gid_pn_work, zfcp_fc_port_did_lookup); INIT_WORK(&port->test_link_work, zfcp_fc_link_test_work); INIT_WORK(&port->rport_work, zfcp_scsi_rport_work); port->adapter = adapter; port->d_id = d_id; port->wwpn = wwpn; port->rport_task = RPORT_NONE; port->dev.parent = &adapter->ccw_device->dev; port->dev.release = zfcp_port_release; if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) { kfree(port); goto err_out; } retval = -EINVAL; if (device_register(&port->dev)) { put_device(&port->dev); goto err_out; } if (sysfs_create_group(&port->dev.kobj, &zfcp_sysfs_port_attrs)) goto err_out_put; write_lock_irq(&adapter->port_list_lock); list_add_tail(&port->list, &adapter->port_list); write_unlock_irq(&adapter->port_list_lock); atomic_set_mask(status | ZFCP_STATUS_COMMON_RUNNING, &port->status); return port; err_out_put: device_unregister(&port->dev); err_out: zfcp_ccw_adapter_put(adapter); return ERR_PTR(retval); }
static void ib_cache_update(struct ib_device *device, u8 port) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; int i; int ret; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; ret = ib_query_port(device, port, tprops); if (ret) { printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", ret, device->name); goto err; } pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * sizeof *pkey_cache->table, GFP_KERNEL); if (!pkey_cache) goto err; pkey_cache->table_len = tprops->pkey_tbl_len; gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len * sizeof *gid_cache->table, GFP_KERNEL); if (!gid_cache) goto err; gid_cache->table_len = tprops->gid_tbl_len; for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); if (ret) { printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } } for (i = 0; i < gid_cache->table_len; ++i) { ret = ib_query_gid(device, port, i, gid_cache->table + i); if (ret) { printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } } write_lock_irq(&device->cache.lock); old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; old_gid_cache = device->cache.gid_cache [port - start_port(device)]; device->cache.pkey_cache[port - start_port(device)] = pkey_cache; device->cache.gid_cache [port - start_port(device)] = gid_cache; device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; write_unlock_irq(&device->cache.lock); kfree(old_pkey_cache); kfree(old_gid_cache); kfree(tprops); return; err: kfree(pkey_cache); kfree(gid_cache); kfree(tprops); }
/** * write_one_page - write out a single page and optionally wait on I/O * @page: the page to write * @wait: if true, wait on writeout * * The page must be locked by the caller and will be unlocked upon return. * * write_one_page() returns a negative error code if I/O failed. */ int write_one_page(struct page *page, int wait) { struct address_space *mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, }; BUG_ON(!PageLocked(page)); if (wait) wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { page_cache_get(page); ret = mapping->a_ops->writepage(page, &wbc); if (ret == 0 && wait) { wait_on_page_writeback(page); if (PageError(page)) ret = -EIO; } page_cache_release(page); } else { unlock_page(page); } return ret; } EXPORT_SYMBOL(write_one_page); /* * For address_spaces which do not use buffers nor write back. */ int __set_page_dirty_no_writeback(struct page *page) { if (!PageDirty(page)) SetPageDirty(page); return 0; } /* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * * This is also used when a single buffer is being dirtied: we want to set the * page dirty in that case, but not all the buffers. This is a "bottom-up" * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * * Most callers have locked the page, which pins the address_space in memory. * But zap_pte_range() does not lock the page, however in that case the * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the * mapping by re-checking page_mapping() insode tree_lock. */ int __set_page_dirty_nobuffers(struct page *page) { if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; if (!mapping) return 1; write_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } write_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } return 1; } return 0; } EXPORT_SYMBOL(__set_page_dirty_nobuffers); /* * When a writepage implementation decides that it doesn't want to write this * page for some reason, it should redirty the locked page via * redirty_page_for_writepage() and it should then unlock the page and return 0 */ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { wbc->pages_skipped++; return __set_page_dirty_nobuffers(page); }
asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct *p; int err = -EINVAL; if (!pid) pid = current->pid; if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ write_lock_irq(&tasklist_lock); err = -ESRCH; p = find_task_by_pid(pid); if (!p) goto out; err = -EINVAL; if (!thread_group_leader(p)) goto out; if (p->parent == current || p->real_parent == current) { err = -EPERM; if (p->signal->session != current->signal->session) goto out; err = -EACCES; if (p->did_exec) goto out; } else { err = -ESRCH; if (p != current) goto out; } err = -EPERM; if (p->signal->leader) goto out; if (pgid != pid) { struct task_struct *p; do_each_task_pid(pgid, PIDTYPE_PGID, p) { if (p->signal->session == current->signal->session) goto ok_pgid; } while_each_task_pid(pgid, PIDTYPE_PGID, p); goto out; } ok_pgid: err = security_task_setpgid(p, pgid); if (err) goto out; if (process_group(p) != pgid) { detach_pid(p, PIDTYPE_PGID); p->signal->pgrp = pgid; attach_pid(p, PIDTYPE_PGID, pgid); } err = 0; out: /* All paths lead to here, thus we are safe. -DaveM */ write_unlock_irq(&tasklist_lock); return err; }
void vcc_insert_socket(struct sock *sk) { write_lock_irq(&vcc_sklist_lock); __vcc_insert_socket(sk); write_unlock_irq(&vcc_sklist_lock); }
/** * zfcp_unit_enqueue - enqueue unit to unit list of a port. * @port: pointer to port where unit is added * @fcp_lun: FCP LUN of unit to be enqueued * Returns: pointer to enqueued unit on success, ERR_PTR on error * Locks: config_sema must be held to serialize changes to the unit list * * Sets up some unit internal structures and creates sysfs entry. */ struct zfcp_unit *zfcp_unit_enqueue(struct zfcp_port *port, u64 fcp_lun) { struct zfcp_unit *unit; unit = kzalloc(sizeof(struct zfcp_unit), GFP_KERNEL); if (!unit) return ERR_PTR(-ENOMEM); atomic_set(&unit->refcount, 0); init_waitqueue_head(&unit->remove_wq); unit->port = port; unit->fcp_lun = fcp_lun; dev_set_name(&unit->sysfs_device, "0x%016llx", (unsigned long long) fcp_lun); unit->sysfs_device.parent = &port->sysfs_device; unit->sysfs_device.release = zfcp_sysfs_unit_release; dev_set_drvdata(&unit->sysfs_device, unit); /* mark unit unusable as long as sysfs registration is not complete */ atomic_set_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status); spin_lock_init(&unit->latencies.lock); unit->latencies.write.channel.min = 0xFFFFFFFF; unit->latencies.write.fabric.min = 0xFFFFFFFF; unit->latencies.read.channel.min = 0xFFFFFFFF; unit->latencies.read.fabric.min = 0xFFFFFFFF; unit->latencies.cmd.channel.min = 0xFFFFFFFF; unit->latencies.cmd.fabric.min = 0xFFFFFFFF; read_lock_irq(&zfcp_data.config_lock); if (zfcp_get_unit_by_lun(port, fcp_lun)) { read_unlock_irq(&zfcp_data.config_lock); goto err_out_free; } read_unlock_irq(&zfcp_data.config_lock); if (device_register(&unit->sysfs_device)) goto err_out_free; if (sysfs_create_group(&unit->sysfs_device.kobj, &zfcp_sysfs_unit_attrs)) { device_unregister(&unit->sysfs_device); return ERR_PTR(-EIO); } zfcp_unit_get(unit); write_lock_irq(&zfcp_data.config_lock); list_add_tail(&unit->list, &port->unit_list_head); atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status); atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &unit->status); write_unlock_irq(&zfcp_data.config_lock); zfcp_port_get(port); return unit; err_out_free: kfree(unit); return ERR_PTR(-EINVAL); }
/* * This needs some heavy checking ... * I just haven't the stomach for it. I also don't fully * understand sessions/pgrp etc. Let somebody who does explain it. * * OK, I think I have the protection semantics right.... this is really * only important on a multi-user system anyway, to make sure one user * can't send a signal to a process owned by another. -TYT, 12/12/91 * * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; int err = -EINVAL; struct pid_namespace *ns; if (!pid) pid = task_pid_vnr(group_leader); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ ns = current->nsproxy->pid_ns; write_lock_irq(&tasklist_lock); err = -ESRCH; p = find_task_by_pid_ns(pid, ns); if (!p) goto out; err = -EINVAL; if (!thread_group_leader(p)) goto out; if (p->real_parent->tgid == group_leader->tgid) { err = -EPERM; if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; if (p->did_exec) goto out; } else { err = -ESRCH; if (p != group_leader) goto out; } err = -EPERM; if (p->signal->leader) goto out; if (pgid != pid) { struct task_struct *g; g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns); if (!g || task_session(g) != task_session(group_leader)) goto out; } err = security_task_setpgid(p, pgid); if (err) goto out; if (task_pgrp_nr_ns(p, ns) != pgid) { struct pid *pid; detach_pid(p, PIDTYPE_PGID); pid = find_vpid(pgid); attach_pid(p, PIDTYPE_PGID, pid); set_task_pgrp(p, pid_nr(pid)); } err = 0; out: /* All paths lead to here, thus we are safe. -DaveM */ write_unlock_irq(&tasklist_lock); return err; }
/** * zfcp_port_enqueue - enqueue port to port list of adapter * @adapter: adapter where remote port is added * @wwpn: WWPN of the remote port to be enqueued * @status: initial status for the port * @d_id: destination id of the remote port to be enqueued * Returns: pointer to enqueued port on success, ERR_PTR on error * Locks: config_sema must be held to serialize changes to the port list * * All port internal structures are set up and the sysfs entry is generated. * d_id is used to enqueue ports with a well known address like the Directory * Service for nameserver lookup. */ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, u32 status, u32 d_id) { struct zfcp_port *port; int retval; port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL); if (!port) return ERR_PTR(-ENOMEM); init_waitqueue_head(&port->remove_wq); INIT_LIST_HEAD(&port->unit_list_head); INIT_WORK(&port->gid_pn_work, zfcp_erp_port_strategy_open_lookup); port->adapter = adapter; port->d_id = d_id; port->wwpn = wwpn; /* mark port unusable as long as sysfs registration is not complete */ atomic_set_mask(status | ZFCP_STATUS_COMMON_REMOVE, &port->status); atomic_set(&port->refcount, 0); dev_set_name(&port->sysfs_device, "0x%016llx", (unsigned long long)wwpn); port->sysfs_device.parent = &adapter->ccw_device->dev; port->sysfs_device.release = zfcp_sysfs_port_release; dev_set_drvdata(&port->sysfs_device, port); read_lock_irq(&zfcp_data.config_lock); if (!(status & ZFCP_STATUS_PORT_NO_WWPN)) if (zfcp_get_port_by_wwpn(adapter, wwpn)) { read_unlock_irq(&zfcp_data.config_lock); goto err_out_free; } read_unlock_irq(&zfcp_data.config_lock); if (device_register(&port->sysfs_device)) goto err_out_free; retval = sysfs_create_group(&port->sysfs_device.kobj, &zfcp_sysfs_port_attrs); if (retval) { device_unregister(&port->sysfs_device); goto err_out; } zfcp_port_get(port); write_lock_irq(&zfcp_data.config_lock); list_add_tail(&port->list, &adapter->port_list_head); atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &port->status); atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &port->status); write_unlock_irq(&zfcp_data.config_lock); zfcp_adapter_get(adapter); return port; err_out_free: kfree(port); err_out: return ERR_PTR(-EINVAL); }
asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) { int flag, retval; struct wait_queue wait = { current, NULL }; struct task_struct *p; if (options & ~(WNOHANG|WUNTRACED|__WCLONE)) return -EINVAL; add_wait_queue(¤t->wait_chldexit,&wait); repeat: flag = 0; /* The interruptible state must be set before looking at the children. This because we want to catch any racy exit from the children as do_exit() may run under us. The following read_lock will enforce SMP ordering at the CPU level. */ current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); for (p = current->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) continue; } else if (!pid) { if (p->pgrp != current->pgrp) continue; } else if (pid != -1) { if (p->pgrp != -pid) continue; } /* wait for cloned processes iff the __WCLONE flag is set */ if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) continue; flag = 1; switch (p->state) { case TASK_STOPPED: if (!p->exit_code) continue; if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) continue; read_unlock(&tasklist_lock); current->state = TASK_RUNNING; /* We *must* do this before touching userspace! */ retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user((p->exit_code << 8) | 0x7f, stat_addr); if (!retval) { p->exit_code = 0; retval = p->pid; } goto end_wait4; case TASK_ZOMBIE: current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime; current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime; read_unlock(&tasklist_lock); current->state = TASK_RUNNING; /* We *must* do this before touching userspace! */ retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user(p->exit_code, stat_addr); if (retval) goto end_wait4; retval = p->pid; if (p->p_opptr != p->p_pptr) { write_lock_irq(&tasklist_lock); REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); write_unlock_irq(&tasklist_lock); notify_parent(p, SIGCHLD); } else release(p); #ifdef DEBUG_PROC_TREE audit_ptree(); #endif goto end_wait4; default: continue; } } read_unlock(&tasklist_lock); if (flag) { retval = 0; if (options & WNOHANG) goto end_wait4; retval = -ERESTARTSYS; if (signal_pending(current)) goto end_wait4; schedule(); goto repeat; } retval = -ECHILD; end_wait4: remove_wait_queue(¤t->wait_chldexit,&wait); current->state = TASK_RUNNING; return retval; }
/* adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ int do_adjtimex(struct timex *txc) { long ltemp, mtemp, save_adjust; int result; /* In order to modify anything, you gotta be super-user! */ if (txc->modes && !capable(CAP_SYS_TIME)) return -EPERM; /* Now we validate the data before disabling interrupts */ if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) /* singleshot must not be used with any other mode bits */ if (txc->modes != ADJ_OFFSET_SINGLESHOT) return -EINVAL; if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET)) /* adjustment Offset limited to +- .512 seconds */ if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE ) return -EINVAL; /* if the quartz is off by more than 10% something is VERY wrong ! */ if (txc->modes & ADJ_TICK) if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) return -EINVAL; write_lock_irq(&xtime_lock); result = time_state; /* mostly `TIME_OK' */ /* Save for later - semantics of adjtime is to return old value */ save_adjust = time_adjust; #if 0 /* STA_CLOCKERR is never set yet */ time_status &= ~STA_CLOCKERR; /* reset STA_CLOCKERR */ #endif /* If there are input parameters, then process them */ if (txc->modes) { if (txc->modes & ADJ_STATUS) /* only set allowed bits */ time_status = (txc->status & ~STA_RONLY) | (time_status & STA_RONLY); if (txc->modes & ADJ_FREQUENCY) { /* p. 22 */ if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) { result = -EINVAL; goto leave; } time_freq = txc->freq - pps_freq; } if (txc->modes & ADJ_MAXERROR) { if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) { result = -EINVAL; goto leave; } time_maxerror = txc->maxerror; } if (txc->modes & ADJ_ESTERROR) { if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) { result = -EINVAL; goto leave; } time_esterror = txc->esterror; } if (txc->modes & ADJ_TIMECONST) { /* p. 24 */ if (txc->constant < 0) { /* NTP v4 uses values > 6 */ result = -EINVAL; goto leave; } time_constant = txc->constant; } if (txc->modes & ADJ_OFFSET) { /* values checked earlier */ if (txc->modes == ADJ_OFFSET_SINGLESHOT) { /* adjtime() is independent from ntp_adjtime() */ time_adjust = txc->offset; } else if ( time_status & (STA_PLL | STA_PPSTIME) ) { ltemp = (time_status & (STA_PPSTIME | STA_PPSSIGNAL)) == (STA_PPSTIME | STA_PPSSIGNAL) ? pps_offset : txc->offset; /* * Scale the phase adjustment and * clamp to the operating range. */ if (ltemp > MAXPHASE) time_offset = MAXPHASE << SHIFT_UPDATE; else if (ltemp < -MAXPHASE) time_offset = -(MAXPHASE << SHIFT_UPDATE); else time_offset = ltemp << SHIFT_UPDATE; /* * Select whether the frequency is to be controlled * and in which mode (PLL or FLL). Clamp to the operating * range. Ugly multiply/divide should be replaced someday. */ if (time_status & STA_FREQHOLD || time_reftime == 0) time_reftime = xtime.tv_sec; mtemp = xtime.tv_sec - time_reftime; time_reftime = xtime.tv_sec; if (time_status & STA_FLL) { if (mtemp >= MINSEC) { ltemp = (time_offset / mtemp) << (SHIFT_USEC - SHIFT_UPDATE); if (ltemp < 0) time_freq -= -ltemp >> SHIFT_KH; else time_freq += ltemp >> SHIFT_KH; } else /* calibration interval too short (p. 12) */ result = TIME_ERROR; } else { /* PLL mode */
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, struct pid *pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_TRACE_IRQFLAGS DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); p->pid = pid_nr(pid); retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup_delays_binfmt; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->sched_time = 0; #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ #endif task_io_accounting_init(p); acct_clear_integrals(p); p->it_virt_expires = cputime_zero; p->it_prof_expires = cputime_zero; p->it_sched_expires = 0; INIT_LIST_HEAD(&p->cpu_timers[0]); INIT_LIST_HEAD(&p->cpu_timers[1]); INIT_LIST_HEAD(&p->cpu_timers[2]); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; cpuset_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cpuset; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespaces; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* for sys_ioprio_set(IOPRIO_WHO_PGRP) */ p->ioprio = current->ioprio; /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_cleanup_namespaces; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, cputime_zero) || current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || !list_empty(¤t->signal->cpu_timers[0]) || !list_empty(¤t->signal->cpu_timers[1]) || !list_empty(¤t->signal->cpu_timers[2])) { /* * Have child wake up on its first tick to check * for process CPU timers. */ p->it_prof_expires = jiffies_to_cputime(1); } } if (likely(p->pid)) { add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); set_signal_session(p->signal, process_session(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); return p; bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); bad_fork_cleanup_cpuset: #endif cpuset_exit(p); bad_fork_cleanup_delays_binfmt: delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); #ifdef CONFIG_PREEMPT_RCU p->rcu_read_lock_nesting = 0; p->rcu_flipctr_idx = 0; #endif /* #ifdef CONFIG_PREEMPT_RCU */ p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; p->default_timer_slack_ns = current->timer_slack_ns; #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; #endif task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif if (unlikely(current->ptrace)) ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } ftrace_graph_init_task(p); p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_graph; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_graph; } if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { list_add_tail(&p->sibling, &p->real_parent->children); tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } #ifdef CONFIG_LTT_LITE ltt_lite_ev_process(LTT_LITE_EV_PROCESS_FORK, p); #endif total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); return p; bad_fork_free_graph: ftrace_graph_exit_task(p); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); put_cred(p->real_cred); put_cred(p->cred); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = p->prev_stime = 0; #endif #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; if (clone_flags & CLONE_THREAD) threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; seqcount_init(&p->mems_allowed_seq); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) goto bad_fork_cleanup_perf; /* copy all the process information */ retval = copy_semundo(clone_flags, p); if (retval) goto bad_fork_cleanup_audit; retval = copy_files(clone_flags, p); if (retval) goto bad_fork_cleanup_semundo; retval = copy_fs(clone_flags, p); if (retval) goto bad_fork_cleanup_files; retval = copy_sighand(clone_flags, p); if (retval) goto bad_fork_cleanup_fs; retval = copy_signal(clone_flags, p); if (retval) goto bad_fork_cleanup_sighand; retval = copy_mm(clone_flags, p); if (retval) goto bad_fork_cleanup_signal; retval = copy_namespaces(clone_flags, p); if (retval) goto bad_fork_cleanup_mm; retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; #ifdef CONFIG_BLOCK p->plug = NULL; #endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing and stepping should be turned off in the * child regardless of CLONE_PTRACE. */ user_disable_single_step(p); clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ if (clone_flags & CLONE_THREAD) p->exit_signal = -1; else if (clone_flags & CLONE_PARENT) p->exit_signal = current->group_leader->exit_signal; else p->exit_signal = (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); if (thread_group_leader(p)) { if (is_child_reaper(pid)) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); __this_cpu_inc(process_counts); } else { current->signal->nr_threads++; atomic_inc(¤t->signal->live); atomic_inc(¤t->signal->sigcnt); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); trace_task_newtask(p, clone_flags); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: if (p->io_context) exit_io_context(p); bad_fork_cleanup_namespaces: if (unlikely(clone_flags & CLONE_NEWPID)) pid_ns_release_proc(p->nsproxy->pid_ns); exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_perf: perf_event_free_task(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); cgroup_exit(p, 0); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int *parent_tidptr, int *child_tidptr) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); if ((clone_flags & CLONE_DETACHED) && !(clone_flags & CLONE_THREAD)) return ERR_PTR(-EINVAL); if (!(clone_flags & CLONE_DETACHED) && (clone_flags & CLONE_THREAD)) return ERR_PTR(-EINVAL); retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; p->tux_info = NULL; retval = -EAGAIN; /* * Increment user->__count before the rlimit test so that it would * be correct if we take the bad_fork_free failure path. */ atomic_inc(&p->user->__count); if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE)) goto bad_fork_free; } atomic_inc(&p->user->processes); /* * Counter increases are protected by * the kernel lock so nr_threads can't * increase under us (but it may decrease). */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; get_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_INC_USE_COUNT(p->binfmt->module); p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; copy_flags(clone_flags, p); if (clone_flags & CLONE_IDLETASK) p->pid = 0; else { p->pid = alloc_pidmap(); if (p->pid == -1) goto bad_fork_cleanup; } retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; INIT_LIST_HEAD(&p->run_list); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); init_waitqueue_head(&p->wait_chldexit); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->switch_lock); p->sigpending = 0; init_sigpending(&p->pending); p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->leader = 0; /* session leadership doesn't inherit */ p->tty_old_pgrp = 0; memset(&p->utime, 0, sizeof(p->utime)); memset(&p->stime, 0, sizeof(p->stime)); memset(&p->cutime, 0, sizeof(p->cutime)); memset(&p->cstime, 0, sizeof(p->cstime)); memset(&p->group_utime, 0, sizeof(p->group_utime)); memset(&p->group_stime, 0, sizeof(p->group_stime)); memset(&p->group_cutime, 0, sizeof(p->group_cutime)); memset(&p->group_cstime, 0, sizeof(p->group_cstime)); #ifdef CONFIG_SMP memset(&p->per_cpu_utime, 0, sizeof(p->per_cpu_utime)); memset(&p->per_cpu_stime, 0, sizeof(p->per_cpu_stime)); #endif memset(&p->timing_state, 0, sizeof(p->timing_state)); p->timing_state.type = PROCESS_TIMING_USER; p->last_sigxcpu = 0; p->array = NULL; p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; retval = -ENOMEM; /* copy all the process information */ if (copy_files(clone_flags, p)) goto bad_fork_cleanup; if (copy_fs(clone_flags, p)) goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) goto bad_fork_cleanup_fs; if (copy_signal(clone_flags, p)) goto bad_fork_cleanup_sighand; if (copy_mm(clone_flags, p)) goto bad_fork_cleanup_signal; if (copy_namespace(clone_flags, p)) goto bad_fork_cleanup_mm; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->semundo = NULL; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL; /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->swappable = 1; if (clone_flags & CLONE_DETACHED) p->exit_signal = -1; else p->exit_signal = clone_flags & CSIGNAL; p->pdeath_signal = 0; /* * Share the timeslice between parent and child, thus the * total amount of pending timeslices in the system doesnt change, * resulting in more scheduling fairness. */ local_irq_disable(); p->time_slice = (current->time_slice + 1) >> 1; p->first_time_slice = 1; /* * The remainder of the first timeslice might be recovered by * the parent if the child exits early enough. */ current->time_slice >>= 1; p->last_run = jiffies; if (!current->time_slice) { /* * This case is rare, it happens when the parent has only * a single jiffy left from its timeslice. Taking the * runqueue lock is not a problem. */ current->time_slice = 1; scheduler_tick(0 /* don't update the time stats */); } local_irq_enable(); if ((int)current->time_slice <= 0) BUG(); if ((int)p->time_slice <= 0) BUG(); /* * Ok, add it to the run-queues and make it * visible to the rest of the system. * * Let it rip! */ p->tgid = p->pid; p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->group_exit) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } p->tgid = current->tgid; p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; p->sigpending = 1; } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (p->ptrace & PT_PTRACED) __ptrace_link(p, current->parent); attach_pid(p, PIDTYPE_PID, p->pid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_TGID, p->tgid); attach_pid(p, PIDTYPE_PGID, p->pgrp); attach_pid(p, PIDTYPE_SID, p->session); } else { link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); } /* clear controlling tty of new task if parent's was just cleared */ if (!current->tty && p->tty) p->tty = NULL; nr_threads++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_mm: exit_mm(p); if (p->active_mm) mmdrop(p->active_mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup: if (p->pid > 0) free_pidmap(p->pid); put_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module) __MOD_DEC_USE_COUNT(p->binfmt->module); bad_fork_cleanup_count: atomic_dec(&p->user->processes); bad_fork_free: p->state = TASK_ZOMBIE; /* debug */ atomic_dec(&p->usage); put_task_struct(p); goto fork_out; }