示例#1
0
void
kproc_exit(int ecode)
{
	struct thread *td;
	struct proc *p;

	td = curthread;
	p = td->td_proc;

	/*
	 * Reparent curthread from proc0 to init so that the zombie
	 * is harvested.
	 */
	sx_xlock(&proctree_lock);
	PROC_LOCK(p);
	proc_reparent(p, initproc);
	PROC_UNLOCK(p);
	sx_xunlock(&proctree_lock);

	/*
	 * Wakeup anyone waiting for us to exit.
	 */
	wakeup(p);

	/* Buh-bye! */
	exit1(td, ecode, 0);
}
示例#2
0
/*
 * Simplified back end of syscall(), used when returning from fork()
 * directly into user mode.  Giant is not held on entry, and must not
 * be held on return.  This function is passed in to fork_exit() as the
 * first parameter and is called when returning to a new userland process.
 */
void
fork_return(struct thread *td, struct trapframe *frame)
{
	struct proc *p, *dbg;

	p = td->td_proc;
	if (td->td_dbgflags & TDB_STOPATFORK) {
		sx_xlock(&proctree_lock);
		PROC_LOCK(p);
		if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) ==
		    (P_TRACED | P_FOLLOWFORK)) {
			/*
			 * If debugger still wants auto-attach for the
			 * parent's children, do it now.
			 */
			dbg = p->p_pptr->p_pptr;
			p->p_flag |= P_TRACED;
			p->p_oppid = p->p_pptr->p_pid;
			CTR2(KTR_PTRACE,
		    "fork_return: attaching to new child pid %d: oppid %d",
			    p->p_pid, p->p_oppid);
			proc_reparent(p, dbg);
			sx_xunlock(&proctree_lock);
			td->td_dbgflags |= TDB_CHILD | TDB_SCX;
			ptracestop(td, SIGSTOP);
			td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
		} else {
			/*
			 * ... otherwise clear the request.
			 */
			sx_xunlock(&proctree_lock);
			td->td_dbgflags &= ~TDB_STOPATFORK;
			cv_broadcast(&p->p_dbgwait);
		}
		PROC_UNLOCK(p);
	} else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) {
 		/*
		 * This is the start of a new thread in a traced
		 * process.  Report a system call exit event.
		 */
		PROC_LOCK(p);
		td->td_dbgflags |= TDB_SCX;
		_STOPEVENT(p, S_SCX, td->td_dbg_sc_code);
		if ((p->p_stops & S_PT_SCX) != 0 ||
		    (td->td_dbgflags & TDB_BORN) != 0)
			ptracestop(td, SIGTRAP);
		td->td_dbgflags &= ~(TDB_SCX | TDB_BORN);
		PROC_UNLOCK(p);
	}

	userret(td, frame);

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSRET))
		ktrsysret(SYS_fork, 0, 0);
#endif
}
示例#3
0
static int
linux_clone_proc(struct thread *td, struct linux_clone_args *args)
{
	struct fork_req fr;
	int error, ff = RFPROC | RFSTOPPED;
	struct proc *p2;
	struct thread *td2;
	int exit_signal;
	struct linux_emuldata *em;

#ifdef DEBUG
	if (ldebug(clone)) {
		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
		    "child tid: %p"), (unsigned)args->flags,
		    args->stack, args->parent_tidptr, args->child_tidptr);
	}
#endif

	exit_signal = args->flags & 0x000000ff;
	if (LINUX_SIG_VALID(exit_signal)) {
		exit_signal = linux_to_bsd_signal(exit_signal);
	} else if (exit_signal != 0)
		return (EINVAL);

	if (args->flags & LINUX_CLONE_VM)
		ff |= RFMEM;
	if (args->flags & LINUX_CLONE_SIGHAND)
		ff |= RFSIGSHARE;
	/*
	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
	 * and open files is independent.  In FreeBSD, its in one
	 * structure but in reality it does not cause any problems
	 * because both of these flags are usually set together.
	 */
	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
		ff |= RFFDG;

	if (args->flags & LINUX_CLONE_PARENT_SETTID)
		if (args->parent_tidptr == NULL)
			return (EINVAL);

	if (args->flags & LINUX_CLONE_VFORK)
		ff |= RFPPWAIT;

	bzero(&fr, sizeof(fr));
	fr.fr_flags = ff;
	fr.fr_procp = &p2;
	error = fork1(td, &fr);
	if (error)
		return (error);

	td2 = FIRST_THREAD_IN_PROC(p2);

	/* create the emuldata */
	linux_proc_init(td, td2, args->flags);

	em = em_find(td2);
	KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));

	if (args->flags & LINUX_CLONE_CHILD_SETTID)
		em->child_set_tid = args->child_tidptr;
	else
	   	em->child_set_tid = NULL;

	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
		em->child_clear_tid = args->child_tidptr;
	else
	   	em->child_clear_tid = NULL;

	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
		error = copyout(&p2->p_pid, args->parent_tidptr,
		    sizeof(p2->p_pid));
		if (error)
			printf(LMSG("copyout failed!"));
	}

	PROC_LOCK(p2);
	p2->p_sigparent = exit_signal;
	PROC_UNLOCK(p2);
	/*
	 * In a case of stack = NULL, we are supposed to COW calling process
	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
	 * intact.
	 */
	linux_set_upcall_kse(td2, PTROUT(args->stack));

	if (args->flags & LINUX_CLONE_SETTLS)
		linux_set_cloned_tls(td2, args->tls);

	/*
	 * If CLONE_PARENT is set, then the parent of the new process will be 
	 * the same as that of the calling process.
	 */
	if (args->flags & LINUX_CLONE_PARENT) {
		sx_xlock(&proctree_lock);
		PROC_LOCK(p2);
		proc_reparent(p2, td->td_proc->p_pptr);
		PROC_UNLOCK(p2);
		sx_xunlock(&proctree_lock);
	}

#ifdef DEBUG
	if (ldebug(clone))
		printf(LMSG("clone: successful rfork to %d, "
		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
		    exit_signal);
#endif

	/*
	 * Make this runnable after we are finished with it.
	 */
	thread_lock(td2);
	TD_SET_CAN_RUN(td2);
	sched_add(td2, SRQ_BORING);
	thread_unlock(td2);

	td->td_retval[0] = p2->p_pid;

	return (0);
}
/*
 * Process debugging system call.
 */
int
sys_ptrace(struct proc *p, void *v, register_t *retval)
{
	struct sys_ptrace_args /* {
		syscallarg(int) req;
		syscallarg(pid_t) pid;
		syscallarg(caddr_t) addr;
		syscallarg(int) data;
	} */ *uap = v;
	struct proc *t;				/* target thread */
	struct process *tr;			/* target process */
	struct uio uio;
	struct iovec iov;
	struct ptrace_io_desc piod;
	struct ptrace_event pe;
	struct ptrace_thread_state pts;
	struct reg *regs;
#if defined (PT_SETFPREGS) || defined (PT_GETFPREGS)
	struct fpreg *fpregs;
#endif
#if defined (PT_SETXMMREGS) || defined (PT_GETXMMREGS)
	struct xmmregs *xmmregs;
#endif
#ifdef PT_WCOOKIE
	register_t wcookie;
#endif
	int error, write;
	int temp;
	int req = SCARG(uap, req);
	int s;

	/* "A foolish consistency..." XXX */
	switch (req) {
	case PT_TRACE_ME:
		t = p;
		break;

	/* calls that only operate on the PID */
	case PT_READ_I:
	case PT_READ_D:
	case PT_WRITE_I:
	case PT_WRITE_D:
	case PT_KILL:
	case PT_ATTACH:
	case PT_IO:
	case PT_SET_EVENT_MASK:
	case PT_GET_EVENT_MASK:
	case PT_GET_PROCESS_STATE:
	case PT_GET_THREAD_FIRST:
	case PT_GET_THREAD_NEXT:
	default:
		/* Find the process we're supposed to be operating on. */
		if ((t = pfind(SCARG(uap, pid))) == NULL)
			return (ESRCH);
		if (t->p_flag & P_THREAD)
			return (ESRCH);
		break;

	/* calls that accept a PID or a thread ID */
	case PT_CONTINUE:
	case PT_DETACH:
#ifdef PT_STEP
	case PT_STEP:
#endif
	case PT_GETREGS:
	case PT_SETREGS:
#ifdef PT_GETFPREGS
	case PT_GETFPREGS:
#endif
#ifdef PT_SETFPREGS
	case PT_SETFPREGS:
#endif
#ifdef PT_GETXMMREGS
	case PT_GETXMMREGS:
#endif
#ifdef PT_SETXMMREGS
	case PT_SETXMMREGS:
#endif
		if (SCARG(uap, pid) > THREAD_PID_OFFSET) {
			t = pfind(SCARG(uap, pid) - THREAD_PID_OFFSET);
			if (t == NULL)
				return (ESRCH);
		} else {
			if ((t = pfind(SCARG(uap, pid))) == NULL)
				return (ESRCH);
			if (t->p_flag & P_THREAD)
				return (ESRCH);
		}
		break;
	}
	tr = t->p_p;

	if ((tr->ps_flags & PS_INEXEC) != 0)
		return (EAGAIN);

	/* Make sure we can operate on it. */
	switch (req) {
	case  PT_TRACE_ME:
		/* Saying that you're being traced is always legal. */
		break;

	case  PT_ATTACH:
		/*
		 * You can't attach to a process if:
		 *	(1) it's the process that's doing the attaching,
		 */
		if (tr == p->p_p)
			return (EINVAL);

		/*
		 *	(2) it's a system process
		 */
		if (ISSET(tr->ps_flags, PS_SYSTEM))
			return (EPERM);

		/*
		 *	(3) it's already being traced, or
		 */
		if (ISSET(tr->ps_flags, PS_TRACED))
			return (EBUSY);

		/*
		 *	(4) it's not owned by you, or the last exec
		 *	    gave us setuid/setgid privs (unless
		 *	    you're root), or...
		 * 
		 *      [Note: once PS_SUGID or PS_SUGIDEXEC gets set in
		 *	execve(), they stay set until the process does
		 *	another execve().  Hence this prevents a setuid
		 *	process which revokes its special privileges using
		 *	setuid() from being traced.  This is good security.]
		 */
		if ((tr->ps_ucred->cr_ruid != p->p_ucred->cr_ruid ||
		    ISSET(tr->ps_flags, PS_SUGIDEXEC | PS_SUGID)) &&
		    (error = suser(p, 0)) != 0)
			return (error);

		/*
		 * 	(4.5) it's not a child of the tracing process.
		 */
		if (global_ptrace == 0 && !inferior(tr, p->p_p) &&
		    (error = suser(p, 0)) != 0)
			return (error);

		/*
		 *	(5) ...it's init, which controls the security level
		 *	    of the entire system, and the system was not
		 *          compiled with permanently insecure mode turned
		 *	    on.
		 */
		if ((tr->ps_pid == 1) && (securelevel > -1))
			return (EPERM);

		/*
		 *	(6) it's an ancestor of the current process and
		 *	    not init (because that would create a loop in
		 *	    the process graph).
		 */
		if (tr->ps_pid != 1 && inferior(p->p_p, tr))
			return (EINVAL);
		break;

	case  PT_READ_I:
	case  PT_READ_D:
	case  PT_WRITE_I:
	case  PT_WRITE_D:
	case  PT_IO:
	case  PT_CONTINUE:
	case  PT_KILL:
	case  PT_DETACH:
#ifdef PT_STEP
	case  PT_STEP:
#endif
	case  PT_SET_EVENT_MASK:
	case  PT_GET_EVENT_MASK:
	case  PT_GET_PROCESS_STATE:
	case  PT_GETREGS:
	case  PT_SETREGS:
#ifdef PT_GETFPREGS
	case  PT_GETFPREGS:
#endif
#ifdef PT_SETFPREGS
	case  PT_SETFPREGS:
#endif
#ifdef PT_GETXMMREGS
	case  PT_GETXMMREGS:
#endif
#ifdef PT_SETXMMREGS
	case  PT_SETXMMREGS:
#endif
#ifdef PT_WCOOKIE
	case  PT_WCOOKIE:
#endif
		/*
		 * You can't do what you want to the process if:
		 *	(1) It's not being traced at all,
		 */
		if (!ISSET(tr->ps_flags, PS_TRACED))
			return (EPERM);

		/*
		 *	(2) it's not being traced by _you_, or
		 */
		if (tr->ps_pptr != p->p_p)
			return (EBUSY);

		/*
		 *	(3) it's not currently stopped.
		 */
		if (t->p_stat != SSTOP || !ISSET(tr->ps_flags, PS_WAITED))
			return (EBUSY);
		break;

	case  PT_GET_THREAD_FIRST:
	case  PT_GET_THREAD_NEXT:
		/*
		 * You can't do what you want to the process if:
		 *	(1) It's not being traced at all,
		 */
		if (!ISSET(tr->ps_flags, PS_TRACED))
			return (EPERM);

		/*
		 *	(2) it's not being traced by _you_, or
		 */
		if (tr->ps_pptr != p->p_p)
			return (EBUSY);

		/*
		 * Do the work here because the request isn't actually
		 * associated with 't'
		 */
		if (SCARG(uap, data) != sizeof(pts))
			return (EINVAL);

		if (req == PT_GET_THREAD_NEXT) {
			error = copyin(SCARG(uap, addr), &pts, sizeof(pts));
			if (error)
				return (error);

			t = pfind(pts.pts_tid - THREAD_PID_OFFSET);
			if (t == NULL || ISSET(t->p_flag, P_WEXIT))
				return (ESRCH);
			if (t->p_p != tr)
				return (EINVAL);
			t = TAILQ_NEXT(t, p_thr_link);
		} else {
			t = TAILQ_FIRST(&tr->ps_threads);
		}

		if (t == NULL)
			pts.pts_tid = -1;
		else
			pts.pts_tid = t->p_pid + THREAD_PID_OFFSET;
		return (copyout(&pts, SCARG(uap, addr), sizeof(pts)));

	default:			/* It was not a legal request. */
		return (EINVAL);
	}

	/* Do single-step fixup if needed. */
	FIX_SSTEP(t);

	/* Now do the operation. */
	write = 0;
	*retval = 0;

	switch (req) {
	case  PT_TRACE_ME:
		/* Just set the trace flag. */
		atomic_setbits_int(&tr->ps_flags, PS_TRACED);
		tr->ps_oppid = tr->ps_pptr->ps_pid;
		if (tr->ps_ptstat == NULL)
			tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat),
			    M_SUBPROC, M_WAITOK);
		memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat));
		return (0);

	case  PT_WRITE_I:		/* XXX no separate I and D spaces */
	case  PT_WRITE_D:
		write = 1;
		temp = SCARG(uap, data);
	case  PT_READ_I:		/* XXX no separate I and D spaces */
	case  PT_READ_D:
		/* write = 0 done above. */
		iov.iov_base = (caddr_t)&temp;
		iov.iov_len = sizeof(int);
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(vaddr_t)SCARG(uap, addr);
		uio.uio_resid = sizeof(int);
		uio.uio_segflg = UIO_SYSSPACE;
		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
		uio.uio_procp = p;
		error = process_domem(p, t, &uio, write ? PT_WRITE_I :
				PT_READ_I);
		if (write == 0)
			*retval = temp;
		return (error);
	case  PT_IO:
		error = copyin(SCARG(uap, addr), &piod, sizeof(piod));
		if (error)
			return (error);
		iov.iov_base = piod.piod_addr;
		iov.iov_len = piod.piod_len;
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(vaddr_t)piod.piod_offs;
		uio.uio_resid = piod.piod_len;
		uio.uio_segflg = UIO_USERSPACE;
		uio.uio_procp = p;
		switch (piod.piod_op) {
		case PIOD_READ_I:
			req = PT_READ_I;
			uio.uio_rw = UIO_READ;
			break;
		case PIOD_READ_D:
			req = PT_READ_D;
			uio.uio_rw = UIO_READ;
			break;
		case PIOD_WRITE_I:
			req = PT_WRITE_I;
			uio.uio_rw = UIO_WRITE;
			break;
		case PIOD_WRITE_D:
			req = PT_WRITE_D;
			uio.uio_rw = UIO_WRITE;
			break;
		case PIOD_READ_AUXV:
			req = PT_READ_D;
			uio.uio_rw = UIO_READ;
			temp = tr->ps_emul->e_arglen * sizeof(char *);
			if (uio.uio_offset > temp)
				return (EIO);
			if (uio.uio_resid > temp - uio.uio_offset)
				uio.uio_resid = temp - uio.uio_offset;
			piod.piod_len = iov.iov_len = uio.uio_resid;
			error = process_auxv_offset(p, t, &uio);
			if (error)
				return (error);
			break;
		default:
			return (EINVAL);
		}
		error = process_domem(p, t, &uio, req);
		piod.piod_len -= uio.uio_resid;
		(void) copyout(&piod, SCARG(uap, addr), sizeof(piod));
		return (error);
#ifdef PT_STEP
	case  PT_STEP:
		/*
		 * From the 4.4BSD PRM:
		 * "Execution continues as in request PT_CONTINUE; however
		 * as soon as possible after execution of at least one
		 * instruction, execution stops again. [ ... ]"
		 */
#endif
	case  PT_CONTINUE:
		/*
		 * From the 4.4BSD PRM:
		 * "The data argument is taken as a signal number and the
		 * child's execution continues at location addr as if it
		 * incurred that signal.  Normally the signal number will
		 * be either 0 to indicate that the signal that caused the
		 * stop should be ignored, or that value fetched out of
		 * the process's image indicating which signal caused
		 * the stop.  If addr is (int *)1 then execution continues
		 * from where it stopped."
		 */

		if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single)
			t = tr->ps_single;

		/* Check that the data is a valid signal number or zero. */
		if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG)
			return (EINVAL);

		/* If the address parameter is not (int *)1, set the pc. */
		if ((int *)SCARG(uap, addr) != (int *)1)
			if ((error = process_set_pc(t, SCARG(uap, addr))) != 0)
				goto relebad;

#ifdef PT_STEP
		/*
		 * Arrange for a single-step, if that's requested and possible.
		 */
		error = process_sstep(t, req == PT_STEP);
		if (error)
			goto relebad;
#endif
		goto sendsig;

	case  PT_DETACH:
		/*
		 * From the 4.4BSD PRM:
		 * "The data argument is taken as a signal number and the
		 * child's execution continues at location addr as if it
		 * incurred that signal.  Normally the signal number will
		 * be either 0 to indicate that the signal that caused the
		 * stop should be ignored, or that value fetched out of
		 * the process's image indicating which signal caused
		 * the stop.  If addr is (int *)1 then execution continues
		 * from where it stopped."
		 */

		if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single)
			t = tr->ps_single;

		/* Check that the data is a valid signal number or zero. */
		if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG)
			return (EINVAL);

#ifdef PT_STEP
		/*
		 * Arrange for a single-step, if that's requested and possible.
		 */
		error = process_sstep(t, req == PT_STEP);
		if (error)
			goto relebad;
#endif

		/* give process back to original parent or init */
		if (tr->ps_oppid != tr->ps_pptr->ps_pid) {
			struct process *ppr;

			ppr = prfind(tr->ps_oppid);
			proc_reparent(tr, ppr ? ppr : initprocess);
		}

		/* not being traced any more */
		tr->ps_oppid = 0;
		atomic_clearbits_int(&tr->ps_flags, PS_TRACED|PS_WAITED);

	sendsig:
		memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat));

		/* Finally, deliver the requested signal (or none). */
		if (t->p_stat == SSTOP) {
			t->p_xstat = SCARG(uap, data);
			SCHED_LOCK(s);
			setrunnable(t);
			SCHED_UNLOCK(s);
		} else {
			if (SCARG(uap, data) != 0)
				psignal(t, SCARG(uap, data));
		}

		return (0);

	relebad:
		return (error);

	case  PT_KILL:
		if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single)
			t = tr->ps_single;

		/* just send the process a KILL signal. */
		SCARG(uap, data) = SIGKILL;
		goto sendsig;	/* in PT_CONTINUE, above. */

	case  PT_ATTACH:
		/*
		 * As was done in procfs:
		 * Go ahead and set the trace flag.
		 * Save the old parent (it's reset in
		 *   _DETACH, and also in kern_exit.c:wait4()
		 * Reparent the process so that the tracing
		 *   proc gets to see all the action.
		 * Stop the target.
		 */
		atomic_setbits_int(&tr->ps_flags, PS_TRACED);
		tr->ps_oppid = tr->ps_pptr->ps_pid;
		if (tr->ps_pptr != p->p_p)
			proc_reparent(tr, p->p_p);
		if (tr->ps_ptstat == NULL)
			tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat),
			    M_SUBPROC, M_WAITOK);
		SCARG(uap, data) = SIGSTOP;
		goto sendsig;

	case  PT_GET_EVENT_MASK:
		if (SCARG(uap, data) != sizeof(pe))
			return (EINVAL);
		memset(&pe, 0, sizeof(pe));
		pe.pe_set_event = tr->ps_ptmask;
		return (copyout(&pe, SCARG(uap, addr), sizeof(pe)));
	case  PT_SET_EVENT_MASK:
		if (SCARG(uap, data) != sizeof(pe))
			return (EINVAL);
		if ((error = copyin(SCARG(uap, addr), &pe, sizeof(pe))))
			return (error);
		tr->ps_ptmask = pe.pe_set_event;
		return (0);

	case  PT_GET_PROCESS_STATE:
		if (SCARG(uap, data) != sizeof(*tr->ps_ptstat))
			return (EINVAL);

		if (tr->ps_single)
			tr->ps_ptstat->pe_tid =
			    tr->ps_single->p_pid + THREAD_PID_OFFSET;

		return (copyout(tr->ps_ptstat, SCARG(uap, addr),
		    sizeof(*tr->ps_ptstat)));

	case  PT_SETREGS:
		KASSERT((p->p_flag & P_SYSTEM) == 0);
		if ((error = process_checkioperm(p, tr)) != 0)
			return (error);

		regs = malloc(sizeof(*regs), M_TEMP, M_WAITOK);
		error = copyin(SCARG(uap, addr), regs, sizeof(*regs));
		if (error == 0) {
			error = process_write_regs(t, regs);
		}
		free(regs, M_TEMP, sizeof(*regs));
		return (error);
	case  PT_GETREGS:
		KASSERT((p->p_flag & P_SYSTEM) == 0);
		if ((error = process_checkioperm(p, tr)) != 0)
			return (error);

		regs = malloc(sizeof(*regs), M_TEMP, M_WAITOK);
		error = process_read_regs(t, regs);
		if (error == 0)
			error = copyout(regs,
			    SCARG(uap, addr), sizeof (*regs));
		free(regs, M_TEMP, sizeof(*regs));
		return (error);
#ifdef PT_SETFPREGS
	case  PT_SETFPREGS:
		KASSERT((p->p_flag & P_SYSTEM) == 0);
		if ((error = process_checkioperm(p, tr)) != 0)
			return (error);

		fpregs = malloc(sizeof(*fpregs), M_TEMP, M_WAITOK);
		error = copyin(SCARG(uap, addr), fpregs, sizeof(*fpregs));
		if (error == 0) {
			error = process_write_fpregs(t, fpregs);
		}
		free(fpregs, M_TEMP, sizeof(*fpregs));
		return (error);
#endif
#ifdef PT_GETFPREGS
	case  PT_GETFPREGS:
		KASSERT((p->p_flag & P_SYSTEM) == 0);
		if ((error = process_checkioperm(p, tr)) != 0)
			return (error);

		fpregs = malloc(sizeof(*fpregs), M_TEMP, M_WAITOK);
		error = process_read_fpregs(t, fpregs);
		if (error == 0)
			error = copyout(fpregs,
			    SCARG(uap, addr), sizeof(*fpregs));
		free(fpregs, M_TEMP, sizeof(*fpregs));
		return (error);
#endif
#ifdef PT_SETXMMREGS
	case  PT_SETXMMREGS:
		KASSERT((p->p_flag & P_SYSTEM) == 0);
		if ((error = process_checkioperm(p, tr)) != 0)
			return (error);

		xmmregs = malloc(sizeof(*xmmregs), M_TEMP, M_WAITOK);
		error = copyin(SCARG(uap, addr), xmmregs, sizeof(*xmmregs));
		if (error == 0) {
			error = process_write_xmmregs(t, xmmregs);
		}
		free(xmmregs, M_TEMP, sizeof(*xmmregs));
		return (error);
#endif
#ifdef PT_GETXMMREGS
	case  PT_GETXMMREGS:
		KASSERT((p->p_flag & P_SYSTEM) == 0);
		if ((error = process_checkioperm(p, tr)) != 0)
			return (error);

		xmmregs = malloc(sizeof(*xmmregs), M_TEMP, M_WAITOK);
		error = process_read_xmmregs(t, xmmregs);
		if (error == 0)
			error = copyout(xmmregs,
			    SCARG(uap, addr), sizeof(*xmmregs));
		free(xmmregs, M_TEMP, sizeof(*xmmregs));
		return (error);
#endif
#ifdef PT_WCOOKIE
	case  PT_WCOOKIE:
		wcookie = process_get_wcookie (t);
		return (copyout(&wcookie, SCARG(uap, addr),
		    sizeof (register_t)));
#endif
	}

#ifdef DIAGNOSTIC
	panic("ptrace: impossible");
#endif
	return 0;
}
示例#5
0
/*
 * Exit: deallocate address space and other resources, change proc state to
 * zombie, and unlink proc from allproc and parent's lists.  Save exit status
 * and rusage for wait().  Check for child processes and orphan them.
 */
void
exit1(struct thread *td, int rv)
{
	struct proc *p, *nq, *q;
	struct vnode *vtmp;
	struct vnode *ttyvp = NULL;
	struct plimit *plim;

	mtx_assert(&Giant, MA_NOTOWNED);

	p = td->td_proc;
	/*
	 * XXX in case we're rebooting we just let init die in order to
	 * work around an unsolved stack overflow seen very late during
	 * shutdown on sparc64 when the gmirror worker process exists.
	 */
	if (p == initproc && rebooting == 0) {
		printf("init died (signal %d, exit %d)\n",
		    WTERMSIG(rv), WEXITSTATUS(rv));
		panic("Going nowhere without my init!");
	}

	/*
	 * MUST abort all other threads before proceeding past here.
	 */
	PROC_LOCK(p);
	while (p->p_flag & P_HADTHREADS) {
		/*
		 * First check if some other thread got here before us.
		 * If so, act appropriately: exit or suspend.
		 */
		thread_suspend_check(0);

		/*
		 * Kill off the other threads. This requires
		 * some co-operation from other parts of the kernel
		 * so it may not be instantaneous.  With this state set
		 * any thread entering the kernel from userspace will
		 * thread_exit() in trap().  Any thread attempting to
		 * sleep will return immediately with EINTR or EWOULDBLOCK
		 * which will hopefully force them to back out to userland
		 * freeing resources as they go.  Any thread attempting
		 * to return to userland will thread_exit() from userret().
		 * thread_exit() will unsuspend us when the last of the
		 * other threads exits.
		 * If there is already a thread singler after resumption,
		 * calling thread_single will fail; in that case, we just
		 * re-check all suspension request, the thread should
		 * either be suspended there or exit.
		 */
		if (!thread_single(SINGLE_EXIT))
			break;

		/*
		 * All other activity in this process is now stopped.
		 * Threading support has been turned off.
		 */
	}
	KASSERT(p->p_numthreads == 1,
	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
	racct_sub(p, RACCT_NTHR, 1);
	/*
	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
	 * on our vmspace, so we should block below until they have
	 * released their reference to us.  Note that if they have
	 * requested S_EXIT stops we will block here until they ack
	 * via PIOCCONT.
	 */
	_STOPEVENT(p, S_EXIT, rv);

	/*
	 * Ignore any pending request to stop due to a stop signal.
	 * Once P_WEXIT is set, future requests will be ignored as
	 * well.
	 */
	p->p_flag &= ~P_STOPPED_SIG;
	KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped"));

	/*
	 * Note that we are exiting and do another wakeup of anyone in
	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
	 * decided to wait again after we told them we are exiting.
	 */
	p->p_flag |= P_WEXIT;
	wakeup(&p->p_stype);

	/*
	 * Wait for any processes that have a hold on our vmspace to
	 * release their reference.
	 */
	while (p->p_lock > 0)
		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);

	p->p_xstat = rv;	/* Let event handler change exit status */
	PROC_UNLOCK(p);
	/* Drain the limit callout while we don't have the proc locked */
	callout_drain(&p->p_limco);

#ifdef AUDIT
	/*
	 * The Sun BSM exit token contains two components: an exit status as
	 * passed to exit(), and a return value to indicate what sort of exit
	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
	 * what the return value is.
	 */
	AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0);
	AUDIT_SYSCALL_EXIT(0, td);
#endif

	/* Are we a task leader? */
	if (p == p->p_leader) {
		mtx_lock(&ppeers_lock);
		q = p->p_peers;
		while (q != NULL) {
			PROC_LOCK(q);
			kern_psignal(q, SIGKILL);
			PROC_UNLOCK(q);
			q = q->p_peers;
		}
		while (p->p_peers != NULL)
			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
		mtx_unlock(&ppeers_lock);
	}

	/*
	 * Check if any loadable modules need anything done at process exit.
	 * E.g. SYSV IPC stuff
	 * XXX what if one of these generates an error?
	 */
	EVENTHANDLER_INVOKE(process_exit, p);

	/*
	 * If parent is waiting for us to exit or exec,
	 * P_PPWAIT is set; we will wakeup the parent below.
	 */
	PROC_LOCK(p);
	rv = p->p_xstat;	/* Event handler could change exit status */
	stopprofclock(p);
	p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);

	/*
	 * Stop the real interval timer.  If the handler is currently
	 * executing, prevent it from rearming itself and let it finish.
	 */
	if (timevalisset(&p->p_realtimer.it_value) &&
	    callout_stop(&p->p_itcallout) == 0) {
		timevalclear(&p->p_realtimer.it_interval);
		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
		KASSERT(!timevalisset(&p->p_realtimer.it_value),
		    ("realtime timer is still armed"));
	}
	PROC_UNLOCK(p);

	/*
	 * Reset any sigio structures pointing to us as a result of
	 * F_SETOWN with our pid.
	 */
	funsetownlst(&p->p_sigiolst);

	/*
	 * If this process has an nlminfo data area (for lockd), release it
	 */
	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
		(*nlminfo_release_p)(p);

	/*
	 * Close open files and release open-file table.
	 * This may block!
	 */
	fdescfree(td);

	/*
	 * If this thread tickled GEOM, we need to wait for the giggling to
	 * stop before we return to userland
	 */
	if (td->td_pflags & TDP_GEOM)
		g_waitidle();

	/*
	 * Remove ourself from our leader's peer list and wake our leader.
	 */
	mtx_lock(&ppeers_lock);
	if (p->p_leader->p_peers) {
		q = p->p_leader;
		while (q->p_peers != p)
			q = q->p_peers;
		q->p_peers = p->p_peers;
		wakeup(p->p_leader);
	}
	mtx_unlock(&ppeers_lock);

	vmspace_exit(td);

	sx_xlock(&proctree_lock);
	if (SESS_LEADER(p)) {
		struct session *sp = p->p_session;
		struct tty *tp;

		/*
		 * s_ttyp is not zero'd; we use this to indicate that
		 * the session once had a controlling terminal. (for
		 * logging and informational purposes)
		 */
		SESS_LOCK(sp);
		ttyvp = sp->s_ttyvp;
		tp = sp->s_ttyp;
		sp->s_ttyvp = NULL;
		sp->s_ttydp = NULL;
		sp->s_leader = NULL;
		SESS_UNLOCK(sp);

		/*
		 * Signal foreground pgrp and revoke access to
		 * controlling terminal if it has not been revoked
		 * already.
		 *
		 * Because the TTY may have been revoked in the mean
		 * time and could already have a new session associated
		 * with it, make sure we don't send a SIGHUP to a
		 * foreground process group that does not belong to this
		 * session.
		 */

		if (tp != NULL) {
			tty_lock(tp);
			if (tp->t_session == sp)
				tty_signal_pgrp(tp, SIGHUP);
			tty_unlock(tp);
		}

		if (ttyvp != NULL) {
			sx_xunlock(&proctree_lock);
			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
				VOP_REVOKE(ttyvp, REVOKEALL);
				VOP_UNLOCK(ttyvp, 0);
			}
			sx_xlock(&proctree_lock);
		}
	}
	fixjobc(p, p->p_pgrp, 0);
	sx_xunlock(&proctree_lock);
	(void)acct_process(td);

	/* Release the TTY now we've unlocked everything. */
	if (ttyvp != NULL)
		vrele(ttyvp);
#ifdef KTRACE
	ktrprocexit(td);
#endif
	/*
	 * Release reference to text vnode
	 */
	if ((vtmp = p->p_textvp) != NULL) {
		p->p_textvp = NULL;
		vrele(vtmp);
	}

	/*
	 * Release our limits structure.
	 */
	plim = p->p_limit;
	p->p_limit = NULL;
	lim_free(plim);

	tidhash_remove(td);

	/*
	 * Remove proc from allproc queue and pidhash chain.
	 * Place onto zombproc.  Unlink from parent's child list.
	 */
	sx_xlock(&allproc_lock);
	LIST_REMOVE(p, p_list);
	LIST_INSERT_HEAD(&zombproc, p, p_list);
	LIST_REMOVE(p, p_hash);
	sx_xunlock(&allproc_lock);

	/*
	 * Call machine-dependent code to release any
	 * machine-dependent resources other than the address space.
	 * The address space is released by "vmspace_exitfree(p)" in
	 * vm_waitproc().
	 */
	cpu_exit(td);

	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);

	/*
	 * Reparent all of our children to init.
	 */
	sx_xlock(&proctree_lock);
	q = LIST_FIRST(&p->p_children);
	if (q != NULL)		/* only need this if any child is S_ZOMB */
		wakeup(initproc);
	for (; q != NULL; q = nq) {
		nq = LIST_NEXT(q, p_sibling);
		PROC_LOCK(q);
		proc_reparent(q, initproc);
		q->p_sigparent = SIGCHLD;
		/*
		 * Traced processes are killed
		 * since their existence means someone is screwing up.
		 */
		if (q->p_flag & P_TRACED) {
			struct thread *temp;

			/*
			 * Since q was found on our children list, the
			 * proc_reparent() call moved q to the orphan
			 * list due to present P_TRACED flag. Clear
			 * orphan link for q now while q is locked.
			 */
			clear_orphan(q);
			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
			FOREACH_THREAD_IN_PROC(q, temp)
				temp->td_dbgflags &= ~TDB_SUSPEND;
			kern_psignal(q, SIGKILL);
		}
		PROC_UNLOCK(q);
	}

	/*
	 * Also get rid of our orphans.
	 */
	while ((q = LIST_FIRST(&p->p_orphans)) != NULL) {
		PROC_LOCK(q);
		clear_orphan(q);
		PROC_UNLOCK(q);
	}

	/* Save exit status. */
	PROC_LOCK(p);
	p->p_xthread = td;

	/* Tell the prison that we are gone. */
	prison_proc_free(p->p_ucred->cr_prison);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exit if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exit)
		dtrace_fasttrap_exit(p);
#endif

	/*
	 * Notify interested parties of our demise.
	 */
	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);

#ifdef KDTRACE_HOOKS
	int reason = CLD_EXITED;
	if (WCOREDUMP(rv))
		reason = CLD_DUMPED;
	else if (WIFSIGNALED(rv))
		reason = CLD_KILLED;
	SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0);
#endif

	/*
	 * Just delete all entries in the p_klist. At this point we won't
	 * report any more events, and there are nasty race conditions that
	 * can beat us if we don't.
	 */
	knlist_clear(&p->p_klist, 1);

	/*
	 * If this is a process with a descriptor, we may not need to deliver
	 * a signal to the parent.  proctree_lock is held over
	 * procdesc_exit() to serialize concurrent calls to close() and
	 * exit().
	 */
	if (p->p_procdesc == NULL || procdesc_exit(p)) {
		/*
		 * Notify parent that we're gone.  If parent has the
		 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN,
		 * notify process 1 instead (and hope it will handle this
		 * situation).
		 */
		PROC_LOCK(p->p_pptr);
		mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
		if (p->p_pptr->p_sigacts->ps_flag &
		    (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
			struct proc *pp;

			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
			pp = p->p_pptr;
			PROC_UNLOCK(pp);
			proc_reparent(p, initproc);
			p->p_sigparent = SIGCHLD;
			PROC_LOCK(p->p_pptr);

			/*
			 * Notify parent, so in case he was wait(2)ing or
			 * executing waitpid(2) with our pid, he will
			 * continue.
			 */
			wakeup(pp);
		} else
			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);

		if (p->p_pptr == initproc)
			kern_psignal(p->p_pptr, SIGCHLD);
		else if (p->p_sigparent != 0) {
			if (p->p_sigparent == SIGCHLD)
				childproc_exited(p);
			else	/* LINUX thread */
				kern_psignal(p->p_pptr, p->p_sigparent);
		}
	} else
		PROC_LOCK(p->p_pptr);
	sx_xunlock(&proctree_lock);

	/*
	 * The state PRS_ZOMBIE prevents other proesses from sending
	 * signal to the process, to avoid memory leak, we free memory
	 * for signal queue at the time when the state is set.
	 */
	sigqueue_flush(&p->p_sigqueue);
	sigqueue_flush(&td->td_sigqueue);

	/*
	 * We have to wait until after acquiring all locks before
	 * changing p_state.  We need to avoid all possible context
	 * switches (including ones from blocking on a mutex) while
	 * marked as a zombie.  We also have to set the zombie state
	 * before we release the parent process' proc lock to avoid
	 * a lost wakeup.  So, we first call wakeup, then we grab the
	 * sched lock, update the state, and release the parent process'
	 * proc lock.
	 */
	wakeup(p->p_pptr);
	cv_broadcast(&p->p_pwait);
	sched_exit(p->p_pptr, td);
	PROC_SLOCK(p);
	p->p_state = PRS_ZOMBIE;
	PROC_UNLOCK(p->p_pptr);

	/*
	 * Hopefully no one will try to deliver a signal to the process this
	 * late in the game.
	 */
	knlist_destroy(&p->p_klist);

	/*
	 * Save our children's rusage information in our exit rusage.
	 */
	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);

	/*
	 * Make sure the scheduler takes this thread out of its tables etc.
	 * This will also release this thread's reference to the ucred.
	 * Other thread parts to release include pcb bits and such.
	 */
	thread_exit();
}
示例#6
0
static int
procfs_control(struct thread *td, struct proc *p, int op)
{
	int error = 0;
	struct thread *temp;

	/*
	 * Attach - attaches the target process for debugging
	 * by the calling process.
	 */
	if (op == PROCFS_CTL_ATTACH) {
		sx_xlock(&proctree_lock);
		PROC_LOCK(p);
		if ((error = p_candebug(td, p)) != 0)
			goto out;
		if (p->p_flag & P_TRACED) {
			error = EBUSY;
			goto out;
		}

		/* Can't trace yourself! */
		if (p->p_pid == td->td_proc->p_pid) {
			error = EINVAL;
			goto out;
		}

		/*
		 * Go ahead and set the trace flag.
		 * Save the old parent (it's reset in
		 *   _DETACH, and also in kern_exit.c:wait4()
		 * Reparent the process so that the tracing
		 *   proc gets to see all the action.
		 * Stop the target.
		 */
		p->p_flag |= P_TRACED;
		faultin(p);
		p->p_xstat = 0;		/* XXX ? */
		if (p->p_pptr != td->td_proc) {
			p->p_oppid = p->p_pptr->p_pid;
			proc_reparent(p, td->td_proc);
		}
		psignal(p, SIGSTOP);
out:
		PROC_UNLOCK(p);
		sx_xunlock(&proctree_lock);
		return (error);
	}

	/*
	 * Authorization check: rely on normal debugging protection, except
	 * allow processes to disengage debugging on a process onto which
	 * they have previously attached, but no longer have permission to
	 * debug.
	 */
	PROC_LOCK(p);
	if (op != PROCFS_CTL_DETACH &&
	    ((error = p_candebug(td, p)))) {
		PROC_UNLOCK(p);
		return (error);
	}

	/*
	 * Target process must be stopped, owned by (td) and
	 * be set up for tracing (P_TRACED flag set).
	 * Allow DETACH to take place at any time for sanity.
	 * Allow WAIT any time, of course.
	 */
	switch (op) {
	case PROCFS_CTL_DETACH:
	case PROCFS_CTL_WAIT:
		break;

	default:
		if (!TRACE_WAIT_P(td->td_proc, p)) {
			PROC_UNLOCK(p);
			return (EBUSY);
		}
	}


#ifdef FIX_SSTEP
	/*
	 * do single-step fixup if needed
	 */
	FIX_SSTEP(FIRST_THREAD_IN_PROC(p));
#endif

	/*
	 * Don't deliver any signal by default.
	 * To continue with a signal, just send
	 * the signal name to the ctl file
	 */
	p->p_xstat = 0;

	switch (op) {
	/*
	 * Detach.  Cleans up the target process, reparent it if possible
	 * and set it running once more.
	 */
	case PROCFS_CTL_DETACH:
		/* if not being traced, then this is a painless no-op */
		if ((p->p_flag & P_TRACED) == 0) {
			PROC_UNLOCK(p);
			return (0);
		}

		/* not being traced any more */
		p->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);

		/* remove pending SIGTRAP, else the process will die */
		sigqueue_delete_proc(p, SIGTRAP);
		FOREACH_THREAD_IN_PROC(p, temp)
			temp->td_dbgflags &= ~TDB_SUSPEND;
		PROC_UNLOCK(p);

		/* give process back to original parent */
		sx_xlock(&proctree_lock);
		if (p->p_oppid != p->p_pptr->p_pid) {
			struct proc *pp;

			pp = pfind(p->p_oppid);
			PROC_LOCK(p);
			if (pp) {
				PROC_UNLOCK(pp);
				proc_reparent(p, pp);
			}
		} else
			PROC_LOCK(p);
		p->p_oppid = 0;
		p->p_flag &= ~P_WAITED;	/* XXX ? */
		sx_xunlock(&proctree_lock);

		wakeup(td->td_proc);	/* XXX for CTL_WAIT below ? */

		break;

	/*
	 * Step.  Let the target process execute a single instruction.
	 * What does it mean to single step a threaded program?
	 */
	case PROCFS_CTL_STEP:
		error = proc_sstep(FIRST_THREAD_IN_PROC(p));
		if (error) {
			PROC_UNLOCK(p);
			return (error);
		}
		break;

	/*
	 * Run.  Let the target process continue running until a breakpoint
	 * or some other trap.
	 */
	case PROCFS_CTL_RUN:
		p->p_flag &= ~P_STOPPED_SIG;	/* this uses SIGSTOP */
		break;

	/*
	 * Wait for the target process to stop.
	 * If the target is not being traced then just wait
	 * to enter
	 */
	case PROCFS_CTL_WAIT:
		if (p->p_flag & P_TRACED) {
			while (error == 0 &&
					(P_SHOULDSTOP(p)) &&
					(p->p_flag & P_TRACED) &&
					(p->p_pptr == td->td_proc))
				error = msleep(p, &p->p_mtx,
						PWAIT|PCATCH, "procfsx", 0);
			if (error == 0 && !TRACE_WAIT_P(td->td_proc, p))
				error = EBUSY;
		} else {
			while (error == 0 && P_SHOULDSTOP(p))
				error = msleep(p, &p->p_mtx,
						PWAIT|PCATCH, "procfs", 0);
		}
		PROC_UNLOCK(p);
		return (error);
	default:
		panic("procfs_control");
	}

	PROC_SLOCK(p);
	thread_unsuspend(p); /* If it can run, let it do so. */
	PROC_SUNLOCK(p);
	PROC_UNLOCK(p);
	return (0);
}
示例#7
0
int
linux_clone(struct thread *td, struct linux_clone_args *args)
{
	int error, ff = RFPROC | RFSTOPPED;
	struct proc *p2;
	struct thread *td2;
	int exit_signal;
	struct linux_emuldata *em;

#ifdef DEBUG
	if (ldebug(clone)) {
		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
		    "child tid: %p"), (unsigned)args->flags,
		    args->stack, args->parent_tidptr, args->child_tidptr);
	}
#endif

	exit_signal = args->flags & 0x000000ff;
	if (LINUX_SIG_VALID(exit_signal)) {
		if (exit_signal <= LINUX_SIGTBLSZ)
			exit_signal =
			    linux_to_bsd_signal[_SIG_IDX(exit_signal)];
	} else if (exit_signal != 0)
		return (EINVAL);

	if (args->flags & LINUX_CLONE_VM)
		ff |= RFMEM;
	if (args->flags & LINUX_CLONE_SIGHAND)
		ff |= RFSIGSHARE;
	/*
	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
	 * and open files is independant.  In FreeBSD, its in one
	 * structure but in reality it does not cause any problems
	 * because both of these flags are usually set together.
	 */
	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
		ff |= RFFDG;

	/*
	 * Attempt to detect when linux_clone(2) is used for creating
	 * kernel threads. Unfortunately despite the existence of the
	 * CLONE_THREAD flag, version of linuxthreads package used in
	 * most popular distros as of beginning of 2005 doesn't make
	 * any use of it. Therefore, this detection relies on
	 * empirical observation that linuxthreads sets certain
	 * combination of flags, so that we can make more or less
	 * precise detection and notify the FreeBSD kernel that several
	 * processes are in fact part of the same threading group, so
	 * that special treatment is necessary for signal delivery
	 * between those processes and fd locking.
	 */
	if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS)
		ff |= RFTHREAD;

	if (args->flags & LINUX_CLONE_PARENT_SETTID)
		if (args->parent_tidptr == NULL)
			return (EINVAL);

	error = fork1(td, ff, 0, &p2, NULL, 0);
	if (error)
		return (error);

	if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) {
	   	sx_xlock(&proctree_lock);
		PROC_LOCK(p2);
		proc_reparent(p2, td->td_proc->p_pptr);
		PROC_UNLOCK(p2);
		sx_xunlock(&proctree_lock);
	}

	/* create the emuldata */
	error = linux_proc_init(td, p2->p_pid, args->flags);
	/* reference it - no need to check this */
	em = em_find(p2, EMUL_DOLOCK);
	KASSERT(em != NULL, ("clone: emuldata not found."));
	/* and adjust it */

	if (args->flags & LINUX_CLONE_THREAD) {
#ifdef notyet
	   	PROC_LOCK(p2);
	   	p2->p_pgrp = td->td_proc->p_pgrp;
	   	PROC_UNLOCK(p2);
#endif
		exit_signal = 0;
	}

	if (args->flags & LINUX_CLONE_CHILD_SETTID)
		em->child_set_tid = args->child_tidptr;
	else
	   	em->child_set_tid = NULL;

	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
		em->child_clear_tid = args->child_tidptr;
	else
	   	em->child_clear_tid = NULL;

	EMUL_UNLOCK(&emul_lock);

	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
		error = copyout(&p2->p_pid, args->parent_tidptr,
		    sizeof(p2->p_pid));
		if (error)
			printf(LMSG("copyout failed!"));
	}

	PROC_LOCK(p2);
	p2->p_sigparent = exit_signal;
	PROC_UNLOCK(p2);
	td2 = FIRST_THREAD_IN_PROC(p2);
	/*
	 * In a case of stack = NULL, we are supposed to COW calling process
	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
	 * intact.
	 */
	if (args->stack)
		linux_set_upcall_kse(td2, PTROUT(args->stack));

	if (args->flags & LINUX_CLONE_SETTLS)
		linux_set_cloned_tls(td2, args->tls);

#ifdef DEBUG
	if (ldebug(clone))
		printf(LMSG("clone: successful rfork to %d, "
		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
		    exit_signal);
#endif
	if (args->flags & LINUX_CLONE_VFORK) {
	   	PROC_LOCK(p2);
	   	p2->p_flag |= P_PPWAIT;
	   	PROC_UNLOCK(p2);
	}

	/*
	 * Make this runnable after we are finished with it.
	 */
	thread_lock(td2);
	TD_SET_CAN_RUN(td2);
	sched_add(td2, SRQ_BORING);
	thread_unlock(td2);

	td->td_retval[0] = p2->p_pid;
	td->td_retval[1] = 0;

	if (args->flags & LINUX_CLONE_VFORK) {
		/* wait for the children to exit, ie. emulate vfork */
		PROC_LOCK(p2);
		while (p2->p_flag & P_PPWAIT)
			cv_wait(&p2->p_pwait, &p2->p_mtx);
		PROC_UNLOCK(p2);
	}

	return (0);
}
示例#8
0
/*
 * Process debugging system call.
 */
int
sys_ptrace(struct lwp *l, const struct sys_ptrace_args *uap, register_t *retval)
{
	/* {
		syscallarg(int) req;
		syscallarg(pid_t) pid;
		syscallarg(void *) addr;
		syscallarg(int) data;
	} */
	struct proc *p = l->l_proc;
	struct lwp *lt;
	struct proc *t;				/* target process */
	struct uio uio;
	struct iovec iov;
	struct ptrace_io_desc piod;
	struct ptrace_lwpinfo pl;
	struct vmspace *vm;
	int error, write, tmp, req, pheld;
	int signo;
	ksiginfo_t ksi;
#ifdef COREDUMP
	char *path;
#endif

	error = 0;
	req = SCARG(uap, req);

	/*
	 * If attaching or detaching, we need to get a write hold on the
	 * proclist lock so that we can re-parent the target process.
	 */
	mutex_enter(proc_lock);

	/* "A foolish consistency..." XXX */
	if (req == PT_TRACE_ME) {
		t = p;
		mutex_enter(t->p_lock);
	} else {
		/* Find the process we're supposed to be operating on. */
		if ((t = p_find(SCARG(uap, pid), PFIND_LOCKED)) == NULL) {
			mutex_exit(proc_lock);
			return (ESRCH);
		}

		/* XXX-elad */
		mutex_enter(t->p_lock);
		error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
		    t, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
		if (error) {
			mutex_exit(proc_lock);
			mutex_exit(t->p_lock);
			return (ESRCH);
		}
	}

	/*
	 * Grab a reference on the process to prevent it from execing or
	 * exiting.
	 */
	if (!rw_tryenter(&t->p_reflock, RW_READER)) {
		mutex_exit(proc_lock);
		mutex_exit(t->p_lock);
		return EBUSY;
	}

	/* Make sure we can operate on it. */
	switch (req) {
	case  PT_TRACE_ME:
		/* Saying that you're being traced is always legal. */
		break;

	case  PT_ATTACH:
		/*
		 * You can't attach to a process if:
		 *	(1) it's the process that's doing the attaching,
		 */
		if (t->p_pid == p->p_pid) {
			error = EINVAL;
			break;
		}

		/*
		 *  (2) it's a system process
		 */
		if (t->p_flag & PK_SYSTEM) {
			error = EPERM;
			break;
		}

		/*
		 *	(3) it's already being traced, or
		 */
		if (ISSET(t->p_slflag, PSL_TRACED)) {
			error = EBUSY;
			break;
		}

		/*
		 * 	(4) the tracer is chrooted, and its root directory is
		 * 	    not at or above the root directory of the tracee
		 */
		mutex_exit(t->p_lock);	/* XXXSMP */
		tmp = proc_isunder(t, l);
		mutex_enter(t->p_lock);	/* XXXSMP */
		if (!tmp) {
			error = EPERM;
			break;
		}
		break;

	case  PT_READ_I:
	case  PT_READ_D:
	case  PT_WRITE_I:
	case  PT_WRITE_D:
	case  PT_IO:
#ifdef PT_GETREGS
	case  PT_GETREGS:
#endif
#ifdef PT_SETREGS
	case  PT_SETREGS:
#endif
#ifdef PT_GETFPREGS
	case  PT_GETFPREGS:
#endif
#ifdef PT_SETFPREGS
	case  PT_SETFPREGS:
#endif
#ifdef __HAVE_PTRACE_MACHDEP
	PTRACE_MACHDEP_REQUEST_CASES
#endif
		/*
		 * You can't read/write the memory or registers of a process
		 * if the tracer is chrooted, and its root directory is not at
		 * or above the root directory of the tracee.
		 */
		mutex_exit(t->p_lock);	/* XXXSMP */
		tmp = proc_isunder(t, l);
		mutex_enter(t->p_lock);	/* XXXSMP */
		if (!tmp) {
			error = EPERM;
			break;
		}
		/*FALLTHROUGH*/

	case  PT_CONTINUE:
	case  PT_KILL:
	case  PT_DETACH:
	case  PT_LWPINFO:
	case  PT_SYSCALL:
#ifdef COREDUMP
	case  PT_DUMPCORE:
#endif
#ifdef PT_STEP
	case  PT_STEP:
#endif
		/*
		 * You can't do what you want to the process if:
		 *	(1) It's not being traced at all,
		 */
		if (!ISSET(t->p_slflag, PSL_TRACED)) {
			error = EPERM;
			break;
		}

		/*
		 *	(2) it's being traced by procfs (which has
		 *	    different signal delivery semantics),
		 */
		if (ISSET(t->p_slflag, PSL_FSTRACE)) {
			uprintf("file system traced\n");
			error = EBUSY;
			break;
		}

		/*
		 *	(3) it's not being traced by _you_, or
		 */
		if (t->p_pptr != p) {
			uprintf("parent %d != %d\n", t->p_pptr->p_pid, p->p_pid);
			error = EBUSY;
			break;
		}

		/*
		 *	(4) it's not currently stopped.
		 */
		if (t->p_stat != SSTOP || !t->p_waited /* XXXSMP */) {
			uprintf("stat %d flag %d\n", t->p_stat,
			    !t->p_waited);
			error = EBUSY;
			break;
		}
		break;

	default:			/* It was not a legal request. */
		error = EINVAL;
		break;
	}

	if (error == 0)
		error = kauth_authorize_process(l->l_cred,
		    KAUTH_PROCESS_PTRACE, t, KAUTH_ARG(req),
		    NULL, NULL);

	if (error != 0) {
		mutex_exit(proc_lock);
		mutex_exit(t->p_lock);
		rw_exit(&t->p_reflock);
		return error;
	}

	/* Do single-step fixup if needed. */
	FIX_SSTEP(t);

	/*
	 * XXX NJWLWP
	 *
	 * The entire ptrace interface needs work to be useful to a
	 * process with multiple LWPs. For the moment, we'll kluge
	 * this; memory access will be fine, but register access will
	 * be weird.
	 */
	lt = LIST_FIRST(&t->p_lwps);
	KASSERT(lt != NULL);
	lwp_addref(lt);

	/*
	 * Which locks do we need held? XXX Ugly.
	 */
	switch (req) {
#ifdef PT_STEP
	case PT_STEP:
#endif
	case PT_CONTINUE:
	case PT_DETACH:
	case PT_KILL:
	case PT_SYSCALL:
	case PT_ATTACH:
	case PT_TRACE_ME:
		pheld = 1;
		break;
	default:
		mutex_exit(proc_lock);
		mutex_exit(t->p_lock);
		pheld = 0;
		break;
	}

	/* Now do the operation. */
	write = 0;
	*retval = 0;
	tmp = 0;

	switch (req) {
	case  PT_TRACE_ME:
		/* Just set the trace flag. */
		SET(t->p_slflag, PSL_TRACED);
		t->p_opptr = t->p_pptr;
		break;

	case  PT_WRITE_I:		/* XXX no separate I and D spaces */
	case  PT_WRITE_D:
#if defined(__HAVE_RAS)
		/*
		 * Can't write to a RAS
		 */
		if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) {
			error = EACCES;
			break;
		}
#endif
		write = 1;
		tmp = SCARG(uap, data);
		/* FALLTHROUGH */

	case  PT_READ_I:		/* XXX no separate I and D spaces */
	case  PT_READ_D:
		/* write = 0 done above. */
		iov.iov_base = (void *)&tmp;
		iov.iov_len = sizeof(tmp);
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(unsigned long)SCARG(uap, addr);
		uio.uio_resid = sizeof(tmp);
		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
		UIO_SETUP_SYSSPACE(&uio);

		error = process_domem(l, lt, &uio);
		if (!write)
			*retval = tmp;
		break;

	case  PT_IO:
		error = copyin(SCARG(uap, addr), &piod, sizeof(piod));
		if (error)
			break;
		switch (piod.piod_op) {
		case PIOD_READ_D:
		case PIOD_READ_I:
			uio.uio_rw = UIO_READ;
			break;
		case PIOD_WRITE_D:
		case PIOD_WRITE_I:
			/*
			 * Can't write to a RAS
			 */
			if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) {
				return (EACCES);
			}
			uio.uio_rw = UIO_WRITE;
			break;
		default:
			error = EINVAL;
			break;
		}
		if (error)
			break;
		error = proc_vmspace_getref(l->l_proc, &vm);
		if (error)
			break;
		iov.iov_base = piod.piod_addr;
		iov.iov_len = piod.piod_len;
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(unsigned long)piod.piod_offs;
		uio.uio_resid = piod.piod_len;
		uio.uio_vmspace = vm;

		error = process_domem(l, lt, &uio);
		piod.piod_len -= uio.uio_resid;
		(void) copyout(&piod, SCARG(uap, addr), sizeof(piod));
		uvmspace_free(vm);
		break;

#ifdef COREDUMP
	case  PT_DUMPCORE:
		if ((path = SCARG(uap, addr)) != NULL) {
			char *dst;
			int len = SCARG(uap, data);
			if (len < 0 || len >= MAXPATHLEN) {
				error = EINVAL;
				break;
			}
			dst = malloc(len + 1, M_TEMP, M_WAITOK);
			if ((error = copyin(path, dst, len)) != 0) {
				free(dst, M_TEMP);
				break;
			}
			path = dst;
			path[len] = '\0';
		}
		error = coredump(lt, path);
		if (path)
			free(path, M_TEMP);
		break;
#endif

#ifdef PT_STEP
	case  PT_STEP:
		/*
		 * From the 4.4BSD PRM:
		 * "Execution continues as in request PT_CONTINUE; however
		 * as soon as possible after execution of at least one
		 * instruction, execution stops again. [ ... ]"
		 */
#endif
	case  PT_CONTINUE:
	case  PT_SYSCALL:
	case  PT_DETACH:
		if (req == PT_SYSCALL) {
			if (!ISSET(t->p_slflag, PSL_SYSCALL)) {
				SET(t->p_slflag, PSL_SYSCALL);
#ifdef __HAVE_SYSCALL_INTERN
				(*t->p_emul->e_syscall_intern)(t);
#endif
			}
		} else {
			if (ISSET(t->p_slflag, PSL_SYSCALL)) {
				CLR(t->p_slflag, PSL_SYSCALL);
#ifdef __HAVE_SYSCALL_INTERN
				(*t->p_emul->e_syscall_intern)(t);
#endif
			}
		}
		p->p_trace_enabled = trace_is_enabled(p);

		/*
		 * From the 4.4BSD PRM:
		 * "The data argument is taken as a signal number and the
		 * child's execution continues at location addr as if it
		 * incurred that signal.  Normally the signal number will
		 * be either 0 to indicate that the signal that caused the
		 * stop should be ignored, or that value fetched out of
		 * the process's image indicating which signal caused
		 * the stop.  If addr is (int *)1 then execution continues
		 * from where it stopped."
		 */

		/* Check that the data is a valid signal number or zero. */
		if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) {
			error = EINVAL;
			break;
		}

		uvm_lwp_hold(lt);

		/* If the address parameter is not (int *)1, set the pc. */
		if ((int *)SCARG(uap, addr) != (int *)1)
			if ((error = process_set_pc(lt, SCARG(uap, addr))) != 0) {
				uvm_lwp_rele(lt);
				break;
			}

#ifdef PT_STEP
		/*
		 * Arrange for a single-step, if that's requested and possible.
		 */
		error = process_sstep(lt, req == PT_STEP);
		if (error) {
			uvm_lwp_rele(lt);
			break;
		}
#endif

		uvm_lwp_rele(lt);

		if (req == PT_DETACH) {
			CLR(t->p_slflag, PSL_TRACED|PSL_FSTRACE|PSL_SYSCALL);

			/* give process back to original parent or init */
			if (t->p_opptr != t->p_pptr) {
				struct proc *pp = t->p_opptr;
				proc_reparent(t, pp ? pp : initproc);
			}

			/* not being traced any more */
			t->p_opptr = NULL;
		}

		signo = SCARG(uap, data);
	sendsig:
		/* Finally, deliver the requested signal (or none). */
		if (t->p_stat == SSTOP) {
			/*
			 * Unstop the process.  If it needs to take a
			 * signal, make all efforts to ensure that at
			 * an LWP runs to see it.
			 */
			t->p_xstat = signo;
			proc_unstop(t);
		} else if (signo != 0) {
			KSI_INIT_EMPTY(&ksi);
			ksi.ksi_signo = signo;
			kpsignal2(t, &ksi);
		}
		break;

	case  PT_KILL:
		/* just send the process a KILL signal. */
		signo = SIGKILL;
		goto sendsig;	/* in PT_CONTINUE, above. */

	case  PT_ATTACH:
		/*
		 * Go ahead and set the trace flag.
		 * Save the old parent (it's reset in
		 *   _DETACH, and also in kern_exit.c:wait4()
		 * Reparent the process so that the tracing
		 *   proc gets to see all the action.
		 * Stop the target.
		 */
		t->p_opptr = t->p_pptr;
		if (t->p_pptr != p) {
			struct proc *parent = t->p_pptr;

			if (parent->p_lock < t->p_lock) {
				if (!mutex_tryenter(parent->p_lock)) {
					mutex_exit(t->p_lock);
					mutex_enter(parent->p_lock);
				}
			} else if (parent->p_lock > t->p_lock) {
				mutex_enter(parent->p_lock);
			}
			parent->p_slflag |= PSL_CHTRACED;
			proc_reparent(t, p);
			if (parent->p_lock != t->p_lock)
				mutex_exit(parent->p_lock);
		}
		SET(t->p_slflag, PSL_TRACED);
		signo = SIGSTOP;
		goto sendsig;

	case PT_LWPINFO:
		if (SCARG(uap, data) != sizeof(pl)) {
			error = EINVAL;
			break;
		}
		error = copyin(SCARG(uap, addr), &pl, sizeof(pl));
		if (error)
			break;
		tmp = pl.pl_lwpid;
		lwp_delref(lt);
		mutex_enter(t->p_lock);
		if (tmp == 0)
			lt = LIST_FIRST(&t->p_lwps);
		else {
			lt = lwp_find(t, tmp);
			if (lt == NULL) {
				mutex_exit(t->p_lock);
				error = ESRCH;
				break;
			}
			lt = LIST_NEXT(lt, l_sibling);
		}
		while (lt != NULL && lt->l_stat == LSZOMB)
			lt = LIST_NEXT(lt, l_sibling);
		pl.pl_lwpid = 0;
		pl.pl_event = 0;
		if (lt) {
			lwp_addref(lt);
			pl.pl_lwpid = lt->l_lid;
			if (lt->l_lid == t->p_sigctx.ps_lwp)
				pl.pl_event = PL_EVENT_SIGNAL;
		}
		mutex_exit(t->p_lock);

		error = copyout(&pl, SCARG(uap, addr), sizeof(pl));
		break;

#ifdef PT_SETREGS
	case  PT_SETREGS:
		write = 1;
#endif
#ifdef PT_GETREGS
	case  PT_GETREGS:
		/* write = 0 done above. */
#endif
#if defined(PT_SETREGS) || defined(PT_GETREGS)
		tmp = SCARG(uap, data);
		if (tmp != 0 && t->p_nlwps > 1) {
			lwp_delref(lt);
			mutex_enter(t->p_lock);
			lt = lwp_find(t, tmp);
			if (lt == NULL) {
				mutex_exit(t->p_lock);
				error = ESRCH;
				break;
			}
			lwp_addref(lt);
			mutex_exit(t->p_lock);
		}
		if (!process_validregs(lt))
			error = EINVAL;
		else {
			error = proc_vmspace_getref(l->l_proc, &vm);
			if (error)
				break;
			iov.iov_base = SCARG(uap, addr);
			iov.iov_len = sizeof(struct reg);
			uio.uio_iov = &iov;
			uio.uio_iovcnt = 1;
			uio.uio_offset = 0;
			uio.uio_resid = sizeof(struct reg);
			uio.uio_rw = write ? UIO_WRITE : UIO_READ;
			uio.uio_vmspace = vm;

			error = process_doregs(l, lt, &uio);
			uvmspace_free(vm);
		}
		break;
#endif

#ifdef PT_SETFPREGS
	case  PT_SETFPREGS:
		write = 1;
#endif
#ifdef PT_GETFPREGS
	case  PT_GETFPREGS:
		/* write = 0 done above. */
#endif
#if defined(PT_SETFPREGS) || defined(PT_GETFPREGS)
		tmp = SCARG(uap, data);
		if (tmp != 0 && t->p_nlwps > 1) {
			lwp_delref(lt);
			mutex_enter(t->p_lock);
			lt = lwp_find(t, tmp);
			if (lt == NULL) {
				mutex_exit(t->p_lock);
				error = ESRCH;
				break;
			}
			lwp_addref(lt);
			mutex_exit(t->p_lock);
		}
		if (!process_validfpregs(lt))
			error = EINVAL;
		else {
			error = proc_vmspace_getref(l->l_proc, &vm);
			if (error)
				break;
			iov.iov_base = SCARG(uap, addr);
			iov.iov_len = sizeof(struct fpreg);
			uio.uio_iov = &iov;
			uio.uio_iovcnt = 1;
			uio.uio_offset = 0;
			uio.uio_resid = sizeof(struct fpreg);
			uio.uio_rw = write ? UIO_WRITE : UIO_READ;
			uio.uio_vmspace = vm;

			error = process_dofpregs(l, lt, &uio);
			uvmspace_free(vm);
		}
		break;
#endif

#ifdef __HAVE_PTRACE_MACHDEP
	PTRACE_MACHDEP_REQUEST_CASES
		error = ptrace_machdep_dorequest(l, lt,
		    req, SCARG(uap, addr), SCARG(uap, data));
		break;
#endif
	}

	if (pheld) {
		mutex_exit(t->p_lock);
		mutex_exit(proc_lock);
	}
	if (lt != NULL)
		lwp_delref(lt);
	rw_exit(&t->p_reflock);

	return error;
}
示例#9
0
/*
 * General fork call.  Note that another LWP in the process may call exec()
 * or exit() while we are forking.  It's safe to continue here, because
 * neither operation will complete until all LWPs have exited the process.
 */
int
fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval,
    struct proc **rnewprocp)
{
	struct proc	*p1, *p2, *parent;
	struct plimit   *p1_lim;
	uid_t		uid;
	struct lwp	*l2;
	int		count;
	vaddr_t		uaddr;
	int		tnprocs;
	int		tracefork;
	int		error = 0;

	p1 = l1->l_proc;
	uid = kauth_cred_getuid(l1->l_cred);
	tnprocs = atomic_inc_uint_nv(&nprocs);

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create.
	 */
	if (__predict_false(tnprocs >= maxproc))
		error = -1;
	else
		error = kauth_authorize_process(l1->l_cred,
		    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);

	if (error) {
		static struct timeval lasttfm;
		atomic_dec_uint(&nprocs);
		if (ratecheck(&lasttfm, &fork_tfmrate))
			tablefull("proc", "increase kern.maxproc or NPROC");
		if (forkfsleep)
			kpause("forkmx", false, forkfsleep, NULL);
		return EAGAIN;
	}

	/*
	 * Enforce limits.
	 */
	count = chgproccnt(uid, 1);
	if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
		if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
		    p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
		    &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) {
			(void)chgproccnt(uid, -1);
			atomic_dec_uint(&nprocs);
			if (forkfsleep)
				kpause("forkulim", false, forkfsleep, NULL);
			return EAGAIN;
		}
	}

	/*
	 * Allocate virtual address space for the U-area now, while it
	 * is still easy to abort the fork operation if we're out of
	 * kernel virtual address space.
	 */
	uaddr = uvm_uarea_alloc();
	if (__predict_false(uaddr == 0)) {
		(void)chgproccnt(uid, -1);
		atomic_dec_uint(&nprocs);
		return ENOMEM;
	}

	/*
	 * We are now committed to the fork.  From here on, we may
	 * block on resources, but resource allocation may NOT fail.
	 */

	/* Allocate new proc. */
	p2 = proc_alloc();

	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */
	memset(&p2->p_startzero, 0,
	    (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
	memcpy(&p2->p_startcopy, &p1->p_startcopy,
	    (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));

	TAILQ_INIT(&p2->p_sigpend.sp_info);

	LIST_INIT(&p2->p_lwps);
	LIST_INIT(&p2->p_sigwaiters);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * Inherit flags we want to keep.  The flags related to SIGCHLD
	 * handling are important in order to keep a consistent behaviour
	 * for the child after the fork.  If we are a 32-bit process, the
	 * child will be too.
	 */
	p2->p_flag =
	    p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
	p2->p_emul = p1->p_emul;
	p2->p_execsw = p1->p_execsw;

	if (flags & FORK_SYSTEM) {
		/*
		 * Mark it as a system process.  Set P_NOCLDWAIT so that
		 * children are reparented to init(8) when they exit.
		 * init(8) can easily wait them out for us.
		 */
		p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT);
	}

	mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
	mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
	rw_init(&p2->p_reflock);
	cv_init(&p2->p_waitcv, "wait");
	cv_init(&p2->p_lwpcv, "lwpwait");

	/*
	 * Share a lock between the processes if they are to share signal
	 * state: we must synchronize access to it.
	 */
	if (flags & FORK_SHARESIGS) {
		p2->p_lock = p1->p_lock;
		mutex_obj_hold(p1->p_lock);
	} else
		p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

	kauth_proc_fork(p1, p2);

	p2->p_raslist = NULL;
#if defined(__HAVE_RAS)
	ras_fork(p1, p2);
#endif

	/* bump references to the text vnode (for procfs) */
	p2->p_textvp = p1->p_textvp;
	if (p2->p_textvp)
		vref(p2->p_textvp);

	if (flags & FORK_SHAREFILES)
		fd_share(p2);
	else if (flags & FORK_CLEANFILES)
		p2->p_fd = fd_init(NULL);
	else
		p2->p_fd = fd_copy();

	/* XXX racy */
	p2->p_mqueue_cnt = p1->p_mqueue_cnt;

	if (flags & FORK_SHARECWD)
		cwdshare(p2);
	else
		p2->p_cwdi = cwdinit();

	/*
	 * Note: p_limit (rlimit stuff) is copy-on-write, so normally
	 * we just need increase pl_refcnt.
	 */
	p1_lim = p1->p_limit;
	if (!p1_lim->pl_writeable) {
		lim_addref(p1_lim);
		p2->p_limit = p1_lim;
	} else {
		p2->p_limit = lim_copy(p1_lim);
	}

	if (flags & FORK_PPWAIT) {
		/* Mark ourselves as waiting for a child. */
		l1->l_pflag |= LP_VFORKWAIT;
		p2->p_lflag = PL_PPWAIT;
		p2->p_vforklwp = l1;
	} else {
		p2->p_lflag = 0;
	}
	p2->p_sflag = 0;
	p2->p_slflag = 0;
	parent = (flags & FORK_NOWAIT) ? initproc : p1;
	p2->p_pptr = parent;
	p2->p_ppid = parent->p_pid;
	LIST_INIT(&p2->p_children);

	p2->p_aio = NULL;

#ifdef KTRACE
	/*
	 * Copy traceflag and tracefile if enabled.
	 * If not inherited, these were zeroed above.
	 */
	if (p1->p_traceflag & KTRFAC_INHERIT) {
		mutex_enter(&ktrace_lock);
		p2->p_traceflag = p1->p_traceflag;
		if ((p2->p_tracep = p1->p_tracep) != NULL)
			ktradref(p2);
		mutex_exit(&ktrace_lock);
	}
#endif

	/*
	 * Create signal actions for the child process.
	 */
	p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS);
	mutex_enter(p1->p_lock);
	p2->p_sflag |=
	    (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
	sched_proc_fork(p1, p2);
	mutex_exit(p1->p_lock);

	p2->p_stflag = p1->p_stflag;

	/*
	 * p_stats.
	 * Copy parts of p_stats, and zero out the rest.
	 */
	p2->p_stats = pstatscopy(p1->p_stats);

	/*
	 * Set up the new process address space.
	 */
	uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false);

	/*
	 * Finish creating the child process.
	 * It will return through a different path later.
	 */
	lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0,
	    stack, stacksize, (func != NULL) ? func : child_return, arg, &l2,
	    l1->l_class);

	/*
	 * Inherit l_private from the parent.
	 * Note that we cannot use lwp_setprivate() here since that
	 * also sets the CPU TLS register, which is incorrect if the
	 * process has changed that without letting the kernel know.
	 */
	l2->l_private = l1->l_private;

	/*
	 * If emulation has a process fork hook, call it now.
	 */
	if (p2->p_emul->e_proc_fork)
		(*p2->p_emul->e_proc_fork)(p2, l1, flags);

	/*
	 * ...and finally, any other random fork hooks that subsystems
	 * might have registered.
	 */
	doforkhooks(p2, p1);

	SDT_PROBE(proc,,,create, p2, p1, flags, 0, 0);

	/*
	 * It's now safe for the scheduler and other processes to see the
	 * child process.
	 */
	mutex_enter(proc_lock);

	if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
		p2->p_lflag |= PL_CONTROLT;

	LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling);
	p2->p_exitsig = exitsig;		/* signal for parent on exit */

	/*
	 * We don't want to tracefork vfork()ed processes because they
	 * will not receive the SIGTRAP until it is too late.
	 */
	tracefork = (p1->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) ==
	    (PSL_TRACEFORK|PSL_TRACED) && (flags && FORK_PPWAIT) == 0;
	if (tracefork) {
		p2->p_slflag |= PSL_TRACED;
		p2->p_opptr = p2->p_pptr;
		if (p2->p_pptr != p1->p_pptr) {
			struct proc *parent1 = p2->p_pptr;

			if (parent1->p_lock < p2->p_lock) {
				if (!mutex_tryenter(parent1->p_lock)) {
					mutex_exit(p2->p_lock);
					mutex_enter(parent1->p_lock);
				}
			} else if (parent1->p_lock > p2->p_lock) {
				mutex_enter(parent1->p_lock);
			}
			parent1->p_slflag |= PSL_CHTRACED;
			proc_reparent(p2, p1->p_pptr);
			if (parent1->p_lock != p2->p_lock)
				mutex_exit(parent1->p_lock);
		}

		/*
		 * Set ptrace status.
		 */
		p1->p_fpid = p2->p_pid;
		p2->p_fpid = p1->p_pid;
	}

	LIST_INSERT_AFTER(p1, p2, p_pglist);
	LIST_INSERT_HEAD(&allproc, p2, p_list);

	p2->p_trace_enabled = trace_is_enabled(p2);
#ifdef __HAVE_SYSCALL_INTERN
	(*p2->p_emul->e_syscall_intern)(p2);
#endif

	/*
	 * Update stats now that we know the fork was successful.
	 */
	uvmexp.forks++;
	if (flags & FORK_PPWAIT)
		uvmexp.forks_ppwait++;
	if (flags & FORK_SHAREVM)
		uvmexp.forks_sharevm++;

	/*
	 * Pass a pointer to the new process to the caller.
	 */
	if (rnewprocp != NULL)
		*rnewprocp = p2;

	if (ktrpoint(KTR_EMUL))
		p2->p_traceflag |= KTRFAC_TRC_EMUL;

	/*
	 * Notify any interested parties about the new process.
	 */
	if (!SLIST_EMPTY(&p1->p_klist)) {
		mutex_exit(proc_lock);
		KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
		mutex_enter(proc_lock);
	}

	/*
	 * Make child runnable, set start time, and add to run queue except
	 * if the parent requested the child to start in SSTOP state.
	 */
	mutex_enter(p2->p_lock);

	/*
	 * Start profiling.
	 */
	if ((p2->p_stflag & PST_PROFIL) != 0) {
		mutex_spin_enter(&p2->p_stmutex);
		startprofclock(p2);
		mutex_spin_exit(&p2->p_stmutex);
	}

	getmicrotime(&p2->p_stats->p_start);
	p2->p_acflag = AFORK;
	lwp_lock(l2);
	KASSERT(p2->p_nrlwps == 1);
	if (p2->p_sflag & PS_STOPFORK) {
		struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate;
		p2->p_nrlwps = 0;
		p2->p_stat = SSTOP;
		p2->p_waited = 0;
		p1->p_nstopchild++;
		l2->l_stat = LSSTOP;
		KASSERT(l2->l_wchan == NULL);
		lwp_unlock_to(l2, spc->spc_lwplock);
	} else {
		p2->p_nrlwps = 1;
		p2->p_stat = SACTIVE;
		l2->l_stat = LSRUN;
		sched_enqueue(l2, false);
		lwp_unlock(l2);
	}

	/*
	 * Return child pid to parent process,
	 * marking us as parent via retval[1].
	 */
	if (retval != NULL) {
		retval[0] = p2->p_pid;
		retval[1] = 0;
	}
	mutex_exit(p2->p_lock);

	/*
	 * Preserve synchronization semantics of vfork.  If waiting for
	 * child to exec or exit, sleep until it clears LP_VFORKWAIT.
	 */
#if 0
	while (l1->l_pflag & LP_VFORKWAIT) {
		cv_wait(&l1->l_waitcv, proc_lock);
	}
#else
	while (p2->p_lflag & PL_PPWAIT)
		cv_wait(&p1->p_waitcv, proc_lock);
#endif

	/*
	 * Let the parent know that we are tracing its child.
	 */
	if (tracefork) {
		ksiginfo_t ksi;

		KSI_INIT_EMPTY(&ksi);
		ksi.ksi_signo = SIGTRAP;
		ksi.ksi_lid = l1->l_lid;
		kpsignal(p1, &ksi, NULL);
	}
	mutex_exit(proc_lock);

	return 0;
}
示例#10
0
int
kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
{
	struct iovec iov;
	struct uio uio;
	struct proc *curp, *p, *pp;
	struct thread *td2;
	struct ptrace_io_desc *piod;
	int error, write, tmp;
	int proctree_locked = 0;

	curp = td->td_proc;

	/* Lock proctree before locking the process. */
	switch (req) {
	case PT_TRACE_ME:
	case PT_ATTACH:
	case PT_STEP:
	case PT_CONTINUE:
	case PT_DETACH:
		sx_xlock(&proctree_lock);
		proctree_locked = 1;
		break;
	default:
		break;
	}
		
	write = 0;
	if (req == PT_TRACE_ME) {
		p = td->td_proc;
		PROC_LOCK(p);
	} else {
		if ((p = pfind(pid)) == NULL) {
			if (proctree_locked)
				sx_xunlock(&proctree_lock);
			return (ESRCH);
		}
	}
	if ((error = p_cansee(td, p)) != 0)
		goto fail;

	if ((error = p_candebug(td, p)) != 0)
		goto fail;

	/*
	 * System processes can't be debugged.
	 */
	if ((p->p_flag & P_SYSTEM) != 0) {
		error = EINVAL;
		goto fail;
	}
	
	/*
	 * Permissions check
	 */
	switch (req) {
	case PT_TRACE_ME:
		/* Always legal. */
		break;

	case PT_ATTACH:
		/* Self */
		if (p->p_pid == td->td_proc->p_pid) {
			error = EINVAL;
			goto fail;
		}

		/* Already traced */
		if (p->p_flag & P_TRACED) {
			error = EBUSY;
			goto fail;
		}

		/* Can't trace an ancestor if you're being traced. */
		if (curp->p_flag & P_TRACED) {
			for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
				if (pp == p) {
					error = EINVAL;
					goto fail;
				}
			}
		}


		/* OK */
		break;

	case PT_READ_I:
	case PT_READ_D:
	case PT_WRITE_I:
	case PT_WRITE_D:
	case PT_IO:
	case PT_CONTINUE:
	case PT_KILL:
	case PT_STEP:
	case PT_DETACH:
	case PT_GETREGS:
	case PT_SETREGS:
	case PT_GETFPREGS:
	case PT_SETFPREGS:
	case PT_GETDBREGS:
	case PT_SETDBREGS:
		/* not being traced... */
		if ((p->p_flag & P_TRACED) == 0) {
			error = EPERM;
			goto fail;
		}

		/* not being traced by YOU */
		if (p->p_pptr != td->td_proc) {
			error = EBUSY;
			goto fail;
		}

		/* not currently stopped */
		if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
			error = EBUSY;
			goto fail;
		}

		/* OK */
		break;

	default:
		error = EINVAL;
		goto fail;
	}

	td2 = FIRST_THREAD_IN_PROC(p);
#ifdef FIX_SSTEP
	/*
	 * Single step fixup ala procfs
	 */
	FIX_SSTEP(td2);			/* XXXKSE */
#endif

	/*
	 * Actually do the requests
	 */

	td->td_retval[0] = 0;

	switch (req) {
	case PT_TRACE_ME:
		/* set my trace flag and "owner" so it can read/write me */
		p->p_flag |= P_TRACED;
		p->p_oppid = p->p_pptr->p_pid;
		PROC_UNLOCK(p);
		sx_xunlock(&proctree_lock);
		return (0);

	case PT_ATTACH:
		/* security check done above */
		p->p_flag |= P_TRACED;
		p->p_oppid = p->p_pptr->p_pid;
		if (p->p_pptr != td->td_proc)
			proc_reparent(p, td->td_proc);
		data = SIGSTOP;
		goto sendsig;	/* in PT_CONTINUE below */

	case PT_STEP:
	case PT_CONTINUE:
	case PT_DETACH:
		/* XXX data is used even in the PT_STEP case. */
		if (req != PT_STEP && (unsigned)data > _SIG_MAXSIG) {
			error = EINVAL;
			goto fail;
		}

		_PHOLD(p);

		if (req == PT_STEP) {
			error = ptrace_single_step(td2);
			if (error) {
				_PRELE(p);
				goto fail;
			}
		}

		if (addr != (void *)1) {
			error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr);
			if (error) {
				_PRELE(p);
				goto fail;
			}
		}
		_PRELE(p);

		if (req == PT_DETACH) {
			/* reset process parent */
			if (p->p_oppid != p->p_pptr->p_pid) {
				struct proc *pp;

				PROC_UNLOCK(p);
				pp = pfind(p->p_oppid);
				if (pp == NULL)
					pp = initproc;
				else
					PROC_UNLOCK(pp);
				PROC_LOCK(p);
				proc_reparent(p, pp);
			}
			p->p_flag &= ~(P_TRACED | P_WAITED);
			p->p_oppid = 0;

			/* should we send SIGCHLD? */
		}

	sendsig:
		if (proctree_locked)
			sx_xunlock(&proctree_lock);
		/* deliver or queue signal */
		if (P_SHOULDSTOP(p)) {
			p->p_xstat = data;
			mtx_lock_spin(&sched_lock);
			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG);
			thread_unsuspend(p);
			setrunnable(td2);	/* XXXKSE */
			/* Need foreach kse in proc, ... make_kse_queued(). */
			mtx_unlock_spin(&sched_lock);
		} else if (data)
			psignal(p, data);
		PROC_UNLOCK(p);
		
		return (0);

	case PT_WRITE_I:
	case PT_WRITE_D:
		write = 1;
		/* FALLTHROUGH */
	case PT_READ_I:
	case PT_READ_D:
		PROC_UNLOCK(p);
		tmp = 0;
		/* write = 0 set above */
		iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
		iov.iov_len = sizeof(int);
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(uintptr_t)addr;
		uio.uio_resid = sizeof(int);
		uio.uio_segflg = UIO_SYSSPACE;	/* i.e.: the uap */
		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
		uio.uio_td = td;
		error = proc_rwmem(p, &uio);
		if (uio.uio_resid != 0) {
			/*
			 * XXX proc_rwmem() doesn't currently return ENOSPC,
			 * so I think write() can bogusly return 0.
			 * XXX what happens for short writes?  We don't want
			 * to write partial data.
			 * XXX proc_rwmem() returns EPERM for other invalid
			 * addresses.  Convert this to EINVAL.  Does this
			 * clobber returns of EPERM for other reasons?
			 */
			if (error == 0 || error == ENOSPC || error == EPERM)
				error = EINVAL;	/* EOF */
		}
		if (!write)
			td->td_retval[0] = tmp;
		return (error);

	case PT_IO:
		PROC_UNLOCK(p);
		piod = addr;
		iov.iov_base = piod->piod_addr;
		iov.iov_len = piod->piod_len;
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
		uio.uio_resid = piod->piod_len;
		uio.uio_segflg = UIO_USERSPACE;
		uio.uio_td = td;
		switch (piod->piod_op) {
		case PIOD_READ_D:
		case PIOD_READ_I:
			uio.uio_rw = UIO_READ;
			break;
		case PIOD_WRITE_D:
		case PIOD_WRITE_I:
			uio.uio_rw = UIO_WRITE;
			break;
		default:
			return (EINVAL);
		}
		error = proc_rwmem(p, &uio);
		piod->piod_len -= uio.uio_resid;
		return (error);

	case PT_KILL:
		data = SIGKILL;
		goto sendsig;	/* in PT_CONTINUE above */

	case PT_SETREGS:
		_PHOLD(p);
		error = proc_write_regs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_GETREGS:
		_PHOLD(p);
		error = proc_read_regs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_SETFPREGS:
		_PHOLD(p);
		error = proc_write_fpregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_GETFPREGS:
		_PHOLD(p);
		error = proc_read_fpregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_SETDBREGS:
		_PHOLD(p);
		error = proc_write_dbregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_GETDBREGS:
		_PHOLD(p);
		error = proc_read_dbregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	default:
		KASSERT(0, ("unreachable code\n"));
		break;
	}

	KASSERT(0, ("unreachable code\n"));
	return (0);

fail:
	PROC_UNLOCK(p);
	if (proctree_locked)
		sx_xunlock(&proctree_lock);
	return (error);
}
示例#11
0
int
fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval,
    struct proc **rnewprocp)
{
	struct proc *p2;
	uid_t uid;
	struct vmspace *vm;
	int count;
	vaddr_t uaddr;
	int s;
	extern void endtsleep(void *);
	extern void realitexpire(void *);

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create. We reserve
	 * the last 5 processes to root. The variable nprocs is the current
	 * number of processes, maxproc is the limit.
	 */
	uid = p1->p_cred->p_ruid;
	if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) {
		static struct timeval lasttfm;

		if (ratecheck(&lasttfm, &fork_tfmrate))
			tablefull("proc");
		return (EAGAIN);
	}
	nprocs++;

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit.
	 */
	count = chgproccnt(uid, 1);
	if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
		(void)chgproccnt(uid, -1);
		nprocs--;
		return (EAGAIN);
	}

	uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1);
	if (uaddr == 0) {
		chgproccnt(uid, -1);
		nprocs--;
		return (ENOMEM);
	}

	/*
	 * From now on, we're committed to the fork and cannot fail.
	 */

	/* Allocate new proc. */
	p2 = pool_get(&proc_pool, PR_WAITOK);

	p2->p_stat = SIDL;			/* protect against others */
	p2->p_exitsig = exitsig;
	p2->p_forw = p2->p_back = NULL;

#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		atomic_setbits_int(&p2->p_flag, P_THREAD);
		p2->p_p = p1->p_p;
		TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link);
	} else {
		process_new(p2, p1);
	}
#else
	process_new(p2, p1);
#endif

	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */
	bzero(&p2->p_startzero,
	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));

	/*
	 * Initialize the timeouts.
	 */
	timeout_set(&p2->p_sleep_to, endtsleep, p2);
	timeout_set(&p2->p_realit_to, realitexpire, p2);

#if defined(__HAVE_CPUINFO)
	p2->p_cpu = p1->p_cpu;
#endif

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * The p_stats and p_sigacts substructs are set in vm_fork.
	 */
	p2->p_flag = 0;
	p2->p_emul = p1->p_emul;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);
	atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC));
	if (flags & FORK_PTRACE)
		atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED);
#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		/* nothing */
	} else
#endif
	{
		p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK);
		bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred));
		p2->p_p->ps_cred->p_refcnt = 1;
		crhold(p1->p_ucred);
	}

	TAILQ_INIT(&p2->p_selects);

	/* bump references to the text vnode (for procfs) */
	p2->p_textvp = p1->p_textvp;
	if (p2->p_textvp)
		VREF(p2->p_textvp);

	if (flags & FORK_CLEANFILES)
		p2->p_fd = fdinit(p1);
	else if (flags & FORK_SHAREFILES)
		p2->p_fd = fdshare(p1);
	else
		p2->p_fd = fdcopy(p1);

	/*
	 * If ps_limit is still copy-on-write, bump refcnt,
	 * otherwise get a copy that won't be modified.
	 * (If PL_SHAREMOD is clear, the structure is shared
	 * copy-on-write.)
	 */
#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		/* nothing */
	} else
#endif
	{
		if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD)
			p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit);
		else {
			p2->p_p->ps_limit = p1->p_p->ps_limit;
			p2->p_p->ps_limit->p_refcnt++;
		}
	}

	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		atomic_setbits_int(&p2->p_flag, P_CONTROLT);
	if (flags & FORK_PPWAIT)
		atomic_setbits_int(&p2->p_flag, P_PPWAIT);
	p2->p_pptr = p1;
	if (flags & FORK_NOZOMBIE)
		atomic_setbits_int(&p2->p_flag, P_NOZOMBIE);
	LIST_INIT(&p2->p_children);

#ifdef KTRACE
	/*
	 * Copy traceflag and tracefile if enabled.
	 * If not inherited, these were zeroed above.
	 */
	if (p1->p_traceflag & KTRFAC_INHERIT) {
		p2->p_traceflag = p1->p_traceflag;
		if ((p2->p_tracep = p1->p_tracep) != NULL)
			VREF(p2->p_tracep);
	}
#endif

	/*
	 * set priority of child to be that of parent
	 * XXX should move p_estcpu into the region of struct proc which gets
	 * copied.
	 */
	scheduler_fork_hook(p1, p2);

	/*
	 * Create signal actions for the child process.
	 */
	if (flags & FORK_SIGHAND)
		sigactsshare(p1, p2);
	else
		p2->p_sigacts = sigactsinit(p1);

	/*
	 * If emulation has process fork hook, call it now.
	 */
	if (p2->p_emul->e_proc_fork)
		(*p2->p_emul->e_proc_fork)(p2, p1);

	p2->p_addr = (struct user *)uaddr;

	/*
	 * Finish creating the child process.  It will return through a
	 * different path later.
	 */
	uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack,
	    stacksize, func ? func : child_return, arg ? arg : p2);

	timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2);
	timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2);

	vm = p2->p_vmspace;

	if (flags & FORK_FORK) {
		forkstat.cntfork++;
		forkstat.sizfork += vm->vm_dsize + vm->vm_ssize;
	} else if (flags & FORK_VFORK) {
		forkstat.cntvfork++;
		forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize;
	} else if (flags & FORK_RFORK) {
		forkstat.cntrfork++;
		forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize;
	} else {
		forkstat.cntkthread++;
		forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize;
	}

	/* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */
	do {
		lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX;
	} while (pidtaken(lastpid));
	p2->p_pid = lastpid;

	LIST_INSERT_HEAD(&allproc, p2, p_list);
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	if (p2->p_flag & P_TRACED) {
		p2->p_oppid = p1->p_pid;
		if (p2->p_pptr != p1->p_pptr)
			proc_reparent(p2, p1->p_pptr);

		/*
		 * Set ptrace status.
		 */
		if (flags & FORK_FORK) {
			p2->p_ptstat = malloc(sizeof(*p2->p_ptstat),
			    M_SUBPROC, M_WAITOK);
			p1->p_ptstat->pe_report_event = PTRACE_FORK;
			p2->p_ptstat->pe_report_event = PTRACE_FORK;
			p1->p_ptstat->pe_other_pid = p2->p_pid;
			p2->p_ptstat->pe_other_pid = p1->p_pid;
		}
	}

#if NSYSTRACE > 0
	if (ISSET(p1->p_flag, P_SYSTRACE))
		systrace_fork(p1, p2);
#endif

	/*
	 * Make child runnable, set start time, and add to run queue.
	 */
	SCHED_LOCK(s);
 	getmicrotime(&p2->p_stats->p_start);
	p2->p_acflag = AFORK;
	p2->p_stat = SRUN;
	setrunqueue(p2);
	SCHED_UNLOCK(s);

	/*
	 * Notify any interested parties about the new process.
	 */
	KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);

	/*
	 * Update stats now that we know the fork was successfull.
	 */
	uvmexp.forks++;
	if (flags & FORK_PPWAIT)
		uvmexp.forks_ppwait++;
	if (flags & FORK_SHAREVM)
		uvmexp.forks_sharevm++;

	/*
	 * Pass a pointer to the new process to the caller.
	 */
	if (rnewprocp != NULL)
		*rnewprocp = p2;

	/*
	 * Preserve synchronization semantics of vfork.  If waiting for
	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
	 * proc (in case of exit).
	 */
	if (flags & FORK_PPWAIT)
		while (p2->p_flag & P_PPWAIT)
			tsleep(p1, PWAIT, "ppwait", 0);

	/*
	 * If we're tracing the child, alert the parent too.
	 */
	if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED))
		psignal(p1, SIGTRAP);

	/*
	 * Return child pid to parent process,
	 * marking us as parent via retval[1].
	 */
	if (retval != NULL) {
		retval[0] = p2->p_pid;
		retval[1] = 0;
	}
	return (0);
}
示例#12
0
static int
procfs_control(struct proc *curp, struct lwp *lp, int op)
{
	struct proc *p = lp->lwp_proc;
	int error;

	ASSERT_LWKT_TOKEN_HELD(&p->p_token);
	ASSERT_LWKT_TOKEN_HELD(&proc_token);

	/* Can't trace a process that's currently exec'ing. */ 
	if ((p->p_flags & P_INEXEC) != 0)
		return EAGAIN;
	/*
	 * Authorization check: rely on normal debugging protection, except
	 * allow processes to disengage debugging on a process onto which
	 * they have previously attached, but no longer have permission to
	 * debug.
	 */
	if (op != PROCFS_CTL_DETACH) {
		if (securelevel > 0 && p->p_pid == 1)
			return (EPERM);

		if (!CHECKIO(curp, p) || p_trespass(curp->p_ucred, p->p_ucred))
			return (EPERM);
	}

	/*
	 * Attach - attaches the target process for debugging
	 * by the calling process.
	 */
	if (op == PROCFS_CTL_ATTACH) {
		/* check whether already being traced */
		if (p->p_flags & P_TRACED)
			return (EBUSY);

		/* can't trace yourself! */
		if (p->p_pid == curp->p_pid)
			return (EINVAL);

		/*
		 * Go ahead and set the trace flag.
		 * Save the old parent (it's reset in
		 *   _DETACH, and also in kern_exit.c:wait4()
		 * Reparent the process so that the tracing
		 *   proc gets to see all the action.
		 * Stop the target.
		 */
		p->p_flags |= P_TRACED;
		faultin(p);
		p->p_xstat = 0;		/* XXX ? */
		if (p->p_pptr != curp) {
			p->p_oppid = p->p_pptr->p_pid;
			proc_reparent(p, curp);
		}
		proc_stop(p);
		return (0);
	}

	/*
	 * Target process must be stopped, owned by (curp) and
	 * be set up for tracing (P_TRACED flag set).
	 * Allow DETACH to take place at any time for sanity.
	 * Allow WAIT any time, of course.
	 */
	switch (op) {
	case PROCFS_CTL_DETACH:
	case PROCFS_CTL_WAIT:
		break;

	default:
		if (!TRACE_WAIT_P(curp, p))
			return (EBUSY);
	}


#ifdef FIX_SSTEP
	/*
	 * do single-step fixup if needed
	 */
	FIX_SSTEP(lp);
#endif

	/*
	 * Don't deliver any signal by default.
	 * To continue with a signal, just send
	 * the signal name to the ctl file
	 */
	p->p_xstat = 0;

	switch (op) {
	/*
	 * Detach.  Cleans up the target process, reparent it if possible
	 * and set it running once more.
	 */
	case PROCFS_CTL_DETACH:
		/* if not being traced, then this is a painless no-op */
		if ((p->p_flags & P_TRACED) == 0)
			return (0);

		/* not being traced any more */
		p->p_flags &= ~P_TRACED;

		/* remove pending SIGTRAP, else the process will die */
		spin_lock(&lp->lwp_spin);
		lwp_delsig(lp, SIGTRAP);
		spin_unlock(&lp->lwp_spin);

		/* give process back to original parent */
		if (p->p_oppid != p->p_pptr->p_pid) {
			struct proc *pp;

			pp = pfs_pfind(p->p_oppid);
			if (pp) {
				proc_reparent(p, pp);
				pfs_pdone(pp);
			}
		}

		p->p_oppid = 0;
		p->p_flags &= ~P_WAITED;	/* XXX ? */
		wakeup((caddr_t) curp);		/* XXX for CTL_WAIT below ? */

		break;

	/*
	 * Step.  Let the target process execute a single instruction.
	 */
	case PROCFS_CTL_STEP:
		LWPHOLD(lp);
		error = procfs_sstep(lp);
		LWPRELE(lp);
		if (error)
			return (error);
		break;

	/*
	 * Run.  Let the target process continue running until a breakpoint
	 * or some other trap.
	 */
	case PROCFS_CTL_RUN:
		break;

	/*
	 * Wait for the target process to stop.
	 * If the target is not being traced then just wait
	 * to enter
	 */
	case PROCFS_CTL_WAIT:
		error = 0;
		if (p->p_flags & P_TRACED) {
			while (error == 0 &&
					p->p_stat != SSTOP &&
					(p->p_flags & P_TRACED) &&
					(p->p_pptr == curp)) {
				error = tsleep((caddr_t) p,
						PCATCH, "procfsx", 0);
			}
			if (error == 0 && !TRACE_WAIT_P(curp, p))
				error = EBUSY;
		} else {
			while (error == 0 && p->p_stat != SSTOP) {
				error = tsleep((caddr_t) p,
						PCATCH, "procfs", 0);
			}
		}
		return (error);

	default:
		panic("procfs_control");
	}

	/*
	 * If the process is in a stopped state, make it runnable again.
	 * Do not set LWP_MP_BREAKTSLEEP - that is, do not break a tsleep
	 * that might be in progress.
	 */
	if (p->p_stat == SSTOP)
		proc_unstop(p);
	return (0);
}