示例#1
0
/*
 * Destroy old address space, and allocate a new stack.
 *	The new stack is only sgrowsiz large because it is grown
 *	automatically on a page fault.
 */
int
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
{
	int error;
	struct proc *p = imgp->proc;
	struct vmspace *vmspace = p->p_vmspace;
	vm_object_t obj;
	struct rlimit rlim_stack;
	vm_offset_t sv_minuser, stack_addr;
	vm_map_t map;
	u_long ssiz;

	imgp->vmspace_destroyed = 1;
	imgp->sysent = sv;

	/* May be called with Giant held */
	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);

	/*
	 * Blow away entire process VM, if address space not shared,
	 * otherwise, create a new VM space so that other threads are
	 * not disrupted
	 */
	map = &vmspace->vm_map;
	if (map_at_zero)
		sv_minuser = sv->sv_minuser;
	else
		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
	    vm_map_max(map) == sv->sv_maxuser &&
	    cpu_exec_vmspace_reuse(p, map)) {
		shmexit(vmspace);
		pmap_remove_pages(vmspace_pmap(vmspace));
		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
		/*
		 * An exec terminates mlockall(MCL_FUTURE), ASLR state
		 * must be re-evaluated.
		 */
		vm_map_lock(map);
		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
		    MAP_ASLR_IGNSTART);
		vm_map_unlock(map);
	} else {
		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
		if (error)
			return (error);
		vmspace = p->p_vmspace;
		map = &vmspace->vm_map;
	}
	map->flags |= imgp->map_flags;

	/* Map a shared page */
	obj = sv->sv_shared_page_obj;
	if (obj != NULL) {
		vm_object_reference(obj);
		error = vm_map_fixed(map, obj, 0,
		    sv->sv_shared_page_base, sv->sv_shared_page_len,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
		if (error != KERN_SUCCESS) {
			vm_object_deallocate(obj);
			return (vm_mmap_to_errno(error));
		}
	}

	/* Allocate a new stack */
	if (imgp->stack_sz != 0) {
		ssiz = trunc_page(imgp->stack_sz);
		PROC_LOCK(p);
		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
		PROC_UNLOCK(p);
		if (ssiz > rlim_stack.rlim_max)
			ssiz = rlim_stack.rlim_max;
		if (ssiz > rlim_stack.rlim_cur) {
			rlim_stack.rlim_cur = ssiz;
			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
		}
	} else if (sv->sv_maxssiz != NULL) {
		ssiz = *sv->sv_maxssiz;
	} else {
		ssiz = maxssiz;
	}
	stack_addr = sv->sv_usrstack - ssiz;
	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
	    obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
	if (error != KERN_SUCCESS)
		return (vm_mmap_to_errno(error));

	/*
	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
	 * are still used to enforce the stack rlimit on the process stack.
	 */
	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
	vmspace->vm_maxsaddr = (char *)stack_addr;

	return (0);
}
示例#2
0
static void
do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
    struct vmspace *vm2, struct file *fp_procdesc)
{
	struct proc *p1, *pptr;
	int trypid;
	struct filedesc *fd;
	struct filedesc_to_leader *fdtol;
	struct sigacts *newsigacts;

	sx_assert(&proctree_lock, SX_SLOCKED);
	sx_assert(&allproc_lock, SX_XLOCKED);

	p1 = td->td_proc;

	trypid = fork_findpid(fr->fr_flags);

	sx_sunlock(&proctree_lock);

	p2->p_state = PRS_NEW;		/* protect against others */
	p2->p_pid = trypid;
	AUDIT_ARG_PID(p2->p_pid);
	LIST_INSERT_HEAD(&allproc, p2, p_list);
	allproc_gen++;
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	tidhash_add(td2);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	sx_xunlock(&allproc_lock);

	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    __rangeof(struct proc, p_startcopy, p_endcopy));
	pargs_hold(p2->p_args);

	PROC_UNLOCK(p1);

	bzero(&p2->p_startzero,
	    __rangeof(struct proc, p_startzero, p_endzero));

	/* Tell the prison that we exist. */
	prison_proc_hold(p2->p_ucred->cr_prison);

	PROC_UNLOCK(p2);

	/*
	 * Malloc things while we don't hold any locks.
	 */
	if (fr->fr_flags & RFSIGSHARE)
		newsigacts = NULL;
	else
		newsigacts = sigacts_alloc();

	/*
	 * Copy filedesc.
	 */
	if (fr->fr_flags & RFCFDG) {
		fd = fdinit(p1->p_fd, false);
		fdtol = NULL;
	} else if (fr->fr_flags & RFFDG) {
		fd = fdcopy(p1->p_fd);
		fdtol = NULL;
	} else {
		fd = fdshare(p1->p_fd);
		if (p1->p_fdtol == NULL)
			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
			    p1->p_leader);
		if ((fr->fr_flags & RFTHREAD) != 0) {
			/*
			 * Shared file descriptor table, and shared
			 * process leaders.
			 */
			fdtol = p1->p_fdtol;
			FILEDESC_XLOCK(p1->p_fd);
			fdtol->fdl_refcount++;
			FILEDESC_XUNLOCK(p1->p_fd);
		} else {
			/* 
			 * Shared file descriptor table, and different
			 * process leaders.
			 */
			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
			    p1->p_fd, p2);
		}
	}
	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */

	PROC_LOCK(p2);
	PROC_LOCK(p1);

	bzero(&td2->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));

	bcopy(&td->td_startcopy, &td2->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));

	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
	td2->td_sigstk = td->td_sigstk;
	td2->td_flags = TDF_INMEM;
	td2->td_lend_user_pri = PRI_MAX;

#ifdef VIMAGE
	td2->td_vnet = NULL;
	td2->td_vnet_lpush = NULL;
#endif

	/*
	 * Allow the scheduler to initialize the child.
	 */
	thread_lock(td);
	sched_fork(td, td2);
	thread_unlock(td);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 */
	p2->p_flag = P_INMEM;
	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
	p2->p_swtick = ticks;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);

	/*
	 * Whilst the proc lock is held, copy the VM domain data out
	 * using the VM domain method.
	 */
	vm_domain_policy_init(&p2->p_vm_dom_policy);
	vm_domain_policy_localcopy(&p2->p_vm_dom_policy,
	    &p1->p_vm_dom_policy);

	if (fr->fr_flags & RFSIGSHARE) {
		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
	} else {
		sigacts_copy(newsigacts, p1->p_sigacts);
		p2->p_sigacts = newsigacts;
	}

	if (fr->fr_flags & RFTSIGZMB)
	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
	else if (fr->fr_flags & RFLINUXTHPN)
	        p2->p_sigparent = SIGUSR1;
	else
	        p2->p_sigparent = SIGCHLD;

	p2->p_textvp = p1->p_textvp;
	p2->p_fd = fd;
	p2->p_fdtol = fdtol;

	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
		p2->p_flag |= P_PROTECTED;
		p2->p_flag2 |= P2_INHERIT_PROTECTED;
	}

	/*
	 * p_limit is copy-on-write.  Bump its refcount.
	 */
	lim_fork(p1, p2);

	thread_cow_get_proc(td2, p2);

	pstats_fork(p1->p_stats, p2->p_stats);

	PROC_UNLOCK(p1);
	PROC_UNLOCK(p2);

	/* Bump references to the text vnode (for procfs). */
	if (p2->p_textvp)
		vrefact(p2->p_textvp);

	/*
	 * Set up linkage for kernel based threading.
	 */
	if ((fr->fr_flags & RFTHREAD) != 0) {
		mtx_lock(&ppeers_lock);
		p2->p_peers = p1->p_peers;
		p1->p_peers = p2;
		p2->p_leader = p1->p_leader;
		mtx_unlock(&ppeers_lock);
		PROC_LOCK(p1->p_leader);
		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
			PROC_UNLOCK(p1->p_leader);
			/*
			 * The task leader is exiting, so process p1 is
			 * going to be killed shortly.  Since p1 obviously
			 * isn't dead yet, we know that the leader is either
			 * sending SIGKILL's to all the processes in this
			 * task or is sleeping waiting for all the peers to
			 * exit.  We let p1 complete the fork, but we need
			 * to go ahead and kill the new process p2 since
			 * the task leader may not get a chance to send
			 * SIGKILL to it.  We leave it on the list so that
			 * the task leader will wait for this new process
			 * to commit suicide.
			 */
			PROC_LOCK(p2);
			kern_psignal(p2, SIGKILL);
			PROC_UNLOCK(p2);
		} else
			PROC_UNLOCK(p1->p_leader);
	} else {
		p2->p_peers = NULL;
		p2->p_leader = p2;
	}

	sx_xlock(&proctree_lock);
	PGRP_LOCK(p1->p_pgrp);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	/*
	 * Preserve some more flags in subprocess.  P_PROFIL has already
	 * been preserved.
	 */
	p2->p_flag |= p1->p_flag & P_SUGID;
	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
	SESS_LOCK(p1->p_session);
	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		p2->p_flag |= P_CONTROLT;
	SESS_UNLOCK(p1->p_session);
	if (fr->fr_flags & RFPPWAIT)
		p2->p_flag |= P_PPWAIT;

	p2->p_pgrp = p1->p_pgrp;
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	PGRP_UNLOCK(p1->p_pgrp);
	LIST_INIT(&p2->p_children);
	LIST_INIT(&p2->p_orphans);

	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);

	/*
	 * If PF_FORK is set, the child process inherits the
	 * procfs ioctl flags from its parent.
	 */
	if (p1->p_pfsflags & PF_FORK) {
		p2->p_stops = p1->p_stops;
		p2->p_pfsflags = p1->p_pfsflags;
	}

	/*
	 * This begins the section where we must prevent the parent
	 * from being swapped.
	 */
	_PHOLD(p1);
	PROC_UNLOCK(p1);

	/*
	 * Attach the new process to its parent.
	 *
	 * If RFNOWAIT is set, the newly created process becomes a child
	 * of init.  This effectively disassociates the child from the
	 * parent.
	 */
	if ((fr->fr_flags & RFNOWAIT) != 0) {
		pptr = p1->p_reaper;
		p2->p_reaper = pptr;
	} else {
		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
		    p1 : p1->p_reaper;
		pptr = p1;
	}
	p2->p_pptr = pptr;
	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
	LIST_INIT(&p2->p_reaplist);
	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
	if (p2->p_reaper == p1)
		p2->p_reapsubtree = p2->p_pid;
	sx_xunlock(&proctree_lock);

	/* Inform accounting that we have forked. */
	p2->p_acflag = AFORK;
	PROC_UNLOCK(p2);

#ifdef KTRACE
	ktrprocfork(p1, p2);
#endif

	/*
	 * Finish creating the child process.  It will return via a different
	 * execution path later.  (ie: directly into user mode)
	 */
	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);

	if (fr->fr_flags == (RFFDG | RFPROC)) {
		VM_CNT_INC(v_forks);
		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
		VM_CNT_INC(v_vforks);
		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (p1 == &proc0) {
		VM_CNT_INC(v_kthreads);
		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else {
		VM_CNT_INC(v_rforks);
		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	}

	/*
	 * Associate the process descriptor with the process before anything
	 * can happen that might cause that process to need the descriptor.
	 * However, don't do this until after fork(2) can no longer fail.
	 */
	if (fr->fr_flags & RFPROCDESC)
		procdesc_new(p2, fr->fr_pd_flags);

	/*
	 * Both processes are set up, now check if any loadable modules want
	 * to adjust anything.
	 */
	EVENTHANDLER_DIRECT_INVOKE(process_fork, p1, p2, fr->fr_flags);

	/*
	 * Set the child start time and mark the process as being complete.
	 */
	PROC_LOCK(p2);
	PROC_LOCK(p1);
	microuptime(&p2->p_stats->p_start);
	PROC_SLOCK(p2);
	p2->p_state = PRS_NORMAL;
	PROC_SUNLOCK(p2);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the new process so that any
	 * tracepoints inherited from the parent can be removed. We have to do
	 * this only after p_state is PRS_NORMAL since the fasttrap module will
	 * use pfind() later on.
	 */
	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
		dtrace_fasttrap_fork(p1, p2);
#endif
	/*
	 * Hold the process so that it cannot exit after we make it runnable,
	 * but before we wait for the debugger.
	 */
	_PHOLD(p2);
	if (p1->p_ptevents & PTRACE_FORK) {
		/*
		 * Arrange for debugger to receive the fork event.
		 *
		 * We can report PL_FLAG_FORKED regardless of
		 * P_FOLLOWFORK settings, but it does not make a sense
		 * for runaway child.
		 */
		td->td_dbgflags |= TDB_FORK;
		td->td_dbg_forked = p2->p_pid;
		td2->td_dbgflags |= TDB_STOPATFORK;
	}
	if (fr->fr_flags & RFPPWAIT) {
		td->td_pflags |= TDP_RFPPWAIT;
		td->td_rfppwait_p = p2;
		td->td_dbgflags |= TDB_VFORK;
	}
	PROC_UNLOCK(p2);

	/*
	 * Now can be swapped.
	 */
	_PRELE(p1);
	PROC_UNLOCK(p1);

	/*
	 * Tell any interested parties about the new process.
	 */
	knote_fork(p1->p_klist, p2->p_pid);
	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);

	if (fr->fr_flags & RFPROCDESC) {
		procdesc_finit(p2->p_procdesc, fp_procdesc);
		fdrop(fp_procdesc, td);
	}

	if ((fr->fr_flags & RFSTOPPED) == 0) {
		/*
		 * If RFSTOPPED not requested, make child runnable and
		 * add to run queue.
		 */
		thread_lock(td2);
		TD_SET_CAN_RUN(td2);
		sched_add(td2, SRQ_BORING);
		thread_unlock(td2);
		if (fr->fr_pidp != NULL)
			*fr->fr_pidp = p2->p_pid;
	} else {
		*fr->fr_procp = p2;
	}

	PROC_LOCK(p2);
	/*
	 * Wait until debugger is attached to child.
	 */
	while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0)
		cv_wait(&p2->p_dbgwait, &p2->p_mtx);
	_PRELE(p2);
	racct_proc_fork_done(p2);
	PROC_UNLOCK(p2);
}