Beispiel #1
0
/*
 * Function:	unix_syscall
 *
 * Inputs:	regs	- pointer to i386 save area
 *
 * Outputs:	none
 */
void
unix_syscall(x86_saved_state_t *state)
{
	thread_t		thread;
	void			*vt;
	unsigned int		code;
	struct sysent		*callp;

	int			error;
	vm_offset_t		params;
	struct proc		*p;
	struct uthread		*uthread;
	x86_saved_state32_t	*regs;
	boolean_t		is_vfork;

	assert(is_saved_state32(state));
	regs = saved_state32(state);
#if DEBUG
	if (regs->eax == 0x800)
		thread_exception_return();
#endif
	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	/* Get the approriate proc; may be different from task's for vfork() */
	is_vfork = uthread->uu_flag & UT_VFORK;
	if (__improbable(is_vfork != 0))
		p = current_proc();
	else 
		p = (struct proc *)get_bsdtask_info(current_task());

	/* Verify that we are not being called from a task without a proc */
	if (__improbable(p == NULL)) {
		regs->eax = EPERM;
		regs->efl |= EFL_CF;
		task_terminate_internal(current_task());
		thread_exception_return();
		/* NOTREACHED */
	}

	code = regs->eax & I386_SYSCALL_NUMBER_MASK;
	DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
							  code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
	params = (vm_offset_t) (regs->uesp + sizeof (int));

	regs->efl &= ~(EFL_CF);

	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

	if (__improbable(callp == sysent)) {
		code = fuword(params);
		params += sizeof(int);
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
	}

	vt = (void *)uthread->uu_arg;

	if (callp->sy_arg_bytes != 0) {
#if CONFIG_REQUIRES_U32_MUNGING
		sy_munge_t	*mungerp;
#else
#error U32 syscalls on x86_64 kernel requires munging
#endif
		uint32_t	 nargs;

		assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
		nargs = callp->sy_arg_bytes;
		error = copyin((user_addr_t) params, (char *) vt, nargs);
		if (error) {
			regs->eax = error;
			regs->efl |= EFL_CF;
			thread_exception_return();
			/* NOTREACHED */
		}

		if (__probable(code != 180)) {
	        	int *ip = (int *)vt;

			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
				BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
				*ip, *(ip+1), *(ip+2), *(ip+3), 0);
		}

#if CONFIG_REQUIRES_U32_MUNGING
		mungerp = callp->sy_arg_munge32;

		if (mungerp != NULL)
			(*mungerp)(vt);
#endif
	} else
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
			0, 0, 0, 0, 0);

	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	kauth_cred_uthread_update(uthread, p);

	uthread->uu_rval[0] = 0;
	uthread->uu_rval[1] = 0;
	uthread->uu_flag |= UT_NOTCANCELPT;
	uthread->syscall_code = code;

#ifdef JOE_DEBUG
        uthread->uu_iocount = 0;
        uthread->uu_vpindex = 0;
#endif

	AUDIT_SYSCALL_ENTER(code, p, uthread);
	error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
	AUDIT_SYSCALL_EXIT(code, p, uthread, error);

#ifdef JOE_DEBUG
        if (uthread->uu_iocount)
                printf("system call returned with uu_iocount != 0\n");
#endif
#if CONFIG_DTRACE
	uthread->t_dtrace_errno = error;
#endif /* CONFIG_DTRACE */

	if (__improbable(error == ERESTART)) {
		/*
		 * Move the user's pc back to repeat the syscall:
		 * 5 bytes for a sysenter, or 2 for an int 8x.
		 * The SYSENTER_TF_CS covers single-stepping over a sysenter
		 * - see debug trap handler in idt.s/idt64.s
		 */

		pal_syscall_restart(thread, state);
	}
	else if (error != EJUSTRETURN) {
		if (__improbable(error)) {
		    regs->eax = error;
		    regs->efl |= EFL_CF;	/* carry bit */
		} else { /* (not error) */
			/*
			 * We split retval across two registers, in case the
			 * syscall had a 64-bit return value, in which case
			 * eax/edx matches the function call ABI.
			 */
		    regs->eax = uthread->uu_rval[0];
		    regs->edx = uthread->uu_rval[1];
		} 
	}

	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall: error=%d retval=(%u,%u)\n",
		error, regs->eax, regs->edx);

	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (__improbable(uthread->uu_lowpri_window)) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (__probable(code != 180))
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
		pal_execve_return(thread);
	}

	thread_exception_return();
	/* NOTREACHED */
}
Beispiel #2
0
void
unix_syscall_return(int error)
{
	thread_t		thread;
	struct uthread		*uthread;
	struct proc *p;
	unsigned int code;
	struct sysent *callp;

	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	pal_register_cache_state(thread, DIRTY);

	p = current_proc();

	if (proc_is64bit(p)) {
		x86_saved_state64_t *regs;

		regs = saved_state64(find_user_regs(thread));

		code = uthread->syscall_code;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

#if CONFIG_DTRACE
		if (callp->sy_call == dtrace_systrace_syscall)
			dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
#endif /* CONFIG_DTRACE */
		AUDIT_SYSCALL_EXIT(code, p, uthread, error);

		if (error == ERESTART) {
			/*
			 * repeat the syscall
			 */
			pal_syscall_restart( thread, find_user_regs(thread) );
		}
		else if (error != EJUSTRETURN) {
			if (error) {
				regs->rax = error;
				regs->isf.rflags |= EFL_CF;	/* carry bit */
			} else { /* (not error) */

				switch (callp->sy_return_type) {
				case _SYSCALL_RET_INT_T:
					regs->rax = uthread->uu_rval[0];
					regs->rdx = uthread->uu_rval[1];
					break;
				case _SYSCALL_RET_UINT_T:
					regs->rax = ((u_int)uthread->uu_rval[0]);
					regs->rdx = ((u_int)uthread->uu_rval[1]);
					break;
				case _SYSCALL_RET_OFF_T:
				case _SYSCALL_RET_ADDR_T:
				case _SYSCALL_RET_SIZE_T:
				case _SYSCALL_RET_SSIZE_T:
				case _SYSCALL_RET_UINT64_T:
					regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
					regs->rdx = 0;
					break;
				case _SYSCALL_RET_NONE:
					break;
				default:
					panic("unix_syscall: unknown return type");
					break;
				}
				regs->isf.rflags &= ~EFL_CF;
			} 
		}
		DEBUG_KPRINT_SYSCALL_UNIX(
			"unix_syscall_return: error=%d retval=(%llu,%llu)\n",
			error, regs->rax, regs->rdx);
	} else {
		x86_saved_state32_t	*regs;

		regs = saved_state32(find_user_regs(thread));

		regs->efl &= ~(EFL_CF);

		code = uthread->syscall_code;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

#if CONFIG_DTRACE
		if (callp->sy_call == dtrace_systrace_syscall)
			dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
#endif /* CONFIG_DTRACE */
		AUDIT_SYSCALL_EXIT(code, p, uthread, error);

		if (error == ERESTART) {
			pal_syscall_restart( thread, find_user_regs(thread) );
		}
		else if (error != EJUSTRETURN) {
			if (error) {
				regs->eax = error;
				regs->efl |= EFL_CF;	/* carry bit */
			} else { /* (not error) */
				regs->eax = uthread->uu_rval[0];
				regs->edx = uthread->uu_rval[1];
			} 
		}
		DEBUG_KPRINT_SYSCALL_UNIX(
			"unix_syscall_return: error=%d retval=(%u,%u)\n",
			error, regs->eax, regs->edx);
	}


	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (uthread->uu_lowpri_window) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (code != 180)
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	thread_exception_return();
	/* NOTREACHED */
}
Beispiel #3
0
void
unix_syscall64(x86_saved_state_t *state)
{
	thread_t	thread;
	void			*vt;
	unsigned int	code;
	struct sysent	*callp;
	int		args_in_regs;
	boolean_t	args_start_at_rdi;
	int		error;
	struct proc	*p;
	struct uthread	*uthread;
	x86_saved_state64_t *regs;

	assert(is_saved_state64(state));
	regs = saved_state64(state);
#if	DEBUG
	if (regs->rax == 0x2000800)
		thread_exception_return();
#endif
	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	/* Get the approriate proc; may be different from task's for vfork() */
	if (__probable(!(uthread->uu_flag & UT_VFORK)))
		p = (struct proc *)get_bsdtask_info(current_task());
	else 
		p = current_proc();

	/* Verify that we are not being called from a task without a proc */
	if (__improbable(p == NULL)) {
		regs->rax = EPERM;
		regs->isf.rflags |= EFL_CF;
		task_terminate_internal(current_task());
		thread_exception_return();
		/* NOTREACHED */
	}

	code = regs->rax & SYSCALL_NUMBER_MASK;
	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall64: code=%d(%s) rip=%llx\n",
		code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

	vt = (void *)uthread->uu_arg;

	if (__improbable(callp == sysent)) {
	        /*
		 * indirect system call... system call number
		 * passed as 'arg0'
		 */
		code = regs->rdi;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
		args_start_at_rdi = FALSE;
		args_in_regs = 5;
	} else {
		args_start_at_rdi = TRUE;
		args_in_regs = 6;
	}

	if (callp->sy_narg != 0) {
		assert(callp->sy_narg <= 8); /* size of uu_arg */

		args_in_regs = MIN(args_in_regs, callp->sy_narg);
		memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));


		if (code != 180) {
			uint64_t *ip = (uint64_t *)vt;

			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
				BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
				(int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
		}

		if (__improbable(callp->sy_narg > args_in_regs)) {
			int copyin_count;

			copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);

			error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
			if (error) {
				regs->rax = error;
				regs->isf.rflags |= EFL_CF;
				thread_exception_return();
				/* NOTREACHED */
			}
		}
	} else
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
			0, 0, 0, 0, 0);

	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	kauth_cred_uthread_update(uthread, p);

	uthread->uu_rval[0] = 0;
	uthread->uu_rval[1] = 0;
	uthread->uu_flag |= UT_NOTCANCELPT;
	uthread->syscall_code = code;

#ifdef JOE_DEBUG
        uthread->uu_iocount = 0;
        uthread->uu_vpindex = 0;
#endif

	AUDIT_SYSCALL_ENTER(code, p, uthread);
	error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
	AUDIT_SYSCALL_EXIT(code, p, uthread, error);

#ifdef JOE_DEBUG
        if (uthread->uu_iocount)
               printf("system call returned with uu_iocount != 0\n");
#endif

#if CONFIG_DTRACE
	uthread->t_dtrace_errno = error;
#endif /* CONFIG_DTRACE */
	
	if (__improbable(error == ERESTART)) {
		/*
		 * all system calls come through via the syscall instruction
		 * in 64 bit mode... its 2 bytes in length
		 * move the user's pc back to repeat the syscall:
		 */
		pal_syscall_restart( thread, state );
	}
	else if (error != EJUSTRETURN) {
		if (__improbable(error)) {
			regs->rax = error;
			regs->isf.rflags |= EFL_CF;	/* carry bit */
		} else { /* (not error) */

			switch (callp->sy_return_type) {
			case _SYSCALL_RET_INT_T:
				regs->rax = uthread->uu_rval[0];
				regs->rdx = uthread->uu_rval[1];
				break;
			case _SYSCALL_RET_UINT_T:
				regs->rax = ((u_int)uthread->uu_rval[0]);
				regs->rdx = ((u_int)uthread->uu_rval[1]);
				break;
			case _SYSCALL_RET_OFF_T:
			case _SYSCALL_RET_ADDR_T:
			case _SYSCALL_RET_SIZE_T:
			case _SYSCALL_RET_SSIZE_T:
			case _SYSCALL_RET_UINT64_T:
			        regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
				regs->rdx = 0;
				break;
			case _SYSCALL_RET_NONE:
				break;
			default:
				panic("unix_syscall: unknown return type");
				break;
			}
			regs->isf.rflags &= ~EFL_CF;
		} 
	}

	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall64: error=%d retval=(%llu,%llu)\n",
		error, regs->rax, regs->rdx);
	
	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (__improbable(uthread->uu_lowpri_window)) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (__probable(code != 180))
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	thread_exception_return();
	/* NOTREACHED */
}
Beispiel #4
0
/*
 * Exit: deallocate address space and other resources, change proc state to
 * zombie, and unlink proc from allproc and parent's lists.  Save exit status
 * and rusage for wait().  Check for child processes and orphan them.
 */
void
exit1(struct thread *td, int rv)
{
	struct proc *p, *nq, *q;
	struct vnode *vtmp;
	struct vnode *ttyvp = NULL;
	struct plimit *plim;

	mtx_assert(&Giant, MA_NOTOWNED);

	p = td->td_proc;
	/*
	 * XXX in case we're rebooting we just let init die in order to
	 * work around an unsolved stack overflow seen very late during
	 * shutdown on sparc64 when the gmirror worker process exists.
	 */
	if (p == initproc && rebooting == 0) {
		printf("init died (signal %d, exit %d)\n",
		    WTERMSIG(rv), WEXITSTATUS(rv));
		panic("Going nowhere without my init!");
	}

	/*
	 * MUST abort all other threads before proceeding past here.
	 */
	PROC_LOCK(p);
	while (p->p_flag & P_HADTHREADS) {
		/*
		 * First check if some other thread got here before us.
		 * If so, act appropriately: exit or suspend.
		 */
		thread_suspend_check(0);

		/*
		 * Kill off the other threads. This requires
		 * some co-operation from other parts of the kernel
		 * so it may not be instantaneous.  With this state set
		 * any thread entering the kernel from userspace will
		 * thread_exit() in trap().  Any thread attempting to
		 * sleep will return immediately with EINTR or EWOULDBLOCK
		 * which will hopefully force them to back out to userland
		 * freeing resources as they go.  Any thread attempting
		 * to return to userland will thread_exit() from userret().
		 * thread_exit() will unsuspend us when the last of the
		 * other threads exits.
		 * If there is already a thread singler after resumption,
		 * calling thread_single will fail; in that case, we just
		 * re-check all suspension request, the thread should
		 * either be suspended there or exit.
		 */
		if (!thread_single(SINGLE_EXIT))
			break;

		/*
		 * All other activity in this process is now stopped.
		 * Threading support has been turned off.
		 */
	}
	KASSERT(p->p_numthreads == 1,
	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
	racct_sub(p, RACCT_NTHR, 1);
	/*
	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
	 * on our vmspace, so we should block below until they have
	 * released their reference to us.  Note that if they have
	 * requested S_EXIT stops we will block here until they ack
	 * via PIOCCONT.
	 */
	_STOPEVENT(p, S_EXIT, rv);

	/*
	 * Ignore any pending request to stop due to a stop signal.
	 * Once P_WEXIT is set, future requests will be ignored as
	 * well.
	 */
	p->p_flag &= ~P_STOPPED_SIG;
	KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped"));

	/*
	 * Note that we are exiting and do another wakeup of anyone in
	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
	 * decided to wait again after we told them we are exiting.
	 */
	p->p_flag |= P_WEXIT;
	wakeup(&p->p_stype);

	/*
	 * Wait for any processes that have a hold on our vmspace to
	 * release their reference.
	 */
	while (p->p_lock > 0)
		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);

	p->p_xstat = rv;	/* Let event handler change exit status */
	PROC_UNLOCK(p);
	/* Drain the limit callout while we don't have the proc locked */
	callout_drain(&p->p_limco);

#ifdef AUDIT
	/*
	 * The Sun BSM exit token contains two components: an exit status as
	 * passed to exit(), and a return value to indicate what sort of exit
	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
	 * what the return value is.
	 */
	AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0);
	AUDIT_SYSCALL_EXIT(0, td);
#endif

	/* Are we a task leader? */
	if (p == p->p_leader) {
		mtx_lock(&ppeers_lock);
		q = p->p_peers;
		while (q != NULL) {
			PROC_LOCK(q);
			kern_psignal(q, SIGKILL);
			PROC_UNLOCK(q);
			q = q->p_peers;
		}
		while (p->p_peers != NULL)
			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
		mtx_unlock(&ppeers_lock);
	}

	/*
	 * Check if any loadable modules need anything done at process exit.
	 * E.g. SYSV IPC stuff
	 * XXX what if one of these generates an error?
	 */
	EVENTHANDLER_INVOKE(process_exit, p);

	/*
	 * If parent is waiting for us to exit or exec,
	 * P_PPWAIT is set; we will wakeup the parent below.
	 */
	PROC_LOCK(p);
	rv = p->p_xstat;	/* Event handler could change exit status */
	stopprofclock(p);
	p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);

	/*
	 * Stop the real interval timer.  If the handler is currently
	 * executing, prevent it from rearming itself and let it finish.
	 */
	if (timevalisset(&p->p_realtimer.it_value) &&
	    callout_stop(&p->p_itcallout) == 0) {
		timevalclear(&p->p_realtimer.it_interval);
		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
		KASSERT(!timevalisset(&p->p_realtimer.it_value),
		    ("realtime timer is still armed"));
	}
	PROC_UNLOCK(p);

	/*
	 * Reset any sigio structures pointing to us as a result of
	 * F_SETOWN with our pid.
	 */
	funsetownlst(&p->p_sigiolst);

	/*
	 * If this process has an nlminfo data area (for lockd), release it
	 */
	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
		(*nlminfo_release_p)(p);

	/*
	 * Close open files and release open-file table.
	 * This may block!
	 */
	fdescfree(td);

	/*
	 * If this thread tickled GEOM, we need to wait for the giggling to
	 * stop before we return to userland
	 */
	if (td->td_pflags & TDP_GEOM)
		g_waitidle();

	/*
	 * Remove ourself from our leader's peer list and wake our leader.
	 */
	mtx_lock(&ppeers_lock);
	if (p->p_leader->p_peers) {
		q = p->p_leader;
		while (q->p_peers != p)
			q = q->p_peers;
		q->p_peers = p->p_peers;
		wakeup(p->p_leader);
	}
	mtx_unlock(&ppeers_lock);

	vmspace_exit(td);

	sx_xlock(&proctree_lock);
	if (SESS_LEADER(p)) {
		struct session *sp = p->p_session;
		struct tty *tp;

		/*
		 * s_ttyp is not zero'd; we use this to indicate that
		 * the session once had a controlling terminal. (for
		 * logging and informational purposes)
		 */
		SESS_LOCK(sp);
		ttyvp = sp->s_ttyvp;
		tp = sp->s_ttyp;
		sp->s_ttyvp = NULL;
		sp->s_ttydp = NULL;
		sp->s_leader = NULL;
		SESS_UNLOCK(sp);

		/*
		 * Signal foreground pgrp and revoke access to
		 * controlling terminal if it has not been revoked
		 * already.
		 *
		 * Because the TTY may have been revoked in the mean
		 * time and could already have a new session associated
		 * with it, make sure we don't send a SIGHUP to a
		 * foreground process group that does not belong to this
		 * session.
		 */

		if (tp != NULL) {
			tty_lock(tp);
			if (tp->t_session == sp)
				tty_signal_pgrp(tp, SIGHUP);
			tty_unlock(tp);
		}

		if (ttyvp != NULL) {
			sx_xunlock(&proctree_lock);
			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
				VOP_REVOKE(ttyvp, REVOKEALL);
				VOP_UNLOCK(ttyvp, 0);
			}
			sx_xlock(&proctree_lock);
		}
	}
	fixjobc(p, p->p_pgrp, 0);
	sx_xunlock(&proctree_lock);
	(void)acct_process(td);

	/* Release the TTY now we've unlocked everything. */
	if (ttyvp != NULL)
		vrele(ttyvp);
#ifdef KTRACE
	ktrprocexit(td);
#endif
	/*
	 * Release reference to text vnode
	 */
	if ((vtmp = p->p_textvp) != NULL) {
		p->p_textvp = NULL;
		vrele(vtmp);
	}

	/*
	 * Release our limits structure.
	 */
	plim = p->p_limit;
	p->p_limit = NULL;
	lim_free(plim);

	tidhash_remove(td);

	/*
	 * Remove proc from allproc queue and pidhash chain.
	 * Place onto zombproc.  Unlink from parent's child list.
	 */
	sx_xlock(&allproc_lock);
	LIST_REMOVE(p, p_list);
	LIST_INSERT_HEAD(&zombproc, p, p_list);
	LIST_REMOVE(p, p_hash);
	sx_xunlock(&allproc_lock);

	/*
	 * Call machine-dependent code to release any
	 * machine-dependent resources other than the address space.
	 * The address space is released by "vmspace_exitfree(p)" in
	 * vm_waitproc().
	 */
	cpu_exit(td);

	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);

	/*
	 * Reparent all of our children to init.
	 */
	sx_xlock(&proctree_lock);
	q = LIST_FIRST(&p->p_children);
	if (q != NULL)		/* only need this if any child is S_ZOMB */
		wakeup(initproc);
	for (; q != NULL; q = nq) {
		nq = LIST_NEXT(q, p_sibling);
		PROC_LOCK(q);
		proc_reparent(q, initproc);
		q->p_sigparent = SIGCHLD;
		/*
		 * Traced processes are killed
		 * since their existence means someone is screwing up.
		 */
		if (q->p_flag & P_TRACED) {
			struct thread *temp;

			/*
			 * Since q was found on our children list, the
			 * proc_reparent() call moved q to the orphan
			 * list due to present P_TRACED flag. Clear
			 * orphan link for q now while q is locked.
			 */
			clear_orphan(q);
			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
			FOREACH_THREAD_IN_PROC(q, temp)
				temp->td_dbgflags &= ~TDB_SUSPEND;
			kern_psignal(q, SIGKILL);
		}
		PROC_UNLOCK(q);
	}

	/*
	 * Also get rid of our orphans.
	 */
	while ((q = LIST_FIRST(&p->p_orphans)) != NULL) {
		PROC_LOCK(q);
		clear_orphan(q);
		PROC_UNLOCK(q);
	}

	/* Save exit status. */
	PROC_LOCK(p);
	p->p_xthread = td;

	/* Tell the prison that we are gone. */
	prison_proc_free(p->p_ucred->cr_prison);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exit if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exit)
		dtrace_fasttrap_exit(p);
#endif

	/*
	 * Notify interested parties of our demise.
	 */
	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);

#ifdef KDTRACE_HOOKS
	int reason = CLD_EXITED;
	if (WCOREDUMP(rv))
		reason = CLD_DUMPED;
	else if (WIFSIGNALED(rv))
		reason = CLD_KILLED;
	SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0);
#endif

	/*
	 * Just delete all entries in the p_klist. At this point we won't
	 * report any more events, and there are nasty race conditions that
	 * can beat us if we don't.
	 */
	knlist_clear(&p->p_klist, 1);

	/*
	 * If this is a process with a descriptor, we may not need to deliver
	 * a signal to the parent.  proctree_lock is held over
	 * procdesc_exit() to serialize concurrent calls to close() and
	 * exit().
	 */
	if (p->p_procdesc == NULL || procdesc_exit(p)) {
		/*
		 * Notify parent that we're gone.  If parent has the
		 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN,
		 * notify process 1 instead (and hope it will handle this
		 * situation).
		 */
		PROC_LOCK(p->p_pptr);
		mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
		if (p->p_pptr->p_sigacts->ps_flag &
		    (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
			struct proc *pp;

			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
			pp = p->p_pptr;
			PROC_UNLOCK(pp);
			proc_reparent(p, initproc);
			p->p_sigparent = SIGCHLD;
			PROC_LOCK(p->p_pptr);

			/*
			 * Notify parent, so in case he was wait(2)ing or
			 * executing waitpid(2) with our pid, he will
			 * continue.
			 */
			wakeup(pp);
		} else
			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);

		if (p->p_pptr == initproc)
			kern_psignal(p->p_pptr, SIGCHLD);
		else if (p->p_sigparent != 0) {
			if (p->p_sigparent == SIGCHLD)
				childproc_exited(p);
			else	/* LINUX thread */
				kern_psignal(p->p_pptr, p->p_sigparent);
		}
	} else
		PROC_LOCK(p->p_pptr);
	sx_xunlock(&proctree_lock);

	/*
	 * The state PRS_ZOMBIE prevents other proesses from sending
	 * signal to the process, to avoid memory leak, we free memory
	 * for signal queue at the time when the state is set.
	 */
	sigqueue_flush(&p->p_sigqueue);
	sigqueue_flush(&td->td_sigqueue);

	/*
	 * We have to wait until after acquiring all locks before
	 * changing p_state.  We need to avoid all possible context
	 * switches (including ones from blocking on a mutex) while
	 * marked as a zombie.  We also have to set the zombie state
	 * before we release the parent process' proc lock to avoid
	 * a lost wakeup.  So, we first call wakeup, then we grab the
	 * sched lock, update the state, and release the parent process'
	 * proc lock.
	 */
	wakeup(p->p_pptr);
	cv_broadcast(&p->p_pwait);
	sched_exit(p->p_pptr, td);
	PROC_SLOCK(p);
	p->p_state = PRS_ZOMBIE;
	PROC_UNLOCK(p->p_pptr);

	/*
	 * Hopefully no one will try to deliver a signal to the process this
	 * late in the game.
	 */
	knlist_destroy(&p->p_klist);

	/*
	 * Save our children's rusage information in our exit rusage.
	 */
	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);

	/*
	 * Make sure the scheduler takes this thread out of its tables etc.
	 * This will also release this thread's reference to the ucred.
	 * Other thread parts to release include pcb bits and such.
	 */
	thread_exit();
}
Beispiel #5
0
static inline int
syscallenter(struct thread *td, struct syscall_args *sa)
{
	struct proc *p;
	int error, traced;

	PCPU_INC(cnt.v_syscall);
	p = td->td_proc;

	td->td_pticks = 0;
	if (td->td_ucred != p->p_ucred)
		cred_update_thread(td);
	if (p->p_flag & P_TRACED) {
		traced = 1;
		PROC_LOCK(p);
		td->td_dbgflags &= ~TDB_USERWR;
		td->td_dbgflags |= TDB_SCE;
		PROC_UNLOCK(p);
	} else
		traced = 0;
	error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSCALL))
		ktrsyscall(sa->code, sa->narg, sa->args);
#endif

	CTR6(KTR_SYSC,
"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
	    td, td->td_proc->p_pid, syscallname(p, sa->code),
	    sa->args[0], sa->args[1], sa->args[2]);

	if (error == 0) {
		STOPEVENT(p, S_SCE, sa->narg);
		if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) {
			PROC_LOCK(p);
			ptracestop((td), SIGTRAP);
			PROC_UNLOCK(p);
		}
		if (td->td_dbgflags & TDB_USERWR) {
			/*
			 * Reread syscall number and arguments if
			 * debugger modified registers or memory.
			 */
			error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
			if (KTRPOINT(td, KTR_SYSCALL))
				ktrsyscall(sa->code, sa->narg, sa->args);
#endif
			if (error != 0)
				goto retval;
		}

#ifdef CAPABILITY_MODE
		/*
		 * In capability mode, we only allow access to system calls
		 * flagged with SYF_CAPENABLED.
		 */
		if (IN_CAPABILITY_MODE(td) &&
		    !(sa->callp->sy_flags & SYF_CAPENABLED)) {
			error = ECAPMODE;
			goto retval;
		}
#endif

		error = syscall_thread_enter(td, sa->callp);
		if (error != 0)
			goto retval;

#ifdef KDTRACE_HOOKS
		/*
		 * If the systrace module has registered it's probe
		 * callback and if there is a probe active for the
		 * syscall 'entry', process the probe.
		 */
		if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
			(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
			    sa->callp, sa->args, 0);
#endif

		AUDIT_SYSCALL_ENTER(sa->code, td);
		error = (sa->callp->sy_call)(td, sa->args);
		AUDIT_SYSCALL_EXIT(error, td);

		/* Save the latest error return value. */
		td->td_errno = error;

#ifdef KDTRACE_HOOKS
		/*
		 * If the systrace module has registered it's probe
		 * callback and if there is a probe active for the
		 * syscall 'return', process the probe.
		 */
		if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
			(*systrace_probe_func)(sa->callp->sy_return, sa->code,
			    sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
#endif
		syscall_thread_exit(td, sa->callp);
		CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
		    p, error, td->td_retval[0], td->td_retval[1]);
	}
 retval:
	if (traced) {
		PROC_LOCK(p);
		td->td_dbgflags &= ~TDB_SCE;
		PROC_UNLOCK(p);
	}
	(p->p_sysent->sv_set_syscall_retval)(td, error);
	return (error);
}
void
ia32_syscall(struct trapframe *frame)
{
	caddr_t params;
	int i;
	struct sysent *callp;
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	register_t orig_tf_rflags;
	int error;
	int narg;
	u_int32_t args[8];
	u_int64_t args64[8];
	u_int code;
	ksiginfo_t ksi;

	PCPU_INC(cnt.v_syscall);
	td->td_pticks = 0;
	td->td_frame = frame;
	if (td->td_ucred != p->p_ucred) 
		cred_update_thread(td);
	params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
	code = frame->tf_rax;
	orig_tf_rflags = frame->tf_rflags;

	if (p->p_sysent->sv_prepsyscall) {
		/*
		 * The prep code is MP aware.
		 */
		(*p->p_sysent->sv_prepsyscall)(frame, args, &code, &params);
	} else {
		/*
		 * Need to check if this is a 32 bit or 64 bit syscall.
		 * fuword is MP aware.
		 */
		if (code == SYS_syscall) {
			/*
			 * Code is first argument, followed by actual args.
			 */
			code = fuword32(params);
			params += sizeof(int);
		} else if (code == SYS___syscall) {
			/*
			 * Like syscall, but code is a quad, so as to maintain
			 * quad alignment for the rest of the arguments.
			 * We use a 32-bit fetch in case params is not
			 * aligned.
			 */
			code = fuword32(params);
			params += sizeof(quad_t);
		}
	}

 	if (p->p_sysent->sv_mask)
 		code &= p->p_sysent->sv_mask;

 	if (code >= p->p_sysent->sv_size)
 		callp = &p->p_sysent->sv_table[0];
  	else
 		callp = &p->p_sysent->sv_table[code];

	narg = callp->sy_narg;

	/*
	 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
	 */
	if (params != NULL && narg != 0)
		error = copyin(params, (caddr_t)args,
		    (u_int)(narg * sizeof(int)));
	else
		error = 0;

	for (i = 0; i < narg; i++)
		args64[i] = args[i];

#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSCALL))
		ktrsyscall(code, narg, args64);
#endif
	CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
	    td->td_proc->p_pid, td->td_proc->p_comm, code);

	if (error == 0) {
		td->td_retval[0] = 0;
		td->td_retval[1] = frame->tf_rdx;

		STOPEVENT(p, S_SCE, narg);

		PTRACESTOP_SC(p, td, S_PT_SCE);

		AUDIT_SYSCALL_ENTER(code, td);
		error = (*callp->sy_call)(td, args64);
		AUDIT_SYSCALL_EXIT(error, td);
	}

	switch (error) {
	case 0:
		frame->tf_rax = td->td_retval[0];
		frame->tf_rdx = td->td_retval[1];
		frame->tf_rflags &= ~PSL_C;
		break;

	case ERESTART:
		/*
		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
		 * int 0x80 is 2 bytes. We saved this in tf_err.
		 */
		frame->tf_rip -= frame->tf_err;
		break;

	case EJUSTRETURN:
		break;

	default:
 		if (p->p_sysent->sv_errsize) {
 			if (error >= p->p_sysent->sv_errsize)
  				error = -1;	/* XXX */
   			else
  				error = p->p_sysent->sv_errtbl[error];
		}
		frame->tf_rax = error;
		frame->tf_rflags |= PSL_C;
		break;
	}

	/*
	 * Traced syscall.
	 */
	if (orig_tf_rflags & PSL_T) {
		frame->tf_rflags &= ~PSL_T;
		ksiginfo_init_trap(&ksi);
		ksi.ksi_signo = SIGTRAP;
		ksi.ksi_code = TRAP_TRACE;
		ksi.ksi_addr = (void *)frame->tf_rip;
		trapsignal(td, &ksi);
	}

	/*
	 * Check for misbehavior.
	 */
	WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
	    (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???");
	KASSERT(td->td_critnest == 0,
	    ("System call %s returning in a critical section",
	    (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???"));
	KASSERT(td->td_locks == 0,
	    ("System call %s returning with %d locks held",
	    (code >= 0 && code < SYS_MAXSYSCALL) ? freebsd32_syscallnames[code] : "???",
	    td->td_locks));

	/*
	 * Handle reschedule and other end-of-syscall issues
	 */
	userret(td, frame);

	CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
	    td->td_proc->p_pid, td->td_proc->p_comm, code);
#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSRET))
		ktrsysret(code, error, td->td_retval[0]);
#endif

	/*
	 * This works because errno is findable through the
	 * register set.  If we ever support an emulation where this
	 * is not the case, this code will need to be revisited.
	 */
	STOPEVENT(p, S_SCX, code);
 
	PTRACESTOP_SC(p, td, S_PT_SCX);
}