Пример #1
0
/*
 * Perform pre-system-call processing, including stopping for tracing,
 * auditing, microstate-accounting, etc.
 *
 * This routine is called only if the t_pre_sys flag is set.  Any condition
 * requiring pre-syscall handling must set the t_pre_sys flag.  If the
 * condition is persistent, this routine will repost t_pre_sys.
 */
int
pre_syscall(int arg0)
{
	unsigned int code;
	kthread_t *t = curthread;
	proc_t *p = ttoproc(t);
	klwp_t *lwp = ttolwp(t);
	struct regs *rp = lwptoregs(lwp);
	int	repost;

	t->t_pre_sys = repost = 0;	/* clear pre-syscall processing flag */

	ASSERT(t->t_schedflag & TS_DONT_SWAP);

	syscall_mstate(LMS_USER, LMS_SYSTEM);

	/*
	 * The syscall arguments in the out registers should be pointed to
	 * by lwp_ap.  If the args need to be copied so that the outs can
	 * be changed without losing the ability to get the args for /proc,
	 * they can be saved by save_syscall_args(), and lwp_ap will be
	 * restored by post_syscall().
	 */
	ASSERT(lwp->lwp_ap == (long *)&rp->r_o0);

	/*
	 * Make sure the thread is holding the latest credentials for the
	 * process.  The credentials in the process right now apply to this
	 * thread for the entire system call.
	 */
	if (t->t_cred != p->p_cred) {
		cred_t *oldcred = t->t_cred;
		/*
		 * DTrace accesses t_cred in probe context.  t_cred must
		 * always be either NULL, or point to a valid, allocated cred
		 * structure.
		 */
		t->t_cred = crgetcred();
		crfree(oldcred);
	}

	/*
	 * Undo special arrangements to single-step the lwp
	 * so that a debugger will see valid register contents.
	 * Also so that the pc is valid for syncfpu().
	 * Also so that a syscall like exec() can be stepped.
	 */
	if (lwp->lwp_pcb.pcb_step != STEP_NONE) {
		(void) prundostep();
		repost = 1;
	}

	/*
	 * Check for indirect system call in case we stop for tracing.
	 * Don't allow multiple indirection.
	 */
	code = t->t_sysnum;
	if (code == 0 && arg0 != 0) {		/* indirect syscall */
		code = arg0;
		t->t_sysnum = arg0;
	}

	/*
	 * From the proc(4) manual page:
	 * When entry to a system call is being traced, the traced process
	 * stops after having begun the call to the system but before the
	 * system call arguments have been fetched from the process.
	 * If proc changes the args we must refetch them after starting.
	 */
	if (PTOU(p)->u_systrap) {
		if (prismember(&PTOU(p)->u_entrymask, code)) {
			/*
			 * Recheck stop condition, now that lock is held.
			 */
			mutex_enter(&p->p_lock);
			if (PTOU(p)->u_systrap &&
			    prismember(&PTOU(p)->u_entrymask, code)) {
				stop(PR_SYSENTRY, code);
				/*
				 * Must refetch args since they were
				 * possibly modified by /proc.  Indicate
				 * that the valid copy is in the
				 * registers.
				 */
				lwp->lwp_argsaved = 0;
				lwp->lwp_ap = (long *)&rp->r_o0;
			}
			mutex_exit(&p->p_lock);
		}
		repost = 1;
	}

	if (lwp->lwp_sysabort) {
		/*
		 * lwp_sysabort may have been set via /proc while the process
		 * was stopped on PR_SYSENTRY.  If so, abort the system call.
		 * Override any error from the copyin() of the arguments.
		 */
		lwp->lwp_sysabort = 0;
		(void) set_errno(EINTR); /* sets post-sys processing */
		t->t_pre_sys = 1;	/* repost anyway */
		return (1);		/* don't do system call, return EINTR */
	}

#ifdef C2_AUDIT
	if (audit_active) {	/* begin auditing for this syscall */
		int error;
		if (error = audit_start(T_SYSCALL, code, 0, lwp)) {
			t->t_pre_sys = 1;	/* repost anyway */
			lwp->lwp_error = 0;	/* for old drivers */
			return (error);
		}
		repost = 1;
	}
#endif /* C2_AUDIT */

#ifndef NPROBE
	/* Kernel probe */
	if (tnf_tracing_active) {
		TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */,
			tnf_sysnum,	sysnum,		t->t_sysnum);
		t->t_post_sys = 1;	/* make sure post_syscall runs */
		repost = 1;
	}
#endif /* NPROBE */

#ifdef SYSCALLTRACE
	if (syscalltrace) {
		int i;
		long *ap;
		char *cp;
		char *sysname;
		struct sysent *callp;

		if (code >= NSYSCALL)
			callp = &nosys_ent;	/* nosys has no args */
		else
			callp = LWP_GETSYSENT(lwp) + code;
		(void) save_syscall_args();
		mutex_enter(&systrace_lock);
		printf("%d: ", p->p_pid);
		if (code >= NSYSCALL)
			printf("0x%x", code);
		else {
			sysname = mod_getsysname(code);
			printf("%s[0x%x]", sysname == NULL ? "NULL" :
			    sysname, code);
		}
		cp = "(";
		for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) {
			printf("%s%lx", cp, *ap);
			cp = ", ";
		}
		if (i)
			printf(")");
		printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread);
		mutex_exit(&systrace_lock);
	}
#endif /* SYSCALLTRACE */

	/*
	 * If there was a continuing reason for pre-syscall processing,
	 * set the t_pre_sys flag for the next system call.
	 */
	if (repost)
		t->t_pre_sys = 1;
	lwp->lwp_error = 0;	/* for old drivers */
	lwp->lwp_badpriv = PRIV_NONE;	/* for privilege tracing */
	return (0);
}
Пример #2
0
/*
 * Perform pre-system-call processing, including stopping for tracing,
 * auditing, etc.
 *
 * This routine is called only if the t_pre_sys flag is set. Any condition
 * requiring pre-syscall handling must set the t_pre_sys flag. If the
 * condition is persistent, this routine will repost t_pre_sys.
 */
int
pre_syscall()
{
	kthread_t *t = curthread;
	unsigned code = t->t_sysnum;
	klwp_t *lwp = ttolwp(t);
	proc_t *p = ttoproc(t);
	int	repost;

	t->t_pre_sys = repost = 0;	/* clear pre-syscall processing flag */

	ASSERT(t->t_schedflag & TS_DONT_SWAP);

#if defined(DEBUG)
	/*
	 * On the i386 kernel, lwp_ap points at the piece of the thread
	 * stack that we copy the users arguments into.
	 *
	 * On the amd64 kernel, the syscall arguments in the rdi..r9
	 * registers should be pointed at by lwp_ap.  If the args need to
	 * be copied so that those registers can be changed without losing
	 * the ability to get the args for /proc, they can be saved by
	 * save_syscall_args(), and lwp_ap will be restored by post_syscall().
	 */
	if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
#if defined(_LP64)
		ASSERT(lwp->lwp_ap == (long *)&lwptoregs(lwp)->r_rdi);
	} else {
#endif
		ASSERT((caddr_t)lwp->lwp_ap > t->t_stkbase &&
			(caddr_t)lwp->lwp_ap < t->t_stk);
	}
#endif	/* DEBUG */

	/*
	 * Make sure the thread is holding the latest credentials for the
	 * process.  The credentials in the process right now apply to this
	 * thread for the entire system call.
	 */
	if (t->t_cred != p->p_cred) {
		cred_t *oldcred = t->t_cred;
		/*
		 * DTrace accesses t_cred in probe context.  t_cred must
		 * always be either NULL, or point to a valid, allocated cred
		 * structure.
		 */
		t->t_cred = crgetcred();
		crfree(oldcred);
	}

	/*
	 * From the proc(4) manual page:
	 * When entry to a system call is being traced, the traced process
	 * stops after having begun the call to the system but before the
	 * system call arguments have been fetched from the process.
	 */
	if (PTOU(p)->u_systrap) {
		if (prismember(&PTOU(p)->u_entrymask, code)) {
			mutex_enter(&p->p_lock);
			/*
			 * Recheck stop condition, now that lock is held.
			 */
			if (PTOU(p)->u_systrap &&
			    prismember(&PTOU(p)->u_entrymask, code)) {
				stop(PR_SYSENTRY, code);

				/*
				 * /proc may have modified syscall args,
				 * either in regs for amd64 or on ustack
				 * for ia32.  Either way, arrange to
				 * copy them again, both for the syscall
				 * handler and for other consumers in
				 * post_syscall (like audit).  Here, we
				 * only do amd64, and just set lwp_ap
				 * back to the kernel-entry stack copy;
				 * the syscall ml code redoes
				 * move-from-regs to set up for the
				 * syscall handler after we return.  For
				 * ia32, save_syscall_args() below makes
				 * an lwp_ap-accessible copy.
				 */
#if defined(_LP64)
				if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
					lwp->lwp_argsaved = 0;
					lwp->lwp_ap =
					    (long *)&lwptoregs(lwp)->r_rdi;
				}
#endif
			}
			mutex_exit(&p->p_lock);
		}
		repost = 1;
	}

	/*
	 * ia32 kernel, or ia32 proc on amd64 kernel: keep args in
	 * lwp_arg for post-syscall processing, regardless of whether
	 * they might have been changed in /proc above.
	 */
#if defined(_LP64)
	if (lwp_getdatamodel(lwp) != DATAMODEL_NATIVE)
#endif
		(void) save_syscall_args();

	if (lwp->lwp_sysabort) {
		/*
		 * lwp_sysabort may have been set via /proc while the process
		 * was stopped on PR_SYSENTRY.  If so, abort the system call.
		 * Override any error from the copyin() of the arguments.
		 */
		lwp->lwp_sysabort = 0;
		(void) set_errno(EINTR);	/* forces post_sys */
		t->t_pre_sys = 1;	/* repost anyway */
		return (1);		/* don't do system call, return EINTR */
	}

#ifdef C2_AUDIT
	if (audit_active) {	/* begin auditing for this syscall */
		int error;
		if (error = audit_start(T_SYSCALL, code, 0, lwp)) {
			t->t_pre_sys = 1;	/* repost anyway */
			(void) set_errno(error);
			return (1);
		}
		repost = 1;
	}
#endif /* C2_AUDIT */

#ifndef NPROBE
	/* Kernel probe */
	if (tnf_tracing_active) {
		TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */,
			tnf_sysnum,	sysnum,		t->t_sysnum);
		t->t_post_sys = 1;	/* make sure post_syscall runs */
		repost = 1;
	}
#endif /* NPROBE */

#ifdef SYSCALLTRACE
	if (syscalltrace) {
		int i;
		long *ap;
		char *cp;
		char *sysname;
		struct sysent *callp;

		if (code >= NSYSCALL)
			callp = &nosys_ent;	/* nosys has no args */
		else
			callp = LWP_GETSYSENT(lwp) + code;
		(void) save_syscall_args();
		mutex_enter(&systrace_lock);
		printf("%d: ", p->p_pid);
		if (code >= NSYSCALL)
			printf("0x%x", code);
		else {
			sysname = mod_getsysname(code);
			printf("%s[0x%x/0x%p]", sysname == NULL ? "NULL" :
			    sysname, code, callp->sy_callc);
		}
		cp = "(";
		for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) {
			printf("%s%lx", cp, *ap);
			cp = ", ";
		}
		if (i)
			printf(")");
		printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread);
		mutex_exit(&systrace_lock);
	}
#endif /* SYSCALLTRACE */

	/*
	 * If there was a continuing reason for pre-syscall processing,
	 * set the t_pre_sys flag for the next system call.
	 */
	if (repost)
		t->t_pre_sys = 1;
	lwp->lwp_error = 0;	/* for old drivers */
	lwp->lwp_badpriv = PRIV_NONE;
	return (0);
}
Пример #3
0
/*
 * See the comment above pool_do_bind() for the semantics of the pset_bind_*()
 * functions.  These must be kept in sync with cpupart_move_thread, and
 * anything else that could fail a pool_pset_bind.
 *
 * Returns non-zero errno on failure and zero on success.
 * Iff successful, cpu_lock is held on return.
 */
int
pset_bind_start(proc_t **procs, pool_t *pool)
{
	cred_t *pcred;
	proc_t *p, **pp;
	kthread_t *t;
	cpupart_t *newpp;
	int ret;

	extern int cpupart_movable_thread(kthread_id_t, cpupart_t *, int);

	ASSERT(pool_lock_held());
	ASSERT(INGLOBALZONE(curproc));

	mutex_enter(&cpu_lock);
	weakbinding_stop();

	newpp = cpupart_find(pool->pool_pset->pset_id);
	ASSERT(newpp != NULL);
	if (newpp->cp_cpulist == NULL) {
		weakbinding_start();
		mutex_exit(&cpu_lock);
		return (ENOTSUP);
	}

	pcred = crgetcred();

	/*
	 * Check for the PRIV_PROC_PRIOCNTL privilege that is required
	 * to enter and exit scheduling classes.  If other privileges
	 * are required by CL_ENTERCLASS/CL_CANEXIT types of routines
	 * in the future, this code will have to be updated.
	 */
	if (secpolicy_setpriority(pcred) != 0) {
		weakbinding_start();
		mutex_exit(&cpu_lock);
		crfree(pcred);
		return (EPERM);
	}

	for (pp = procs; (p = *pp) != NULL; pp++) {
		mutex_enter(&p->p_lock);
		if ((t = p->p_tlist) == NULL) {
			mutex_exit(&p->p_lock);
			continue;
		}
		/*
		 * Check our basic permissions to control this process.
		 */
		if (!prochasprocperm(p, curproc, pcred)) {
			mutex_exit(&p->p_lock);
			weakbinding_start();
			mutex_exit(&cpu_lock);
			crfree(pcred);
			return (EPERM);
		}
		do {
			/*
			 * Check that all threads can be moved to
			 * a new processor set.
			 */
			thread_lock(t);
			ret = cpupart_movable_thread(t, newpp, 0);
			thread_unlock(t);
			if (ret != 0) {
				mutex_exit(&p->p_lock);
				weakbinding_start();
				mutex_exit(&cpu_lock);
				crfree(pcred);
				return (ret);
			}
		} while ((t = t->t_forw) != p->p_tlist);
		mutex_exit(&p->p_lock);
	}
	crfree(pcred);
	return (0);	/* with cpu_lock held and weakbinding stopped */
}