/* * Perform pre-system-call processing, including stopping for tracing, * auditing, microstate-accounting, etc. * * This routine is called only if the t_pre_sys flag is set. Any condition * requiring pre-syscall handling must set the t_pre_sys flag. If the * condition is persistent, this routine will repost t_pre_sys. */ int pre_syscall(int arg0) { unsigned int code; kthread_t *t = curthread; proc_t *p = ttoproc(t); klwp_t *lwp = ttolwp(t); struct regs *rp = lwptoregs(lwp); int repost; t->t_pre_sys = repost = 0; /* clear pre-syscall processing flag */ ASSERT(t->t_schedflag & TS_DONT_SWAP); syscall_mstate(LMS_USER, LMS_SYSTEM); /* * The syscall arguments in the out registers should be pointed to * by lwp_ap. If the args need to be copied so that the outs can * be changed without losing the ability to get the args for /proc, * they can be saved by save_syscall_args(), and lwp_ap will be * restored by post_syscall(). */ ASSERT(lwp->lwp_ap == (long *)&rp->r_o0); /* * Make sure the thread is holding the latest credentials for the * process. The credentials in the process right now apply to this * thread for the entire system call. */ if (t->t_cred != p->p_cred) { cred_t *oldcred = t->t_cred; /* * DTrace accesses t_cred in probe context. t_cred must * always be either NULL, or point to a valid, allocated cred * structure. */ t->t_cred = crgetcred(); crfree(oldcred); } /* * Undo special arrangements to single-step the lwp * so that a debugger will see valid register contents. * Also so that the pc is valid for syncfpu(). * Also so that a syscall like exec() can be stepped. */ if (lwp->lwp_pcb.pcb_step != STEP_NONE) { (void) prundostep(); repost = 1; } /* * Check for indirect system call in case we stop for tracing. * Don't allow multiple indirection. */ code = t->t_sysnum; if (code == 0 && arg0 != 0) { /* indirect syscall */ code = arg0; t->t_sysnum = arg0; } /* * From the proc(4) manual page: * When entry to a system call is being traced, the traced process * stops after having begun the call to the system but before the * system call arguments have been fetched from the process. * If proc changes the args we must refetch them after starting. */ if (PTOU(p)->u_systrap) { if (prismember(&PTOU(p)->u_entrymask, code)) { /* * Recheck stop condition, now that lock is held. */ mutex_enter(&p->p_lock); if (PTOU(p)->u_systrap && prismember(&PTOU(p)->u_entrymask, code)) { stop(PR_SYSENTRY, code); /* * Must refetch args since they were * possibly modified by /proc. Indicate * that the valid copy is in the * registers. */ lwp->lwp_argsaved = 0; lwp->lwp_ap = (long *)&rp->r_o0; } mutex_exit(&p->p_lock); } repost = 1; } if (lwp->lwp_sysabort) { /* * lwp_sysabort may have been set via /proc while the process * was stopped on PR_SYSENTRY. If so, abort the system call. * Override any error from the copyin() of the arguments. */ lwp->lwp_sysabort = 0; (void) set_errno(EINTR); /* sets post-sys processing */ t->t_pre_sys = 1; /* repost anyway */ return (1); /* don't do system call, return EINTR */ } #ifdef C2_AUDIT if (audit_active) { /* begin auditing for this syscall */ int error; if (error = audit_start(T_SYSCALL, code, 0, lwp)) { t->t_pre_sys = 1; /* repost anyway */ lwp->lwp_error = 0; /* for old drivers */ return (error); } repost = 1; } #endif /* C2_AUDIT */ #ifndef NPROBE /* Kernel probe */ if (tnf_tracing_active) { TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */, tnf_sysnum, sysnum, t->t_sysnum); t->t_post_sys = 1; /* make sure post_syscall runs */ repost = 1; } #endif /* NPROBE */ #ifdef SYSCALLTRACE if (syscalltrace) { int i; long *ap; char *cp; char *sysname; struct sysent *callp; if (code >= NSYSCALL) callp = &nosys_ent; /* nosys has no args */ else callp = LWP_GETSYSENT(lwp) + code; (void) save_syscall_args(); mutex_enter(&systrace_lock); printf("%d: ", p->p_pid); if (code >= NSYSCALL) printf("0x%x", code); else { sysname = mod_getsysname(code); printf("%s[0x%x]", sysname == NULL ? "NULL" : sysname, code); } cp = "("; for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) { printf("%s%lx", cp, *ap); cp = ", "; } if (i) printf(")"); printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread); mutex_exit(&systrace_lock); } #endif /* SYSCALLTRACE */ /* * If there was a continuing reason for pre-syscall processing, * set the t_pre_sys flag for the next system call. */ if (repost) t->t_pre_sys = 1; lwp->lwp_error = 0; /* for old drivers */ lwp->lwp_badpriv = PRIV_NONE; /* for privilege tracing */ return (0); }
/* * Perform pre-system-call processing, including stopping for tracing, * auditing, etc. * * This routine is called only if the t_pre_sys flag is set. Any condition * requiring pre-syscall handling must set the t_pre_sys flag. If the * condition is persistent, this routine will repost t_pre_sys. */ int pre_syscall() { kthread_t *t = curthread; unsigned code = t->t_sysnum; klwp_t *lwp = ttolwp(t); proc_t *p = ttoproc(t); int repost; t->t_pre_sys = repost = 0; /* clear pre-syscall processing flag */ ASSERT(t->t_schedflag & TS_DONT_SWAP); #if defined(DEBUG) /* * On the i386 kernel, lwp_ap points at the piece of the thread * stack that we copy the users arguments into. * * On the amd64 kernel, the syscall arguments in the rdi..r9 * registers should be pointed at by lwp_ap. If the args need to * be copied so that those registers can be changed without losing * the ability to get the args for /proc, they can be saved by * save_syscall_args(), and lwp_ap will be restored by post_syscall(). */ if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { #if defined(_LP64) ASSERT(lwp->lwp_ap == (long *)&lwptoregs(lwp)->r_rdi); } else { #endif ASSERT((caddr_t)lwp->lwp_ap > t->t_stkbase && (caddr_t)lwp->lwp_ap < t->t_stk); } #endif /* DEBUG */ /* * Make sure the thread is holding the latest credentials for the * process. The credentials in the process right now apply to this * thread for the entire system call. */ if (t->t_cred != p->p_cred) { cred_t *oldcred = t->t_cred; /* * DTrace accesses t_cred in probe context. t_cred must * always be either NULL, or point to a valid, allocated cred * structure. */ t->t_cred = crgetcred(); crfree(oldcred); } /* * From the proc(4) manual page: * When entry to a system call is being traced, the traced process * stops after having begun the call to the system but before the * system call arguments have been fetched from the process. */ if (PTOU(p)->u_systrap) { if (prismember(&PTOU(p)->u_entrymask, code)) { mutex_enter(&p->p_lock); /* * Recheck stop condition, now that lock is held. */ if (PTOU(p)->u_systrap && prismember(&PTOU(p)->u_entrymask, code)) { stop(PR_SYSENTRY, code); /* * /proc may have modified syscall args, * either in regs for amd64 or on ustack * for ia32. Either way, arrange to * copy them again, both for the syscall * handler and for other consumers in * post_syscall (like audit). Here, we * only do amd64, and just set lwp_ap * back to the kernel-entry stack copy; * the syscall ml code redoes * move-from-regs to set up for the * syscall handler after we return. For * ia32, save_syscall_args() below makes * an lwp_ap-accessible copy. */ #if defined(_LP64) if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { lwp->lwp_argsaved = 0; lwp->lwp_ap = (long *)&lwptoregs(lwp)->r_rdi; } #endif } mutex_exit(&p->p_lock); } repost = 1; } /* * ia32 kernel, or ia32 proc on amd64 kernel: keep args in * lwp_arg for post-syscall processing, regardless of whether * they might have been changed in /proc above. */ #if defined(_LP64) if (lwp_getdatamodel(lwp) != DATAMODEL_NATIVE) #endif (void) save_syscall_args(); if (lwp->lwp_sysabort) { /* * lwp_sysabort may have been set via /proc while the process * was stopped on PR_SYSENTRY. If so, abort the system call. * Override any error from the copyin() of the arguments. */ lwp->lwp_sysabort = 0; (void) set_errno(EINTR); /* forces post_sys */ t->t_pre_sys = 1; /* repost anyway */ return (1); /* don't do system call, return EINTR */ } #ifdef C2_AUDIT if (audit_active) { /* begin auditing for this syscall */ int error; if (error = audit_start(T_SYSCALL, code, 0, lwp)) { t->t_pre_sys = 1; /* repost anyway */ (void) set_errno(error); return (1); } repost = 1; } #endif /* C2_AUDIT */ #ifndef NPROBE /* Kernel probe */ if (tnf_tracing_active) { TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */, tnf_sysnum, sysnum, t->t_sysnum); t->t_post_sys = 1; /* make sure post_syscall runs */ repost = 1; } #endif /* NPROBE */ #ifdef SYSCALLTRACE if (syscalltrace) { int i; long *ap; char *cp; char *sysname; struct sysent *callp; if (code >= NSYSCALL) callp = &nosys_ent; /* nosys has no args */ else callp = LWP_GETSYSENT(lwp) + code; (void) save_syscall_args(); mutex_enter(&systrace_lock); printf("%d: ", p->p_pid); if (code >= NSYSCALL) printf("0x%x", code); else { sysname = mod_getsysname(code); printf("%s[0x%x/0x%p]", sysname == NULL ? "NULL" : sysname, code, callp->sy_callc); } cp = "("; for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) { printf("%s%lx", cp, *ap); cp = ", "; } if (i) printf(")"); printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread); mutex_exit(&systrace_lock); } #endif /* SYSCALLTRACE */ /* * If there was a continuing reason for pre-syscall processing, * set the t_pre_sys flag for the next system call. */ if (repost) t->t_pre_sys = 1; lwp->lwp_error = 0; /* for old drivers */ lwp->lwp_badpriv = PRIV_NONE; return (0); }
/* * See the comment above pool_do_bind() for the semantics of the pset_bind_*() * functions. These must be kept in sync with cpupart_move_thread, and * anything else that could fail a pool_pset_bind. * * Returns non-zero errno on failure and zero on success. * Iff successful, cpu_lock is held on return. */ int pset_bind_start(proc_t **procs, pool_t *pool) { cred_t *pcred; proc_t *p, **pp; kthread_t *t; cpupart_t *newpp; int ret; extern int cpupart_movable_thread(kthread_id_t, cpupart_t *, int); ASSERT(pool_lock_held()); ASSERT(INGLOBALZONE(curproc)); mutex_enter(&cpu_lock); weakbinding_stop(); newpp = cpupart_find(pool->pool_pset->pset_id); ASSERT(newpp != NULL); if (newpp->cp_cpulist == NULL) { weakbinding_start(); mutex_exit(&cpu_lock); return (ENOTSUP); } pcred = crgetcred(); /* * Check for the PRIV_PROC_PRIOCNTL privilege that is required * to enter and exit scheduling classes. If other privileges * are required by CL_ENTERCLASS/CL_CANEXIT types of routines * in the future, this code will have to be updated. */ if (secpolicy_setpriority(pcred) != 0) { weakbinding_start(); mutex_exit(&cpu_lock); crfree(pcred); return (EPERM); } for (pp = procs; (p = *pp) != NULL; pp++) { mutex_enter(&p->p_lock); if ((t = p->p_tlist) == NULL) { mutex_exit(&p->p_lock); continue; } /* * Check our basic permissions to control this process. */ if (!prochasprocperm(p, curproc, pcred)) { mutex_exit(&p->p_lock); weakbinding_start(); mutex_exit(&cpu_lock); crfree(pcred); return (EPERM); } do { /* * Check that all threads can be moved to * a new processor set. */ thread_lock(t); ret = cpupart_movable_thread(t, newpp, 0); thread_unlock(t); if (ret != 0) { mutex_exit(&p->p_lock); weakbinding_start(); mutex_exit(&cpu_lock); crfree(pcred); return (ret); } } while ((t = t->t_forw) != p->p_tlist); mutex_exit(&p->p_lock); } crfree(pcred); return (0); /* with cpu_lock held and weakbinding stopped */ }