Exemple #1
0
static int
dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
    uintptr_t sp)
{
	proc_t *p = curproc;
	int ret = 0;

	ASSERT(pcstack == NULL || pcstack_limit > 0);

	while (pc != 0) {
		ret++;
		if (pcstack != NULL) {
			*pcstack++ = (uint64_t)pc;
			pcstack_limit--;
			if (pcstack_limit <= 0)
				break;
		}

		if (sp == 0)
			break;

		if (SV_PROC_FLAG(p, SV_ILP32)) {
			pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
			sp = dtrace_fuword32((void *)sp);
		}
		else {
			pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
			sp = dtrace_fuword64((void *)sp);
		}
	}

	return (ret);
}
Exemple #2
0
/*
 * Set that machine state for performing an upcall that starts
 * the entry function with the given argument.
 */
void
cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
    stack_t *stack)
{

	/* 
	 * Do any extra cleaning that needs to be done.
	 * The thread may have optional components
	 * that are not present in a fresh thread.
	 * This may be a recycled thread so make it look
	 * as though it's newly allocated.
	 */
	cpu_thread_clean(td);

#ifdef COMPAT_FREEBSD32
	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
		/*
		 * Set the trap frame to point at the beginning of the entry
		 * function.
		 */
		td->td_frame->tf_rbp = 0;
		td->td_frame->tf_rsp =
		   (((uintptr_t)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
		td->td_frame->tf_rip = (uintptr_t)entry;

		/* Return address sentinel value to stop stack unwinding. */
		suword32((void *)td->td_frame->tf_rsp, 0);

		/* Pass the argument to the entry point. */
		suword32((void *)(td->td_frame->tf_rsp + sizeof(int32_t)),
		    (uint32_t)(uintptr_t)arg);

		return;
	}
#endif

	/*
	 * Set the trap frame to point at the beginning of the uts
	 * function.
	 */
	td->td_frame->tf_rbp = 0;
	td->td_frame->tf_rsp =
	    ((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
	td->td_frame->tf_rsp -= 8;
	td->td_frame->tf_rip = (register_t)entry;
	td->td_frame->tf_ds = _udatasel;
	td->td_frame->tf_es = _udatasel;
	td->td_frame->tf_fs = _ufssel;
	td->td_frame->tf_gs = _ugssel;
	td->td_frame->tf_flags = TF_HASSEGS;

	/* Return address sentinel value to stop stack unwinding. */
	suword((void *)td->td_frame->tf_rsp, 0);

	/* Pass the argument to the entry point. */
	td->td_frame->tf_rdi = (register_t)arg;
}
int
procfs_doprocfpregs(PFS_FILL_ARGS)
{
	int error;
	struct fpreg r;
	struct thread *td2;
#ifdef COMPAT_FREEBSD32
	struct fpreg32 r32;
	int wrap32 = 0;
#endif

	if (uio->uio_offset != 0)
		return (0);

	PROC_LOCK(p);
	PROC_ASSERT_HELD(p);
	if (p_candebug(td, p)) {
		PROC_UNLOCK(p);
		return (EPERM);
	}
	if (!P_SHOULDSTOP(p)) {
		PROC_UNLOCK(p);
		return (EBUSY);
	}

	td2 = FIRST_THREAD_IN_PROC(p);
#ifdef COMPAT_FREEBSD32
	if (SV_CURPROC_FLAG(SV_ILP32)) {
		if (SV_PROC_FLAG(td2->td_proc, SV_ILP32) == 0) {
			PROC_UNLOCK(p);
			return (EINVAL);
		}
		wrap32 = 1;
		memset(&r32, 0, sizeof(r32));
	} else
#endif
		memset(&r, 0, sizeof(r));
	error = PROC(read, fpregs, td2, &r);
	if (error == 0) {
		PROC_UNLOCK(p);
		error = UIOMOVE_FROMBUF(r, uio);
		PROC_LOCK(p);
	}
	if (error == 0 && uio->uio_rw == UIO_WRITE) {
		if (!P_SHOULDSTOP(p))
			error = EBUSY;
		else
			/* XXXKSE: */
			error = PROC(write, fpregs, td2, &r);
	}
	PROC_UNLOCK(p);

	return (error);
}
Exemple #4
0
static void
linux_fixup_prot(struct thread *td, int *prot)
{
	struct linux_pemuldata *pem;

	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) {
		pem = pem_find(td->td_proc);
		if (pem->persona & LINUX_READ_IMPLIES_EXEC)
			*prot |= PROT_EXEC;
	}

}
Exemple #5
0
int
procfs_doprocdbregs(PFS_FILL_ARGS)
{
	int error;
	struct dbreg r;
	struct thread *td2;
#ifdef COMPAT_FREEBSD32
	struct dbreg32 r32;
	int wrap32 = 0;
#endif

	if (uio->uio_offset != 0)
		return (0);

	PROC_LOCK(p);
	KASSERT(p->p_lock > 0, ("proc not held"));
	if (p_candebug(td, p) != 0) {
		PROC_UNLOCK(p);
		return (EPERM);
	}

	td2 = FIRST_THREAD_IN_PROC(p);
#ifdef COMPAT_FREEBSD32
	if (SV_CURPROC_FLAG(SV_ILP32)) {
		if (SV_PROC_FLAG(td2->td_proc, SV_ILP32) == 0) {
			PROC_UNLOCK(p);
			return (EINVAL);
		}
		wrap32 = 1;
	}
#endif
	error = PROC(read, dbregs, td2, &r);
	if (error == 0) {
		PROC_UNLOCK(p);
		error = UIOMOVE_FROMBUF(r, uio);
		PROC_LOCK(p);
	}
	if (error == 0 && uio->uio_rw == UIO_WRITE) {
		if (!P_SHOULDSTOP(p)) /* XXXKSE should be P_TRACED? */
			error = EBUSY;
		else
			/* XXXKSE: */
			error = PROC(write, dbregs, td2, &r);
	}
	PROC_UNLOCK(p);

	return (error);
}
Exemple #6
0
int
cpu_set_user_tls(struct thread *td, void *tls_base)
{

#if defined(__mips_n64) && defined(COMPAT_FREEBSD32)
	if (td->td_proc && SV_PROC_FLAG(td->td_proc, SV_ILP32))
		td->td_md.md_tls_tcb_offset = TLS_TP_OFFSET + TLS_TCB_SIZE32;
	else
#endif
	td->td_md.md_tls_tcb_offset = TLS_TP_OFFSET + TLS_TCB_SIZE;
	td->td_md.md_tls = (char*)tls_base;
	if (td == curthread && cpuinfo.userlocal_reg == true) {
		mips_wr_userlocal((unsigned long)tls_base +
		    td->td_md.md_tls_tcb_offset);
	}

	return (0);
}
Exemple #7
0
int
cpu_set_user_tls(struct thread *td, void *tls_base)
{
	struct pcb *pcb;

	if ((u_int64_t)tls_base >= VM_MAXUSER_ADDRESS)
		return (EINVAL);

	pcb = td->td_pcb;
	set_pcb_flags(pcb, PCB_FULL_IRET);
#ifdef COMPAT_FREEBSD32
	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
		pcb->pcb_gsbase = (register_t)tls_base;
		return (0);
	}
#endif
	pcb->pcb_fsbase = (register_t)tls_base;
	return (0);
}
Exemple #8
0
int
dtrace_getustackdepth(void)
{
	proc_t *p = curproc;
	struct trapframe *tf;
	uintptr_t pc, sp;
	int n = 0;

	if (p == NULL || (tf = curthread->td_frame) == NULL)
		return (0);

	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
		return (-1);

	pc = tf->srr0;
	sp = tf->fixreg[1];

	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
		/* 
		 * In an entry probe.  The frame pointer has not yet been
		 * pushed (that happens in the function prologue).  The
		 * best approach is to add the current pc as a missing top
		 * of stack and back the pc up to the caller, which is stored
		 * at the current stack pointer address since the call 
		 * instruction puts it there right before the branch.
		 */

		if (SV_PROC_FLAG(p, SV_ILP32)) {
			pc = dtrace_fuword32((void *) sp);
		}
		else
			pc = dtrace_fuword64((void *) sp);
		n++;
	}

	n += dtrace_getustack_common(NULL, 0, pc, sp);

	return (n);
}
Exemple #9
0
void
dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
{
	proc_t *p = curproc;
	struct trapframe *tf;
	uintptr_t pc, sp;
	volatile uint16_t *flags =
	    (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags;
#ifdef notyet	/* XXX signal stack */
	uintptr_t oldcontext;
	size_t s1, s2;
#endif

	if (*flags & CPU_DTRACE_FAULT)
		return;

	if (pcstack_limit <= 0)
		return;

	/*
	 * If there's no user context we still need to zero the stack.
	 */
	if (p == NULL || (tf = curthread->td_frame) == NULL)
		goto zero;

	*pcstack++ = (uint64_t)p->p_pid;
	pcstack_limit--;

	if (pcstack_limit <= 0)
		return;

	pc = tf->srr0;
	sp = tf->fixreg[1];

#ifdef notyet /* XXX signal stack */
	oldcontext = lwp->lwp_oldcontext;
	s1 = sizeof (struct xframe) + 2 * sizeof (long);
	s2 = s1 + sizeof (siginfo_t);
#endif

	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
		*pcstack++ = (uint64_t)pc;
		*fpstack++ = 0;
		pcstack_limit--;
		if (pcstack_limit <= 0)
			return;

		if (SV_PROC_FLAG(p, SV_ILP32)) {
			pc = dtrace_fuword32((void *)sp);
		}
		else {
			pc = dtrace_fuword64((void *)sp);
		}
	}

	while (pc != 0) {
		*pcstack++ = (uint64_t)pc;
		*fpstack++ = sp;
		pcstack_limit--;
		if (pcstack_limit <= 0)
			break;

		if (sp == 0)
			break;

#ifdef notyet /* XXX signal stack */
		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
			ucontext_t *ucp = (ucontext_t *)oldcontext;
			greg_t *gregs = ucp->uc_mcontext.gregs;

			sp = dtrace_fulword(&gregs[REG_FP]);
			pc = dtrace_fulword(&gregs[REG_PC]);

			oldcontext = dtrace_fulword(&ucp->uc_link);
		} else
#endif /* XXX */
		{
			if (SV_PROC_FLAG(p, SV_ILP32)) {
				pc = dtrace_fuword32((void *)(sp + RETURN_OFFSET));
				sp = dtrace_fuword32((void *)sp);
			}
			else {
				pc = dtrace_fuword64((void *)(sp + RETURN_OFFSET64));
				sp = dtrace_fuword64((void *)sp);
			}
		}

		/*
		 * This is totally bogus:  if we faulted, we're going to clear
		 * the fault and break.  This is to deal with the apparently
		 * broken Java stacks on x86.
		 */
		if (*flags & CPU_DTRACE_FAULT) {
			*flags &= ~CPU_DTRACE_FAULT;
			break;
		}
	}

zero:
	while (pcstack_limit-- > 0)
		*pcstack++ = 0;
}
Exemple #10
0
/*
 * In-kernel implementation of execve().  All arguments are assumed to be
 * userspace pointers from the passed thread.
 */
static int
do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
{
	struct proc *p = td->td_proc;
	struct nameidata nd;
	struct ucred *oldcred;
	struct uidinfo *euip = NULL;
	register_t *stack_base;
	int error, i;
	struct image_params image_params, *imgp;
	struct vattr attr;
	int (*img_first)(struct image_params *);
	struct pargs *oldargs = NULL, *newargs = NULL;
	struct sigacts *oldsigacts = NULL, *newsigacts = NULL;
#ifdef KTRACE
	struct vnode *tracevp = NULL;
	struct ucred *tracecred = NULL;
#endif
	struct vnode *oldtextvp = NULL, *newtextvp;
	int credential_changing;
	int textset;
#ifdef MAC
	struct label *interpvplabel = NULL;
	int will_transition;
#endif
#ifdef HWPMC_HOOKS
	struct pmckern_procexec pe;
#endif
	static const char fexecv_proc_title[] = "(fexecv)";

	imgp = &image_params;

	/*
	 * Lock the process and set the P_INEXEC flag to indicate that
	 * it should be left alone until we're done here.  This is
	 * necessary to avoid race conditions - e.g. in ptrace() -
	 * that might allow a local user to illicitly obtain elevated
	 * privileges.
	 */
	PROC_LOCK(p);
	KASSERT((p->p_flag & P_INEXEC) == 0,
	    ("%s(): process already has P_INEXEC flag", __func__));
	p->p_flag |= P_INEXEC;
	PROC_UNLOCK(p);

	/*
	 * Initialize part of the common data
	 */
	bzero(imgp, sizeof(*imgp));
	imgp->proc = p;
	imgp->attr = &attr;
	imgp->args = args;
	oldcred = p->p_ucred;

#ifdef MAC
	error = mac_execve_enter(imgp, mac_p);
	if (error)
		goto exec_fail;
#endif

	/*
	 * Translate the file name. namei() returns a vnode pointer
	 *	in ni_vp among other things.
	 *
	 * XXXAUDIT: It would be desirable to also audit the name of the
	 * interpreter if this is an interpreted binary.
	 */
	if (args->fname != NULL) {
		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
		    | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
	}

	SDT_PROBE1(proc, , , exec, args->fname);

interpret:
	if (args->fname != NULL) {
#ifdef CAPABILITY_MODE
		/*
		 * While capability mode can't reach this point via direct
		 * path arguments to execve(), we also don't allow
		 * interpreters to be used in capability mode (for now).
		 * Catch indirect lookups and return a permissions error.
		 */
		if (IN_CAPABILITY_MODE(td)) {
			error = ECAPMODE;
			goto exec_fail;
		}
#endif
		error = namei(&nd);
		if (error)
			goto exec_fail;

		newtextvp = nd.ni_vp;
		imgp->vp = newtextvp;
	} else {
		AUDIT_ARG_FD(args->fd);
		/*
		 * Descriptors opened only with O_EXEC or O_RDONLY are allowed.
		 */
		error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp);
		if (error)
			goto exec_fail;
		vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY);
		AUDIT_ARG_VNODE1(newtextvp);
		imgp->vp = newtextvp;
	}

	/*
	 * Check file permissions (also 'opens' file)
	 */
	error = exec_check_permissions(imgp);
	if (error)
		goto exec_fail_dealloc;

	imgp->object = imgp->vp->v_object;
	if (imgp->object != NULL)
		vm_object_reference(imgp->object);

	/*
	 * Set VV_TEXT now so no one can write to the executable while we're
	 * activating it.
	 *
	 * Remember if this was set before and unset it in case this is not
	 * actually an executable image.
	 */
	textset = VOP_IS_TEXT(imgp->vp);
	VOP_SET_TEXT(imgp->vp);

	error = exec_map_first_page(imgp);
	if (error)
		goto exec_fail_dealloc;

	imgp->proc->p_osrel = 0;
	imgp->proc->p_fctl0 = 0;

	/*
	 * Implement image setuid/setgid.
	 *
	 * Determine new credentials before attempting image activators
	 * so that it can be used by process_exec handlers to determine
	 * credential/setid changes.
	 *
	 * Don't honor setuid/setgid if the filesystem prohibits it or if
	 * the process is being traced.
	 *
	 * We disable setuid/setgid/etc in capability mode on the basis
	 * that most setugid applications are not written with that
	 * environment in mind, and will therefore almost certainly operate
	 * incorrectly. In principle there's no reason that setugid
	 * applications might not be useful in capability mode, so we may want
	 * to reconsider this conservative design choice in the future.
	 *
	 * XXXMAC: For the time being, use NOSUID to also prohibit
	 * transitions on the file system.
	 */
	credential_changing = 0;
	credential_changing |= (attr.va_mode & S_ISUID) &&
	    oldcred->cr_uid != attr.va_uid;
	credential_changing |= (attr.va_mode & S_ISGID) &&
	    oldcred->cr_gid != attr.va_gid;
#ifdef MAC
	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
	    interpvplabel, imgp);
	credential_changing |= will_transition;
#endif

	/* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */
	if (credential_changing)
		imgp->proc->p_pdeathsig = 0;

	if (credential_changing &&
#ifdef CAPABILITY_MODE
	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
#endif
	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
	    (p->p_flag & P_TRACED) == 0) {
		imgp->credential_setid = true;
		VOP_UNLOCK(imgp->vp, 0);
		imgp->newcred = crdup(oldcred);
		if (attr.va_mode & S_ISUID) {
			euip = uifind(attr.va_uid);
			change_euid(imgp->newcred, euip);
		}
		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
		if (attr.va_mode & S_ISGID)
			change_egid(imgp->newcred, attr.va_gid);
		/*
		 * Implement correct POSIX saved-id behavior.
		 *
		 * XXXMAC: Note that the current logic will save the
		 * uid and gid if a MAC domain transition occurs, even
		 * though maybe it shouldn't.
		 */
		change_svuid(imgp->newcred, imgp->newcred->cr_uid);
		change_svgid(imgp->newcred, imgp->newcred->cr_gid);
	} else {
		/*
		 * Implement correct POSIX saved-id behavior.
		 *
		 * XXX: It's not clear that the existing behavior is
		 * POSIX-compliant.  A number of sources indicate that the
		 * saved uid/gid should only be updated if the new ruid is
		 * not equal to the old ruid, or the new euid is not equal
		 * to the old euid and the new euid is not equal to the old
		 * ruid.  The FreeBSD code always updates the saved uid/gid.
		 * Also, this code uses the new (replaced) euid and egid as
		 * the source, which may or may not be the right ones to use.
		 */
		if (oldcred->cr_svuid != oldcred->cr_uid ||
		    oldcred->cr_svgid != oldcred->cr_gid) {
			VOP_UNLOCK(imgp->vp, 0);
			imgp->newcred = crdup(oldcred);
			vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
			change_svuid(imgp->newcred, imgp->newcred->cr_uid);
			change_svgid(imgp->newcred, imgp->newcred->cr_gid);
		}
	}
	/* The new credentials are installed into the process later. */

	/*
	 * Do the best to calculate the full path to the image file.
	 */
	if (args->fname != NULL && args->fname[0] == '/')
		imgp->execpath = args->fname;
	else {
		VOP_UNLOCK(imgp->vp, 0);
		if (vn_fullpath(td, imgp->vp, &imgp->execpath,
		    &imgp->freepath) != 0)
			imgp->execpath = args->fname;
		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
	}

	/*
	 *	If the current process has a special image activator it
	 *	wants to try first, call it.   For example, emulating shell
	 *	scripts differently.
	 */
	error = -1;
	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
		error = img_first(imgp);

	/*
	 *	Loop through the list of image activators, calling each one.
	 *	An activator returns -1 if there is no match, 0 on success,
	 *	and an error otherwise.
	 */
	for (i = 0; error == -1 && execsw[i]; ++i) {
		if (execsw[i]->ex_imgact == NULL ||
		    execsw[i]->ex_imgact == img_first) {
			continue;
		}
		error = (*execsw[i]->ex_imgact)(imgp);
	}

	if (error) {
		if (error == -1) {
			if (textset == 0)
				VOP_UNSET_TEXT(imgp->vp);
			error = ENOEXEC;
		}
		goto exec_fail_dealloc;
	}

	/*
	 * Special interpreter operation, cleanup and loop up to try to
	 * activate the interpreter.
	 */
	if (imgp->interpreted) {
		exec_unmap_first_page(imgp);
		/*
		 * VV_TEXT needs to be unset for scripts.  There is a short
		 * period before we determine that something is a script where
		 * VV_TEXT will be set. The vnode lock is held over this
		 * entire period so nothing should illegitimately be blocked.
		 */
		VOP_UNSET_TEXT(imgp->vp);
		/* free name buffer and old vnode */
		if (args->fname != NULL)
			NDFREE(&nd, NDF_ONLY_PNBUF);
#ifdef MAC
		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
#endif
		if (imgp->opened) {
			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
			imgp->opened = 0;
		}
		vput(newtextvp);
		vm_object_deallocate(imgp->object);
		imgp->object = NULL;
		imgp->credential_setid = false;
		if (imgp->newcred != NULL) {
			crfree(imgp->newcred);
			imgp->newcred = NULL;
		}
		imgp->execpath = NULL;
		free(imgp->freepath, M_TEMP);
		imgp->freepath = NULL;
		/* set new name to that of the interpreter */
		NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
		    UIO_SYSSPACE, imgp->interpreter_name, td);
		args->fname = imgp->interpreter_name;
		goto interpret;
	}

	/*
	 * NB: We unlock the vnode here because it is believed that none
	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
	 */
	VOP_UNLOCK(imgp->vp, 0);

	if (disallow_high_osrel &&
	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
		error = ENOEXEC;
		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		goto exec_fail_dealloc;
	}

	/* ABI enforces the use of Capsicum. Switch into capabilities mode. */
	if (SV_PROC_FLAG(p, SV_CAPSICUM))
		sys_cap_enter(td, NULL);

	/*
	 * Copy out strings (args and env) and initialize stack base.
	 */
	stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);

	/*
	 * Stack setup.
	 */
	error = (*p->p_sysent->sv_fixup)(&stack_base, imgp);
	if (error != 0) {
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		goto exec_fail_dealloc;
	}

	if (args->fdp != NULL) {
		/* Install a brand new file descriptor table. */
		fdinstall_remapped(td, args->fdp);
		args->fdp = NULL;
	} else {
		/*
		 * Keep on using the existing file descriptor table. For
		 * security and other reasons, the file descriptor table
		 * cannot be shared after an exec.
		 */
		fdunshare(td);
		/* close files on exec */
		fdcloseexec(td);
	}

	/*
	 * Malloc things before we need locks.
	 */
	i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv;
	/* Cache arguments if they fit inside our allowance */
	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
		newargs = pargs_alloc(i);
		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
	}

	/*
	 * For security and other reasons, signal handlers cannot
	 * be shared after an exec. The new process gets a copy of the old
	 * handlers. In execsigs(), the new process will have its signals
	 * reset.
	 */
	if (sigacts_shared(p->p_sigacts)) {
		oldsigacts = p->p_sigacts;
		newsigacts = sigacts_alloc();
		sigacts_copy(newsigacts, oldsigacts);
	}

	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);

	PROC_LOCK(p);
	if (oldsigacts)
		p->p_sigacts = newsigacts;
	/* Stop profiling */
	stopprofclock(p);

	/* reset caught signals */
	execsigs(p);

	/* name this process - nameiexec(p, ndp) */
	bzero(p->p_comm, sizeof(p->p_comm));
	if (args->fname)
		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
#ifdef KTR
	sched_clear_tdname(td);
#endif

	/*
	 * mark as execed, wakeup the process that vforked (if any) and tell
	 * it that it now has its own resources back
	 */
	p->p_flag |= P_EXEC;
	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
		p->p_flag2 &= ~P2_NOTRACE;
	if (p->p_flag & P_PPWAIT) {
		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
		cv_broadcast(&p->p_pwait);
		/* STOPs are no longer ignored, arrange for AST */
		signotify(td);
	}

	/*
	 * Implement image setuid/setgid installation.
	 */
	if (imgp->credential_setid) {
		/*
		 * Turn off syscall tracing for set-id programs, except for
		 * root.  Record any set-id flags first to make sure that
		 * we do not regain any tracing during a possible block.
		 */
		setsugid(p);

#ifdef KTRACE
		if (p->p_tracecred != NULL &&
		    priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED))
			ktrprocexec(p, &tracecred, &tracevp);
#endif
		/*
		 * Close any file descriptors 0..2 that reference procfs,
		 * then make sure file descriptors 0..2 are in use.
		 *
		 * Both fdsetugidsafety() and fdcheckstd() may call functions
		 * taking sleepable locks, so temporarily drop our locks.
		 */
		PROC_UNLOCK(p);
		VOP_UNLOCK(imgp->vp, 0);
		fdsetugidsafety(td);
		error = fdcheckstd(td);
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
		if (error != 0)
			goto exec_fail_dealloc;
		PROC_LOCK(p);
#ifdef MAC
		if (will_transition) {
			mac_vnode_execve_transition(oldcred, imgp->newcred,
			    imgp->vp, interpvplabel, imgp);
		}
#endif
	} else {
		if (oldcred->cr_uid == oldcred->cr_ruid &&
		    oldcred->cr_gid == oldcred->cr_rgid)
			p->p_flag &= ~P_SUGID;
	}
	/*
	 * Set the new credentials.
	 */
	if (imgp->newcred != NULL) {
		proc_set_cred(p, imgp->newcred);
		crfree(oldcred);
		oldcred = NULL;
	}

	/*
	 * Store the vp for use in procfs.  This vnode was referenced by namei
	 * or fgetvp_exec.
	 */
	oldtextvp = p->p_textvp;
	p->p_textvp = newtextvp;

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exec if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exec)
		dtrace_fasttrap_exec(p);
#endif

	/*
	 * Notify others that we exec'd, and clear the P_INEXEC flag
	 * as we're now a bona fide freshly-execed process.
	 */
	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
	p->p_flag &= ~P_INEXEC;

	/* clear "fork but no exec" flag, as we _are_ execing */
	p->p_acflag &= ~AFORK;

	/*
	 * Free any previous argument cache and replace it with
	 * the new argument cache, if any.
	 */
	oldargs = p->p_args;
	p->p_args = newargs;
	newargs = NULL;

	PROC_UNLOCK(p);

#ifdef	HWPMC_HOOKS
	/*
	 * Check if system-wide sampling is in effect or if the
	 * current process is using PMCs.  If so, do exec() time
	 * processing.  This processing needs to happen AFTER the
	 * P_INEXEC flag is cleared.
	 */
	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
		VOP_UNLOCK(imgp->vp, 0);
		pe.pm_credentialschanged = credential_changing;
		pe.pm_entryaddr = imgp->entry_addr;

		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
	}
#endif

	/* Set values passed into the program in registers. */
	(*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base);

	vfs_mark_atime(imgp->vp, td->td_ucred);

	SDT_PROBE1(proc, , , exec__success, args->fname);

exec_fail_dealloc:
	if (imgp->firstpage != NULL)
		exec_unmap_first_page(imgp);

	if (imgp->vp != NULL) {
		if (args->fname)
			NDFREE(&nd, NDF_ONLY_PNBUF);
		if (imgp->opened)
			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
		if (error != 0)
			vput(imgp->vp);
		else
			VOP_UNLOCK(imgp->vp, 0);
	}

	if (imgp->object != NULL)
		vm_object_deallocate(imgp->object);

	free(imgp->freepath, M_TEMP);

	if (error == 0) {
		if (p->p_ptevents & PTRACE_EXEC) {
			PROC_LOCK(p);
			if (p->p_ptevents & PTRACE_EXEC)
				td->td_dbgflags |= TDB_EXEC;
			PROC_UNLOCK(p);
		}

		/*
		 * Stop the process here if its stop event mask has
		 * the S_EXEC bit set.
		 */
		STOPEVENT(p, S_EXEC, 0);
	} else {
exec_fail:
		/* we're done here, clear P_INEXEC */
		PROC_LOCK(p);
		p->p_flag &= ~P_INEXEC;
		PROC_UNLOCK(p);

		SDT_PROBE1(proc, , , exec__failure, error);
	}

	if (imgp->newcred != NULL && oldcred != NULL)
		crfree(imgp->newcred);

#ifdef MAC
	mac_execve_exit(imgp);
	mac_execve_interpreter_exit(interpvplabel);
#endif
	exec_free_args(args);

	/*
	 * Handle deferred decrement of ref counts.
	 */
	if (oldtextvp != NULL)
		vrele(oldtextvp);
#ifdef KTRACE
	if (tracevp != NULL)
		vrele(tracevp);
	if (tracecred != NULL)
		crfree(tracecred);
#endif
	pargs_drop(oldargs);
	pargs_drop(newargs);
	if (oldsigacts != NULL)
		sigacts_free(oldsigacts);
	if (euip != NULL)
		uifree(euip);

	if (error && imgp->vmspace_destroyed) {
		/* sorry, no more process anymore. exit gracefully */
		exit1(td, 0, SIGABRT);
		/* NOT REACHED */
	}

#ifdef KTRACE
	if (error == 0)
		ktrprocctor(p);
#endif

	/*
	 * We don't want cpu_set_syscall_retval() to overwrite any of
	 * the register values put in place by exec_setregs().
	 * Implementations of cpu_set_syscall_retval() will leave
	 * registers unmodified when returning EJUSTRETURN.
	 */
	return (error == 0 ? EJUSTRETURN : error);
}
Exemple #11
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_doprocmap(PFS_FILL_ARGS)
{
	struct vmspace *vm;
	vm_map_t map;
	vm_map_entry_t entry, tmp_entry;
	struct vnode *vp;
	char *fullpath, *freepath;
	struct ucred *cred;
	int error;
	unsigned int last_timestamp;
#ifdef COMPAT_FREEBSD32
	int wrap32 = 0;
#endif

	PROC_LOCK(p);
	error = p_candebug(td, p);
	PROC_UNLOCK(p);
	if (error)
		return (error);

	if (uio->uio_rw != UIO_READ)
		return (EOPNOTSUPP);

#ifdef COMPAT_FREEBSD32
        if (SV_CURPROC_FLAG(SV_ILP32)) {
                if (!(SV_PROC_FLAG(p, SV_ILP32)))
                        return (EOPNOTSUPP);
                wrap32 = 1;
        }
#endif

	vm = vmspace_acquire_ref(p);
	if (vm == NULL)
		return (ESRCH);
	map = &vm->vm_map;
	vm_map_lock_read(map);
	for (entry = map->header.next; entry != &map->header;
	     entry = entry->next) {
		vm_object_t obj, tobj, lobj;
		int ref_count, shadow_count, flags;
		vm_offset_t e_start, e_end, addr;
		int resident, privateresident;
		char *type;
		vm_eflags_t e_eflags;
		vm_prot_t e_prot;

		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
			continue;

		e_eflags = entry->eflags;
		e_prot = entry->protection;
		e_start = entry->start;
		e_end = entry->end;
		privateresident = 0;
		obj = entry->object.vm_object;
		if (obj != NULL) {
			VM_OBJECT_RLOCK(obj);
			if (obj->shadow_count == 1)
				privateresident = obj->resident_page_count;
		}
		cred = (entry->cred) ? entry->cred : (obj ? obj->cred : NULL);

		resident = 0;
		addr = entry->start;
		while (addr < entry->end) {
			if (pmap_extract(map->pmap, addr))
				resident++;
			addr += PAGE_SIZE;
		}

		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
			if (tobj != obj)
				VM_OBJECT_RLOCK(tobj);
			if (lobj != obj)
				VM_OBJECT_RUNLOCK(lobj);
			lobj = tobj;
		}
		last_timestamp = map->timestamp;
		vm_map_unlock_read(map);

		freepath = NULL;
		fullpath = "-";
		if (lobj) {
			vp = NULL;
			switch (lobj->type) {
			default:
			case OBJT_DEFAULT:
				type = "default";
				break;
			case OBJT_VNODE:
				type = "vnode";
				vp = lobj->handle;
				vref(vp);
				break;
			case OBJT_SWAP:
				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
					type = "vnode";
					if ((lobj->flags & OBJ_TMPFS) != 0) {
						vp = lobj->un_pager.swp.swp_tmpfs;
						vref(vp);
					}
				} else {
					type = "swap";
				}
				break;
			case OBJT_SG:
			case OBJT_DEVICE:
				type = "device";
				break;
			}
			if (lobj != obj)
				VM_OBJECT_RUNLOCK(lobj);

			flags = obj->flags;
			ref_count = obj->ref_count;
			shadow_count = obj->shadow_count;
			VM_OBJECT_RUNLOCK(obj);
			if (vp != NULL) {
				vn_fullpath(td, vp, &fullpath, &freepath);
				vrele(vp);
			}
		} else {
			type = "none";
			flags = 0;
			ref_count = 0;
			shadow_count = 0;
		}

		/*
		 * format:
		 *  start, end, resident, private resident, cow, access, type,
		 *         charged, charged uid.
		 */
		error = sbuf_printf(sb,
		    "0x%lx 0x%lx %d %d %p %s%s%s %d %d 0x%x %s %s %s %s %s %d\n",
			(u_long)e_start, (u_long)e_end,
			resident, privateresident,
#ifdef COMPAT_FREEBSD32
			wrap32 ? NULL : obj,	/* Hide 64 bit value */
#else
			obj,
#endif
			(e_prot & VM_PROT_READ)?"r":"-",
			(e_prot & VM_PROT_WRITE)?"w":"-",
			(e_prot & VM_PROT_EXECUTE)?"x":"-",
			ref_count, shadow_count, flags,
			(e_eflags & MAP_ENTRY_COW)?"COW":"NCOW",
			(e_eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
			type, fullpath,
			cred ? "CH":"NCH", cred ? cred->cr_ruid : -1);

		if (freepath != NULL)
			free(freepath, M_TEMP);
		vm_map_lock_read(map);
		if (error == -1) {
			error = 0;
			break;
		}
		if (last_timestamp != map->timestamp) {
			/*
			 * Look again for the entry because the map was
			 * modified while it was unlocked.  Specifically,
			 * the entry may have been clipped, merged, or deleted.
			 */
			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
			entry = tmp_entry;
		}
	}
	vm_map_unlock_read(map);
	vmspace_free(vm);
	return (error);
}
Exemple #12
0
void
cpu_set_syscall_retval(struct thread *td, int error)
{
	struct trapframe *locr0 = td->td_frame;
	unsigned int code;
	int quad_syscall;

	code = locr0->v0;
	quad_syscall = 0;
#if defined(__mips_n32) || defined(__mips_n64)
#ifdef COMPAT_FREEBSD32
	if (code == SYS___syscall && SV_PROC_FLAG(td->td_proc, SV_ILP32))
		quad_syscall = 1;
#endif
#else
	if (code == SYS___syscall)
		quad_syscall = 1;
#endif

	if (code == SYS_syscall)
		code = locr0->a0;
	else if (code == SYS___syscall) {
		if (quad_syscall)
			code = _QUAD_LOWWORD ? locr0->a1 : locr0->a0;
		else
			code = locr0->a0;
	}

	switch (error) {
	case 0:
		if (quad_syscall && code != SYS_lseek) {
			/*
			 * System call invoked through the
			 * SYS___syscall interface but the
			 * return value is really just 32
			 * bits.
			 */
			locr0->v0 = td->td_retval[0];
			if (_QUAD_LOWWORD)
				locr0->v1 = td->td_retval[0];
			locr0->a3 = 0;
		} else {
			locr0->v0 = td->td_retval[0];
			locr0->v1 = td->td_retval[1];
			locr0->a3 = 0;
		}
		break;

	case ERESTART:
		locr0->pc = td->td_pcb->pcb_tpc;
		break;

	case EJUSTRETURN:
		break;	/* nothing to do */

	default:
		if (quad_syscall && code != SYS_lseek) {
			locr0->v0 = error;
			if (_QUAD_LOWWORD)
				locr0->v1 = error;
			locr0->a3 = 1;
		} else {
			locr0->v0 = error;
			locr0->a3 = 1;
		}
	}
}