Example #1
0
/*
 * Read a symbolic link
 */
static int
pfs_readlink(struct vop_readlink_args *va)
{
	struct vnode *vn = va->a_vp;
	struct pfs_vdata *pvd = vn->v_data;
	struct pfs_node *pn = pvd->pvd_pn;
	struct uio *uio = va->a_uio;
	struct proc *proc = NULL;
	struct thread *td = curthread;
	char buf[PATH_MAX];
	struct sbuf sb;
	int error, locked;

	PFS_TRACE(("%s", pn->pn_name));
	pfs_assert_not_owned(pn);

	if (vn->v_type != VLNK)
		PFS_RETURN (EINVAL);
	KASSERT_PN_IS_LINK(pn);

	if (pn->pn_fill == NULL)
		PFS_RETURN (EIO);

	if (pvd->pvd_pid != NO_PID) {
		if ((proc = pfind(pvd->pvd_pid)) == NULL)
			PFS_RETURN (EIO);
		if (proc->p_flag & P_WEXIT) {
			PROC_UNLOCK(proc);
			PFS_RETURN (EIO);
		}
		_PHOLD(proc);
		PROC_UNLOCK(proc);
	}
	vhold(vn);
	locked = VOP_ISLOCKED(vn, td);
	VOP_UNLOCK(vn, 0, td);

	/* sbuf_new() can't fail with a static buffer */
	sbuf_new(&sb, buf, sizeof buf, 0);

	error = pn_fill(td, proc, pn, &sb, NULL);

	if (proc != NULL)
		PRELE(proc);
	vn_lock(vn, locked | LK_RETRY, td);
	vdrop(vn);

	if (error) {
		sbuf_delete(&sb);
		PFS_RETURN (error);
	}

	sbuf_finish(&sb);
	error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio);
	sbuf_delete(&sb);
	PFS_RETURN (error);
}
Example #2
0
/*
 * Write to a file
 */
static int
pfs_write(struct vop_write_args *va)
{
	struct vnode *vn = va->a_vp;
	struct pfs_vdata *pvd = vn->v_data;
	struct pfs_node *pn = pvd->pvd_pn;
	struct uio *uio = va->a_uio;
	struct proc *proc;
	struct sbuf sb;
	int error;

	PFS_TRACE(("%s", pn->pn_name));
	pfs_assert_not_owned(pn);

	if (vn->v_type != VREG)
		PFS_RETURN (EINVAL);
	KASSERT_PN_IS_FILE(pn);

	if (!(pn->pn_flags & PFS_WR))
		PFS_RETURN (EBADF);

	if (pn->pn_fill == NULL)
		PFS_RETURN (EIO);

	/*
	 * This is necessary because either process' privileges may
	 * have changed since the open() call.
	 */
	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
		PFS_RETURN (EIO);
	if (proc != NULL) {
		_PHOLD(proc);
		PROC_UNLOCK(proc);
	}

	if (pn->pn_flags & PFS_RAWWR) {
		error = pn_fill(curthread, proc, pn, NULL, uio);
		if (proc != NULL)
			PRELE(proc);
		PFS_RETURN (error);
	}

	sbuf_uionew(&sb, uio, &error);
	if (error) {
		if (proc != NULL)
			PRELE(proc);
		PFS_RETURN (error);
	}

	error = pn_fill(curthread, proc, pn, &sb, uio);

	sbuf_delete(&sb);
	if (proc != NULL)
		PRELE(proc);
	PFS_RETURN (error);
}
Example #3
0
int
fasttrap_pid_probe(struct reg *rp)
{
	proc_t *p = curproc;
	uintptr_t pc = rp->r_rip - 1;
	uintptr_t new_pc = 0;
	fasttrap_bucket_t *bucket;
#if defined(sun)
	kmutex_t *pid_mtx;
#endif
	fasttrap_tracepoint_t *tp, tp_local;
	pid_t pid;
	dtrace_icookie_t cookie;
	uint_t is_enabled = 0;

	/*
	 * It's possible that a user (in a veritable orgy of bad planning)
	 * could redirect this thread's flow of control before it reached the
	 * return probe fasttrap. In this case we need to kill the process
	 * since it's in a unrecoverable state.
	 */
	if (curthread->t_dtrace_step) {
		ASSERT(curthread->t_dtrace_on);
		fasttrap_sigtrap(p, curthread, pc);
		return (0);
	}

	/*
	 * Clear all user tracing flags.
	 */
	curthread->t_dtrace_ft = 0;
	curthread->t_dtrace_pc = 0;
	curthread->t_dtrace_npc = 0;
	curthread->t_dtrace_scrpc = 0;
	curthread->t_dtrace_astpc = 0;
#ifdef __amd64
	curthread->t_dtrace_regv = 0;
#endif

#if defined(sun)
	/*
	 * Treat a child created by a call to vfork(2) as if it were its
	 * parent. We know that there's only one thread of control in such a
	 * process: this one.
	 */
	while (p->p_flag & SVFORK) {
		p = p->p_parent;
	}
#endif

	PROC_LOCK(p);
	_PHOLD(p);
	pid = p->p_pid;
#if defined(sun)
	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
	mutex_enter(pid_mtx);
#endif
	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];

	/*
	 * Lookup the tracepoint that the process just hit.
	 */
	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
		    tp->ftt_proc->ftpc_acount != 0)
			break;
	}

	/*
	 * If we couldn't find a matching tracepoint, either a tracepoint has
	 * been inserted without using the pid<pid> ioctl interface (see
	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
	 */
	if (tp == NULL) {
#if defined(sun)
		mutex_exit(pid_mtx);
#endif
		_PRELE(p);
		PROC_UNLOCK(p);
		return (-1);
	}

	/*
	 * Set the program counter to the address of the traced instruction
	 * so that it looks right in ustack() output.
	 */
	rp->r_rip = pc;

	if (tp->ftt_ids != NULL) {
		fasttrap_id_t *id;

#ifdef __amd64
		if (p->p_model == DATAMODEL_LP64) {
			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
				fasttrap_probe_t *probe = id->fti_probe;

				if (id->fti_ptype == DTFTP_ENTRY) {
					/*
					 * We note that this was an entry
					 * probe to help ustack() find the
					 * first caller.
					 */
					cookie = dtrace_interrupt_disable();
					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
					dtrace_probe(probe->ftp_id, rp->r_rdi,
					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
					    rp->r_r8);
					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
					dtrace_interrupt_enable(cookie);
				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
					/*
					 * Note that in this case, we don't
					 * call dtrace_probe() since it's only
					 * an artificial probe meant to change
					 * the flow of control so that it
					 * encounters the true probe.
					 */
					is_enabled = 1;
				} else if (probe->ftp_argmap == NULL) {
					dtrace_probe(probe->ftp_id, rp->r_rdi,
					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
					    rp->r_r8);
				} else {
					uintptr_t t[5];

					fasttrap_usdt_args64(probe, rp,
					    sizeof (t) / sizeof (t[0]), t);

					dtrace_probe(probe->ftp_id, t[0], t[1],
					    t[2], t[3], t[4]);
				}
			}
		} else {
#else /* __amd64 */
			uintptr_t s0, s1, s2, s3, s4, s5;
			uint32_t *stack = (uint32_t *)rp->r_esp;

			/*
			 * In 32-bit mode, all arguments are passed on the
			 * stack. If this is a function entry probe, we need
			 * to skip the first entry on the stack as it
			 * represents the return address rather than a
			 * parameter to the function.
			 */
			s0 = fasttrap_fuword32_noerr(&stack[0]);
			s1 = fasttrap_fuword32_noerr(&stack[1]);
			s2 = fasttrap_fuword32_noerr(&stack[2]);
			s3 = fasttrap_fuword32_noerr(&stack[3]);
			s4 = fasttrap_fuword32_noerr(&stack[4]);
			s5 = fasttrap_fuword32_noerr(&stack[5]);

			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
				fasttrap_probe_t *probe = id->fti_probe;

				if (id->fti_ptype == DTFTP_ENTRY) {
					/*
					 * We note that this was an entry
					 * probe to help ustack() find the
					 * first caller.
					 */
					cookie = dtrace_interrupt_disable();
					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
					dtrace_probe(probe->ftp_id, s1, s2,
					    s3, s4, s5);
					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
					dtrace_interrupt_enable(cookie);
				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
					/*
					 * Note that in this case, we don't
					 * call dtrace_probe() since it's only
					 * an artificial probe meant to change
					 * the flow of control so that it
					 * encounters the true probe.
					 */
					is_enabled = 1;
				} else if (probe->ftp_argmap == NULL) {
					dtrace_probe(probe->ftp_id, s0, s1,
					    s2, s3, s4);
				} else {
					uint32_t t[5];

					fasttrap_usdt_args32(probe, rp,
					    sizeof (t) / sizeof (t[0]), t);

					dtrace_probe(probe->ftp_id, t[0], t[1],
					    t[2], t[3], t[4]);
				}
			}
#endif /* __amd64 */
#ifdef __amd64
		}
#endif
	}

	/*
	 * We're about to do a bunch of work so we cache a local copy of
	 * the tracepoint to emulate the instruction, and then find the
	 * tracepoint again later if we need to light up any return probes.
	 */
	tp_local = *tp;
	PROC_UNLOCK(p);
#if defined(sun)
	mutex_exit(pid_mtx);
#endif
	tp = &tp_local;

	/*
	 * Set the program counter to appear as though the traced instruction
	 * had completely executed. This ensures that fasttrap_getreg() will
	 * report the expected value for REG_RIP.
	 */
	rp->r_rip = pc + tp->ftt_size;

	/*
	 * If there's an is-enabled probe connected to this tracepoint it
	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
	 * instruction that was placed there by DTrace when the binary was
	 * linked. As this probe is, in fact, enabled, we need to stuff 1
	 * into %eax or %rax. Accordingly, we can bypass all the instruction
	 * emulation logic since we know the inevitable result. It's possible
	 * that a user could construct a scenario where the 'is-enabled'
	 * probe was on some other instruction, but that would be a rather
	 * exotic way to shoot oneself in the foot.
	 */
	if (is_enabled) {
		rp->r_rax = 1;
		new_pc = rp->r_rip;
		goto done;
	}

	/*
	 * We emulate certain types of instructions to ensure correctness
	 * (in the case of position dependent instructions) or optimize
	 * common cases. The rest we have the thread execute back in user-
	 * land.
	 */
	switch (tp->ftt_type) {
	case FASTTRAP_T_RET:
	case FASTTRAP_T_RET16:
	{
		uintptr_t dst = 0;
		uintptr_t addr = 0;
		int ret = 0;

		/*
		 * We have to emulate _every_ facet of the behavior of a ret
		 * instruction including what happens if the load from %esp
		 * fails; in that case, we send a SIGSEGV.
		 */
#ifdef __amd64
		if (p->p_model == DATAMODEL_NATIVE) {
			ret = dst = fasttrap_fulword((void *)rp->r_rsp);
			addr = rp->r_rsp + sizeof (uintptr_t);
		} else {
#endif
#ifdef __i386__
			uint32_t dst32;
			ret = dst32 = fasttrap_fuword32((void *)rp->r_esp);
			dst = dst32;
			addr = rp->r_esp + sizeof (uint32_t);
#endif
#ifdef __amd64
		}
#endif

		if (ret == -1) {
			fasttrap_sigsegv(p, curthread, rp->r_rsp);
			new_pc = pc;
			break;
		}

		if (tp->ftt_type == FASTTRAP_T_RET16)
			addr += tp->ftt_dest;

		rp->r_rsp = addr;
		new_pc = dst;
		break;
	}

	case FASTTRAP_T_JCC:
	{
		uint_t taken = 0;

		switch (tp->ftt_code) {
		case FASTTRAP_JO:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;
			break;
		case FASTTRAP_JNO:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;
			break;
		case FASTTRAP_JB:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;
			break;
		case FASTTRAP_JAE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;
			break;
		case FASTTRAP_JE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
			break;
		case FASTTRAP_JNE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
			break;
		case FASTTRAP_JBE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||
			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
			break;
		case FASTTRAP_JA:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&
			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
			break;
		case FASTTRAP_JS:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;
			break;
		case FASTTRAP_JNS:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;
			break;
		case FASTTRAP_JP:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;
			break;
		case FASTTRAP_JNP:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;
			break;
		case FASTTRAP_JL:
			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;
		case FASTTRAP_JGE:
			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;
		case FASTTRAP_JLE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;
		case FASTTRAP_JG:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;

		}

		if (taken)
			new_pc = tp->ftt_dest;
		else
			new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_LOOP:
	{
		uint_t taken = 0;
#ifdef __amd64
		greg_t cx = rp->r_rcx--;
#else
		greg_t cx = rp->r_ecx--;
#endif

		switch (tp->ftt_code) {
		case FASTTRAP_LOOPNZ:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
			    cx != 0;
			break;
		case FASTTRAP_LOOPZ:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
			    cx != 0;
			break;
		case FASTTRAP_LOOP:
			taken = (cx != 0);
			break;
		}

		if (taken)
			new_pc = tp->ftt_dest;
		else
			new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_JCXZ:
	{
#ifdef __amd64
		greg_t cx = rp->r_rcx;
#else
		greg_t cx = rp->r_ecx;
#endif

		if (cx == 0)
			new_pc = tp->ftt_dest;
		else
			new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_PUSHL_EBP:
	{
		int ret = 0;
		uintptr_t addr = 0;

#ifdef __amd64
		if (p->p_model == DATAMODEL_NATIVE) {
			addr = rp->r_rsp - sizeof (uintptr_t);
			ret = fasttrap_sulword((void *)addr, &rp->r_rsp);
		} else {
#endif
#ifdef __i386__
			addr = rp->r_rsp - sizeof (uint32_t);
			ret = fasttrap_suword32((void *)addr, &rp->r_rsp);
#endif
#ifdef __amd64
		}
#endif

		if (ret == -1) {
			fasttrap_sigsegv(p, curthread, addr);
			new_pc = pc;
			break;
		}

		rp->r_rsp = addr;
		new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_NOP:
		new_pc = pc + tp->ftt_size;
		break;

	case FASTTRAP_T_JMP:
	case FASTTRAP_T_CALL:
		if (tp->ftt_code == 0) {
			new_pc = tp->ftt_dest;
		} else {
#ifdef __amd64
			uintptr_t value;
#endif
			uintptr_t addr = tp->ftt_dest;

			if (tp->ftt_base != FASTTRAP_NOREG)
				addr += fasttrap_getreg(rp, tp->ftt_base);
			if (tp->ftt_index != FASTTRAP_NOREG)
				addr += fasttrap_getreg(rp, tp->ftt_index) <<
				    tp->ftt_scale;

			if (tp->ftt_code == 1) {
				/*
				 * If there's a segment prefix for this
				 * instruction, we'll need to check permissions
				 * and bounds on the given selector, and adjust
				 * the address accordingly.
				 */
				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
				    fasttrap_do_seg(tp, rp, &addr) != 0) {
					fasttrap_sigsegv(p, curthread, addr);
					new_pc = pc;
					break;
				}

#ifdef __amd64
				if (p->p_model == DATAMODEL_NATIVE) {
					if ((value = fasttrap_fulword((void *)addr))
					     == -1) {
						fasttrap_sigsegv(p, curthread,
						    addr);
						new_pc = pc;
						break;
					}
					new_pc = value;
				} else {
#endif
#ifdef __i386__
					uint32_t value32;
					addr = (uintptr_t)(uint32_t)addr;
					if ((value32 = fasttrap_fuword32((void *)addr))
					    == -1) {
						fasttrap_sigsegv(p, curthread,
						    addr);
						new_pc = pc;
						break;
					}
					new_pc = value32;
#endif
				}
#ifdef __amd64
			} else {
				new_pc = addr;
			}
#endif
		}

		/*
		 * If this is a call instruction, we need to push the return
		 * address onto the stack. If this fails, we send the process
		 * a SIGSEGV and reset the pc to emulate what would happen if
		 * this instruction weren't traced.
		 */
		if (tp->ftt_type == FASTTRAP_T_CALL) {
			int ret = 0;
			uintptr_t addr = 0, pcps;
#ifdef __amd64
			if (p->p_model == DATAMODEL_NATIVE) {
				addr = rp->r_rsp - sizeof (uintptr_t);
				pcps = pc + tp->ftt_size;
				ret = fasttrap_sulword((void *)addr, &pcps);
			} else {
#endif
#ifdef __i386__
				addr = rp->r_rsp - sizeof (uint32_t);
				pcps = (uint32_t)(pc + tp->ftt_size);
				ret = fasttrap_suword32((void *)addr, &pcps);
#endif
#ifdef __amd64
			}
#endif

			if (ret == -1) {
				fasttrap_sigsegv(p, curthread, addr);
				new_pc = pc;
				break;
			}

			rp->r_rsp = addr;
		}

		break;

	case FASTTRAP_T_COMMON:
	{
		uintptr_t addr;
#if defined(__amd64)
		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
#else
		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
#endif
		uint_t i = 0;
#if defined(sun)
		klwp_t *lwp = ttolwp(curthread);
#endif

		/*
		 * Compute the address of the ulwp_t and step over the
		 * ul_self pointer. The method used to store the user-land
		 * thread pointer is very different on 32- and 64-bit
		 * kernels.
		 */
#if defined(sun)
#if defined(__amd64)
		if (p->p_model == DATAMODEL_LP64) {
			addr = lwp->lwp_pcb.pcb_fsbase;
			addr += sizeof (void *);
		} else {
			addr = lwp->lwp_pcb.pcb_gsbase;
			addr += sizeof (caddr32_t);
		}
#else
		addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
		addr += sizeof (void *);
#endif
#endif /* sun */
#ifdef __i386__
		addr = USD_GETBASE(&curthread->td_pcb->pcb_gsd);
#else
		addr = curthread->td_pcb->pcb_gsbase;
#endif
		addr += sizeof (void *);

		/*
		 * Generic Instruction Tracing
		 * ---------------------------
		 *
		 * This is the layout of the scratch space in the user-land
		 * thread structure for our generated instructions.
		 *
		 *	32-bit mode			bytes
		 *	------------------------	-----
		 * a:	<original instruction>		<= 15
		 *	jmp	<pc + tp->ftt_size>	    5
		 * b:	<original instruction>		<= 15
		 *	int	T_DTRACE_RET		    2
		 *					-----
		 *					<= 37
		 *
		 *	64-bit mode			bytes
		 *	------------------------	-----
		 * a:	<original instruction>		<= 15
		 *	jmp	0(%rip)			    6
		 *	<pc + tp->ftt_size>		    8
		 * b:	<original instruction>		<= 15
		 * 	int	T_DTRACE_RET		    2
		 * 					-----
		 * 					<= 46
		 *
		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
		 * to b. If we encounter a signal on the way out of the
		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
		 * so that we execute the original instruction and re-enter
		 * the kernel rather than redirecting to the next instruction.
		 *
		 * If there are return probes (so we know that we're going to
		 * need to reenter the kernel after executing the original
		 * instruction), the scratch space will just contain the
		 * original instruction followed by an interrupt -- the same
		 * data as at b.
		 *
		 * %rip-relative Addressing
		 * ------------------------
		 *
		 * There's a further complication in 64-bit mode due to %rip-
		 * relative addressing. While this is clearly a beneficial
		 * architectural decision for position independent code, it's
		 * hard not to see it as a personal attack against the pid
		 * provider since before there was a relatively small set of
		 * instructions to emulate; with %rip-relative addressing,
		 * almost every instruction can potentially depend on the
		 * address at which it's executed. Rather than emulating
		 * the broad spectrum of instructions that can now be
		 * position dependent, we emulate jumps and others as in
		 * 32-bit mode, and take a different tack for instructions
		 * using %rip-relative addressing.
		 *
		 * For every instruction that uses the ModRM byte, the
		 * in-kernel disassembler reports its location. We use the
		 * ModRM byte to identify that an instruction uses
		 * %rip-relative addressing and to see what other registers
		 * the instruction uses. To emulate those instructions,
		 * we modify the instruction to be %rax-relative rather than
		 * %rip-relative (or %rcx-relative if the instruction uses
		 * %rax; or %r8- or %r9-relative if the REX.B is present so
		 * we don't have to rewrite the REX prefix). We then load
		 * the value that %rip would have been into the scratch
		 * register and generate an instruction to reset the scratch
		 * register back to its original value. The instruction
		 * sequence looks like this:
		 *
		 *	64-mode %rip-relative		bytes
		 *	------------------------	-----
		 * a:	<modified instruction>		<= 15
		 *	movq	$<value>, %<scratch>	    6
		 *	jmp	0(%rip)			    6
		 *	<pc + tp->ftt_size>		    8
		 * b:	<modified instruction>  	<= 15
		 * 	int	T_DTRACE_RET		    2
		 * 					-----
		 *					   52
		 *
		 * We set curthread->t_dtrace_regv so that upon receiving
		 * a signal we can reset the value of the scratch register.
		 */

		ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);

		curthread->t_dtrace_scrpc = addr;
		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
		i += tp->ftt_size;

#ifdef __amd64
		if (tp->ftt_ripmode != 0) {
			greg_t *reg = NULL;

			ASSERT(p->p_model == DATAMODEL_LP64);
			ASSERT(tp->ftt_ripmode &
			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));

			/*
			 * If this was a %rip-relative instruction, we change
			 * it to be either a %rax- or %rcx-relative
			 * instruction (depending on whether those registers
			 * are used as another operand; or %r8- or %r9-
			 * relative depending on the value of REX.B). We then
			 * set that register and generate a movq instruction
			 * to reset the value.
			 */
			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
			else
				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);

			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
				scratch[i++] = FASTTRAP_MOV_EAX;
			else
				scratch[i++] = FASTTRAP_MOV_ECX;

			switch (tp->ftt_ripmode) {
			case FASTTRAP_RIP_1:
				reg = &rp->r_rax;
				curthread->t_dtrace_reg = REG_RAX;
				break;
			case FASTTRAP_RIP_2:
				reg = &rp->r_rcx;
				curthread->t_dtrace_reg = REG_RCX;
				break;
			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
				reg = &rp->r_r8;
				curthread->t_dtrace_reg = REG_R8;
				break;
			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
				reg = &rp->r_r9;
				curthread->t_dtrace_reg = REG_R9;
				break;
			}

			/* LINTED - alignment */
			*(uint64_t *)&scratch[i] = *reg;
			curthread->t_dtrace_regv = *reg;
			*reg = pc + tp->ftt_size;
			i += sizeof (uint64_t);
		}
#endif

		/*
		 * Generate the branch instruction to what would have
		 * normally been the subsequent instruction. In 32-bit mode,
		 * this is just a relative branch; in 64-bit mode this is a
		 * %rip-relative branch that loads the 64-bit pc value
		 * immediately after the jmp instruction.
		 */
#ifdef __amd64
		if (p->p_model == DATAMODEL_LP64) {
			scratch[i++] = FASTTRAP_GROUP5_OP;
			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
			/* LINTED - alignment */
			*(uint32_t *)&scratch[i] = 0;
			i += sizeof (uint32_t);
			/* LINTED - alignment */
			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
			i += sizeof (uint64_t);
		} else {
#endif
#ifdef __i386__
			/*
			 * Set up the jmp to the next instruction; note that
			 * the size of the traced instruction cancels out.
			 */
			scratch[i++] = FASTTRAP_JMP32;
			/* LINTED - alignment */
			*(uint32_t *)&scratch[i] = pc - addr - 5;
			i += sizeof (uint32_t);
#endif
#ifdef __amd64
		}
#endif

		curthread->t_dtrace_astpc = addr + i;
		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
		i += tp->ftt_size;
		scratch[i++] = FASTTRAP_INT;
		scratch[i++] = T_DTRACE_RET;

		ASSERT(i <= sizeof (scratch));

#if defined(sun)
		if (fasttrap_copyout(scratch, (char *)addr, i)) {
#else
		if (uwrite(curproc, scratch, i, addr)) {
#endif
			fasttrap_sigtrap(p, curthread, pc);
			new_pc = pc;
			break;
		}
		if (tp->ftt_retids != NULL) {
			curthread->t_dtrace_step = 1;
			curthread->t_dtrace_ret = 1;
			new_pc = curthread->t_dtrace_astpc;
		} else {
			new_pc = curthread->t_dtrace_scrpc;
		}

		curthread->t_dtrace_pc = pc;
		curthread->t_dtrace_npc = pc + tp->ftt_size;
		curthread->t_dtrace_on = 1;
		break;
	}

	default:
		panic("fasttrap: mishandled an instruction");
	}

done:
	/*
	 * If there were no return probes when we first found the tracepoint,
	 * we should feel no obligation to honor any return probes that were
	 * subsequently enabled -- they'll just have to wait until the next
	 * time around.
	 */
	if (tp->ftt_retids != NULL) {
		/*
		 * We need to wait until the results of the instruction are
		 * apparent before invoking any return probes. If this
		 * instruction was emulated we can just call
		 * fasttrap_return_common(); if it needs to be executed, we
		 * need to wait until the user thread returns to the kernel.
		 */
		if (tp->ftt_type != FASTTRAP_T_COMMON) {
			/*
			 * Set the program counter to the address of the traced
			 * instruction so that it looks right in ustack()
			 * output. We had previously set it to the end of the
			 * instruction to simplify %rip-relative addressing.
			 */
			rp->r_rip = pc;

			fasttrap_return_common(rp, pc, pid, new_pc);
		} else {
			ASSERT(curthread->t_dtrace_ret != 0);
			ASSERT(curthread->t_dtrace_pc == pc);
			ASSERT(curthread->t_dtrace_scrpc != 0);
			ASSERT(new_pc == curthread->t_dtrace_astpc);
		}
	}

	rp->r_rip = new_pc;

	PROC_LOCK(p);
	proc_write_regs(curthread, rp);
	_PRELE(p);
	PROC_UNLOCK(p);

	return (0);
}

int
fasttrap_return_probe(struct reg *rp)
{
	proc_t *p = curproc;
	uintptr_t pc = curthread->t_dtrace_pc;
	uintptr_t npc = curthread->t_dtrace_npc;

	curthread->t_dtrace_pc = 0;
	curthread->t_dtrace_npc = 0;
	curthread->t_dtrace_scrpc = 0;
	curthread->t_dtrace_astpc = 0;

#if defined(sun)
	/*
	 * Treat a child created by a call to vfork(2) as if it were its
	 * parent. We know that there's only one thread of control in such a
	 * process: this one.
	 */
	while (p->p_flag & SVFORK) {
		p = p->p_parent;
	}
#endif

	/*
	 * We set rp->r_rip to the address of the traced instruction so
	 * that it appears to dtrace_probe() that we're on the original
	 * instruction, and so that the user can't easily detect our
	 * complex web of lies. dtrace_return_probe() (our caller)
	 * will correctly set %pc after we return.
	 */
	rp->r_rip = pc;

	fasttrap_return_common(rp, pc, p->p_pid, npc);

	return (0);
}
Example #4
0
int
linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
{
	union {
		struct linux_pt_reg	reg;
		struct linux_pt_fpreg	fpreg;
		struct linux_pt_fpxreg	fpxreg;
	} r;
	union {
		struct reg		bsd_reg;
		struct fpreg		bsd_fpreg;
		struct dbreg		bsd_dbreg;
	} u;
	void *addr;
	pid_t pid;
	int error, req;

	error = 0;

	/* by default, just copy data intact */
	req  = uap->req;
	pid  = (pid_t)uap->pid;
	addr = (void *)uap->addr;

	switch (req) {
	case PTRACE_TRACEME:
	case PTRACE_POKETEXT:
	case PTRACE_POKEDATA:
	case PTRACE_KILL:
		error = kern_ptrace(td, req, pid, addr, uap->data);
		break;
	case PTRACE_PEEKTEXT:
	case PTRACE_PEEKDATA: {
		/* need to preserve return value */
		int rval = td->td_retval[0];
		error = kern_ptrace(td, req, pid, addr, 0);
		if (error == 0)
			error = copyout(td->td_retval, (void *)uap->data,
			    sizeof(l_int));
		td->td_retval[0] = rval;
		break;
	}
	case PTRACE_DETACH:
		error = kern_ptrace(td, PT_DETACH, pid, (void *)1,
		     map_signum(uap->data));
		break;
	case PTRACE_SINGLESTEP:
	case PTRACE_CONT:
		error = kern_ptrace(td, req, pid, (void *)1,
		     map_signum(uap->data));
		break;
	case PTRACE_ATTACH:
		error = kern_ptrace(td, PT_ATTACH, pid, addr, uap->data);
		break;
	case PTRACE_GETREGS:
		/* Linux is using data where FreeBSD is using addr */
		error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0);
		if (error == 0) {
			map_regs_to_linux(&u.bsd_reg, &r.reg);
			error = copyout(&r.reg, (void *)uap->data,
			    sizeof(r.reg));
		}
		break;
	case PTRACE_SETREGS:
		/* Linux is using data where FreeBSD is using addr */
		error = copyin((void *)uap->data, &r.reg, sizeof(r.reg));
		if (error == 0) {
			map_regs_from_linux(&u.bsd_reg, &r.reg);
			error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0);
		}
		break;
	case PTRACE_GETFPREGS:
		/* Linux is using data where FreeBSD is using addr */
		error = kern_ptrace(td, PT_GETFPREGS, pid, &u.bsd_fpreg, 0);
		if (error == 0) {
			map_fpregs_to_linux(&u.bsd_fpreg, &r.fpreg);
			error = copyout(&r.fpreg, (void *)uap->data,
			    sizeof(r.fpreg));
		}
		break;
	case PTRACE_SETFPREGS:
		/* Linux is using data where FreeBSD is using addr */
		error = copyin((void *)uap->data, &r.fpreg, sizeof(r.fpreg));
		if (error == 0) {
			map_fpregs_from_linux(&u.bsd_fpreg, &r.fpreg);
			error = kern_ptrace(td, PT_SETFPREGS, pid,
			    &u.bsd_fpreg, 0);
		}
		break;
	case PTRACE_SETFPXREGS:
#ifdef CPU_ENABLE_SSE
		error = copyin((void *)uap->data, &r.fpxreg, sizeof(r.fpxreg));
		if (error)
			break;
#endif
		/* FALL THROUGH */
	case PTRACE_GETFPXREGS: {
#ifdef CPU_ENABLE_SSE
		struct proc *p;
		struct thread *td2;

		if (sizeof(struct linux_pt_fpxreg) != sizeof(struct savexmm)) {
			static int once = 0;
			if (!once) {
				printf("linux: savexmm != linux_pt_fpxreg\n");
				once = 1;
			}
			error = EIO;
			break;
		}

		if ((p = pfind(uap->pid)) == NULL) {
			error = ESRCH;
			break;
		}

		/* Exiting processes can't be debugged. */
		if ((p->p_flag & P_WEXIT) != 0) {
			error = ESRCH;
			goto fail;
		}

		if ((error = p_candebug(td, p)) != 0)
			goto fail;

		/* System processes can't be debugged. */
		if ((p->p_flag & P_SYSTEM) != 0) {
			error = EINVAL;
			goto fail;
		}

		/* not being traced... */
		if ((p->p_flag & P_TRACED) == 0) {
			error = EPERM;
			goto fail;
		}

		/* not being traced by YOU */
		if (p->p_pptr != td->td_proc) {
			error = EBUSY;
			goto fail;
		}

		/* not currently stopped */
		if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
			error = EBUSY;
			goto fail;
		}

		if (req == PTRACE_GETFPXREGS) {
			_PHOLD(p);	/* may block */
			td2 = FIRST_THREAD_IN_PROC(p);
			error = linux_proc_read_fpxregs(td2, &r.fpxreg);
			_PRELE(p);
			PROC_UNLOCK(p);
			if (error == 0)
				error = copyout(&r.fpxreg, (void *)uap->data,
				    sizeof(r.fpxreg));
		} else {
			/* clear dangerous bits exactly as Linux does*/
			r.fpxreg.mxcsr &= 0xffbf;
			_PHOLD(p);	/* may block */
			td2 = FIRST_THREAD_IN_PROC(p);
			error = linux_proc_write_fpxregs(td2, &r.fpxreg);
			_PRELE(p);
			PROC_UNLOCK(p);
		}
		break;

	fail:
		PROC_UNLOCK(p);
#else
		error = EIO;
#endif
		break;
	}
	case PTRACE_PEEKUSR:
	case PTRACE_POKEUSR: {
		error = EIO;

		/* check addr for alignment */
		if (uap->addr < 0 || uap->addr & (sizeof(l_int) - 1))
			break;
		/*
		 * Allow linux programs to access register values in
		 * user struct. We simulate this through PT_GET/SETREGS
		 * as necessary.
		 */
		if (uap->addr < sizeof(struct linux_pt_reg)) {
			error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0);
			if (error != 0)
				break;

			map_regs_to_linux(&u.bsd_reg, &r.reg);
			if (req == PTRACE_PEEKUSR) {
				error = copyout((char *)&r.reg + uap->addr,
				    (void *)uap->data, sizeof(l_int));
				break;
			}

			*(l_int *)((char *)&r.reg + uap->addr) =
			    (l_int)uap->data;

			map_regs_from_linux(&u.bsd_reg, &r.reg);
			error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0);
		}

		/*
		 * Simulate debug registers access
		 */
		if (uap->addr >= LINUX_DBREG_OFFSET &&
		    uap->addr <= LINUX_DBREG_OFFSET + LINUX_DBREG_SIZE) {
			error = kern_ptrace(td, PT_GETDBREGS, pid, &u.bsd_dbreg,
			    0);
			if (error != 0)
				break;

			uap->addr -= LINUX_DBREG_OFFSET;
			if (req == PTRACE_PEEKUSR) {
				error = copyout((char *)&u.bsd_dbreg +
				    uap->addr, (void *)uap->data,
				    sizeof(l_int));
				break;
			}

			*(l_int *)((char *)&u.bsd_dbreg + uap->addr) =
			     uap->data;
			error = kern_ptrace(td, PT_SETDBREGS, pid,
			    &u.bsd_dbreg, 0);
		}

		break;
	}
	case PTRACE_SYSCALL:
		/* fall through */
	default:
		printf("linux: ptrace(%u, ...) not implemented\n",
		    (unsigned int)uap->req);
		error = EINVAL;
		break;
	}

	return (error);
}
Example #5
0
/*
 * Read from a file
 */
static int
pfs_read(struct vop_read_args *va)
{
	struct vnode *vn = va->a_vp;
	struct pfs_vdata *pvd = vn->v_data;
	struct pfs_node *pn = pvd->pvd_pn;
	struct uio *uio = va->a_uio;
	struct proc *proc;
	struct sbuf *sb = NULL;
	int error, locked;
	unsigned int buflen, offset, resid;

	PFS_TRACE(("%s", pn->pn_name));
	pfs_assert_not_owned(pn);

	if (vn->v_type != VREG)
		PFS_RETURN (EINVAL);
	KASSERT_PN_IS_FILE(pn);

	if (!(pn->pn_flags & PFS_RD))
		PFS_RETURN (EBADF);

	if (pn->pn_fill == NULL)
		PFS_RETURN (EIO);

	/*
	 * This is necessary because either process' privileges may
	 * have changed since the open() call.
	 */
	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
		PFS_RETURN (EIO);
	if (proc != NULL) {
		_PHOLD(proc);
		PROC_UNLOCK(proc);
	}

	vhold(vn);
	locked = VOP_ISLOCKED(vn, curthread);
	VOP_UNLOCK(vn, 0, curthread);

	if (pn->pn_flags & PFS_RAWRD) {
		PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid));
		error = pn_fill(curthread, proc, pn, NULL, uio);
		PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid));
		goto ret;
	}

	/* beaucoup sanity checks so we don't ask for bogus allocation */
	if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
	    (offset = uio->uio_offset) != uio->uio_offset ||
	    (resid = uio->uio_resid) != uio->uio_resid ||
	    (buflen = offset + resid + 1) < offset || buflen > INT_MAX) {
		if (proc != NULL)
			PRELE(proc);
		error = EINVAL;
		goto ret;
	}
	if (buflen > MAXPHYS + 1) {
		error = EIO;
		goto ret;
	}

	sb = sbuf_new(sb, NULL, buflen, 0);
	if (sb == NULL) {
		error = EIO;
		goto ret;
	}

	error = pn_fill(curthread, proc, pn, sb, uio);

	if (error) {
		sbuf_delete(sb);
		goto ret;
	}

	sbuf_finish(sb);
	error = uiomove_frombuf(sbuf_data(sb), sbuf_len(sb), uio);
	sbuf_delete(sb);
ret:
	vn_lock(vn, locked | LK_RETRY, curthread);
	vdrop(vn);
	if (proc != NULL)
		PRELE(proc);
	PFS_RETURN (error);
}
Example #6
0
static void
do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
    struct vmspace *vm2, struct file *fp_procdesc)
{
	struct proc *p1, *pptr;
	int trypid;
	struct filedesc *fd;
	struct filedesc_to_leader *fdtol;
	struct sigacts *newsigacts;

	sx_assert(&proctree_lock, SX_SLOCKED);
	sx_assert(&allproc_lock, SX_XLOCKED);

	p1 = td->td_proc;

	trypid = fork_findpid(fr->fr_flags);

	sx_sunlock(&proctree_lock);

	p2->p_state = PRS_NEW;		/* protect against others */
	p2->p_pid = trypid;
	AUDIT_ARG_PID(p2->p_pid);
	LIST_INSERT_HEAD(&allproc, p2, p_list);
	allproc_gen++;
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	tidhash_add(td2);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	sx_xunlock(&allproc_lock);

	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    __rangeof(struct proc, p_startcopy, p_endcopy));
	pargs_hold(p2->p_args);

	PROC_UNLOCK(p1);

	bzero(&p2->p_startzero,
	    __rangeof(struct proc, p_startzero, p_endzero));

	/* Tell the prison that we exist. */
	prison_proc_hold(p2->p_ucred->cr_prison);

	PROC_UNLOCK(p2);

	/*
	 * Malloc things while we don't hold any locks.
	 */
	if (fr->fr_flags & RFSIGSHARE)
		newsigacts = NULL;
	else
		newsigacts = sigacts_alloc();

	/*
	 * Copy filedesc.
	 */
	if (fr->fr_flags & RFCFDG) {
		fd = fdinit(p1->p_fd, false);
		fdtol = NULL;
	} else if (fr->fr_flags & RFFDG) {
		fd = fdcopy(p1->p_fd);
		fdtol = NULL;
	} else {
		fd = fdshare(p1->p_fd);
		if (p1->p_fdtol == NULL)
			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
			    p1->p_leader);
		if ((fr->fr_flags & RFTHREAD) != 0) {
			/*
			 * Shared file descriptor table, and shared
			 * process leaders.
			 */
			fdtol = p1->p_fdtol;
			FILEDESC_XLOCK(p1->p_fd);
			fdtol->fdl_refcount++;
			FILEDESC_XUNLOCK(p1->p_fd);
		} else {
			/* 
			 * Shared file descriptor table, and different
			 * process leaders.
			 */
			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
			    p1->p_fd, p2);
		}
	}
	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */

	PROC_LOCK(p2);
	PROC_LOCK(p1);

	bzero(&td2->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));

	bcopy(&td->td_startcopy, &td2->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));

	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
	td2->td_sigstk = td->td_sigstk;
	td2->td_flags = TDF_INMEM;
	td2->td_lend_user_pri = PRI_MAX;

#ifdef VIMAGE
	td2->td_vnet = NULL;
	td2->td_vnet_lpush = NULL;
#endif

	/*
	 * Allow the scheduler to initialize the child.
	 */
	thread_lock(td);
	sched_fork(td, td2);
	thread_unlock(td);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 */
	p2->p_flag = P_INMEM;
	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
	p2->p_swtick = ticks;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);

	/*
	 * Whilst the proc lock is held, copy the VM domain data out
	 * using the VM domain method.
	 */
	vm_domain_policy_init(&p2->p_vm_dom_policy);
	vm_domain_policy_localcopy(&p2->p_vm_dom_policy,
	    &p1->p_vm_dom_policy);

	if (fr->fr_flags & RFSIGSHARE) {
		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
	} else {
		sigacts_copy(newsigacts, p1->p_sigacts);
		p2->p_sigacts = newsigacts;
	}

	if (fr->fr_flags & RFTSIGZMB)
	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
	else if (fr->fr_flags & RFLINUXTHPN)
	        p2->p_sigparent = SIGUSR1;
	else
	        p2->p_sigparent = SIGCHLD;

	p2->p_textvp = p1->p_textvp;
	p2->p_fd = fd;
	p2->p_fdtol = fdtol;

	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
		p2->p_flag |= P_PROTECTED;
		p2->p_flag2 |= P2_INHERIT_PROTECTED;
	}

	/*
	 * p_limit is copy-on-write.  Bump its refcount.
	 */
	lim_fork(p1, p2);

	thread_cow_get_proc(td2, p2);

	pstats_fork(p1->p_stats, p2->p_stats);

	PROC_UNLOCK(p1);
	PROC_UNLOCK(p2);

	/* Bump references to the text vnode (for procfs). */
	if (p2->p_textvp)
		vrefact(p2->p_textvp);

	/*
	 * Set up linkage for kernel based threading.
	 */
	if ((fr->fr_flags & RFTHREAD) != 0) {
		mtx_lock(&ppeers_lock);
		p2->p_peers = p1->p_peers;
		p1->p_peers = p2;
		p2->p_leader = p1->p_leader;
		mtx_unlock(&ppeers_lock);
		PROC_LOCK(p1->p_leader);
		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
			PROC_UNLOCK(p1->p_leader);
			/*
			 * The task leader is exiting, so process p1 is
			 * going to be killed shortly.  Since p1 obviously
			 * isn't dead yet, we know that the leader is either
			 * sending SIGKILL's to all the processes in this
			 * task or is sleeping waiting for all the peers to
			 * exit.  We let p1 complete the fork, but we need
			 * to go ahead and kill the new process p2 since
			 * the task leader may not get a chance to send
			 * SIGKILL to it.  We leave it on the list so that
			 * the task leader will wait for this new process
			 * to commit suicide.
			 */
			PROC_LOCK(p2);
			kern_psignal(p2, SIGKILL);
			PROC_UNLOCK(p2);
		} else
			PROC_UNLOCK(p1->p_leader);
	} else {
		p2->p_peers = NULL;
		p2->p_leader = p2;
	}

	sx_xlock(&proctree_lock);
	PGRP_LOCK(p1->p_pgrp);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	/*
	 * Preserve some more flags in subprocess.  P_PROFIL has already
	 * been preserved.
	 */
	p2->p_flag |= p1->p_flag & P_SUGID;
	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
	SESS_LOCK(p1->p_session);
	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		p2->p_flag |= P_CONTROLT;
	SESS_UNLOCK(p1->p_session);
	if (fr->fr_flags & RFPPWAIT)
		p2->p_flag |= P_PPWAIT;

	p2->p_pgrp = p1->p_pgrp;
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	PGRP_UNLOCK(p1->p_pgrp);
	LIST_INIT(&p2->p_children);
	LIST_INIT(&p2->p_orphans);

	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);

	/*
	 * If PF_FORK is set, the child process inherits the
	 * procfs ioctl flags from its parent.
	 */
	if (p1->p_pfsflags & PF_FORK) {
		p2->p_stops = p1->p_stops;
		p2->p_pfsflags = p1->p_pfsflags;
	}

	/*
	 * This begins the section where we must prevent the parent
	 * from being swapped.
	 */
	_PHOLD(p1);
	PROC_UNLOCK(p1);

	/*
	 * Attach the new process to its parent.
	 *
	 * If RFNOWAIT is set, the newly created process becomes a child
	 * of init.  This effectively disassociates the child from the
	 * parent.
	 */
	if ((fr->fr_flags & RFNOWAIT) != 0) {
		pptr = p1->p_reaper;
		p2->p_reaper = pptr;
	} else {
		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
		    p1 : p1->p_reaper;
		pptr = p1;
	}
	p2->p_pptr = pptr;
	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
	LIST_INIT(&p2->p_reaplist);
	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
	if (p2->p_reaper == p1)
		p2->p_reapsubtree = p2->p_pid;
	sx_xunlock(&proctree_lock);

	/* Inform accounting that we have forked. */
	p2->p_acflag = AFORK;
	PROC_UNLOCK(p2);

#ifdef KTRACE
	ktrprocfork(p1, p2);
#endif

	/*
	 * Finish creating the child process.  It will return via a different
	 * execution path later.  (ie: directly into user mode)
	 */
	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);

	if (fr->fr_flags == (RFFDG | RFPROC)) {
		VM_CNT_INC(v_forks);
		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
		VM_CNT_INC(v_vforks);
		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (p1 == &proc0) {
		VM_CNT_INC(v_kthreads);
		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else {
		VM_CNT_INC(v_rforks);
		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	}

	/*
	 * Associate the process descriptor with the process before anything
	 * can happen that might cause that process to need the descriptor.
	 * However, don't do this until after fork(2) can no longer fail.
	 */
	if (fr->fr_flags & RFPROCDESC)
		procdesc_new(p2, fr->fr_pd_flags);

	/*
	 * Both processes are set up, now check if any loadable modules want
	 * to adjust anything.
	 */
	EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags);

	/*
	 * Set the child start time and mark the process as being complete.
	 */
	PROC_LOCK(p2);
	PROC_LOCK(p1);
	microuptime(&p2->p_stats->p_start);
	PROC_SLOCK(p2);
	p2->p_state = PRS_NORMAL;
	PROC_SUNLOCK(p2);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the new process so that any
	 * tracepoints inherited from the parent can be removed. We have to do
	 * this only after p_state is PRS_NORMAL since the fasttrap module will
	 * use pfind() later on.
	 */
	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
		dtrace_fasttrap_fork(p1, p2);
#endif
	/*
	 * Hold the process so that it cannot exit after we make it runnable,
	 * but before we wait for the debugger.
	 */
	_PHOLD(p2);
	if (p1->p_ptevents & PTRACE_FORK) {
		/*
		 * Arrange for debugger to receive the fork event.
		 *
		 * We can report PL_FLAG_FORKED regardless of
		 * P_FOLLOWFORK settings, but it does not make a sense
		 * for runaway child.
		 */
		td->td_dbgflags |= TDB_FORK;
		td->td_dbg_forked = p2->p_pid;
		td2->td_dbgflags |= TDB_STOPATFORK;
	}
	if (fr->fr_flags & RFPPWAIT) {
		td->td_pflags |= TDP_RFPPWAIT;
		td->td_rfppwait_p = p2;
		td->td_dbgflags |= TDB_VFORK;
	}
	PROC_UNLOCK(p2);

	/*
	 * Now can be swapped.
	 */
	_PRELE(p1);
	PROC_UNLOCK(p1);

	/*
	 * Tell any interested parties about the new process.
	 */
	knote_fork(p1->p_klist, p2->p_pid);
	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);

	if (fr->fr_flags & RFPROCDESC) {
		procdesc_finit(p2->p_procdesc, fp_procdesc);
		fdrop(fp_procdesc, td);
	}

	if ((fr->fr_flags & RFSTOPPED) == 0) {
		/*
		 * If RFSTOPPED not requested, make child runnable and
		 * add to run queue.
		 */
		thread_lock(td2);
		TD_SET_CAN_RUN(td2);
		sched_add(td2, SRQ_BORING);
		thread_unlock(td2);
		if (fr->fr_pidp != NULL)
			*fr->fr_pidp = p2->p_pid;
	} else {
		*fr->fr_procp = p2;
	}

	PROC_LOCK(p2);
	/*
	 * Wait until debugger is attached to child.
	 */
	while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0)
		cv_wait(&p2->p_dbgwait, &p2->p_mtx);
	_PRELE(p2);
	racct_proc_fork_done(p2);
	PROC_UNLOCK(p2);
}
Example #7
0
/*
 * Process ioctls
 */
int
procfs_ioctl(PFS_IOCTL_ARGS)
{
	struct procfs_status *ps;
#ifdef COMPAT_FREEBSD32
	struct procfs_status32 *ps32;
#endif
	int error, flags, sig;
#ifdef COMPAT_FREEBSD6
	int ival;
#endif

	KASSERT(p != NULL,
	    ("%s() called without a process", __func__));
	PROC_LOCK_ASSERT(p, MA_OWNED);

	error = 0;
	switch (cmd) {
#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
	case _IOC(IOC_IN, 'p', 1, 0):
#endif
#ifdef COMPAT_FREEBSD6
	case _IO('p', 1):
		ival = IOCPARM_IVAL(data);
		data = &ival;
#endif
	case PIOCBIS:
		p->p_stops |= *(unsigned int *)data;
		break;
#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
	case _IOC(IOC_IN, 'p', 2, 0):
#endif
#ifdef COMPAT_FREEBSD6
	case _IO('p', 2):
		ival = IOCPARM_IVAL(data);
		data = &ival;
#endif
	case PIOCBIC:
		p->p_stops &= ~*(unsigned int *)data;
		break;
#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
	case _IOC(IOC_IN, 'p', 3, 0):
#endif
#ifdef COMPAT_FREEBSD6
	case _IO('p', 3):
		ival = IOCPARM_IVAL(data);
		data = &ival;
#endif
	case PIOCSFL:
		flags = *(unsigned int *)data;
		if (flags & PF_ISUGID) {
			/*
			 * XXXRW: Is this specific check required here, as
			 * p_candebug() should implement it, or other checks
			 * are missing.
			 */
			error = priv_check(td, PRIV_DEBUG_SUGID);
			if (error)
				break;
		}
		p->p_pfsflags = flags;
		break;
	case PIOCGFL:
		*(unsigned int *)data = p->p_pfsflags;
		break;
	case PIOCWAIT:
		while (p->p_step == 0 && (p->p_flag & P_WEXIT) == 0) {
			/* sleep until p stops */
			_PHOLD(p);
			error = msleep(&p->p_stype, &p->p_mtx,
			    PWAIT|PCATCH, "pioctl", 0);
			_PRELE(p);
			if (error != 0)
				break;
		}
		/* fall through to PIOCSTATUS */
	case PIOCSTATUS:
		ps = (struct procfs_status *)data;
		ps->state = (p->p_step == 0);
		ps->flags = 0; /* nope */
		ps->events = p->p_stops;
		ps->why = p->p_step ? p->p_stype : 0;
		ps->val = p->p_step ? p->p_xstat : 0;
		break;
#ifdef COMPAT_FREEBSD32
	case PIOCWAIT32:
		while (p->p_step == 0 && (p->p_flag & P_WEXIT) == 0) {
			/* sleep until p stops */
			_PHOLD(p);
			error = msleep(&p->p_stype, &p->p_mtx,
			    PWAIT|PCATCH, "pioctl", 0);
			_PRELE(p);
			if (error != 0)
				break;
		}
		/* fall through to PIOCSTATUS32 */
	case PIOCSTATUS32:
		ps32 = (struct procfs_status32 *)data;
		ps32->state = (p->p_step == 0);
		ps32->flags = 0; /* nope */
		ps32->events = p->p_stops;
		ps32->why = p->p_step ? p->p_stype : 0;
		ps32->val = p->p_step ? p->p_xstat : 0;
		break;
#endif
#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
	case _IOC(IOC_IN, 'p', 5, 0):
#endif
#ifdef COMPAT_FREEBSD6
	case _IO('p', 5):
		ival = IOCPARM_IVAL(data);
		data = &ival;
#endif
	case PIOCCONT:
		if (p->p_step == 0)
			break;
		sig = *(unsigned int *)data;
		if (sig != 0 && !_SIG_VALID(sig)) {
			error = EINVAL;
			break;
		}
#if 0
		p->p_step = 0;
		if (P_SHOULDSTOP(p)) {
			p->p_xstat = sig;
			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG);
			PROC_SLOCK(p);
			thread_unsuspend(p);
			PROC_SUNLOCK(p);
		} else if (sig)
			kern_psignal(p, sig);
#else
		if (sig)
			kern_psignal(p, sig);
		p->p_step = 0;
		wakeup(&p->p_step);
#endif
		break;
	default:
		error = (ENOTTY);
	}

	return (error);
}
Example #8
0
void
undefinedinstruction(struct trapframe *frame)
{
	struct thread *td;
	u_int fault_pc;
	int fault_instruction;
	int fault_code;
	int coprocessor;
	struct undefined_handler *uh;
	int error;
#ifdef VERBOSE_ARM32
	int s;
#endif
	ksiginfo_t ksi;

	/* Enable interrupts if they were enabled before the exception. */
	if (__predict_true(frame->tf_spsr & PSR_I) == 0)
		enable_interrupts(PSR_I);
	if (__predict_true(frame->tf_spsr & PSR_F) == 0)
		enable_interrupts(PSR_F);

	VM_CNT_INC(v_trap);

	fault_pc = frame->tf_pc;

	/*
	 * Get the current thread/proc structure or thread0/proc0 if there is
	 * none.
	 */
	td = curthread == NULL ? &thread0 : curthread;

	coprocessor = 0;
	if ((frame->tf_spsr & PSR_T) == 0) {
		/*
		 * Make sure the program counter is correctly aligned so we
		 * don't take an alignment fault trying to read the opcode.
		 */
		if (__predict_false((fault_pc & 3) != 0)) {
			ksiginfo_init_trap(&ksi);
			ksi.ksi_signo = SIGILL;
			ksi.ksi_code = ILL_ILLADR;
			ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
			trapsignal(td, &ksi);
			userret(td, frame);
			return;
		}

		/*
		 * Should use fuword() here .. but in the interests of
		 * squeezing every bit of speed we will just use ReadWord().
		 * We know the instruction can be read as was just executed
		 * so this will never fail unless the kernel is screwed up
		 * in which case it does not really matter does it ?
		 */

		fault_instruction = *(u_int32_t *)fault_pc;

		/* Check for coprocessor instruction */

		/*
		 * According to the datasheets you only need to look at bit
		 * 27 of the instruction to tell the difference between and
		 * undefined instruction and a coprocessor instruction
		 * following an undefined instruction trap.
		 */

		if (ARM_COPROC_INSN(fault_instruction))
			coprocessor = ARM_COPROC(fault_instruction);
		else {          /* check for special instructions */
			if (ARM_VFP_INSN(fault_instruction))
				coprocessor = COPROC_VFP; /* vfp / simd */
		}
	} else {
#if __ARM_ARCH >= 7
		fault_instruction = *(uint16_t *)fault_pc;
		if (THUMB_32BIT_INSN(fault_instruction)) {
			fault_instruction <<= 16;
			fault_instruction |= *(uint16_t *)(fault_pc + 2);

			/*
			 * Is it a Coprocessor, Advanced SIMD, or
			 * Floating-point instruction.
			 */
			if (THUMB_COPROC_INSN(fault_instruction)) {
				if (THUMB_COPROC_UNDEFINED(fault_instruction)) {
					/* undefined insn */
				} else if (THUMB_VFP_INSN(fault_instruction))
					coprocessor = COPROC_VFP;
				else
					coprocessor =
					    THUMB_COPROC(fault_instruction);
			}
		}
#else
		/*
		 * No support for Thumb-2 on this cpu
		 */
		ksiginfo_init_trap(&ksi);
		ksi.ksi_signo = SIGILL;
		ksi.ksi_code = ILL_ILLADR;
		ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
		trapsignal(td, &ksi);
		userret(td, frame);
		return;
#endif
	}

	if ((frame->tf_spsr & PSR_MODE) == PSR_USR32_MODE) {
		/*
		 * Modify the fault_code to reflect the USR/SVC state at
		 * time of fault.
		 */
		fault_code = FAULT_USER;
		td->td_frame = frame;
	} else
		fault_code = 0;

	/* OK this is were we do something about the instruction. */
	LIST_FOREACH(uh, &undefined_handlers[coprocessor], uh_link)
	    if (uh->uh_handler(fault_pc, fault_instruction, frame,
			       fault_code) == 0)
		    break;

	if (fault_code & FAULT_USER) {
		/* TODO: No support for ptrace from Thumb-2 */
		if ((frame->tf_spsr & PSR_T) == 0 &&
		    fault_instruction == PTRACE_BREAKPOINT) {
			PROC_LOCK(td->td_proc);
			_PHOLD(td->td_proc);
			error = ptrace_clear_single_step(td);
			_PRELE(td->td_proc);
			PROC_UNLOCK(td->td_proc);
			if (error != 0) {
				ksiginfo_init_trap(&ksi);
				ksi.ksi_signo = SIGILL;
				ksi.ksi_code = ILL_ILLOPC;
				ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
				trapsignal(td, &ksi);
			}
			return;
		}
	}

	if (uh == NULL && (fault_code & FAULT_USER)) {
		/* Fault has not been handled */
		ksiginfo_init_trap(&ksi);
		ksi.ksi_signo = SIGILL;
		ksi.ksi_code = ILL_ILLOPC;
		ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
		trapsignal(td, &ksi);
	}

	if ((fault_code & FAULT_USER) == 0) {
		if (fault_instruction == KERNEL_BREAKPOINT) {
#ifdef KDB
			kdb_trap(T_BREAKPOINT, 0, frame);
#else
			printf("No debugger in kernel.\n");
#endif
			return;
		}
		else
			panic("Undefined instruction in kernel.\n");
	}

	userret(td, frame);
}
Example #9
0
int
kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
{
	struct iovec iov;
	struct uio uio;
	struct proc *curp, *p, *pp;
	struct thread *td2;
	struct ptrace_io_desc *piod;
	int error, write, tmp;
	int proctree_locked = 0;

	curp = td->td_proc;

	/* Lock proctree before locking the process. */
	switch (req) {
	case PT_TRACE_ME:
	case PT_ATTACH:
	case PT_STEP:
	case PT_CONTINUE:
	case PT_DETACH:
		sx_xlock(&proctree_lock);
		proctree_locked = 1;
		break;
	default:
		break;
	}
		
	write = 0;
	if (req == PT_TRACE_ME) {
		p = td->td_proc;
		PROC_LOCK(p);
	} else {
		if ((p = pfind(pid)) == NULL) {
			if (proctree_locked)
				sx_xunlock(&proctree_lock);
			return (ESRCH);
		}
	}
	if ((error = p_cansee(td, p)) != 0)
		goto fail;

	if ((error = p_candebug(td, p)) != 0)
		goto fail;

	/*
	 * System processes can't be debugged.
	 */
	if ((p->p_flag & P_SYSTEM) != 0) {
		error = EINVAL;
		goto fail;
	}
	
	/*
	 * Permissions check
	 */
	switch (req) {
	case PT_TRACE_ME:
		/* Always legal. */
		break;

	case PT_ATTACH:
		/* Self */
		if (p->p_pid == td->td_proc->p_pid) {
			error = EINVAL;
			goto fail;
		}

		/* Already traced */
		if (p->p_flag & P_TRACED) {
			error = EBUSY;
			goto fail;
		}

		/* Can't trace an ancestor if you're being traced. */
		if (curp->p_flag & P_TRACED) {
			for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
				if (pp == p) {
					error = EINVAL;
					goto fail;
				}
			}
		}


		/* OK */
		break;

	case PT_READ_I:
	case PT_READ_D:
	case PT_WRITE_I:
	case PT_WRITE_D:
	case PT_IO:
	case PT_CONTINUE:
	case PT_KILL:
	case PT_STEP:
	case PT_DETACH:
	case PT_GETREGS:
	case PT_SETREGS:
	case PT_GETFPREGS:
	case PT_SETFPREGS:
	case PT_GETDBREGS:
	case PT_SETDBREGS:
		/* not being traced... */
		if ((p->p_flag & P_TRACED) == 0) {
			error = EPERM;
			goto fail;
		}

		/* not being traced by YOU */
		if (p->p_pptr != td->td_proc) {
			error = EBUSY;
			goto fail;
		}

		/* not currently stopped */
		if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) {
			error = EBUSY;
			goto fail;
		}

		/* OK */
		break;

	default:
		error = EINVAL;
		goto fail;
	}

	td2 = FIRST_THREAD_IN_PROC(p);
#ifdef FIX_SSTEP
	/*
	 * Single step fixup ala procfs
	 */
	FIX_SSTEP(td2);			/* XXXKSE */
#endif

	/*
	 * Actually do the requests
	 */

	td->td_retval[0] = 0;

	switch (req) {
	case PT_TRACE_ME:
		/* set my trace flag and "owner" so it can read/write me */
		p->p_flag |= P_TRACED;
		p->p_oppid = p->p_pptr->p_pid;
		PROC_UNLOCK(p);
		sx_xunlock(&proctree_lock);
		return (0);

	case PT_ATTACH:
		/* security check done above */
		p->p_flag |= P_TRACED;
		p->p_oppid = p->p_pptr->p_pid;
		if (p->p_pptr != td->td_proc)
			proc_reparent(p, td->td_proc);
		data = SIGSTOP;
		goto sendsig;	/* in PT_CONTINUE below */

	case PT_STEP:
	case PT_CONTINUE:
	case PT_DETACH:
		/* XXX data is used even in the PT_STEP case. */
		if (req != PT_STEP && (unsigned)data > _SIG_MAXSIG) {
			error = EINVAL;
			goto fail;
		}

		_PHOLD(p);

		if (req == PT_STEP) {
			error = ptrace_single_step(td2);
			if (error) {
				_PRELE(p);
				goto fail;
			}
		}

		if (addr != (void *)1) {
			error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr);
			if (error) {
				_PRELE(p);
				goto fail;
			}
		}
		_PRELE(p);

		if (req == PT_DETACH) {
			/* reset process parent */
			if (p->p_oppid != p->p_pptr->p_pid) {
				struct proc *pp;

				PROC_UNLOCK(p);
				pp = pfind(p->p_oppid);
				if (pp == NULL)
					pp = initproc;
				else
					PROC_UNLOCK(pp);
				PROC_LOCK(p);
				proc_reparent(p, pp);
			}
			p->p_flag &= ~(P_TRACED | P_WAITED);
			p->p_oppid = 0;

			/* should we send SIGCHLD? */
		}

	sendsig:
		if (proctree_locked)
			sx_xunlock(&proctree_lock);
		/* deliver or queue signal */
		if (P_SHOULDSTOP(p)) {
			p->p_xstat = data;
			mtx_lock_spin(&sched_lock);
			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG);
			thread_unsuspend(p);
			setrunnable(td2);	/* XXXKSE */
			/* Need foreach kse in proc, ... make_kse_queued(). */
			mtx_unlock_spin(&sched_lock);
		} else if (data)
			psignal(p, data);
		PROC_UNLOCK(p);
		
		return (0);

	case PT_WRITE_I:
	case PT_WRITE_D:
		write = 1;
		/* FALLTHROUGH */
	case PT_READ_I:
	case PT_READ_D:
		PROC_UNLOCK(p);
		tmp = 0;
		/* write = 0 set above */
		iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
		iov.iov_len = sizeof(int);
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(uintptr_t)addr;
		uio.uio_resid = sizeof(int);
		uio.uio_segflg = UIO_SYSSPACE;	/* i.e.: the uap */
		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
		uio.uio_td = td;
		error = proc_rwmem(p, &uio);
		if (uio.uio_resid != 0) {
			/*
			 * XXX proc_rwmem() doesn't currently return ENOSPC,
			 * so I think write() can bogusly return 0.
			 * XXX what happens for short writes?  We don't want
			 * to write partial data.
			 * XXX proc_rwmem() returns EPERM for other invalid
			 * addresses.  Convert this to EINVAL.  Does this
			 * clobber returns of EPERM for other reasons?
			 */
			if (error == 0 || error == ENOSPC || error == EPERM)
				error = EINVAL;	/* EOF */
		}
		if (!write)
			td->td_retval[0] = tmp;
		return (error);

	case PT_IO:
		PROC_UNLOCK(p);
		piod = addr;
		iov.iov_base = piod->piod_addr;
		iov.iov_len = piod->piod_len;
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
		uio.uio_resid = piod->piod_len;
		uio.uio_segflg = UIO_USERSPACE;
		uio.uio_td = td;
		switch (piod->piod_op) {
		case PIOD_READ_D:
		case PIOD_READ_I:
			uio.uio_rw = UIO_READ;
			break;
		case PIOD_WRITE_D:
		case PIOD_WRITE_I:
			uio.uio_rw = UIO_WRITE;
			break;
		default:
			return (EINVAL);
		}
		error = proc_rwmem(p, &uio);
		piod->piod_len -= uio.uio_resid;
		return (error);

	case PT_KILL:
		data = SIGKILL;
		goto sendsig;	/* in PT_CONTINUE above */

	case PT_SETREGS:
		_PHOLD(p);
		error = proc_write_regs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_GETREGS:
		_PHOLD(p);
		error = proc_read_regs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_SETFPREGS:
		_PHOLD(p);
		error = proc_write_fpregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_GETFPREGS:
		_PHOLD(p);
		error = proc_read_fpregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_SETDBREGS:
		_PHOLD(p);
		error = proc_write_dbregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	case PT_GETDBREGS:
		_PHOLD(p);
		error = proc_read_dbregs(td2, addr);
		_PRELE(p);
		PROC_UNLOCK(p);
		return (error);

	default:
		KASSERT(0, ("unreachable code\n"));
		break;
	}

	KASSERT(0, ("unreachable code\n"));
	return (0);

fail:
	PROC_UNLOCK(p);
	if (proctree_locked)
		sx_xunlock(&proctree_lock);
	return (error);
}