Esempio n. 1
0
static void
ia32_osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct ia32_sigframe3 sf, *fp;
	struct proc *p;
	struct thread *td;
	struct sigacts *psp;
	struct trapframe *regs;
	int sig;
	int oonstack;

	td = curthread;
	p = td->td_proc;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	sig = ksi->ksi_signo;
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);
	regs = td->td_frame;
	oonstack = sigonstack(regs->tf_rsp);

	/* Allocate space for the signal handler context. */
	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		fp = (struct ia32_sigframe3 *)(td->td_sigstk.ss_sp +
		    td->td_sigstk.ss_size - sizeof(sf));
		td->td_sigstk.ss_flags |= SS_ONSTACK;
	} else
		fp = (struct ia32_sigframe3 *)regs->tf_rsp - 1;

	/* Translate the signal if appropriate. */
	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];

	/* Build the argument list for the signal handler. */
	sf.sf_signum = sig;
	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
		/* Signal handler installed with SA_SIGINFO. */
		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
		sf.sf_siginfo.si_signo = sig;
		sf.sf_siginfo.si_code = ksi->ksi_code;
		sf.sf_ah = (uintptr_t)catcher;
	} else {
		/* Old FreeBSD-style arguments. */
		sf.sf_arg2 = ksi->ksi_code;
		sf.sf_addr = (register_t)ksi->ksi_addr;
		sf.sf_ah = (uintptr_t)catcher;
	}
	mtx_unlock(&psp->ps_mtx);
	PROC_UNLOCK(p);

	/* Save most if not all of trap frame. */
	sf.sf_siginfo.si_sc.sc_eax = regs->tf_rax;
	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_rbx;
	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_rcx;
	sf.sf_siginfo.si_sc.sc_edx = regs->tf_rdx;
	sf.sf_siginfo.si_sc.sc_esi = regs->tf_rsi;
	sf.sf_siginfo.si_sc.sc_edi = regs->tf_rdi;
	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
	sf.sf_siginfo.si_sc.sc_gs = regs->tf_gs;
	sf.sf_siginfo.si_sc.sc_isp = regs->tf_rsp;

	/* Build the signal context to be used by osigreturn(). */
	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
	sf.sf_siginfo.si_sc.sc_esp = regs->tf_rsp;
	sf.sf_siginfo.si_sc.sc_ebp = regs->tf_rbp;
	sf.sf_siginfo.si_sc.sc_eip = regs->tf_rip;
	sf.sf_siginfo.si_sc.sc_eflags = regs->tf_rflags;
	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;

	/*
	 * Copy the sigframe out to the user's stack.
	 */
	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
#ifdef DEBUG
		printf("process %ld has trashed its stack\n", (long)p->p_pid);
#endif
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	regs->tf_rsp = (uintptr_t)fp;
	regs->tf_rip = p->p_sysent->sv_psstrings - sz_ia32_osigcode;
	regs->tf_rflags &= ~(PSL_T | PSL_D);
	regs->tf_cs = _ucode32sel;
	regs->tf_ds = _udatasel;
	regs->tf_es = _udatasel;
	regs->tf_fs = _udatasel;
	regs->tf_ss = _udatasel;
	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 2
0
static int
linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode)
{
	cap_rights_t rights;
	struct proc *p = td->td_proc;
	struct file *fp;
	int fd;
	int bsd_flags, error;

	bsd_flags = 0;
	switch (l_flags & LINUX_O_ACCMODE) {
	case LINUX_O_WRONLY:
		bsd_flags |= O_WRONLY;
		break;
	case LINUX_O_RDWR:
		bsd_flags |= O_RDWR;
		break;
	default:
		bsd_flags |= O_RDONLY;
	}
	if (l_flags & LINUX_O_NDELAY)
		bsd_flags |= O_NONBLOCK;
	if (l_flags & LINUX_O_APPEND)
		bsd_flags |= O_APPEND;
	if (l_flags & LINUX_O_SYNC)
		bsd_flags |= O_FSYNC;
	if (l_flags & LINUX_O_NONBLOCK)
		bsd_flags |= O_NONBLOCK;
	if (l_flags & LINUX_FASYNC)
		bsd_flags |= O_ASYNC;
	if (l_flags & LINUX_O_CREAT)
		bsd_flags |= O_CREAT;
	if (l_flags & LINUX_O_TRUNC)
		bsd_flags |= O_TRUNC;
	if (l_flags & LINUX_O_EXCL)
		bsd_flags |= O_EXCL;
	if (l_flags & LINUX_O_NOCTTY)
		bsd_flags |= O_NOCTTY;
	if (l_flags & LINUX_O_DIRECT)
		bsd_flags |= O_DIRECT;
	if (l_flags & LINUX_O_NOFOLLOW)
		bsd_flags |= O_NOFOLLOW;
	if (l_flags & LINUX_O_DIRECTORY)
		bsd_flags |= O_DIRECTORY;
	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */

	error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode);
	if (error != 0)
		goto done;
	if (bsd_flags & O_NOCTTY)
		goto done;

	/*
	 * XXX In between kern_open() and fget(), another process
	 * having the same filedesc could use that fd without
	 * checking below.
	*/
	fd = td->td_retval[0];
	if (fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp) == 0) {
		if (fp->f_type != DTYPE_VNODE) {
			fdrop(fp, td);
			goto done;
		}
		sx_slock(&proctree_lock);
		PROC_LOCK(p);
		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
			PROC_UNLOCK(p);
			sx_sunlock(&proctree_lock);
			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
			    td->td_ucred, td);
		} else {
			PROC_UNLOCK(p);
			sx_sunlock(&proctree_lock);
		}
		fdrop(fp, td);
	}

done:
#ifdef DEBUG
	if (ldebug(open))
		printf(LMSG("open returns error %d"), error);
#endif
	LFREEPATH(path);
	return (error);
}
Esempio n. 3
0
static int
trap_pfault(struct trapframe *frame, int user)
{
	vm_offset_t	eva, va;
	struct		thread *td;
	struct		proc *p;
	vm_map_t	map;
	vm_prot_t	ftype;
	int		rv;

	td = curthread;
	p = td->td_proc;

	if (frame->exc == EXC_ISI) {
		eva = frame->srr0;
		ftype = VM_PROT_READ | VM_PROT_EXECUTE;

	} else {
		eva = frame->cpu.booke.dear;
		if (frame->cpu.booke.esr & ESR_ST)
			ftype = VM_PROT_WRITE;
		else
			ftype = VM_PROT_READ;
	}

	if (user) {
		KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace  NULL"));
		map = &p->p_vmspace->vm_map;
	} else {
		if (eva < VM_MAXUSER_ADDRESS) {

			if (p->p_vmspace == NULL)
				return (SIGSEGV);

			map = &p->p_vmspace->vm_map;

		} else {
			map = kernel_map;
		}
	}
	va = trunc_page(eva);

	if (map != kernel_map) {
		/*
		 * Keep swapout from messing with us during this
		 *	critical time.
		 */
		PROC_LOCK(p);
		++p->p_lock;
		PROC_UNLOCK(p);

		/* Fault in the user page: */
		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);

		PROC_LOCK(p);
		--p->p_lock;
		PROC_UNLOCK(p);
	} else {
		/*
		 * Don't have to worry about process locking or stacks in the
		 * kernel.
		 */
		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
	}

	if (rv == KERN_SUCCESS)
		return (0);

	if (!user && handle_onfault(frame))
		return (0);

	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
}
Esempio n. 4
0
File: trap.c Progetto: rodero95/sys
void
data_abort_handler(trapframe_t *tf)
{
	struct vm_map *map;
	struct pcb *pcb;
	struct thread *td;
	u_int user, far, fsr;
	vm_prot_t ftype;
	void *onfault;
	vm_offset_t va;
	int error = 0;
	struct ksig ksig;
	struct proc *p;
	

	/* Grab FAR/FSR before enabling interrupts */
	far = cpu_faultaddress();
	fsr = cpu_faultstatus();
#if 0
	printf("data abort: fault address=%p (from pc=%p lr=%p)\n",
	       (void*)far, (void*)tf->tf_pc, (void*)tf->tf_svc_lr);
#endif

	/* Update vmmeter statistics */
#if 0
	vmexp.traps++;
#endif

	td = curthread;
	p = td->td_proc;

	PCPU_INC(cnt.v_trap);
	/* Data abort came from user mode? */
	user = TRAP_USERMODE(tf);

	if (user) {
		td->td_pticks = 0;
		td->td_frame = tf;		
		if (td->td_ucred != td->td_proc->p_ucred)
			cred_update_thread(td);
		
	}
	/* Grab the current pcb */
	pcb = td->td_pcb;
	/* Re-enable interrupts if they were enabled previously */
	if (td->td_md.md_spinlock_count == 0) {
		if (__predict_true(tf->tf_spsr & I32_bit) == 0)
			enable_interrupts(I32_bit);
		if (__predict_true(tf->tf_spsr & F32_bit) == 0)
			enable_interrupts(F32_bit);
	}
		

	/* Invoke the appropriate handler, if necessary */
	if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) {
		if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far,
		    td, &ksig)) {
			goto do_trapsignal;
		}
		goto out;
	}

	/*
	 * At this point, we're dealing with one of the following data aborts:
	 *
	 *  FAULT_TRANS_S  - Translation -- Section
	 *  FAULT_TRANS_P  - Translation -- Page
	 *  FAULT_DOMAIN_S - Domain -- Section
	 *  FAULT_DOMAIN_P - Domain -- Page
	 *  FAULT_PERM_S   - Permission -- Section
	 *  FAULT_PERM_P   - Permission -- Page
	 *
	 * These are the main virtual memory-related faults signalled by
	 * the MMU.
	 */

	/* fusubailout is used by [fs]uswintr to avoid page faulting */
	if (__predict_false(pcb->pcb_onfault == fusubailout)) {
		tf->tf_r0 = EFAULT;
		tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault;
		return;
	}

	/*
	 * Make sure the Program Counter is sane. We could fall foul of
	 * someone executing Thumb code, in which case the PC might not
	 * be word-aligned. This would cause a kernel alignment fault
	 * further down if we have to decode the current instruction.
	 * XXX: It would be nice to be able to support Thumb at some point.
	 */
	if (__predict_false((tf->tf_pc & 3) != 0)) {
		if (user) {
			/*
			 * Give the user an illegal instruction signal.
			 */
			/* Deliver a SIGILL to the process */
			ksig.signb = SIGILL;
			ksig.code = 0;
			goto do_trapsignal;
		}

		/*
		 * The kernel never executes Thumb code.
		 */
		printf("\ndata_abort_fault: Misaligned Kernel-mode "
		    "Program Counter\n");
		dab_fatal(tf, fsr, far, td, &ksig);
	}

	/* See if the cpu state needs to be fixed up */
	switch (data_abort_fixup(tf, fsr, far, td, &ksig)) {
	case ABORT_FIXUP_RETURN:
		return;
	case ABORT_FIXUP_FAILED:
		/* Deliver a SIGILL to the process */
		ksig.signb = SIGILL;
		ksig.code = 0;
		goto do_trapsignal;
	default:
		break;
	}

	va = trunc_page((vm_offset_t)far);

	/*
	 * It is only a kernel address space fault iff:
	 *	1. user == 0  and
	 *	2. pcb_onfault not set or
	 *	3. pcb_onfault set and not LDRT/LDRBT/STRT/STRBT instruction.
	 */
	if (user == 0 && (va >= VM_MIN_KERNEL_ADDRESS ||
	    (va < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW)) &&
	    __predict_true((pcb->pcb_onfault == NULL ||
	     (ReadWord(tf->tf_pc) & 0x05200000) != 0x04200000))) {
		map = kernel_map;

		/* Was the fault due to the FPE/IPKDB ? */
		if (__predict_false((tf->tf_spsr & PSR_MODE)==PSR_UND32_MODE)) {

			/*
			 * Force exit via userret()
			 * This is necessary as the FPE is an extension to
			 * userland that actually runs in a priveledged mode
			 * but uses USR mode permissions for its accesses.
			 */
			user = 1;
			ksig.signb = SIGSEGV;
			ksig.code = 0;
			goto do_trapsignal;
		}
	} else {
		map = &td->td_proc->p_vmspace->vm_map;
	}

	/*
	 * We need to know whether the page should be mapped
	 * as R or R/W. The MMU does not give us the info as
	 * to whether the fault was caused by a read or a write.
	 *
	 * However, we know that a permission fault can only be
	 * the result of a write to a read-only location, so
	 * we can deal with those quickly.
	 *
	 * Otherwise we need to disassemble the instruction
	 * responsible to determine if it was a write.
	 */
	if (IS_PERMISSION_FAULT(fsr))
		ftype = VM_PROT_WRITE;
	else {
		u_int insn = ReadWord(tf->tf_pc);

		if (((insn & 0x0c100000) == 0x04000000) ||	/* STR/STRB */
		    ((insn & 0x0e1000b0) == 0x000000b0) ||	/* STRH/STRD */
		    ((insn & 0x0a100000) == 0x08000000)) {	/* STM/CDT */
			ftype = VM_PROT_WRITE;
		} else {
			if ((insn & 0x0fb00ff0) == 0x01000090)	/* SWP */
				ftype = VM_PROT_READ | VM_PROT_WRITE;
			else
				ftype = VM_PROT_READ;
		}
	}

	/*
	 * See if the fault is as a result of ref/mod emulation,
	 * or domain mismatch.
	 */
#ifdef DEBUG
	last_fault_code = fsr;
#endif
	if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype,
	    user)) {
		goto out;
	}

	onfault = pcb->pcb_onfault;
	pcb->pcb_onfault = NULL;
	if (map != kernel_map) {
		PROC_LOCK(p);
		p->p_lock++;
		PROC_UNLOCK(p);
	}
	error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
	pcb->pcb_onfault = onfault;

	if (map != kernel_map) {
		PROC_LOCK(p);
		p->p_lock--;
		PROC_UNLOCK(p);
	}
	if (__predict_true(error == 0))
		goto out;
	if (user == 0) {
		if (pcb->pcb_onfault) {
			tf->tf_r0 = error;
			tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault;
			return;
		}

		printf("\nvm_fault(%p, %x, %x, 0) -> %x\n", map, va, ftype,
		    error);
		dab_fatal(tf, fsr, far, td, &ksig);
	}


	if (error == ENOMEM) {
		printf("VM: pid %d (%s), uid %d killed: "
		    "out of swap\n", td->td_proc->p_pid, td->td_name,
		    (td->td_proc->p_ucred) ?
		     td->td_proc->p_ucred->cr_uid : -1);
		ksig.signb = SIGKILL;
	} else {
		ksig.signb = SIGSEGV;
	}
	ksig.code = 0;
do_trapsignal:
	call_trapsignal(td, ksig.signb, ksig.code);
out:
	/* If returning to user mode, make sure to invoke userret() */
	if (user)
		userret(td, tf);
}
Esempio n. 5
0
static void
pmclog_loop(void *arg)
{
	int error;
	struct pmc_owner *po;
	struct pmclog_buffer *lb;
	struct proc *p;
	struct ucred *ownercred;
	struct ucred *mycred;
	struct thread *td;
	struct uio auio;
	struct iovec aiov;
	size_t nbytes;

	po = (struct pmc_owner *) arg;
	p = po->po_owner;
	td = curthread;
	mycred = td->td_ucred;

	PROC_LOCK(p);
	ownercred = crhold(p->p_ucred);
	PROC_UNLOCK(p);

	PMCDBG(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread);
	KASSERT(po->po_kthread == curthread->td_proc,
	    ("[pmclog,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__,
		po, po->po_kthread, curthread->td_proc));

	lb = NULL;


	/*
	 * Loop waiting for I/O requests to be added to the owner
	 * struct's queue.  The loop is exited when the log file
	 * is deconfigured.
	 */

	mtx_lock(&pmc_kthread_mtx);

	for (;;) {

		/* check if we've been asked to exit */
		if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
			break;

		if (lb == NULL) { /* look for a fresh buffer to write */
			mtx_lock_spin(&po->po_mtx);
			if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) {
				mtx_unlock_spin(&po->po_mtx);

				/* No more buffers and shutdown required. */
				if (po->po_flags & PMC_PO_SHUTDOWN) {
					mtx_unlock(&pmc_kthread_mtx);
					/*
			 		 * Close the file to get PMCLOG_EOF
					 * error in pmclog(3).
					 */
					fo_close(po->po_file, curthread);
					mtx_lock(&pmc_kthread_mtx);
					break;
				}

				(void) msleep(po, &pmc_kthread_mtx, PWAIT,
				    "pmcloop", 0);
				continue;
			}

			TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
			mtx_unlock_spin(&po->po_mtx);
		}

		mtx_unlock(&pmc_kthread_mtx);

		/* process the request */
		PMCDBG(LOG,WRI,2, "po=%p base=%p ptr=%p", po,
		    lb->plb_base, lb->plb_ptr);
		/* change our thread's credentials before issuing the I/O */

		aiov.iov_base = lb->plb_base;
		aiov.iov_len  = nbytes = lb->plb_ptr - lb->plb_base;

		auio.uio_iov    = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = -1;
		auio.uio_resid  = nbytes;
		auio.uio_rw     = UIO_WRITE;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_td     = td;

		/* switch thread credentials -- see kern_ktrace.c */
		td->td_ucred = ownercred;
		error = fo_write(po->po_file, &auio, ownercred, 0, td);
		td->td_ucred = mycred;

		if (error) {
			/* XXX some errors are recoverable */
			/* send a SIGIO to the owner and exit */
			PROC_LOCK(p);
			kern_psignal(p, SIGIO);
			PROC_UNLOCK(p);

			mtx_lock(&pmc_kthread_mtx);

			po->po_error = error; /* save for flush log */

			PMCDBG(LOG,WRI,2, "po=%p error=%d", po, error);

			break;
		}

		mtx_lock(&pmc_kthread_mtx);

		/* put the used buffer back into the global pool */
		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);

		mtx_lock_spin(&pmc_bufferlist_mtx);
		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
		mtx_unlock_spin(&pmc_bufferlist_mtx);

		lb = NULL;
	}

	wakeup_one(po->po_kthread);
	po->po_kthread = NULL;

	mtx_unlock(&pmc_kthread_mtx);

	/* return the current I/O buffer to the global pool */
	if (lb) {
		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);

		mtx_lock_spin(&pmc_bufferlist_mtx);
		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
		mtx_unlock_spin(&pmc_bufferlist_mtx);
	}

	/*
	 * Exit this thread, signalling the waiter
	 */

	crfree(ownercred);

	kproc_exit(0);
}
Esempio n. 6
0
/*
 * Exit: deallocate address space and other resources, change proc state to
 * zombie, and unlink proc from allproc and parent's lists.  Save exit status
 * and rusage for wait().  Check for child processes and orphan them.
 */
void
exit1(struct thread *td, int rv)
{
	struct proc *p, *nq, *q;
	struct vnode *vtmp;
	struct vnode *ttyvp = NULL;
	struct plimit *plim;

	mtx_assert(&Giant, MA_NOTOWNED);

	p = td->td_proc;
	/*
	 * XXX in case we're rebooting we just let init die in order to
	 * work around an unsolved stack overflow seen very late during
	 * shutdown on sparc64 when the gmirror worker process exists.
	 */
	if (p == initproc && rebooting == 0) {
		printf("init died (signal %d, exit %d)\n",
		    WTERMSIG(rv), WEXITSTATUS(rv));
		panic("Going nowhere without my init!");
	}

	/*
	 * MUST abort all other threads before proceeding past here.
	 */
	PROC_LOCK(p);
	while (p->p_flag & P_HADTHREADS) {
		/*
		 * First check if some other thread got here before us.
		 * If so, act appropriately: exit or suspend.
		 */
		thread_suspend_check(0);

		/*
		 * Kill off the other threads. This requires
		 * some co-operation from other parts of the kernel
		 * so it may not be instantaneous.  With this state set
		 * any thread entering the kernel from userspace will
		 * thread_exit() in trap().  Any thread attempting to
		 * sleep will return immediately with EINTR or EWOULDBLOCK
		 * which will hopefully force them to back out to userland
		 * freeing resources as they go.  Any thread attempting
		 * to return to userland will thread_exit() from userret().
		 * thread_exit() will unsuspend us when the last of the
		 * other threads exits.
		 * If there is already a thread singler after resumption,
		 * calling thread_single will fail; in that case, we just
		 * re-check all suspension request, the thread should
		 * either be suspended there or exit.
		 */
		if (!thread_single(SINGLE_EXIT))
			break;

		/*
		 * All other activity in this process is now stopped.
		 * Threading support has been turned off.
		 */
	}
	KASSERT(p->p_numthreads == 1,
	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
	racct_sub(p, RACCT_NTHR, 1);
	/*
	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
	 * on our vmspace, so we should block below until they have
	 * released their reference to us.  Note that if they have
	 * requested S_EXIT stops we will block here until they ack
	 * via PIOCCONT.
	 */
	_STOPEVENT(p, S_EXIT, rv);

	/*
	 * Ignore any pending request to stop due to a stop signal.
	 * Once P_WEXIT is set, future requests will be ignored as
	 * well.
	 */
	p->p_flag &= ~P_STOPPED_SIG;
	KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped"));

	/*
	 * Note that we are exiting and do another wakeup of anyone in
	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
	 * decided to wait again after we told them we are exiting.
	 */
	p->p_flag |= P_WEXIT;
	wakeup(&p->p_stype);

	/*
	 * Wait for any processes that have a hold on our vmspace to
	 * release their reference.
	 */
	while (p->p_lock > 0)
		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);

	p->p_xstat = rv;	/* Let event handler change exit status */
	PROC_UNLOCK(p);
	/* Drain the limit callout while we don't have the proc locked */
	callout_drain(&p->p_limco);

#ifdef AUDIT
	/*
	 * The Sun BSM exit token contains two components: an exit status as
	 * passed to exit(), and a return value to indicate what sort of exit
	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
	 * what the return value is.
	 */
	AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0);
	AUDIT_SYSCALL_EXIT(0, td);
#endif

	/* Are we a task leader? */
	if (p == p->p_leader) {
		mtx_lock(&ppeers_lock);
		q = p->p_peers;
		while (q != NULL) {
			PROC_LOCK(q);
			kern_psignal(q, SIGKILL);
			PROC_UNLOCK(q);
			q = q->p_peers;
		}
		while (p->p_peers != NULL)
			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
		mtx_unlock(&ppeers_lock);
	}

	/*
	 * Check if any loadable modules need anything done at process exit.
	 * E.g. SYSV IPC stuff
	 * XXX what if one of these generates an error?
	 */
	EVENTHANDLER_INVOKE(process_exit, p);

	/*
	 * If parent is waiting for us to exit or exec,
	 * P_PPWAIT is set; we will wakeup the parent below.
	 */
	PROC_LOCK(p);
	rv = p->p_xstat;	/* Event handler could change exit status */
	stopprofclock(p);
	p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);

	/*
	 * Stop the real interval timer.  If the handler is currently
	 * executing, prevent it from rearming itself and let it finish.
	 */
	if (timevalisset(&p->p_realtimer.it_value) &&
	    callout_stop(&p->p_itcallout) == 0) {
		timevalclear(&p->p_realtimer.it_interval);
		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
		KASSERT(!timevalisset(&p->p_realtimer.it_value),
		    ("realtime timer is still armed"));
	}
	PROC_UNLOCK(p);

	/*
	 * Reset any sigio structures pointing to us as a result of
	 * F_SETOWN with our pid.
	 */
	funsetownlst(&p->p_sigiolst);

	/*
	 * If this process has an nlminfo data area (for lockd), release it
	 */
	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
		(*nlminfo_release_p)(p);

	/*
	 * Close open files and release open-file table.
	 * This may block!
	 */
	fdescfree(td);

	/*
	 * If this thread tickled GEOM, we need to wait for the giggling to
	 * stop before we return to userland
	 */
	if (td->td_pflags & TDP_GEOM)
		g_waitidle();

	/*
	 * Remove ourself from our leader's peer list and wake our leader.
	 */
	mtx_lock(&ppeers_lock);
	if (p->p_leader->p_peers) {
		q = p->p_leader;
		while (q->p_peers != p)
			q = q->p_peers;
		q->p_peers = p->p_peers;
		wakeup(p->p_leader);
	}
	mtx_unlock(&ppeers_lock);

	vmspace_exit(td);

	sx_xlock(&proctree_lock);
	if (SESS_LEADER(p)) {
		struct session *sp = p->p_session;
		struct tty *tp;

		/*
		 * s_ttyp is not zero'd; we use this to indicate that
		 * the session once had a controlling terminal. (for
		 * logging and informational purposes)
		 */
		SESS_LOCK(sp);
		ttyvp = sp->s_ttyvp;
		tp = sp->s_ttyp;
		sp->s_ttyvp = NULL;
		sp->s_ttydp = NULL;
		sp->s_leader = NULL;
		SESS_UNLOCK(sp);

		/*
		 * Signal foreground pgrp and revoke access to
		 * controlling terminal if it has not been revoked
		 * already.
		 *
		 * Because the TTY may have been revoked in the mean
		 * time and could already have a new session associated
		 * with it, make sure we don't send a SIGHUP to a
		 * foreground process group that does not belong to this
		 * session.
		 */

		if (tp != NULL) {
			tty_lock(tp);
			if (tp->t_session == sp)
				tty_signal_pgrp(tp, SIGHUP);
			tty_unlock(tp);
		}

		if (ttyvp != NULL) {
			sx_xunlock(&proctree_lock);
			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
				VOP_REVOKE(ttyvp, REVOKEALL);
				VOP_UNLOCK(ttyvp, 0);
			}
			sx_xlock(&proctree_lock);
		}
	}
	fixjobc(p, p->p_pgrp, 0);
	sx_xunlock(&proctree_lock);
	(void)acct_process(td);

	/* Release the TTY now we've unlocked everything. */
	if (ttyvp != NULL)
		vrele(ttyvp);
#ifdef KTRACE
	ktrprocexit(td);
#endif
	/*
	 * Release reference to text vnode
	 */
	if ((vtmp = p->p_textvp) != NULL) {
		p->p_textvp = NULL;
		vrele(vtmp);
	}

	/*
	 * Release our limits structure.
	 */
	plim = p->p_limit;
	p->p_limit = NULL;
	lim_free(plim);

	tidhash_remove(td);

	/*
	 * Remove proc from allproc queue and pidhash chain.
	 * Place onto zombproc.  Unlink from parent's child list.
	 */
	sx_xlock(&allproc_lock);
	LIST_REMOVE(p, p_list);
	LIST_INSERT_HEAD(&zombproc, p, p_list);
	LIST_REMOVE(p, p_hash);
	sx_xunlock(&allproc_lock);

	/*
	 * Call machine-dependent code to release any
	 * machine-dependent resources other than the address space.
	 * The address space is released by "vmspace_exitfree(p)" in
	 * vm_waitproc().
	 */
	cpu_exit(td);

	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);

	/*
	 * Reparent all of our children to init.
	 */
	sx_xlock(&proctree_lock);
	q = LIST_FIRST(&p->p_children);
	if (q != NULL)		/* only need this if any child is S_ZOMB */
		wakeup(initproc);
	for (; q != NULL; q = nq) {
		nq = LIST_NEXT(q, p_sibling);
		PROC_LOCK(q);
		proc_reparent(q, initproc);
		q->p_sigparent = SIGCHLD;
		/*
		 * Traced processes are killed
		 * since their existence means someone is screwing up.
		 */
		if (q->p_flag & P_TRACED) {
			struct thread *temp;

			/*
			 * Since q was found on our children list, the
			 * proc_reparent() call moved q to the orphan
			 * list due to present P_TRACED flag. Clear
			 * orphan link for q now while q is locked.
			 */
			clear_orphan(q);
			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
			FOREACH_THREAD_IN_PROC(q, temp)
				temp->td_dbgflags &= ~TDB_SUSPEND;
			kern_psignal(q, SIGKILL);
		}
		PROC_UNLOCK(q);
	}

	/*
	 * Also get rid of our orphans.
	 */
	while ((q = LIST_FIRST(&p->p_orphans)) != NULL) {
		PROC_LOCK(q);
		clear_orphan(q);
		PROC_UNLOCK(q);
	}

	/* Save exit status. */
	PROC_LOCK(p);
	p->p_xthread = td;

	/* Tell the prison that we are gone. */
	prison_proc_free(p->p_ucred->cr_prison);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exit if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exit)
		dtrace_fasttrap_exit(p);
#endif

	/*
	 * Notify interested parties of our demise.
	 */
	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);

#ifdef KDTRACE_HOOKS
	int reason = CLD_EXITED;
	if (WCOREDUMP(rv))
		reason = CLD_DUMPED;
	else if (WIFSIGNALED(rv))
		reason = CLD_KILLED;
	SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0);
#endif

	/*
	 * Just delete all entries in the p_klist. At this point we won't
	 * report any more events, and there are nasty race conditions that
	 * can beat us if we don't.
	 */
	knlist_clear(&p->p_klist, 1);

	/*
	 * If this is a process with a descriptor, we may not need to deliver
	 * a signal to the parent.  proctree_lock is held over
	 * procdesc_exit() to serialize concurrent calls to close() and
	 * exit().
	 */
	if (p->p_procdesc == NULL || procdesc_exit(p)) {
		/*
		 * Notify parent that we're gone.  If parent has the
		 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN,
		 * notify process 1 instead (and hope it will handle this
		 * situation).
		 */
		PROC_LOCK(p->p_pptr);
		mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
		if (p->p_pptr->p_sigacts->ps_flag &
		    (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
			struct proc *pp;

			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
			pp = p->p_pptr;
			PROC_UNLOCK(pp);
			proc_reparent(p, initproc);
			p->p_sigparent = SIGCHLD;
			PROC_LOCK(p->p_pptr);

			/*
			 * Notify parent, so in case he was wait(2)ing or
			 * executing waitpid(2) with our pid, he will
			 * continue.
			 */
			wakeup(pp);
		} else
			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);

		if (p->p_pptr == initproc)
			kern_psignal(p->p_pptr, SIGCHLD);
		else if (p->p_sigparent != 0) {
			if (p->p_sigparent == SIGCHLD)
				childproc_exited(p);
			else	/* LINUX thread */
				kern_psignal(p->p_pptr, p->p_sigparent);
		}
	} else
		PROC_LOCK(p->p_pptr);
	sx_xunlock(&proctree_lock);

	/*
	 * The state PRS_ZOMBIE prevents other proesses from sending
	 * signal to the process, to avoid memory leak, we free memory
	 * for signal queue at the time when the state is set.
	 */
	sigqueue_flush(&p->p_sigqueue);
	sigqueue_flush(&td->td_sigqueue);

	/*
	 * We have to wait until after acquiring all locks before
	 * changing p_state.  We need to avoid all possible context
	 * switches (including ones from blocking on a mutex) while
	 * marked as a zombie.  We also have to set the zombie state
	 * before we release the parent process' proc lock to avoid
	 * a lost wakeup.  So, we first call wakeup, then we grab the
	 * sched lock, update the state, and release the parent process'
	 * proc lock.
	 */
	wakeup(p->p_pptr);
	cv_broadcast(&p->p_pwait);
	sched_exit(p->p_pptr, td);
	PROC_SLOCK(p);
	p->p_state = PRS_ZOMBIE;
	PROC_UNLOCK(p->p_pptr);

	/*
	 * Hopefully no one will try to deliver a signal to the process this
	 * late in the game.
	 */
	knlist_destroy(&p->p_klist);

	/*
	 * Save our children's rusage information in our exit rusage.
	 */
	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);

	/*
	 * Make sure the scheduler takes this thread out of its tables etc.
	 * This will also release this thread's reference to the ucred.
	 * Other thread parts to release include pcb bits and such.
	 */
	thread_exit();
}
Esempio n. 7
0
/*
 * Copied from amd64/amd64/machdep.c
 *
 * XXX fpu state need? don't think so
 */
int
linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
{
	struct proc *p;
	struct l_ucontext uc;
	struct l_sigcontext *context;
	struct trapframe *regs;
	unsigned long rflags;
	int error;
	ksiginfo_t ksi;

	regs = td->td_frame;
	error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc));
	if (error != 0)
		return (error);

	p = td->td_proc;
	context = &uc.uc_mcontext;
	rflags = context->sc_rflags;

	/*
	 * Don't allow users to change privileged or reserved flags.
	 */
	/*
	 * XXX do allow users to change the privileged flag PSL_RF.
	 * The cpu sets PSL_RF in tf_rflags for faults.  Debuggers
	 * should sometimes set it there too.  tf_rflags is kept in
	 * the signal context during signal handling and there is no
	 * other place to remember it, so the PSL_RF bit may be
	 * corrupted by the signal handler without us knowing.
	 * Corruption of the PSL_RF bit at worst causes one more or
	 * one less debugger trap, so allowing it is fairly harmless.
	 */

#define RFLAG_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
	if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
		printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags);
		return (EINVAL);
	}

	/*
	 * Don't allow users to load a valid privileged %cs.  Let the
	 * hardware check for invalid selectors, excess privilege in
	 * other selectors, invalid %eip's and invalid %esp's.
	 */
#define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
	if (!CS_SECURE(context->sc_cs)) {
		printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs);
		ksiginfo_init_trap(&ksi);
		ksi.ksi_signo = SIGBUS;
		ksi.ksi_code = BUS_OBJERR;
		ksi.ksi_trapno = T_PROTFLT;
		ksi.ksi_addr = (void *)regs->tf_rip;
		trapsignal(td, &ksi);
		return (EINVAL);
	}

	PROC_LOCK(p);
	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
	SIG_CANTMASK(td->td_sigmask);
	signotify(td);
	PROC_UNLOCK(p);

	regs->tf_rdi    = context->sc_rdi;
	regs->tf_rsi    = context->sc_rsi;
	regs->tf_rdx    = context->sc_rdx;
	regs->tf_rbp    = context->sc_rbp;
	regs->tf_rbx    = context->sc_rbx;
	regs->tf_rcx    = context->sc_rcx;
	regs->tf_rax    = context->sc_rax;
	regs->tf_rip    = context->sc_rip;
	regs->tf_rsp    = context->sc_rsp;
	regs->tf_r8     = context->sc_r8;
	regs->tf_r9     = context->sc_r9;
	regs->tf_r10    = context->sc_r10;
	regs->tf_r11    = context->sc_r11;
	regs->tf_r12    = context->sc_r12;
	regs->tf_r13    = context->sc_r13;
	regs->tf_r14    = context->sc_r14;
	regs->tf_r15    = context->sc_r15;
	regs->tf_cs     = context->sc_cs;
	regs->tf_err    = context->sc_err;
	regs->tf_rflags = rflags;

	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	return (EJUSTRETURN);
}
Esempio n. 8
0
static int
procfs_control(struct thread *td, struct proc *p, int op)
{
	int error = 0;
	struct thread *temp;

	/*
	 * Attach - attaches the target process for debugging
	 * by the calling process.
	 */
	if (op == PROCFS_CTL_ATTACH) {
		sx_xlock(&proctree_lock);
		PROC_LOCK(p);
		if ((error = p_candebug(td, p)) != 0)
			goto out;
		if (p->p_flag & P_TRACED) {
			error = EBUSY;
			goto out;
		}

		/* Can't trace yourself! */
		if (p->p_pid == td->td_proc->p_pid) {
			error = EINVAL;
			goto out;
		}

		/*
		 * Go ahead and set the trace flag.
		 * Save the old parent (it's reset in
		 *   _DETACH, and also in kern_exit.c:wait4()
		 * Reparent the process so that the tracing
		 *   proc gets to see all the action.
		 * Stop the target.
		 */
		p->p_flag |= P_TRACED;
		faultin(p);
		p->p_xstat = 0;		/* XXX ? */
		if (p->p_pptr != td->td_proc) {
			p->p_oppid = p->p_pptr->p_pid;
			proc_reparent(p, td->td_proc);
		}
		psignal(p, SIGSTOP);
out:
		PROC_UNLOCK(p);
		sx_xunlock(&proctree_lock);
		return (error);
	}

	/*
	 * Authorization check: rely on normal debugging protection, except
	 * allow processes to disengage debugging on a process onto which
	 * they have previously attached, but no longer have permission to
	 * debug.
	 */
	PROC_LOCK(p);
	if (op != PROCFS_CTL_DETACH &&
	    ((error = p_candebug(td, p)))) {
		PROC_UNLOCK(p);
		return (error);
	}

	/*
	 * Target process must be stopped, owned by (td) and
	 * be set up for tracing (P_TRACED flag set).
	 * Allow DETACH to take place at any time for sanity.
	 * Allow WAIT any time, of course.
	 */
	switch (op) {
	case PROCFS_CTL_DETACH:
	case PROCFS_CTL_WAIT:
		break;

	default:
		if (!TRACE_WAIT_P(td->td_proc, p)) {
			PROC_UNLOCK(p);
			return (EBUSY);
		}
	}


#ifdef FIX_SSTEP
	/*
	 * do single-step fixup if needed
	 */
	FIX_SSTEP(FIRST_THREAD_IN_PROC(p));
#endif

	/*
	 * Don't deliver any signal by default.
	 * To continue with a signal, just send
	 * the signal name to the ctl file
	 */
	p->p_xstat = 0;

	switch (op) {
	/*
	 * Detach.  Cleans up the target process, reparent it if possible
	 * and set it running once more.
	 */
	case PROCFS_CTL_DETACH:
		/* if not being traced, then this is a painless no-op */
		if ((p->p_flag & P_TRACED) == 0) {
			PROC_UNLOCK(p);
			return (0);
		}

		/* not being traced any more */
		p->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);

		/* remove pending SIGTRAP, else the process will die */
		sigqueue_delete_proc(p, SIGTRAP);
		FOREACH_THREAD_IN_PROC(p, temp)
			temp->td_dbgflags &= ~TDB_SUSPEND;
		PROC_UNLOCK(p);

		/* give process back to original parent */
		sx_xlock(&proctree_lock);
		if (p->p_oppid != p->p_pptr->p_pid) {
			struct proc *pp;

			pp = pfind(p->p_oppid);
			PROC_LOCK(p);
			if (pp) {
				PROC_UNLOCK(pp);
				proc_reparent(p, pp);
			}
		} else
			PROC_LOCK(p);
		p->p_oppid = 0;
		p->p_flag &= ~P_WAITED;	/* XXX ? */
		sx_xunlock(&proctree_lock);

		wakeup(td->td_proc);	/* XXX for CTL_WAIT below ? */

		break;

	/*
	 * Step.  Let the target process execute a single instruction.
	 * What does it mean to single step a threaded program?
	 */
	case PROCFS_CTL_STEP:
		error = proc_sstep(FIRST_THREAD_IN_PROC(p));
		if (error) {
			PROC_UNLOCK(p);
			return (error);
		}
		break;

	/*
	 * Run.  Let the target process continue running until a breakpoint
	 * or some other trap.
	 */
	case PROCFS_CTL_RUN:
		p->p_flag &= ~P_STOPPED_SIG;	/* this uses SIGSTOP */
		break;

	/*
	 * Wait for the target process to stop.
	 * If the target is not being traced then just wait
	 * to enter
	 */
	case PROCFS_CTL_WAIT:
		if (p->p_flag & P_TRACED) {
			while (error == 0 &&
					(P_SHOULDSTOP(p)) &&
					(p->p_flag & P_TRACED) &&
					(p->p_pptr == td->td_proc))
				error = msleep(p, &p->p_mtx,
						PWAIT|PCATCH, "procfsx", 0);
			if (error == 0 && !TRACE_WAIT_P(td->td_proc, p))
				error = EBUSY;
		} else {
			while (error == 0 && P_SHOULDSTOP(p))
				error = msleep(p, &p->p_mtx,
						PWAIT|PCATCH, "procfs", 0);
		}
		PROC_UNLOCK(p);
		return (error);
	default:
		panic("procfs_control");
	}

	PROC_SLOCK(p);
	thread_unsuspend(p); /* If it can run, let it do so. */
	PROC_SUNLOCK(p);
	PROC_UNLOCK(p);
	return (0);
}
Esempio n. 9
0
/*
 * The CheriABI version of sendsig(9) largely borrows from the MIPS version,
 * and it is important to keep them in sync.  It differs primarily in that it
 * must also be aware of user stack-handling ABIs, so is also sensitive to our
 * (fluctuating) design choices in how $stc and $sp interact.  The current
 * design uses ($stc + $sp) for stack-relative references, so early on we have
 * to calculate a 'relocated' version of $sp that we can then use for
 * MIPS-style access.
 *
 * This code, as with the CHERI-aware MIPS code, makes a privilege
 * determination in order to decide whether to trust the stack exposed by the
 * user code for the purposes of signal handling.  We must use the alternative
 * stack if there is any indication that using the user thread's stack state
 * might violate the userspace compartmentalisation model.
 */
static void
cheriabi_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct proc *p;
	struct thread *td;
	struct trapframe *regs;
	struct sigacts *psp;
	struct sigframe_c sf, *sfp;
	uintptr_t stackbase;
	vm_offset_t sp;
	int cheri_is_sandboxed;
	int sig;
	int oonstack;

	td = curthread;
	p = td->td_proc;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	sig = ksi->ksi_signo;
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);

	regs = td->td_frame;

	/*
	 * In CheriABI, $sp is $stc relative, so calculate a relocation base
	 * that must be combined with regs->sp from this point onwards.
	 * Unfortunately, we won't retain bounds and permissions information
	 * (as is the case elsewhere in CheriABI).  While 'stackbase'
	 * suggests that $stc's offset isn't included, in practice it will be,
	 * although we may reasonably assume that it will be zero.
	 *
	 * If it turns out we will be delivering to the alternative signal
	 * stack, we'll recalculate stackbase later.
	 */
	CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC, &td->td_pcb->pcb_regs.stc,
	    0);
	CHERI_CTOPTR(stackbase, CHERI_CR_CTEMP0, CHERI_CR_KDC);
	oonstack = sigonstack(stackbase + regs->sp);

	/*
	 * CHERI affects signal delivery in the following ways:
	 *
	 * (1) Additional capability-coprocessor state is exposed via
	 *     extensions to the context frame placed on the stack.
	 *
	 * (2) If the user $pcc doesn't include CHERI_PERM_SYSCALL, then we
	 *     consider user state to be 'sandboxed' and therefore to require
	 *     special delivery handling which includes a domain-switch to the
	 *     thread's context-switch domain.  (This is done by
	 *     cheri_sendsig()).
	 *
	 * (3) If an alternative signal stack is not defined, and we are in a
	 *     'sandboxed' state, then we have two choices: (a) if the signal
	 *     is of type SA_SANDBOX_UNWIND, we will automatically unwind the
	 *     trusted stack by one frame; (b) otherwise, we will terminate
	 *     the process unconditionally.
	 */
	cheri_is_sandboxed = cheri_signal_sandboxed(td);

	/*
	 * We provide the ability to drop into the debugger in two different
	 * circumstances: (1) if the code running is sandboxed; and (2) if the
	 * fault is a CHERI protection fault.  Handle both here for the
	 * non-unwind case.  Do this before we rewrite any general-purpose or
	 * capability register state for the thread.
	 */
#if DDB
	if (cheri_is_sandboxed && security_cheri_debugger_on_sandbox_signal)
		kdb_enter(KDB_WHY_CHERI, "Signal delivery to CHERI sandbox");
	else if (sig == SIGPROT && security_cheri_debugger_on_sigprot)
		kdb_enter(KDB_WHY_CHERI,
		    "SIGPROT delivered outside sandbox");
#endif

	/*
	 * If a thread is running sandboxed, we can't rely on $sp which may
	 * not point at a valid stack in the ambient context, or even be
	 * maliciously manipulated.  We must therefore always use the
	 * alternative stack.  We are also therefore unable to tell whether we
	 * are on the alternative stack, so must clear 'oonstack' here.
	 *
	 * XXXRW: This requires significant further thinking; however, the net
	 * upshot is that it is not a good idea to do an object-capability
	 * invoke() from a signal handler, as with so many other things in
	 * life.
	 */
	if (cheri_is_sandboxed != 0)
		oonstack = 0;

	/* save user context */
	bzero(&sf, sizeof(sf));
	sf.sf_uc.uc_sigmask = *mask;
#if 0
	/*
	 * XXX-BD: stack_t type differs and we can't just fake a capabilty.
	 * We don't restore the value so what purpose does it serve?
	 */
	sf.sf_uc.uc_stack = td->td_sigstk;
#endif
	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
	sf.sf_uc.uc_mcontext.mc_pc = regs->pc;
	sf.sf_uc.uc_mcontext.mullo = regs->mullo;
	sf.sf_uc.uc_mcontext.mulhi = regs->mulhi;
	cheri_capability_copy(&sf.sf_uc.uc_mcontext.mc_tls,
	    &td->td_md.md_tls_cap);
	sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC;  /* magic number */
	bcopy((void *)&regs->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1],
	    sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t));
	sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED;
#if defined(CPU_HAVEFPU)
	if (sf.sf_uc.uc_mcontext.mc_fpused) {
		/* if FPU has current state, save it first */
		if (td == PCPU_GET(fpcurthread))
			MipsSaveCurFPState(td);
		bcopy((void *)&td->td_frame->f0,
		    (void *)sf.sf_uc.uc_mcontext.mc_fpregs,
		    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
	}
#endif
	/* XXXRW: sf.sf_uc.uc_mcontext.sr seems never to be set? */
	sf.sf_uc.uc_mcontext.cause = regs->cause;
	cheri_trapframe_to_cheriframe(&td->td_pcb->pcb_regs,
	    &sf.sf_uc.uc_mcontext.mc_cheriframe);

	/*
	 * Allocate and validate space for the signal handler context.  For
	 * CheriABI purposes, 'sp' from this point forward is relocated
	 * relative to any pertinent stack capability.  For an alternative
	 * signal context, we need to recalculate stackbase for later use in
	 * calculating a new $sp for the signal-handling context.
	 *
	 * XXXRW: It seems like it would be nice to both the regular and
	 * alternative stack calculations in the same place.  However, we need
	 * oonstack sooner.  We should clean this up later.
	 */
	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		stackbase = (vm_offset_t)td->td_sigstk.ss_sp;
		sp = (vm_offset_t)(stackbase + td->td_sigstk.ss_size);
	} else {
		/*
		 * Signals delivered when a CHERI sandbox is present must be
		 * delivered on the alternative stack rather than a local one.
		 * If an alternative stack isn't present, then terminate or
		 * risk leaking capabilities (and control) to the sandbox (or
		 * just crashing the sandbox).
		 */
		if (cheri_is_sandboxed) {
			mtx_unlock(&psp->ps_mtx);
			printf("pid %d, tid %d: signal in sandbox without "
			    "alternative stack defined\n", td->td_proc->p_pid,
			    td->td_tid);
			sigexit(td, SIGILL);
			/* NOTREACHED */
		}
		sp = (vm_offset_t)(stackbase + regs->sp);
	}
	sp -= sizeof(struct sigframe_c);
	/* For CHERI, keep the stack pointer capability aligned. */
	sp &= ~(CHERICAP_SIZE - 1);
	sfp = (void *)sp;

	/* Build the argument list for the signal handler. */
	regs->a0 = sig;
	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
		/*
		 * Signal handler installed with SA_SIGINFO.
		 *
		 * XXXRW: We would ideally synthesise these from the
		 * user-originated stack capability, rather than $kdc, to be
		 * on the safe side.
		 */
		cheri_capability_set(&regs->c3, CHERI_CAP_USER_DATA_PERMS,
		    (void *)(intptr_t)&sfp->sf_si, sizeof(sfp->sf_si), 0);
		cheri_capability_set(&regs->c4, CHERI_CAP_USER_DATA_PERMS,
		    (void *)(intptr_t)&sfp->sf_uc, sizeof(sfp->sf_uc), 0);
		/* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */

		/* fill siginfo structure */
		sf.sf_si.si_signo = sig;
		sf.sf_si.si_code = ksi->ksi_code;
		/*
		 * Write out badvaddr, but don't create a valid capability
		 * since that might allow privilege amplification.
		 *
		 * XXX-BD: This probably isn't the right method.
		 * XXX-BD: Do we want to set base or offset?
		 *
		 * XXXRW: I think there's some argument that anything
		 * receiving this signal is fairly privileged.  But we could
		 * generate a $ddc-relative (or $pcc-relative) capability, if
		 * possible.  (Using versions if $ddc and $pcc for the
		 * signal-handling context rather than that which caused the
		 * signal).  I'd be tempted to deliver badvaddr as the offset
		 * of that capability.  If badvaddr is not in range, then we
		 * should just deliver an untagged NULL-derived version
		 * (perhaps)?
		 */
		*((uintptr_t *)&sf.sf_si.si_addr) =
		    (uintptr_t)(void *)regs->badvaddr;
	}
	/*
	 * XXX: No support for undocumented arguments to old style handlers.
	 */

	mtx_unlock(&psp->ps_mtx);
	PROC_UNLOCK(p);

	/*
	 * Copy the sigframe out to the user's stack.
	 */
	if (copyoutcap(&sf, (void *)sfp, sizeof(sf)) != 0) {
		/*
		 * Something is wrong with the stack pointer.
		 * ...Kill the process.
		 */
		PROC_LOCK(p);
		printf("pid %d, tid %d: could not copy out sigframe\n",
		    td->td_proc->p_pid, td->td_tid);
		sigexit(td, SIGILL);
		/* NOTREACHED */
	}

	/*
	 * Re-acquire process locks necessary to access suitable pcb fields.
	 * However, arguably, these operations should be atomic with the
	 * initial inspection of 'psp'.
	 */
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);

	/*
	 * Install CHERI signal-delivery register state for handler to run
	 * in.  As we don't install this in the CHERI frame on the user stack,
	 * it will be (generally) be removed automatically on sigreturn().
	 */
	/* XXX-BD: this isn't quite right */
	cheri_sendsig(td);

	/*
	 * Note that $sp must be installed relative to $stc, so re-subtract
	 * the stack base here.
	 */
	regs->pc = (register_t)(intptr_t)catcher;
	regs->sp = (register_t)((intptr_t)sfp - stackbase);

	cheri_capability_copy(&regs->c12, &psp->ps_sigcap[_SIG_IDX(sig)]);
	cheri_capability_copy(&regs->c17,
	    &td->td_pcb->pcb_cherisignal.csig_sigcode);
}
Esempio n. 10
0
int
ext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb)
{
	struct inode *ip;
	struct buf *bp;
	struct ext2mount *ump;
	struct mount *mp;
	struct indir a[NIADDR+1], *ap;
	daddr_t daddr;
	e2fs_lbn_t metalbn;
	int error, num, maxrun = 0, bsize;
	int *nump;

	ap = NULL;
	ip = VTOI(vp);
	mp = vp->v_mount;
	ump = VFSTOEXT2(mp);

	bsize = EXT2_BLOCK_SIZE(ump->um_e2fs);

	if (runp) {
		maxrun = mp->mnt_iosize_max / bsize - 1;
		*runp = 0;
	}

	if (runb) {
		*runb = 0;
	}


	ap = a;
	nump = &num;
	error = ext2_getlbns(vp, bn, ap, nump);
	if (error)
		return (error);

	num = *nump;
	if (num == 0) {
		*bnp = blkptrtodb(ump, ip->i_db[bn]);
		if (*bnp == 0) {
			*bnp = -1;
		} else if (runp) {
			daddr_t bnb = bn;
			for (++bn; bn < NDADDR && *runp < maxrun &&
			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
			    ++bn, ++*runp);
			bn = bnb;
			if (runb && (bn > 0)) {
				for (--bn; (bn >= 0) && (*runb < maxrun) &&
					is_sequential(ump, ip->i_db[bn],
						ip->i_db[bn + 1]);
						--bn, ++*runb);
			}
		}
		return (0);
	}


	/* Get disk address out of indirect block array */
	daddr = ip->i_ib[ap->in_off];

	for (bp = NULL, ++ap; --num; ++ap) {
		/*
		 * Exit the loop if there is no disk address assigned yet and
		 * the indirect block isn't in the cache, or if we were
		 * looking for an indirect block and we've found it.
		 */

		metalbn = ap->in_lbn;
		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
			break;
		/*
		 * If we get here, we've either got the block in the cache
		 * or we have a disk address for it, go fetch it.
		 */
		if (bp)
			bqrelse(bp);

		bp = getblk(vp, metalbn, bsize, 0, 0, 0);
		if ((bp->b_flags & B_CACHE) == 0) {
#ifdef INVARIANTS
			if (!daddr)
				panic("ext2_bmaparray: indirect block not in cache");
#endif
			bp->b_blkno = blkptrtodb(ump, daddr);
			bp->b_iocmd = BIO_READ;
			bp->b_flags &= ~B_INVAL;
			bp->b_ioflags &= ~BIO_ERROR;
			vfs_busy_pages(bp, 0);
			bp->b_iooffset = dbtob(bp->b_blkno);
			bstrategy(bp);
#ifdef RACCT
			if (racct_enable) {
				PROC_LOCK(curproc);
				racct_add_buf(curproc, bp, 0);
				PROC_UNLOCK(curproc);
			}
#endif
			curthread->td_ru.ru_inblock++;
			error = bufwait(bp);
			if (error) {
				brelse(bp);
				return (error);
			}
		}

		daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off];
		if (num == 1 && daddr && runp) {
			for (bn = ap->in_off + 1;
			    bn < MNINDIR(ump) && *runp < maxrun &&
			    is_sequential(ump,
			    ((e2fs_daddr_t *)bp->b_data)[bn - 1],
			    ((e2fs_daddr_t *)bp->b_data)[bn]);
			    ++bn, ++*runp);
			bn = ap->in_off;
			if (runb && bn) {
				for (--bn; bn >= 0 && *runb < maxrun &&
					is_sequential(ump,
					((e2fs_daddr_t *)bp->b_data)[bn],
					((e2fs_daddr_t *)bp->b_data)[bn + 1]);
					--bn, ++*runb);
			}
		}
	}
	if (bp)
		bqrelse(bp);

	/*
	 * Since this is FFS independent code, we are out of scope for the
	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
	 * will fall in the range 1..um_seqinc, so we use that test and
	 * return a request for a zeroed out buffer if attempts are made
	 * to read a BLK_NOCOPY or BLK_SNAP block.
	 */
	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
		*bnp = -1;
		return (0);
	}
	*bnp = blkptrtodb(ump, daddr);
	if (*bnp == 0) {
		*bnp = -1;
	}
	return (0);
}
Esempio n. 11
0
static void
deadlkres(void)
{
	struct proc *p;
	struct thread *td;
	void *wchan;
	int blkticks, i, slpticks, slptype, tryl, tticks;

	tryl = 0;
	for (;;) {
		blkticks = blktime_threshold * hz;
		slpticks = slptime_threshold * hz;

		/*
		 * Avoid to sleep on the sx_lock in order to avoid a possible
		 * priority inversion problem leading to starvation.
		 * If the lock can't be held after 100 tries, panic.
		 */
		if (!sx_try_slock(&allproc_lock)) {
			if (tryl > 100)
		panic("%s: possible deadlock detected on allproc_lock\n",
				    __func__);
			tryl++;
			pause("allproc", sleepfreq * hz);
			continue;
		}
		tryl = 0;
		FOREACH_PROC_IN_SYSTEM(p) {
			PROC_LOCK(p);
			if (p->p_state == PRS_NEW) {
				PROC_UNLOCK(p);
				continue;
			}
			FOREACH_THREAD_IN_PROC(p, td) {

				/*
				 * Once a thread is found in "interesting"
				 * state a possible ticks wrap-up needs to be
				 * checked.
				 */
				thread_lock(td);
				if (TD_ON_LOCK(td) && ticks < td->td_blktick) {

					/*
					 * The thread should be blocked on a
					 * turnstile, simply check if the
					 * turnstile channel is in good state.
					 */
					MPASS(td->td_blocked != NULL);

					tticks = ticks - td->td_blktick;
					thread_unlock(td);
					if (tticks > blkticks) {

						/*
						 * Accordingly with provided
						 * thresholds, this thread is
						 * stuck for too long on a
						 * turnstile.
						 */
						PROC_UNLOCK(p);
						sx_sunlock(&allproc_lock);
	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
						    __func__, td, tticks);
					}
				} else if (TD_IS_SLEEPING(td) &&
				    TD_ON_SLEEPQ(td) &&
				    ticks < td->td_blktick) {

					/*
					 * Check if the thread is sleeping on a
					 * lock, otherwise skip the check.
					 * Drop the thread lock in order to
					 * avoid a LOR with the sleepqueue
					 * spinlock.
					 */
					wchan = td->td_wchan;
					tticks = ticks - td->td_slptick;
					thread_unlock(td);
					slptype = sleepq_type(wchan);
					if ((slptype == SLEEPQ_SX ||
					    slptype == SLEEPQ_LK) &&
					    tticks > slpticks) {

						/*
						 * Accordingly with provided
						 * thresholds, this thread is
						 * stuck for too long on a
						 * sleepqueue.
						 * However, being on a
						 * sleepqueue, we might still
						 * check for the blessed
						 * list.
						 */
						tryl = 0;
						for (i = 0; blessed[i] != NULL;
						    i++) {
							if (!strcmp(blessed[i],
							    td->td_wmesg)) {
								tryl = 1;
								break;
							}
						}
						if (tryl != 0) {
							tryl = 0;
							continue;
						}
						PROC_UNLOCK(p);
						sx_sunlock(&allproc_lock);
	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
						    __func__, td, tticks);
					}
				} else
					thread_unlock(td);
			}
			PROC_UNLOCK(p);
		}
		sx_sunlock(&allproc_lock);

		/* Sleep for sleepfreq seconds. */
		pause("-", sleepfreq * hz);
	}
Esempio n. 12
0
int
linux_clone(struct thread *td, struct linux_clone_args *args)
{
	int error, ff = RFPROC | RFSTOPPED;
	struct proc *p2;
	struct thread *td2;
	int exit_signal;
	struct linux_emuldata *em;

#ifdef DEBUG
	if (ldebug(clone)) {
		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
		    "child tid: %p"), (unsigned)args->flags,
		    args->stack, args->parent_tidptr, args->child_tidptr);
	}
#endif

	exit_signal = args->flags & 0x000000ff;
	if (LINUX_SIG_VALID(exit_signal)) {
		if (exit_signal <= LINUX_SIGTBLSZ)
			exit_signal =
			    linux_to_bsd_signal[_SIG_IDX(exit_signal)];
	} else if (exit_signal != 0)
		return (EINVAL);

	if (args->flags & LINUX_CLONE_VM)
		ff |= RFMEM;
	if (args->flags & LINUX_CLONE_SIGHAND)
		ff |= RFSIGSHARE;
	/*
	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
	 * and open files is independant.  In FreeBSD, its in one
	 * structure but in reality it does not cause any problems
	 * because both of these flags are usually set together.
	 */
	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
		ff |= RFFDG;

	/*
	 * Attempt to detect when linux_clone(2) is used for creating
	 * kernel threads. Unfortunately despite the existence of the
	 * CLONE_THREAD flag, version of linuxthreads package used in
	 * most popular distros as of beginning of 2005 doesn't make
	 * any use of it. Therefore, this detection relies on
	 * empirical observation that linuxthreads sets certain
	 * combination of flags, so that we can make more or less
	 * precise detection and notify the FreeBSD kernel that several
	 * processes are in fact part of the same threading group, so
	 * that special treatment is necessary for signal delivery
	 * between those processes and fd locking.
	 */
	if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS)
		ff |= RFTHREAD;

	if (args->flags & LINUX_CLONE_PARENT_SETTID)
		if (args->parent_tidptr == NULL)
			return (EINVAL);

	error = fork1(td, ff, 0, &p2, NULL, 0);
	if (error)
		return (error);

	if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) {
	   	sx_xlock(&proctree_lock);
		PROC_LOCK(p2);
		proc_reparent(p2, td->td_proc->p_pptr);
		PROC_UNLOCK(p2);
		sx_xunlock(&proctree_lock);
	}

	/* create the emuldata */
	error = linux_proc_init(td, p2->p_pid, args->flags);
	/* reference it - no need to check this */
	em = em_find(p2, EMUL_DOLOCK);
	KASSERT(em != NULL, ("clone: emuldata not found."));
	/* and adjust it */

	if (args->flags & LINUX_CLONE_THREAD) {
#ifdef notyet
	   	PROC_LOCK(p2);
	   	p2->p_pgrp = td->td_proc->p_pgrp;
	   	PROC_UNLOCK(p2);
#endif
		exit_signal = 0;
	}

	if (args->flags & LINUX_CLONE_CHILD_SETTID)
		em->child_set_tid = args->child_tidptr;
	else
	   	em->child_set_tid = NULL;

	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
		em->child_clear_tid = args->child_tidptr;
	else
	   	em->child_clear_tid = NULL;

	EMUL_UNLOCK(&emul_lock);

	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
		error = copyout(&p2->p_pid, args->parent_tidptr,
		    sizeof(p2->p_pid));
		if (error)
			printf(LMSG("copyout failed!"));
	}

	PROC_LOCK(p2);
	p2->p_sigparent = exit_signal;
	PROC_UNLOCK(p2);
	td2 = FIRST_THREAD_IN_PROC(p2);
	/*
	 * In a case of stack = NULL, we are supposed to COW calling process
	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
	 * intact.
	 */
	if (args->stack)
		linux_set_upcall_kse(td2, PTROUT(args->stack));

	if (args->flags & LINUX_CLONE_SETTLS)
		linux_set_cloned_tls(td2, args->tls);

#ifdef DEBUG
	if (ldebug(clone))
		printf(LMSG("clone: successful rfork to %d, "
		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
		    exit_signal);
#endif
	if (args->flags & LINUX_CLONE_VFORK) {
	   	PROC_LOCK(p2);
	   	p2->p_flag |= P_PPWAIT;
	   	PROC_UNLOCK(p2);
	}

	/*
	 * Make this runnable after we are finished with it.
	 */
	thread_lock(td2);
	TD_SET_CAN_RUN(td2);
	sched_add(td2, SRQ_BORING);
	thread_unlock(td2);

	td->td_retval[0] = p2->p_pid;
	td->td_retval[1] = 0;

	if (args->flags & LINUX_CLONE_VFORK) {
		/* wait for the children to exit, ie. emulate vfork */
		PROC_LOCK(p2);
		while (p2->p_flag & P_PPWAIT)
			cv_wait(&p2->p_pwait, &p2->p_mtx);
		PROC_UNLOCK(p2);
	}

	return (0);
}
Esempio n. 13
0
void
ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct ia32_sigframe sf, *sfp;
	struct siginfo32 siginfo;
	struct proc *p;
	struct thread *td;
	struct sigacts *psp;
	char *sp;
	struct trapframe *regs;
	char *xfpusave;
	size_t xfpusave_len;
	int oonstack;
	int sig;

	siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);
	td = curthread;
	p = td->td_proc;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	sig = siginfo.si_signo;
	psp = p->p_sigacts;
#ifdef COMPAT_FREEBSD4
	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
		freebsd4_ia32_sendsig(catcher, ksi, mask);
		return;
	}
#endif
#ifdef COMPAT_43
	if (SIGISMEMBER(psp->ps_osigset, sig)) {
		ia32_osendsig(catcher, ksi, mask);
		return;
	}
#endif
	mtx_assert(&psp->ps_mtx, MA_OWNED);
	regs = td->td_frame;
	oonstack = sigonstack(regs->tf_rsp);

	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
		xfpusave = __builtin_alloca(xfpusave_len);
	} else {
		xfpusave_len = 0;
		xfpusave = NULL;
	}

	/* Save user context. */
	bzero(&sf, sizeof(sf));
	sf.sf_uc.uc_sigmask = *mask;
	sf.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
	sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
	sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
	sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
	sf.sf_uc.uc_mcontext.mc_isp = regs->tf_rsp; /* XXX */
	sf.sf_uc.uc_mcontext.mc_ebx = regs->tf_rbx;
	sf.sf_uc.uc_mcontext.mc_edx = regs->tf_rdx;
	sf.sf_uc.uc_mcontext.mc_ecx = regs->tf_rcx;
	sf.sf_uc.uc_mcontext.mc_eax = regs->tf_rax;
	sf.sf_uc.uc_mcontext.mc_trapno = regs->tf_trapno;
	sf.sf_uc.uc_mcontext.mc_err = regs->tf_err;
	sf.sf_uc.uc_mcontext.mc_eip = regs->tf_rip;
	sf.sf_uc.uc_mcontext.mc_cs = regs->tf_cs;
	sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
	sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
	sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
	sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
	sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
	sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
	sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
	ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
	fpstate_drop(td);
	sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
	sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));

	/* Allocate space for the signal handler context. */
	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig))
		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
	else
		sp = (char *)regs->tf_rsp;
	if (xfpusave != NULL) {
		sp -= xfpusave_len;
		sp = (char *)((unsigned long)sp & ~0x3Ful);
		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
	}
	sp -= sizeof(sf);
	/* Align to 16 bytes. */
	sfp = (struct ia32_sigframe *)((uintptr_t)sp & ~0xF);
	PROC_UNLOCK(p);

	/* Translate the signal if appropriate. */
	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];

	/* Build the argument list for the signal handler. */
	sf.sf_signum = sig;
	sf.sf_ucontext = (register_t)&sfp->sf_uc;
	bzero(&sf.sf_si, sizeof(sf.sf_si));
	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
		/* Signal handler installed with SA_SIGINFO. */
		sf.sf_siginfo = (u_int32_t)(uintptr_t)&sfp->sf_si;
		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;

		/* Fill in POSIX parts */
		sf.sf_si = siginfo;
		sf.sf_si.si_signo = sig;
	} else {
		/* Old FreeBSD-style arguments. */
		sf.sf_siginfo = siginfo.si_code;
		sf.sf_addr = (u_int32_t)siginfo.si_addr;
		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;
	}
	mtx_unlock(&psp->ps_mtx);

	/*
	 * Copy the sigframe out to the user's stack.
	 */
	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
	    (xfpusave != NULL && copyout(xfpusave,
	    PTRIN(sf.sf_uc.uc_mcontext.mc_xfpustate), xfpusave_len)
	    != 0)) {
#ifdef DEBUG
		printf("process %ld has trashed its stack\n", (long)p->p_pid);
#endif
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	regs->tf_rsp = (uintptr_t)sfp;
	regs->tf_rip = p->p_sysent->sv_sigcode_base;
	regs->tf_rflags &= ~(PSL_T | PSL_D);
	regs->tf_cs = _ucode32sel;
	regs->tf_ss = _udatasel;
	regs->tf_ds = _udatasel;
	regs->tf_es = _udatasel;
	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	/* XXXKIB leave user %fs and %gs untouched */
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 14
0
static void
freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct ia32_sigframe4 sf, *sfp;
	struct siginfo32 siginfo;
	struct proc *p;
	struct thread *td;
	struct sigacts *psp;
	struct trapframe *regs;
	int oonstack;
	int sig;

	td = curthread;
	p = td->td_proc;
	siginfo_to_siginfo32(&ksi->ksi_info, &siginfo);

	PROC_LOCK_ASSERT(p, MA_OWNED);
	sig = siginfo.si_signo;
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);
	regs = td->td_frame;
	oonstack = sigonstack(regs->tf_rsp);

	/* Save user context. */
	bzero(&sf, sizeof(sf));
	sf.sf_uc.uc_sigmask = *mask;
	sf.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp;
	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
	sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
	sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
	sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
	sf.sf_uc.uc_mcontext.mc_isp = regs->tf_rsp; /* XXX */
	sf.sf_uc.uc_mcontext.mc_ebx = regs->tf_rbx;
	sf.sf_uc.uc_mcontext.mc_edx = regs->tf_rdx;
	sf.sf_uc.uc_mcontext.mc_ecx = regs->tf_rcx;
	sf.sf_uc.uc_mcontext.mc_eax = regs->tf_rax;
	sf.sf_uc.uc_mcontext.mc_trapno = regs->tf_trapno;
	sf.sf_uc.uc_mcontext.mc_err = regs->tf_err;
	sf.sf_uc.uc_mcontext.mc_eip = regs->tf_rip;
	sf.sf_uc.uc_mcontext.mc_cs = regs->tf_cs;
	sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
	sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
	sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
	sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
	sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
	sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
	sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
	bzero(sf.sf_uc.uc_mcontext.__spare__,
	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));

	/* Allocate space for the signal handler context. */
	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		sfp = (struct ia32_sigframe4 *)(td->td_sigstk.ss_sp +
		    td->td_sigstk.ss_size - sizeof(sf));
	} else
		sfp = (struct ia32_sigframe4 *)regs->tf_rsp - 1;
	PROC_UNLOCK(p);

	/* Translate the signal if appropriate. */
	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];

	/* Build the argument list for the signal handler. */
	sf.sf_signum = sig;
	sf.sf_ucontext = (register_t)&sfp->sf_uc;
	bzero(&sf.sf_si, sizeof(sf.sf_si));
	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
		/* Signal handler installed with SA_SIGINFO. */
		sf.sf_siginfo = (u_int32_t)(uintptr_t)&sfp->sf_si;
		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;

		/* Fill in POSIX parts */
		sf.sf_si = siginfo;
		sf.sf_si.si_signo = sig;
	} else {
		/* Old FreeBSD-style arguments. */
		sf.sf_siginfo = siginfo.si_code;
		sf.sf_addr = (u_int32_t)siginfo.si_addr;
		sf.sf_ah = (u_int32_t)(uintptr_t)catcher;
	}
	mtx_unlock(&psp->ps_mtx);

	/*
	 * Copy the sigframe out to the user's stack.
	 */
	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
#ifdef DEBUG
		printf("process %ld has trashed its stack\n", (long)p->p_pid);
#endif
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	regs->tf_rsp = (uintptr_t)sfp;
	regs->tf_rip = p->p_sysent->sv_sigcode_base + sz_ia32_sigcode -
	    sz_freebsd4_ia32_sigcode;
	regs->tf_rflags &= ~(PSL_T | PSL_D);
	regs->tf_cs = _ucode32sel;
	regs->tf_ss = _udatasel;
	regs->tf_ds = _udatasel;
	regs->tf_es = _udatasel;
	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	/* leave user %fs and %gs untouched */
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 15
0
int
thread_create(struct thread *td, struct rtprio *rtp,
    int (*initialize_thread)(struct thread *, void *), void *thunk)
{
	struct thread *newtd;
	struct proc *p;
	int error;

	p = td->td_proc;

	if (rtp != NULL) {
		switch(rtp->type) {
		case RTP_PRIO_REALTIME:
		case RTP_PRIO_FIFO:
			/* Only root can set scheduler policy */
			if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0)
				return (EPERM);
			if (rtp->prio > RTP_PRIO_MAX)
				return (EINVAL);
			break;
		case RTP_PRIO_NORMAL:
			rtp->prio = 0;
			break;
		default:
			return (EINVAL);
		}
	}

#ifdef RACCT
	if (racct_enable) {
		PROC_LOCK(p);
		error = racct_add(p, RACCT_NTHR, 1);
		PROC_UNLOCK(p);
		if (error != 0)
			return (EPROCLIM);
	}
#endif

	/* Initialize our td */
	error = kern_thr_alloc(p, 0, &newtd);
	if (error)
		goto fail;

	cpu_set_upcall(newtd, td);

	bzero(&newtd->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));
	bcopy(&td->td_startcopy, &newtd->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));
	newtd->td_proc = td->td_proc;
	thread_cow_get(newtd, td);

	error = initialize_thread(newtd, thunk);
	if (error != 0) {
		thread_cow_free(newtd);
		thread_free(newtd);
		goto fail;
	}

	PROC_LOCK(p);
	p->p_flag |= P_HADTHREADS;
	thread_link(newtd, p);
	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
	newtd->td_pax = p->p_pax;
	thread_lock(td);
	/* let the scheduler know about these things. */
	sched_fork_thread(td, newtd);
	thread_unlock(td);
	if (P_SHOULDSTOP(p))
		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
	if (p->p_flag2 & P2_LWP_EVENTS)
		newtd->td_dbgflags |= TDB_BORN;

	/*
	 * Copy the existing thread VM policy into the new thread.
	 */
	vm_domain_policy_localcopy(&newtd->td_vm_dom_policy,
	    &td->td_vm_dom_policy);

	PROC_UNLOCK(p);

	tidhash_add(newtd);

	thread_lock(newtd);
	if (rtp != NULL) {
		if (!(td->td_pri_class == PRI_TIMESHARE &&
		      rtp->type == RTP_PRIO_NORMAL)) {
			rtp_to_pri(rtp, newtd);
			sched_prio(newtd, newtd->td_user_pri);
		} /* ignore timesharing class */
	}
	TD_SET_CAN_RUN(newtd);
	sched_add(newtd, SRQ_BORING);
	thread_unlock(newtd);

	return (0);

fail:
#ifdef RACCT
	if (racct_enable) {
		PROC_LOCK(p);
		racct_sub(p, RACCT_NTHR, 1);
		PROC_UNLOCK(p);
	}
#endif
	return (error);
}
Esempio n. 16
0
int
cheriabi_sysarch(struct thread *td, struct cheriabi_sysarch_args *uap)
{
	struct trapframe *regs = &td->td_pcb->pcb_regs;
	int error;
	int parms_from_cap = 1;
	size_t reqsize;
	register_t reqperms;

	/*
	 * The sysarch() fill_uap function is machine-independent so can not
	 * check the validity of the capabilty which becomes uap->parms.  As
	 * such, it makes no attempt to convert the result.  We need to
	 * perform those checks here.
	 */
	switch (uap->op) {
	case MIPS_SET_TLS:
		reqsize = 0;
		reqperms = 0;
		break;

	case MIPS_GET_TLS:
	case CHERI_GET_STACK:
	case CHERI_GET_TYPECAP:
		reqsize = sizeof(struct chericap);
		reqperms = CHERI_PERM_STORE|CHERI_PERM_STORE_CAP;
		break;

	case CHERI_SET_STACK:
		reqsize = sizeof(struct chericap);
		reqperms = CHERI_PERM_LOAD|CHERI_PERM_LOAD_CAP;
		break;

	case CHERI_MMAP_GETBASE:
	case CHERI_MMAP_GETLEN:
	case CHERI_MMAP_GETOFFSET:
	case CHERI_MMAP_GETPERM:
	case CHERI_MMAP_SETOFFSET:
	case CHERI_MMAP_SETBOUNDS:
		reqsize = sizeof(uint64_t);
		reqperms = CHERI_PERM_STORE;
		break;

	case CHERI_MMAP_ANDPERM:
		reqsize = sizeof(uint64_t);
		reqperms = CHERI_PERM_LOAD|CHERI_PERM_STORE;
		break;

	case MIPS_GET_COUNT:
		parms_from_cap = 0;
		break;

#ifdef CPU_QEMU_MALTA
	case QEMU_GET_QTRACE:
		reqsize = sizeof(int);
		reqperms = CHERI_PERM_STORE;
		break;

	case QEMU_SET_QTRACE:
		reqsize = sizeof(int);
		reqperms = CHERI_PERM_LOAD;
		break;
#endif

	default:
		return (EINVAL);
	}
	if (parms_from_cap) {
		error = cheriabi_cap_to_ptr(&uap->parms, &regs->c3,
		    reqsize, reqperms, 0);
		if (error != 0)
			return (error);
	}

	switch (uap->op) {
	case MIPS_SET_TLS:
		return (cheriabi_set_user_tls(td, &regs->c3));

	case MIPS_GET_TLS:
		error = copyoutcap(&td->td_md.md_tls_cap, uap->parms,
		    sizeof(struct chericap));
		return (error);

	case CHERI_MMAP_GETBASE: {
		size_t base;

		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETBASE(base, CHERI_CR_CTEMP0);
		PROC_UNLOCK(td->td_proc);
		if (suword64(uap->parms, base) != 0)
			return (EFAULT);
		return (0);
	}

	case CHERI_MMAP_GETLEN: {
		size_t len;

		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETLEN(len, CHERI_CR_CTEMP0);
		PROC_UNLOCK(td->td_proc);
		if (suword64(uap->parms, len) != 0)
			return (EFAULT);
		return (0);
	}

	case CHERI_MMAP_GETOFFSET: {
		ssize_t offset;

		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETOFFSET(offset, CHERI_CR_CTEMP0);
		PROC_UNLOCK(td->td_proc);
		if (suword64(uap->parms, offset) != 0)
			return (EFAULT);
		return (0);
	}

	case CHERI_MMAP_GETPERM: {
		uint64_t perms;

		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETPERM(perms, CHERI_CR_CTEMP0);
		PROC_UNLOCK(td->td_proc);
		if (suword64(uap->parms, perms) != 0)
			return (EFAULT);
		return (0);
	}

	case CHERI_MMAP_ANDPERM: {
		uint64_t perms;
		perms = fuword64(uap->parms);

		if (perms == -1)
			return (EINVAL);
		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CANDPERM(CHERI_CR_CTEMP0, CHERI_CR_CTEMP0,
		    (register_t)perms);
		CHERI_CSC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETPERM(perms, CHERI_CR_CTEMP0);
		PROC_UNLOCK(td->td_proc);
		if (suword64(uap->parms, perms) != 0)
			return (EFAULT);
		return (0);
	}

	case CHERI_MMAP_SETOFFSET: {
		size_t len;
		ssize_t offset;

		offset = fuword64(uap->parms);
		/* Reject errors and misaligned offsets */
		if (offset == -1 || (offset & PAGE_MASK) != 0)
			return (EINVAL);
		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETLEN(len, CHERI_CR_CTEMP0);
		/* Don't allow out of bounds offsets, they aren't useful */
		if (offset < 0 || offset > len) {
			PROC_UNLOCK(td->td_proc);
			return (EINVAL);
		}
		CHERI_CSETOFFSET(CHERI_CR_CTEMP0, CHERI_CR_CTEMP0,
		    (register_t)offset);
		CHERI_CSC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		PROC_UNLOCK(td->td_proc);
		return (0);
	}

	case CHERI_MMAP_SETBOUNDS: {
		size_t len, olen;
		ssize_t offset;

		len = fuword64(uap->parms);
		/* Reject errors or misaligned lengths */
		if (len == (size_t)-1 || (len & PAGE_MASK) != 0)
			return (EINVAL);
		PROC_LOCK(td->td_proc);
		CHERI_CLC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		CHERI_CGETLEN(olen, CHERI_CR_CTEMP0);
		CHERI_CGETOFFSET(offset, CHERI_CR_CTEMP0);
		/* Don't try to set out of bounds lengths */
		if (offset > olen || len > olen - offset) {
			PROC_UNLOCK(td->td_proc);
			return (EINVAL);
		}
		CHERI_CSETBOUNDS(CHERI_CR_CTEMP0, CHERI_CR_CTEMP0,
		    (register_t)len);
		CHERI_CSC(CHERI_CR_CTEMP0, CHERI_CR_KDC,
		    &td->td_proc->p_md.md_cheri_mmap_cap, 0);
		PROC_UNLOCK(td->td_proc);
		return (0);
	}

	default:
		return (sysarch(td, (struct sysarch_args*)uap));
	}
}
Esempio n. 17
0
void
sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct thread *td;
	struct proc *p;
	struct trapframe *tf;
	struct sigframe *fp, frame;
	struct sigacts *psp;
	int code, onstack, sig;

	td = curthread;
	p = td->td_proc;
	PROC_LOCK_ASSERT(p, MA_OWNED);

	sig = ksi->ksi_signo;
	code = ksi->ksi_code;
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);

	tf = td->td_frame;
	onstack = sigonstack(tf->tf_sp);

	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
	    catcher, sig);

	/* Allocate and validate space for the signal handler context. */
	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		fp = (struct sigframe *)(td->td_sigstk.ss_sp +
		    td->td_sigstk.ss_size);
#if defined(COMPAT_43)
		td->td_sigstk.ss_flags |= SS_ONSTACK;
#endif
	} else {
		fp = (struct sigframe *)td->td_frame->tf_sp;
	}

	/* Make room, keeping the stack aligned */
	fp--;
	fp = (struct sigframe *)STACKALIGN(fp);

	/* Fill in the frame to copy out */
	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
	get_fpcontext(td, &frame.sf_uc.uc_mcontext);
	frame.sf_si = ksi->ksi_info;
	frame.sf_uc.uc_sigmask = *mask;
	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ?
	    ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
	frame.sf_uc.uc_stack = td->td_sigstk;
	mtx_unlock(&psp->ps_mtx);
	PROC_UNLOCK(td->td_proc);

	/* Copy the sigframe out to the user's stack. */
	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
		/* Process has trashed its stack. Kill it. */
		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	tf->tf_x[0]= sig;
	tf->tf_x[1] = (register_t)&fp->sf_si;
	tf->tf_x[2] = (register_t)&fp->sf_uc;

	tf->tf_elr = (register_t)catcher;
	tf->tf_sp = (register_t)fp;
	tf->tf_lr = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode));

	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
	    tf->tf_sp);

	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 18
0
static int
linux_clone_proc(struct thread *td, struct linux_clone_args *args)
{
	struct fork_req fr;
	int error, ff = RFPROC | RFSTOPPED;
	struct proc *p2;
	struct thread *td2;
	int exit_signal;
	struct linux_emuldata *em;

#ifdef DEBUG
	if (ldebug(clone)) {
		printf(ARGS(clone, "flags %x, stack %p, parent tid: %p, "
		    "child tid: %p"), (unsigned)args->flags,
		    args->stack, args->parent_tidptr, args->child_tidptr);
	}
#endif

	exit_signal = args->flags & 0x000000ff;
	if (LINUX_SIG_VALID(exit_signal)) {
		exit_signal = linux_to_bsd_signal(exit_signal);
	} else if (exit_signal != 0)
		return (EINVAL);

	if (args->flags & LINUX_CLONE_VM)
		ff |= RFMEM;
	if (args->flags & LINUX_CLONE_SIGHAND)
		ff |= RFSIGSHARE;
	/*
	 * XXX: In Linux, sharing of fs info (chroot/cwd/umask)
	 * and open files is independent.  In FreeBSD, its in one
	 * structure but in reality it does not cause any problems
	 * because both of these flags are usually set together.
	 */
	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
		ff |= RFFDG;

	if (args->flags & LINUX_CLONE_PARENT_SETTID)
		if (args->parent_tidptr == NULL)
			return (EINVAL);

	if (args->flags & LINUX_CLONE_VFORK)
		ff |= RFPPWAIT;

	bzero(&fr, sizeof(fr));
	fr.fr_flags = ff;
	fr.fr_procp = &p2;
	error = fork1(td, &fr);
	if (error)
		return (error);

	td2 = FIRST_THREAD_IN_PROC(p2);

	/* create the emuldata */
	linux_proc_init(td, td2, args->flags);

	em = em_find(td2);
	KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));

	if (args->flags & LINUX_CLONE_CHILD_SETTID)
		em->child_set_tid = args->child_tidptr;
	else
	   	em->child_set_tid = NULL;

	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
		em->child_clear_tid = args->child_tidptr;
	else
	   	em->child_clear_tid = NULL;

	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
		error = copyout(&p2->p_pid, args->parent_tidptr,
		    sizeof(p2->p_pid));
		if (error)
			printf(LMSG("copyout failed!"));
	}

	PROC_LOCK(p2);
	p2->p_sigparent = exit_signal;
	PROC_UNLOCK(p2);
	/*
	 * In a case of stack = NULL, we are supposed to COW calling process
	 * stack. This is what normal fork() does, so we just keep tf_rsp arg
	 * intact.
	 */
	linux_set_upcall_kse(td2, PTROUT(args->stack));

	if (args->flags & LINUX_CLONE_SETTLS)
		linux_set_cloned_tls(td2, args->tls);

	/*
	 * If CLONE_PARENT is set, then the parent of the new process will be 
	 * the same as that of the calling process.
	 */
	if (args->flags & LINUX_CLONE_PARENT) {
		sx_xlock(&proctree_lock);
		PROC_LOCK(p2);
		proc_reparent(p2, td->td_proc->p_pptr);
		PROC_UNLOCK(p2);
		sx_xunlock(&proctree_lock);
	}

#ifdef DEBUG
	if (ldebug(clone))
		printf(LMSG("clone: successful rfork to %d, "
		    "stack %p sig = %d"), (int)p2->p_pid, args->stack,
		    exit_signal);
#endif

	/*
	 * Make this runnable after we are finished with it.
	 */
	thread_lock(td2);
	TD_SET_CAN_RUN(td2);
	sched_add(td2, SRQ_BORING);
	thread_unlock(td2);

	td->td_retval[0] = p2->p_pid;

	return (0);
}
Esempio n. 19
0
/*
 * Send an interrupt to process.
 *
 * Stack is set up to allow sigcode stored
 * at top to call routine, followed by kcall
 * to sigreturn routine below.	After sigreturn
 * resets the signal mask, the stack, and the
 * frame pointer, it returns to the user
 * specified pc, psl.
 */
static void
freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct proc *p;
	struct thread *td;
	struct fpreg32 fpregs;
	struct reg32 regs;
	struct sigacts *psp;
	struct sigframe32 sf, *sfp;
	int sig;
	int oonstack;
	unsigned i;

	td = curthread;
	p = td->td_proc;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	sig = ksi->ksi_signo;
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);

	fill_regs32(td, &regs);
	oonstack = sigonstack(td->td_frame->sp);

	/* save user context */
	bzero(&sf, sizeof sf);
	sf.sf_uc.uc_sigmask = *mask;
	sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp;
	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
	sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags;
	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
	sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC];
	sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO];
	sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI];
	sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls;
	sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC;  /* magic number */
	for (i = 1; i < 32; i++)
		sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i];
	sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED;
	if (sf.sf_uc.uc_mcontext.mc_fpused) {
		/* if FPU has current state, save it first */
		if (td == PCPU_GET(fpcurthread))
			MipsSaveCurFPState(td);
		fill_fpregs32(td, &fpregs);
		for (i = 0; i < 33; i++)
			sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i];
	}

	/* Allocate and validate space for the signal handler context. */
	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		sfp = (struct sigframe32 *)(((uintptr_t)td->td_sigstk.ss_sp +
		    td->td_sigstk.ss_size - sizeof(struct sigframe32))
		    & ~(sizeof(__int64_t) - 1));
	} else
		sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - 
		    sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1));

	/* Build the argument list for the signal handler. */
	td->td_frame->a0 = sig;
	td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc;
	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
		/* Signal handler installed with SA_SIGINFO. */
		td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si;
		/* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */

		/* fill siginfo structure */
		sf.sf_si.si_signo = sig;
		sf.sf_si.si_code = ksi->ksi_code;
		sf.sf_si.si_addr = td->td_frame->badvaddr;
	} else {
		/* Old FreeBSD-style arguments. */
		td->td_frame->a1 = ksi->ksi_code;
		td->td_frame->a3 = td->td_frame->badvaddr;
		/* sf.sf_ahu.sf_handler = catcher; */
	}

	mtx_unlock(&psp->ps_mtx);
	PROC_UNLOCK(p);

	/*
	 * Copy the sigframe out to the user's stack.
	 */
	if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) {
		/*
		 * Something is wrong with the stack pointer.
		 * ...Kill the process.
		 */
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	td->td_frame->pc = (register_t)(intptr_t)catcher;
	td->td_frame->t9 = (register_t)(intptr_t)catcher;
	td->td_frame->sp = (register_t)(intptr_t)sfp;
	/*
	 * Signal trampoline code is at base of user stack.
	 */
	td->td_frame->ra = (register_t)(intptr_t)p->p_psstrings - *(p->p_sysent->sv_szsigcode);
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 20
0
static int
linux_clone_thread(struct thread *td, struct linux_clone_args *args)
{
	struct linux_emuldata *em;
	struct thread *newtd;
	struct proc *p;
	int error;

#ifdef DEBUG
	if (ldebug(clone)) {
		printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, "
		    "child tid: %p"), (unsigned)args->flags,
		    args->stack, args->parent_tidptr, args->child_tidptr);
	}
#endif

	LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p",
	    td->td_tid, (unsigned)args->flags,
	    args->parent_tidptr, args->child_tidptr);

	if (args->flags & LINUX_CLONE_PARENT_SETTID)
		if (args->parent_tidptr == NULL)
			return (EINVAL);

	/* Threads should be created with own stack */
	if (args->stack == NULL)
		return (EINVAL);

	p = td->td_proc;

#ifdef RACCT
	if (racct_enable) {
		PROC_LOCK(p);
		error = racct_add(p, RACCT_NTHR, 1);
		PROC_UNLOCK(p);
		if (error != 0)
			return (EPROCLIM);
	}
#endif

	/* Initialize our td */
	error = kern_thr_alloc(p, 0, &newtd);
	if (error)
		goto fail;
														
	cpu_set_upcall(newtd, td);

	bzero(&newtd->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));
	bcopy(&td->td_startcopy, &newtd->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));

	newtd->td_proc = p;
	thread_cow_get(newtd, td);

	/* create the emuldata */
	linux_proc_init(td, newtd, args->flags);

	em = em_find(newtd);
	KASSERT(em != NULL, ("clone_thread: emuldata not found.\n"));

	if (args->flags & LINUX_CLONE_SETTLS)
		linux_set_cloned_tls(newtd, args->tls);

	if (args->flags & LINUX_CLONE_CHILD_SETTID)
		em->child_set_tid = args->child_tidptr;
	else
	   	em->child_set_tid = NULL;

	if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
		em->child_clear_tid = args->child_tidptr;
	else
	   	em->child_clear_tid = NULL;

	cpu_thread_clean(newtd);
	
	linux_set_upcall_kse(newtd, PTROUT(args->stack));

	PROC_LOCK(p);
	p->p_flag |= P_HADTHREADS;
	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));

	if (args->flags & LINUX_CLONE_PARENT)
		thread_link(newtd, p->p_pptr);
	else
		thread_link(newtd, p);

	thread_lock(td);
	/* let the scheduler know about these things. */
	sched_fork_thread(td, newtd);
	thread_unlock(td);
	if (P_SHOULDSTOP(p))
		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
	PROC_UNLOCK(p);

	tidhash_add(newtd);

#ifdef DEBUG
	if (ldebug(clone))
		printf(ARGS(clone, "successful clone to %d, stack %p"),
		(int)newtd->td_tid, args->stack);
#endif

	LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d",
	    td->td_tid, newtd->td_tid);

	if (args->flags & LINUX_CLONE_PARENT_SETTID) {
		error = copyout(&newtd->td_tid, args->parent_tidptr,
		    sizeof(newtd->td_tid));
		if (error)
			printf(LMSG("clone_thread: copyout failed!"));
	}

	/*
	 * Make this runnable after we are finished with it.
	 */
	thread_lock(newtd);
	TD_SET_CAN_RUN(newtd);
	sched_add(newtd, SRQ_BORING);
	thread_unlock(newtd);

	td->td_retval[0] = newtd->td_tid;

	return (0);

fail:
#ifdef RACCT
	if (racct_enable) {
		PROC_LOCK(p);
		racct_sub(p, RACCT_NTHR, 1);
		PROC_UNLOCK(p);
	}
#endif
	return (error);
}
Esempio n. 21
0
/*
 * copied from amd64/amd64/machdep.c
 *
 * Send an interrupt to process.
 */
static void
linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct l_rt_sigframe sf, *sfp;
	struct proc *p;
	struct thread *td;
	struct sigacts *psp;
	caddr_t sp;
	struct trapframe *regs;
	int sig, code;
	int oonstack;

	td = curthread;
	p = td->td_proc;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	sig = ksi->ksi_signo;
	psp = p->p_sigacts;
	code = ksi->ksi_code;
	mtx_assert(&psp->ps_mtx, MA_OWNED);
	regs = td->td_frame;
	oonstack = sigonstack(regs->tf_rsp);

	LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u",
	    catcher, sig, mask, code);

	/* Allocate space for the signal handler context. */
	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		sp = (caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size -
		    sizeof(struct l_rt_sigframe);
	} else
		sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128;
	/* Align to 16 bytes. */
	sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul);
	mtx_unlock(&psp->ps_mtx);

	/* Translate the signal. */
	sig = bsd_to_linux_signal(sig);

	/* Save user context. */
	bzero(&sf, sizeof(sf));
	bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask);
	bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask);

	sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
	sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
	sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
	PROC_UNLOCK(p);

	sf.sf_sc.uc_mcontext.sc_rdi    = regs->tf_rdi;
	sf.sf_sc.uc_mcontext.sc_rsi    = regs->tf_rsi;
	sf.sf_sc.uc_mcontext.sc_rdx    = regs->tf_rdx;
	sf.sf_sc.uc_mcontext.sc_rbp    = regs->tf_rbp;
	sf.sf_sc.uc_mcontext.sc_rbx    = regs->tf_rbx;
	sf.sf_sc.uc_mcontext.sc_rcx    = regs->tf_rcx;
	sf.sf_sc.uc_mcontext.sc_rax    = regs->tf_rax;
	sf.sf_sc.uc_mcontext.sc_rip    = regs->tf_rip;
	sf.sf_sc.uc_mcontext.sc_rsp    = regs->tf_rsp;
	sf.sf_sc.uc_mcontext.sc_r8     = regs->tf_r8;
	sf.sf_sc.uc_mcontext.sc_r9     = regs->tf_r9;
	sf.sf_sc.uc_mcontext.sc_r10    = regs->tf_r10;
	sf.sf_sc.uc_mcontext.sc_r11    = regs->tf_r11;
	sf.sf_sc.uc_mcontext.sc_r12    = regs->tf_r12;
	sf.sf_sc.uc_mcontext.sc_r13    = regs->tf_r13;
	sf.sf_sc.uc_mcontext.sc_r14    = regs->tf_r14;
	sf.sf_sc.uc_mcontext.sc_r15    = regs->tf_r15;
	sf.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
	sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags;
	sf.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
	sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
	sf.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;

	/* Build the argument list for the signal handler. */
	regs->tf_rdi = sig;			/* arg 1 in %rdi */
	regs->tf_rax = 0;
	regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
	regs->tf_rdx = (register_t)&sfp->sf_sc;	/* arg 3 in %rdx */

	sf.sf_handler = catcher;
	/* Fill in POSIX parts */
	ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig);

	/*
	 * Copy the sigframe out to the user's stack.
	 */
	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
#ifdef DEBUG
		printf("process %ld has trashed its stack\n", (long)p->p_pid);
#endif
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	regs->tf_rsp = (long)sfp;
	regs->tf_rip = linux_rt_sigcode;
	regs->tf_rflags &= ~(PSL_T | PSL_D);
	regs->tf_cs = _ucodesel;
	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 22
0
static inline int
syscallenter(struct thread *td, struct syscall_args *sa)
{
	struct proc *p;
	int error, traced;

	PCPU_INC(cnt.v_syscall);
	p = td->td_proc;

	td->td_pticks = 0;
	if (td->td_ucred != p->p_ucred)
		cred_update_thread(td);
	if (p->p_flag & P_TRACED) {
		traced = 1;
		PROC_LOCK(p);
		td->td_dbgflags &= ~TDB_USERWR;
		td->td_dbgflags |= TDB_SCE;
		PROC_UNLOCK(p);
	} else
		traced = 0;
	error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
	if (KTRPOINT(td, KTR_SYSCALL))
		ktrsyscall(sa->code, sa->narg, sa->args);
#endif

	CTR6(KTR_SYSC,
"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
	    td, td->td_proc->p_pid, syscallname(p, sa->code),
	    sa->args[0], sa->args[1], sa->args[2]);

	if (error == 0) {
		STOPEVENT(p, S_SCE, sa->narg);
		if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) {
			PROC_LOCK(p);
			ptracestop((td), SIGTRAP);
			PROC_UNLOCK(p);
		}
		if (td->td_dbgflags & TDB_USERWR) {
			/*
			 * Reread syscall number and arguments if
			 * debugger modified registers or memory.
			 */
			error = (p->p_sysent->sv_fetch_syscall_args)(td, sa);
#ifdef KTRACE
			if (KTRPOINT(td, KTR_SYSCALL))
				ktrsyscall(sa->code, sa->narg, sa->args);
#endif
			if (error != 0)
				goto retval;
		}

#ifdef CAPABILITY_MODE
		/*
		 * In capability mode, we only allow access to system calls
		 * flagged with SYF_CAPENABLED.
		 */
		if (IN_CAPABILITY_MODE(td) &&
		    !(sa->callp->sy_flags & SYF_CAPENABLED)) {
			error = ECAPMODE;
			goto retval;
		}
#endif

		error = syscall_thread_enter(td, sa->callp);
		if (error != 0)
			goto retval;

#ifdef KDTRACE_HOOKS
		/*
		 * If the systrace module has registered it's probe
		 * callback and if there is a probe active for the
		 * syscall 'entry', process the probe.
		 */
		if (systrace_probe_func != NULL && sa->callp->sy_entry != 0)
			(*systrace_probe_func)(sa->callp->sy_entry, sa->code,
			    sa->callp, sa->args, 0);
#endif

		AUDIT_SYSCALL_ENTER(sa->code, td);
		error = (sa->callp->sy_call)(td, sa->args);
		AUDIT_SYSCALL_EXIT(error, td);

		/* Save the latest error return value. */
		td->td_errno = error;

#ifdef KDTRACE_HOOKS
		/*
		 * If the systrace module has registered it's probe
		 * callback and if there is a probe active for the
		 * syscall 'return', process the probe.
		 */
		if (systrace_probe_func != NULL && sa->callp->sy_return != 0)
			(*systrace_probe_func)(sa->callp->sy_return, sa->code,
			    sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
#endif
		syscall_thread_exit(td, sa->callp);
		CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
		    p, error, td->td_retval[0], td->td_retval[1]);
	}
 retval:
	if (traced) {
		PROC_LOCK(p);
		td->td_dbgflags &= ~TDB_SCE;
		PROC_UNLOCK(p);
	}
	(p->p_sysent->sv_set_syscall_retval)(td, error);
	return (error);
}
Esempio n. 23
0
File: trap.c Progetto: rodero95/sys
/*
 * void prefetch_abort_handler(trapframe_t *tf)
 *
 * Abort handler called when instruction execution occurs at
 * a non existent or restricted (access permissions) memory page.
 * If the address is invalid and we were in SVC mode then panic as
 * the kernel should never prefetch abort.
 * If the address is invalid and the page is mapped then the user process
 * does no have read permission so send it a signal.
 * Otherwise fault the page in and try again.
 */
void
prefetch_abort_handler(trapframe_t *tf)
{
	struct thread *td;
	struct proc * p;
	struct vm_map *map;
	vm_offset_t fault_pc, va;
	int error = 0;
	struct ksig ksig;


#if 0
	/* Update vmmeter statistics */
	uvmexp.traps++;
#endif
#if 0
	printf("prefetch abort handler: %p %p\n", (void*)tf->tf_pc,
	    (void*)tf->tf_usr_lr);
#endif
	
 	td = curthread;
	p = td->td_proc;
	PCPU_INC(cnt.v_trap);

	if (TRAP_USERMODE(tf)) {
		td->td_frame = tf;
		if (td->td_ucred != td->td_proc->p_ucred)
			cred_update_thread(td);
	}
	fault_pc = tf->tf_pc;
	if (td->td_md.md_spinlock_count == 0) {
		if (__predict_true(tf->tf_spsr & I32_bit) == 0)
			enable_interrupts(I32_bit);
		if (__predict_true(tf->tf_spsr & F32_bit) == 0)
			enable_interrupts(F32_bit);
	}

	/* See if the cpu state needs to be fixed up */
	switch (prefetch_abort_fixup(tf, &ksig)) {
	case ABORT_FIXUP_RETURN:
		return;
	case ABORT_FIXUP_FAILED:
		/* Deliver a SIGILL to the process */
		ksig.signb = SIGILL;
		ksig.code = 0;
		td->td_frame = tf;
		goto do_trapsignal;
	default:
		break;
	}

	/* Prefetch aborts cannot happen in kernel mode */
	if (__predict_false(!TRAP_USERMODE(tf)))
		dab_fatal(tf, 0, tf->tf_pc, NULL, &ksig);
	td->td_pticks = 0;


	/* Ok validate the address, can only execute in USER space */
	if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS ||
	    (fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) {
		ksig.signb = SIGSEGV;
		ksig.code = 0;
		goto do_trapsignal;
	}

	map = &td->td_proc->p_vmspace->vm_map;
	va = trunc_page(fault_pc);

	/*
	 * See if the pmap can handle this fault on its own...
	 */
#ifdef DEBUG
	last_fault_code = -1;
#endif
	if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ, 1))
		goto out;

	if (map != kernel_map) {
		PROC_LOCK(p);
		p->p_lock++;
		PROC_UNLOCK(p);
	}

	error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
	    VM_FAULT_NORMAL);
	if (map != kernel_map) {
		PROC_LOCK(p);
		p->p_lock--;
		PROC_UNLOCK(p);
	}

	if (__predict_true(error == 0))
		goto out;

	if (error == ENOMEM) {
		printf("VM: pid %d (%s), uid %d killed: "
		    "out of swap\n", td->td_proc->p_pid, td->td_name,
		    (td->td_proc->p_ucred) ?
		     td->td_proc->p_ucred->cr_uid : -1);
		ksig.signb = SIGKILL;
	} else {
		ksig.signb = SIGSEGV;
	}
	ksig.code = 0;

do_trapsignal:
	call_trapsignal(td, ksig.signb, ksig.code);

out:
	userret(td, tf);

}
Esempio n. 24
0
void
trap(struct trapframe *frame)
{
#ifdef KDTRACE_HOOKS
	struct reg regs;
#endif
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	int i = 0, ucode = 0, code;
	u_int type;
	register_t addr = 0;
	vm_offset_t eva;
	ksiginfo_t ksi;
#ifdef POWERFAIL_NMI
	static int lastalert = 0;
#endif

	PCPU_INC(cnt.v_trap);
	type = frame->tf_trapno;

#ifdef SMP
	/* Handler for NMI IPIs used for stopping CPUs. */
	if (type == T_NMI) {
	         if (ipi_nmi_handler() == 0)
	                   goto out;
	}
#endif /* SMP */

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif

	if (type == T_RESERVED) {
		trap_fatal(frame, 0);
		goto out;
	}

#ifdef	HWPMC_HOOKS
	/*
	 * CPU PMCs interrupt using an NMI so we check for that first.
	 * If the HWPMC module is active, 'pmc_hook' will point to
	 * the function to be called.  A return value of '1' from the
	 * hook means that the NMI was handled by it and that we can
	 * return immediately.
	 */
	if (type == T_NMI && pmc_intr &&
	    (*pmc_intr)(PCPU_GET(cpuid), frame))
	    goto out;
#endif

	if (type == T_MCHK) {
		mca_intr();
		goto out;
	}

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in its per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 */
	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
	    dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
		goto out;
#endif

	if ((frame->tf_eflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled
		 * interrupts and then trapped.  Enabling interrupts
		 * now is wrong, but it is better than running with
		 * interrupts disabled until they are accidentally
		 * enabled later.
		 */
		if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
			uprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curthread->td_name, type);
		else if (type != T_NMI && type != T_BPTFLT &&
		    type != T_TRCTRAP &&
		    frame->tf_eip != (int)cpu_switch_load_gs) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			printf("kernel trap %d with interrupts disabled\n",
			    type);
			/*
			 * Page faults need interrupts disabled until later,
			 * and we shouldn't enable interrupts while holding
			 * a spin lock.
			 */
			if (type != T_PAGEFLT &&
			    td->td_md.md_spinlock_count == 0)
				enable_intr();
		}
	}
	eva = 0;
	code = frame->tf_err;
	if (type == T_PAGEFLT) {
		/*
		 * For some Cyrix CPUs, %cr2 is clobbered by
		 * interrupts.  This problem is worked around by using
		 * an interrupt gate for the pagefault handler.  We
		 * are finally ready to read %cr2 and conditionally
		 * reenable interrupts.  If we hold a spin lock, then
		 * we must not reenable interrupts.  This might be a
		 * spurious page fault.
		 */
		eva = rcr2();
		if (td->td_md.md_spinlock_count == 0)
			enable_intr();
	}

        if ((ISPL(frame->tf_cs) == SEL_UPL) ||
	    ((frame->tf_eflags & PSL_VM) && 
		!(curpcb->pcb_flags & PCB_VM86CALL))) {
		/* user trap */

		td->td_pticks = 0;
		td->td_frame = frame;
		addr = frame->tf_eip;
		if (td->td_ucred != p->p_ucred) 
			cred_update_thread(td);

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			enable_intr();
#ifdef KDTRACE_HOOKS
			if (type == T_BPTFLT) {
				fill_frame_regs(frame, &regs);
				if (dtrace_pid_probe_ptr != NULL &&
				    dtrace_pid_probe_ptr(&regs) == 0)
					goto out;
			}
#endif
			frame->tf_eflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
#ifdef DEV_NPX
			ucode = npxtrap_x87();
			if (ucode == -1)
				goto userout;
#else
			ucode = 0;
#endif
			i = SIGFPE;
			break;

			/*
			 * The following two traps can happen in
			 * vm86 mode, and, if so, we want to handle
			 * them specially.
			 */
		case T_PROTFLT:		/* general protection fault */
		case T_STKFLT:		/* stack fault */
			if (frame->tf_eflags & PSL_VM) {
				i = vm86_emulate((struct vm86frame *)frame);
				if (i == 0)
					goto user;
				break;
			}
			i = SIGBUS;
			ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
			break;
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_ALIGNFLT:
			i = SIGBUS;
			ucode = BUS_ADRALN;
			break;
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */

			i = trap_pfault(frame, TRUE, eva);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
			if (i == -2) {
				/*
				 * The f00f hack workaround has triggered, so
				 * treat the fault as an illegal instruction 
				 * (T_PRIVINFLT) instead of a page fault.
				 */
				type = frame->tf_trapno = T_PRIVINFLT;

				/* Proceed as in that case. */
				ucode = ILL_PRVOPC;
				i = SIGILL;
				break;
			}
#endif
			if (i == -1)
				goto userout;
			if (i == 0)
				goto user;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				if (prot_fault_translation == 0) {
					/*
					 * Autodetect.
					 * This check also covers the images
					 * without the ABI-tag ELF note.
					 */
					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
					    && p->p_osrel >= P_OSREL_SIGSEGV) {
						i = SIGSEGV;
						ucode = SEGV_ACCERR;
					} else {
						i = SIGBUS;
						ucode = BUS_PAGE_FAULT;
					}
				} else if (prot_fault_translation == 1) {
					/*
					 * Always compat mode.
					 */
					i = SIGBUS;
					ucode = BUS_PAGE_FAULT;
				} else {
					/*
					 * Always SIGSEGV mode.
					 */
					i = SIGSEGV;
					ucode = SEGV_ACCERR;
				}
			}
			addr = eva;
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#ifdef DEV_ISA
		case T_NMI:
#ifdef POWERFAIL_NMI
#ifndef TIMER_FREQ
#  define TIMER_FREQ 1193182
#endif
			if (time_second - lastalert > 10) {
				log(LOG_WARNING, "NMI: power fail\n");
				sysbeep(880, hz);
				lastalert = time_second;
			}
			goto userout;
#else /* !POWERFAIL_NMI */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto userout;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* POWERFAIL_NMI */
#endif /* DEV_ISA */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
#ifdef DEV_NPX
			KASSERT(PCB_USER_FPU(td->td_pcb),
			    ("kernel FPU ctx has leaked"));
			/* transparent fault (due to context switch "late") */
			if (npxdna())
				goto userout;
#endif
			uprintf("pid %d killed due to lack of floating point\n",
				p->p_pid);
			i = SIGKILL;
			ucode = 0;
			break;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
#if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
			ucode = npxtrap_sse();
			if (ucode == -1)
				goto userout;
#else
			ucode = 0;
#endif
			i = SIGFPE;
			break;
#ifdef KDTRACE_HOOKS
		case T_DTRACE_RET:
			enable_intr();
			fill_frame_regs(frame, &regs);
			if (dtrace_return_probe_ptr != NULL &&
			    dtrace_return_probe_ptr(&regs) == 0)
				goto out;
			break;
#endif
		}
	} else {
		/* kernel trap */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case T_PAGEFLT:			/* page fault */
			(void) trap_pfault(frame, FALSE, eva);
			goto out;

		case T_DNA:
#ifdef DEV_NPX
			KASSERT(!PCB_USER_FPU(td->td_pcb),
			    ("Unregistered use of FPU in kernel"));
			if (npxdna())
				goto out;
#endif
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
		case T_XMMFLT:		/* SIMD floating-point exception */
		case T_FPOPFLT:		/* FPU operand fetch fault */
			/*
			 * XXXKIB for now disable any FPU traps in kernel
			 * handler registration seems to be overkill
			 */
			trap_fatal(frame, 0);
			goto out;

			/*
			 * The following two traps can happen in
			 * vm86 mode, and, if so, we want to handle
			 * them specially.
			 */
		case T_PROTFLT:		/* general protection fault */
		case T_STKFLT:		/* stack fault */
			if (frame->tf_eflags & PSL_VM) {
				i = vm86_emulate((struct vm86frame *)frame);
				if (i != 0)
					/*
					 * returns to original process
					 */
					vm86_trap((struct vm86frame *)frame);
				goto out;
			}
			if (type == T_STKFLT)
				break;

			/* FALL THROUGH */

		case T_SEGNPFLT:	/* segment not present fault */
			if (curpcb->pcb_flags & PCB_VM86CALL)
				break;

			/*
			 * Invalid %fs's and %gs's can be created using
			 * procfs or PT_SETREGS or by invalidating the
			 * underlying LDT entry.  This causes a fault
			 * in kernel mode when the kernel attempts to
			 * switch contexts.  Lose the bad context
			 * (XXX) so that we can continue, and generate
			 * a signal.
			 */
			if (frame->tf_eip == (int)cpu_switch_load_gs) {
				curpcb->pcb_gs = 0;
#if 0				
				PROC_LOCK(p);
				kern_psignal(p, SIGBUS);
				PROC_UNLOCK(p);
#endif				
				goto out;
			}

			if (td->td_intr_nesting_level != 0)
				break;

			/*
			 * Invalid segment selectors and out of bounds
			 * %eip's and %esp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (frame->tf_eip == (int)doreti_iret) {
				frame->tf_eip = (int)doreti_iret_fault;
				goto out;
			}
			if (frame->tf_eip == (int)doreti_popl_ds) {
				frame->tf_eip = (int)doreti_popl_ds_fault;
				goto out;
			}
			if (frame->tf_eip == (int)doreti_popl_es) {
				frame->tf_eip = (int)doreti_popl_es_fault;
				goto out;
			}
			if (frame->tf_eip == (int)doreti_popl_fs) {
				frame->tf_eip = (int)doreti_popl_fs_fault;
				goto out;
			}
			if (curpcb->pcb_onfault != NULL) {
				frame->tf_eip =
				    (int)curpcb->pcb_onfault;
				goto out;
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_eflags & PSL_NT) {
				frame->tf_eflags &= ~PSL_NT;
				goto out;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
				/*
				 * We've just entered system mode via the
				 * syscall lcall.  Continue single stepping
				 * silently until the syscall handler has
				 * saved the flags.
				 */
				goto out;
			}
			if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
				/*
				 * The syscall handler has now saved the
				 * flags.  Stop single stepping it.
				 */
				frame->tf_eflags &= ~PSL_T;
				goto out;
			}
			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap() && 
			   !(curpcb->pcb_flags & PCB_VM86CALL)) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				load_dr6(rdr6() & 0xfffffff0);
				goto out;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If KDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
#ifdef KDB
			if (kdb_trap(type, 0, frame))
				goto out;
#endif
			break;

#ifdef DEV_ISA
		case T_NMI:
#ifdef POWERFAIL_NMI
			if (time_second - lastalert > 10) {
				log(LOG_WARNING, "NMI: power fail\n");
				sysbeep(880, hz);
				lastalert = time_second;
			}
			goto out;
#else /* !POWERFAIL_NMI */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto out;
			} else if (panic_on_nmi == 0)
				goto out;
			/* FALLTHROUGH */
#endif /* POWERFAIL_NMI */
#endif /* DEV_ISA */
		}

		trap_fatal(frame, eva);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	ksiginfo_init_trap(&ksi);
	ksi.ksi_signo = i;
	ksi.ksi_code = ucode;
	ksi.ksi_addr = (void *)addr;
	ksi.ksi_trapno = type;
	if (uprintf_signal) {
		uprintf("pid %d comm %s: signal %d err %x code %d type %d "
		    "addr 0x%x esp 0x%08x eip 0x%08x "
		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
		    p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
		    frame->tf_esp, frame->tf_eip,
		    fubyte((void *)(frame->tf_eip + 0)),
		    fubyte((void *)(frame->tf_eip + 1)),
		    fubyte((void *)(frame->tf_eip + 2)),
		    fubyte((void *)(frame->tf_eip + 3)),
		    fubyte((void *)(frame->tf_eip + 4)),
		    fubyte((void *)(frame->tf_eip + 5)),
		    fubyte((void *)(frame->tf_eip + 6)),
		    fubyte((void *)(frame->tf_eip + 7)));
	}
	KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled"));
	trapsignal(td, &ksi);

#ifdef DEBUG
	if (type <= MAX_TRAP_MSG) {
		uprintf("fatal process exception: %s",
			trap_msg[type]);
		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
			uprintf(", fault VA = 0x%lx", (u_long)eva);
		uprintf("\n");
	}
#endif

user:
	userret(td, frame);
	KASSERT(PCB_USER_FPU(td->td_pcb),
	    ("Return from trap with kernel FPU ctx leaked"));
userout:
out:
	return;
}
Esempio n. 25
0
int
pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd)
{
	int error;
	struct proc *p;

	/*
	 * As long as it is possible to get a LOR between pmc_sx lock and
	 * proctree/allproc sx locks used for adding a new process, assure
	 * the former is not held here.
	 */
	sx_assert(&pmc_sx, SA_UNLOCKED);
	PMCDBG(LOG,CFG,1, "config po=%p logfd=%d", po, logfd);

	p = po->po_owner;

	/* return EBUSY if a log file was already present */
	if (po->po_flags & PMC_PO_OWNS_LOGFILE)
		return (EBUSY);

	KASSERT(po->po_kthread == NULL,
	    ("[pmclog,%d] po=%p kthread (%p) already present", __LINE__, po,
		po->po_kthread));
	KASSERT(po->po_file == NULL,
	    ("[pmclog,%d] po=%p file (%p) already present", __LINE__, po,
		po->po_file));

	/* get a reference to the file state */
	error = fget_write(curthread, logfd, CAP_WRITE, &po->po_file);
	if (error)
		goto error;

	/* mark process as owning a log file */
	po->po_flags |= PMC_PO_OWNS_LOGFILE;
	error = kproc_create(pmclog_loop, po, &po->po_kthread,
	    RFHIGHPID, 0, "hwpmc: proc(%d)", p->p_pid);
	if (error)
		goto error;

	/* mark process as using HWPMCs */
	PROC_LOCK(p);
	p->p_flag |= P_HWPMC;
	PROC_UNLOCK(p);

	/* create a log initialization entry */
	PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE,
	    sizeof(struct pmclog_initialize));
	PMCLOG_EMIT32(PMC_VERSION);
	PMCLOG_EMIT32(md->pmd_cputype);
	PMCLOG_DESPATCH(po);

	return (0);

 error:
	/* shutdown the thread */
	if (po->po_kthread)
		pmclog_stop_kthread(po);

	KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not "
	    "stopped", __LINE__, po));

	if (po->po_file)
		(void) fdrop(po->po_file, curthread);
	po->po_file  = NULL;	/* clear file and error state */
	po->po_error = 0;

	return (error);
}
Esempio n. 26
0
static void
linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	struct sigacts *psp;
	struct trapframe *regs;
	struct l_rt_sigframe *fp, frame;
	int oonstack;
	int sig;
	int code;
	
	sig = ksi->ksi_signo;
	code = ksi->ksi_code;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);
	regs = td->td_frame;
	oonstack = sigonstack(regs->tf_rsp);

#ifdef DEBUG
	if (ldebug(rt_sendsig))
		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
		    catcher, sig, (void*)mask, code);
#endif
	/*
	 * Allocate space for the signal handler context.
	 */
	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
	} else
		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
	mtx_unlock(&psp->ps_mtx);

	/*
	 * Build the argument list for the signal handler.
	 */
	if (p->p_sysent->sv_sigtbl)
		if (sig <= p->p_sysent->sv_sigsize)
			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];

	bzero(&frame, sizeof(frame));

	frame.sf_handler = PTROUT(catcher);
	frame.sf_sig = sig;
	frame.sf_siginfo = PTROUT(&fp->sf_si);
	frame.sf_ucontext = PTROUT(&fp->sf_sc);

	/* Fill in POSIX parts */
	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);

	/*
	 * Build the signal context to be used by sigreturn.
	 */
	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
	frame.sf_sc.uc_link = 0;		/* XXX ??? */

	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
	PROC_UNLOCK(p);

	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);

	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);

#ifdef DEBUG
	if (ldebug(rt_sendsig))
		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
#endif

	if (copyout(&frame, fp, sizeof(frame)) != 0) {
		/*
		 * Process has trashed its stack; give it an illegal
		 * instruction to halt it in its tracks.
		 */
#ifdef DEBUG
		if (ldebug(rt_sendsig))
			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
			    fp, oonstack);
#endif
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	/*
	 * Build context to run handler in.
	 */
	regs->tf_rsp = PTROUT(fp);
	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
	regs->tf_rflags &= ~(PSL_T | PSL_D);
	regs->tf_cs = _ucode32sel;
	regs->tf_ss = _udatasel;
	regs->tf_ds = _udatasel;
	regs->tf_es = _udatasel;
	regs->tf_fs = _ufssel;
	regs->tf_gs = _ugssel;
	regs->tf_flags = TF_HASSEGS;
	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 27
0
void debugger_getprocs_callback(struct allocation_t* ref)
{
	if (!ref)
		return;

	struct message_t* message = __get(ref);
	if (!message)
		return;

	// Only handle requests
	if (message->header.request != 1)
		goto cleanup;

	int(*_sx_slock)(struct sx *sx, int opts, const char *file, int line) = kdlsym(_sx_slock);
	void(*_sx_sunlock)(struct sx *sx, const char *file, int line) = kdlsym(_sx_sunlock);
	void(*_mtx_lock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_lock_flags);
	void(*_mtx_unlock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_unlock_flags);
	struct sx* allproclock = (struct sx*)kdlsym(allproc_lock);
	struct proclist* allproc = (struct proclist*)*(uint64_t*)kdlsym(allproc);

	void(*vmspace_free)(struct vmspace *) = kdlsym(vmspace_free);
	struct vmspace* (*vmspace_acquire_ref)(struct proc *) = kdlsym(vmspace_acquire_ref);
	void(*_vm_map_lock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_lock_read);
	void(*_vm_map_unlock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_unlock_read);

	uint64_t procCount = 0;
	struct proc* p = NULL;
	struct debugger_getprocs_t getproc = { 0 };

	sx_slock(allproclock);
	FOREACH_PROC_IN_SYSTEM(p)
	{
		PROC_LOCK(p);
		// Zero out our process information
		kmemset(&getproc, 0, sizeof(getproc));

		// Get the vm map
		struct vmspace* vm = vmspace_acquire_ref(p);
		vm_map_t map = &p->p_vmspace->vm_map;
		vm_map_lock_read(map);

		struct vm_map_entry* entry = map->header.next;

		// Copy over all of the address information
		getproc.process_id = p->p_pid;
		getproc.text_address = (uint64_t)entry->start;
		getproc.text_size = (uint64_t)entry->end - entry->start;
		getproc.data_address = (uint64_t)p->p_vmspace->vm_daddr;
		getproc.data_size = p->p_vmspace->vm_dsize;
		// Copy over the name and path
		kmemcpy(getproc.process_name, p->p_comm, sizeof(getproc.process_name));
		kmemcpy(getproc.path, p->p_elfpath, sizeof(getproc.path));
		// Write it back to the PC
		kwrite(message->socket, &getproc, sizeof(getproc));
		procCount++;

		// Free the vmmap
		vm_map_unlock_read(map);
		vmspace_free(vm);

		PROC_UNLOCK(p);
	}
	sx_sunlock(allproclock);
	// Send finalizer, because f**k this shit
	kmemset(&getproc, 0xDD, sizeof(getproc));
	kwrite(message->socket, &getproc, sizeof(getproc));

cleanup:
	__dec(ref);
}
Esempio n. 28
0
/*
 * Send an interrupt to process.
 *
 * Stack is set up to allow sigcode stored
 * in u. to call routine, followed by kcall
 * to sigreturn routine below.  After sigreturn
 * resets the signal mask, the stack, and the
 * frame pointer, it returns to the user
 * specified pc, psl.
 */
static void
linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	struct sigacts *psp;
	struct trapframe *regs;
	struct l_sigframe *fp, frame;
	l_sigset_t lmask;
	int oonstack, i;
	int sig, code;

	sig = ksi->ksi_signo;
	code = ksi->ksi_code;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	psp = p->p_sigacts;
	mtx_assert(&psp->ps_mtx, MA_OWNED);
	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
		/* Signal handler installed with SA_SIGINFO. */
		linux_rt_sendsig(catcher, ksi, mask);
		return;
	}

	regs = td->td_frame;
	oonstack = sigonstack(regs->tf_rsp);

#ifdef DEBUG
	if (ldebug(sendsig))
		printf(ARGS(sendsig, "%p, %d, %p, %u"),
		    catcher, sig, (void*)mask, code);
#endif

	/*
	 * Allocate space for the signal handler context.
	 */
	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
	} else
		fp = (struct l_sigframe *)regs->tf_rsp - 1;
	mtx_unlock(&psp->ps_mtx);
	PROC_UNLOCK(p);

	/*
	 * Build the argument list for the signal handler.
	 */
	if (p->p_sysent->sv_sigtbl)
		if (sig <= p->p_sysent->sv_sigsize)
			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];

	bzero(&frame, sizeof(frame));

	frame.sf_handler = PTROUT(catcher);
	frame.sf_sig = sig;

	bsd_to_linux_sigset(mask, &lmask);

	/*
	 * Build the signal context to be used by sigreturn.
	 */
	frame.sf_sc.sc_mask   = lmask.__bits[0];
	frame.sf_sc.sc_gs     = regs->tf_gs;
	frame.sf_sc.sc_fs     = regs->tf_fs;
	frame.sf_sc.sc_es     = regs->tf_es;
	frame.sf_sc.sc_ds     = regs->tf_ds;
	frame.sf_sc.sc_edi    = regs->tf_rdi;
	frame.sf_sc.sc_esi    = regs->tf_rsi;
	frame.sf_sc.sc_ebp    = regs->tf_rbp;
	frame.sf_sc.sc_ebx    = regs->tf_rbx;
	frame.sf_sc.sc_edx    = regs->tf_rdx;
	frame.sf_sc.sc_ecx    = regs->tf_rcx;
	frame.sf_sc.sc_eax    = regs->tf_rax;
	frame.sf_sc.sc_eip    = regs->tf_rip;
	frame.sf_sc.sc_cs     = regs->tf_cs;
	frame.sf_sc.sc_eflags = regs->tf_rflags;
	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
	frame.sf_sc.sc_ss     = regs->tf_ss;
	frame.sf_sc.sc_err    = regs->tf_err;
	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);

	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
		frame.sf_extramask[i] = lmask.__bits[i+1];

	if (copyout(&frame, fp, sizeof(frame)) != 0) {
		/*
		 * Process has trashed its stack; give it an illegal
		 * instruction to halt it in its tracks.
		 */
		PROC_LOCK(p);
		sigexit(td, SIGILL);
	}

	/*
	 * Build context to run handler in.
	 */
	regs->tf_rsp = PTROUT(fp);
	regs->tf_rip = p->p_sysent->sv_sigcode_base;
	regs->tf_rflags &= ~(PSL_T | PSL_D);
	regs->tf_cs = _ucode32sel;
	regs->tf_ss = _udatasel;
	regs->tf_ds = _udatasel;
	regs->tf_es = _udatasel;
	regs->tf_fs = _ufssel;
	regs->tf_gs = _ugssel;
	regs->tf_flags = TF_HASSEGS;
	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
	PROC_LOCK(p);
	mtx_lock(&psp->ps_mtx);
}
Esempio n. 29
0
static int
create_thread(struct thread *td, mcontext_t *ctx,
	    void (*start_func)(void *), void *arg,
	    char *stack_base, size_t stack_size,
	    char *tls_base,
	    long *child_tid, long *parent_tid,
	    int flags, struct rtprio *rtp)
{
	stack_t stack;
	struct thread *newtd;
	struct proc *p;
	int error;

	p = td->td_proc;

	/* Have race condition but it is cheap. */
	if (p->p_numthreads >= max_threads_per_proc) {
		++max_threads_hits;
		return (EPROCLIM);
	}

	if (rtp != NULL) {
		switch(rtp->type) {
		case RTP_PRIO_REALTIME:
		case RTP_PRIO_FIFO:
			/* Only root can set scheduler policy */
			if (priv_check(td, PRIV_SCHED_SETPOLICY) != 0)
				return (EPERM);
			if (rtp->prio > RTP_PRIO_MAX)
				return (EINVAL);
			break;
		case RTP_PRIO_NORMAL:
			rtp->prio = 0;
			break;
		default:
			return (EINVAL);
		}
	}

#ifdef RACCT
	PROC_LOCK(td->td_proc);
	error = racct_add(p, RACCT_NTHR, 1);
	PROC_UNLOCK(td->td_proc);
	if (error != 0)
		return (EPROCLIM);
#endif

	/* Initialize our td */
	newtd = thread_alloc(0);
	if (newtd == NULL) {
		error = ENOMEM;
		goto fail;
	}

	cpu_set_upcall(newtd, td);

	/*
	 * Try the copyout as soon as we allocate the td so we don't
	 * have to tear things down in a failure case below.
	 * Here we copy out tid to two places, one for child and one
	 * for parent, because pthread can create a detached thread,
	 * if parent wants to safely access child tid, it has to provide 
	 * its storage, because child thread may exit quickly and
	 * memory is freed before parent thread can access it.
	 */
	if ((child_tid != NULL &&
	    suword_lwpid(child_tid, newtd->td_tid)) ||
	    (parent_tid != NULL &&
	    suword_lwpid(parent_tid, newtd->td_tid))) {
		thread_free(newtd);
		error = EFAULT;
		goto fail;
	}

	bzero(&newtd->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));
	bcopy(&td->td_startcopy, &newtd->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));
	newtd->td_proc = td->td_proc;
	newtd->td_ucred = crhold(td->td_ucred);

	if (ctx != NULL) { /* old way to set user context */
		error = set_mcontext(newtd, ctx);
		if (error != 0) {
			thread_free(newtd);
			crfree(td->td_ucred);
			goto fail;
		}
	} else {
		/* Set up our machine context. */
		stack.ss_sp = stack_base;
		stack.ss_size = stack_size;
		/* Set upcall address to user thread entry function. */
		cpu_set_upcall_kse(newtd, start_func, arg, &stack);
		/* Setup user TLS address and TLS pointer register. */
		error = cpu_set_user_tls(newtd, tls_base);
		if (error != 0) {
			thread_free(newtd);
			crfree(td->td_ucred);
			goto fail;
		}
	}

	PROC_LOCK(td->td_proc);
	td->td_proc->p_flag |= P_HADTHREADS;
	thread_link(newtd, p); 
	bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
	thread_lock(td);
	/* let the scheduler know about these things. */
	sched_fork_thread(td, newtd);
	thread_unlock(td);
	if (P_SHOULDSTOP(p))
		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
	PROC_UNLOCK(p);

	tidhash_add(newtd);

	thread_lock(newtd);
	if (rtp != NULL) {
		if (!(td->td_pri_class == PRI_TIMESHARE &&
		      rtp->type == RTP_PRIO_NORMAL)) {
			rtp_to_pri(rtp, newtd);
			sched_prio(newtd, newtd->td_user_pri);
		} /* ignore timesharing class */
	}
	TD_SET_CAN_RUN(newtd);
	sched_add(newtd, SRQ_BORING);
	thread_unlock(newtd);

	return (0);

fail:
#ifdef RACCT
	PROC_LOCK(p);
	racct_sub(p, RACCT_NTHR, 1);
	PROC_UNLOCK(p);
#endif
	return (error);
}
Esempio n. 30
0
static void
do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
    struct vmspace *vm2, struct file *fp_procdesc)
{
	struct proc *p1, *pptr;
	int trypid;
	struct filedesc *fd;
	struct filedesc_to_leader *fdtol;
	struct sigacts *newsigacts;

	sx_assert(&proctree_lock, SX_SLOCKED);
	sx_assert(&allproc_lock, SX_XLOCKED);

	p1 = td->td_proc;

	trypid = fork_findpid(fr->fr_flags);

	sx_sunlock(&proctree_lock);

	p2->p_state = PRS_NEW;		/* protect against others */
	p2->p_pid = trypid;
	AUDIT_ARG_PID(p2->p_pid);
	LIST_INSERT_HEAD(&allproc, p2, p_list);
	allproc_gen++;
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	tidhash_add(td2);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	sx_xunlock(&allproc_lock);

	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    __rangeof(struct proc, p_startcopy, p_endcopy));
	pargs_hold(p2->p_args);

	PROC_UNLOCK(p1);

	bzero(&p2->p_startzero,
	    __rangeof(struct proc, p_startzero, p_endzero));

	/* Tell the prison that we exist. */
	prison_proc_hold(p2->p_ucred->cr_prison);

	PROC_UNLOCK(p2);

	/*
	 * Malloc things while we don't hold any locks.
	 */
	if (fr->fr_flags & RFSIGSHARE)
		newsigacts = NULL;
	else
		newsigacts = sigacts_alloc();

	/*
	 * Copy filedesc.
	 */
	if (fr->fr_flags & RFCFDG) {
		fd = fdinit(p1->p_fd, false);
		fdtol = NULL;
	} else if (fr->fr_flags & RFFDG) {
		fd = fdcopy(p1->p_fd);
		fdtol = NULL;
	} else {
		fd = fdshare(p1->p_fd);
		if (p1->p_fdtol == NULL)
			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
			    p1->p_leader);
		if ((fr->fr_flags & RFTHREAD) != 0) {
			/*
			 * Shared file descriptor table, and shared
			 * process leaders.
			 */
			fdtol = p1->p_fdtol;
			FILEDESC_XLOCK(p1->p_fd);
			fdtol->fdl_refcount++;
			FILEDESC_XUNLOCK(p1->p_fd);
		} else {
			/* 
			 * Shared file descriptor table, and different
			 * process leaders.
			 */
			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
			    p1->p_fd, p2);
		}
	}
	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */

	PROC_LOCK(p2);
	PROC_LOCK(p1);

	bzero(&td2->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));

	bcopy(&td->td_startcopy, &td2->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));

	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
	td2->td_sigstk = td->td_sigstk;
	td2->td_flags = TDF_INMEM;
	td2->td_lend_user_pri = PRI_MAX;

#ifdef VIMAGE
	td2->td_vnet = NULL;
	td2->td_vnet_lpush = NULL;
#endif

	/*
	 * Allow the scheduler to initialize the child.
	 */
	thread_lock(td);
	sched_fork(td, td2);
	thread_unlock(td);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 */
	p2->p_flag = P_INMEM;
	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
	p2->p_swtick = ticks;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);

	/*
	 * Whilst the proc lock is held, copy the VM domain data out
	 * using the VM domain method.
	 */
	vm_domain_policy_init(&p2->p_vm_dom_policy);
	vm_domain_policy_localcopy(&p2->p_vm_dom_policy,
	    &p1->p_vm_dom_policy);

	if (fr->fr_flags & RFSIGSHARE) {
		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
	} else {
		sigacts_copy(newsigacts, p1->p_sigacts);
		p2->p_sigacts = newsigacts;
	}

	if (fr->fr_flags & RFTSIGZMB)
	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
	else if (fr->fr_flags & RFLINUXTHPN)
	        p2->p_sigparent = SIGUSR1;
	else
	        p2->p_sigparent = SIGCHLD;

	p2->p_textvp = p1->p_textvp;
	p2->p_fd = fd;
	p2->p_fdtol = fdtol;

	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
		p2->p_flag |= P_PROTECTED;
		p2->p_flag2 |= P2_INHERIT_PROTECTED;
	}

	/*
	 * p_limit is copy-on-write.  Bump its refcount.
	 */
	lim_fork(p1, p2);

	thread_cow_get_proc(td2, p2);

	pstats_fork(p1->p_stats, p2->p_stats);

	PROC_UNLOCK(p1);
	PROC_UNLOCK(p2);

	/* Bump references to the text vnode (for procfs). */
	if (p2->p_textvp)
		vrefact(p2->p_textvp);

	/*
	 * Set up linkage for kernel based threading.
	 */
	if ((fr->fr_flags & RFTHREAD) != 0) {
		mtx_lock(&ppeers_lock);
		p2->p_peers = p1->p_peers;
		p1->p_peers = p2;
		p2->p_leader = p1->p_leader;
		mtx_unlock(&ppeers_lock);
		PROC_LOCK(p1->p_leader);
		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
			PROC_UNLOCK(p1->p_leader);
			/*
			 * The task leader is exiting, so process p1 is
			 * going to be killed shortly.  Since p1 obviously
			 * isn't dead yet, we know that the leader is either
			 * sending SIGKILL's to all the processes in this
			 * task or is sleeping waiting for all the peers to
			 * exit.  We let p1 complete the fork, but we need
			 * to go ahead and kill the new process p2 since
			 * the task leader may not get a chance to send
			 * SIGKILL to it.  We leave it on the list so that
			 * the task leader will wait for this new process
			 * to commit suicide.
			 */
			PROC_LOCK(p2);
			kern_psignal(p2, SIGKILL);
			PROC_UNLOCK(p2);
		} else
			PROC_UNLOCK(p1->p_leader);
	} else {
		p2->p_peers = NULL;
		p2->p_leader = p2;
	}

	sx_xlock(&proctree_lock);
	PGRP_LOCK(p1->p_pgrp);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	/*
	 * Preserve some more flags in subprocess.  P_PROFIL has already
	 * been preserved.
	 */
	p2->p_flag |= p1->p_flag & P_SUGID;
	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
	SESS_LOCK(p1->p_session);
	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		p2->p_flag |= P_CONTROLT;
	SESS_UNLOCK(p1->p_session);
	if (fr->fr_flags & RFPPWAIT)
		p2->p_flag |= P_PPWAIT;

	p2->p_pgrp = p1->p_pgrp;
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	PGRP_UNLOCK(p1->p_pgrp);
	LIST_INIT(&p2->p_children);
	LIST_INIT(&p2->p_orphans);

	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);

	/*
	 * If PF_FORK is set, the child process inherits the
	 * procfs ioctl flags from its parent.
	 */
	if (p1->p_pfsflags & PF_FORK) {
		p2->p_stops = p1->p_stops;
		p2->p_pfsflags = p1->p_pfsflags;
	}

	/*
	 * This begins the section where we must prevent the parent
	 * from being swapped.
	 */
	_PHOLD(p1);
	PROC_UNLOCK(p1);

	/*
	 * Attach the new process to its parent.
	 *
	 * If RFNOWAIT is set, the newly created process becomes a child
	 * of init.  This effectively disassociates the child from the
	 * parent.
	 */
	if ((fr->fr_flags & RFNOWAIT) != 0) {
		pptr = p1->p_reaper;
		p2->p_reaper = pptr;
	} else {
		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
		    p1 : p1->p_reaper;
		pptr = p1;
	}
	p2->p_pptr = pptr;
	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
	LIST_INIT(&p2->p_reaplist);
	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
	if (p2->p_reaper == p1)
		p2->p_reapsubtree = p2->p_pid;
	sx_xunlock(&proctree_lock);

	/* Inform accounting that we have forked. */
	p2->p_acflag = AFORK;
	PROC_UNLOCK(p2);

#ifdef KTRACE
	ktrprocfork(p1, p2);
#endif

	/*
	 * Finish creating the child process.  It will return via a different
	 * execution path later.  (ie: directly into user mode)
	 */
	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);

	if (fr->fr_flags == (RFFDG | RFPROC)) {
		VM_CNT_INC(v_forks);
		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
		VM_CNT_INC(v_vforks);
		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (p1 == &proc0) {
		VM_CNT_INC(v_kthreads);
		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else {
		VM_CNT_INC(v_rforks);
		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	}

	/*
	 * Associate the process descriptor with the process before anything
	 * can happen that might cause that process to need the descriptor.
	 * However, don't do this until after fork(2) can no longer fail.
	 */
	if (fr->fr_flags & RFPROCDESC)
		procdesc_new(p2, fr->fr_pd_flags);

	/*
	 * Both processes are set up, now check if any loadable modules want
	 * to adjust anything.
	 */
	EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags);

	/*
	 * Set the child start time and mark the process as being complete.
	 */
	PROC_LOCK(p2);
	PROC_LOCK(p1);
	microuptime(&p2->p_stats->p_start);
	PROC_SLOCK(p2);
	p2->p_state = PRS_NORMAL;
	PROC_SUNLOCK(p2);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the new process so that any
	 * tracepoints inherited from the parent can be removed. We have to do
	 * this only after p_state is PRS_NORMAL since the fasttrap module will
	 * use pfind() later on.
	 */
	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
		dtrace_fasttrap_fork(p1, p2);
#endif
	/*
	 * Hold the process so that it cannot exit after we make it runnable,
	 * but before we wait for the debugger.
	 */
	_PHOLD(p2);
	if (p1->p_ptevents & PTRACE_FORK) {
		/*
		 * Arrange for debugger to receive the fork event.
		 *
		 * We can report PL_FLAG_FORKED regardless of
		 * P_FOLLOWFORK settings, but it does not make a sense
		 * for runaway child.
		 */
		td->td_dbgflags |= TDB_FORK;
		td->td_dbg_forked = p2->p_pid;
		td2->td_dbgflags |= TDB_STOPATFORK;
	}
	if (fr->fr_flags & RFPPWAIT) {
		td->td_pflags |= TDP_RFPPWAIT;
		td->td_rfppwait_p = p2;
		td->td_dbgflags |= TDB_VFORK;
	}
	PROC_UNLOCK(p2);

	/*
	 * Now can be swapped.
	 */
	_PRELE(p1);
	PROC_UNLOCK(p1);

	/*
	 * Tell any interested parties about the new process.
	 */
	knote_fork(p1->p_klist, p2->p_pid);
	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);

	if (fr->fr_flags & RFPROCDESC) {
		procdesc_finit(p2->p_procdesc, fp_procdesc);
		fdrop(fp_procdesc, td);
	}

	if ((fr->fr_flags & RFSTOPPED) == 0) {
		/*
		 * If RFSTOPPED not requested, make child runnable and
		 * add to run queue.
		 */
		thread_lock(td2);
		TD_SET_CAN_RUN(td2);
		sched_add(td2, SRQ_BORING);
		thread_unlock(td2);
		if (fr->fr_pidp != NULL)
			*fr->fr_pidp = p2->p_pid;
	} else {
		*fr->fr_procp = p2;
	}

	PROC_LOCK(p2);
	/*
	 * Wait until debugger is attached to child.
	 */
	while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0)
		cv_wait(&p2->p_dbgwait, &p2->p_mtx);
	_PRELE(p2);
	racct_proc_fork_done(p2);
	PROC_UNLOCK(p2);
}