Exemplo n.º 1
0
void
ipi_process(struct cpu_info *ci, uint64_t ipi_mask)
{
	KASSERT(cpu_intr_p());

	if (ipi_mask & __BIT(IPI_NOP)) {
		ci->ci_evcnt_per_ipi[IPI_NOP].ev_count++;
		ipi_nop(ci);
	}
	if (ipi_mask & __BIT(IPI_AST)) {
		ci->ci_evcnt_per_ipi[IPI_AST].ev_count++;
		ipi_nop(ci);
	}
	if (ipi_mask & __BIT(IPI_SHOOTDOWN)) {
		ci->ci_evcnt_per_ipi[IPI_SHOOTDOWN].ev_count++;
		ipi_shootdown(ci);
	}
	if (ipi_mask & __BIT(IPI_SYNCICACHE)) {
		ci->ci_evcnt_per_ipi[IPI_SYNCICACHE].ev_count++;
		ipi_syncicache(ci);
	}
	if (ipi_mask & __BIT(IPI_SUSPEND)) {
		ci->ci_evcnt_per_ipi[IPI_SUSPEND].ev_count++;
		cpu_pause(NULL);
	}
	if (ipi_mask & __BIT(IPI_HALT)) {
		ci->ci_evcnt_per_ipi[IPI_HALT].ev_count++;
		ipi_halt();
	}
	if (ipi_mask & __BIT(IPI_XCALL)) {
		ci->ci_evcnt_per_ipi[IPI_XCALL].ev_count++;
		xc_ipi_handler();
	}
}
Exemplo n.º 2
0
int
pserialize_read_enter(void)
{

	KASSERT(!cpu_intr_p());
	return splsoftserial();
}
Exemplo n.º 3
0
void
intr_disestablish(void *ih)
{
	struct intrsource * const is = ih;

	KASSERT(!cpu_intr_p());
	KASSERT(!cpu_softintr_p());

	pic_disestablish_source(is);
}
/*
 * callout_halt:
 *
 *	Cancel a pending callout.  If in-flight, block until it completes.
 *	May not be called from a hard interrupt handler.  If the callout
 * 	can take locks, the caller of callout_halt() must not hold any of
 *	those locks, otherwise the two could deadlock.  If 'interlock' is
 *	non-NULL and we must wait for the callout to complete, it will be
 *	released and re-acquired before returning.
 */
bool
callout_halt(callout_t *cs, void *interlock)
{
	callout_impl_t *c = (callout_impl_t *)cs;
	struct callout_cpu *cc;
	struct lwp *l;
	kmutex_t *lock, *relock;
	bool expired;

	KASSERT(c->c_magic == CALLOUT_MAGIC);
	KASSERT(!cpu_intr_p());

	lock = callout_lock(c);
	relock = NULL;

	expired = ((c->c_flags & CALLOUT_FIRED) != 0);
	if ((c->c_flags & CALLOUT_PENDING) != 0)
		CIRCQ_REMOVE(&c->c_list);
	c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);

	l = curlwp;
	for (;;) {
		cc = c->c_cpu;
		if (__predict_true(cc->cc_active != c || cc->cc_lwp == l))
			break;
		if (interlock != NULL) {
			/*
			 * Avoid potential scheduler lock order problems by
			 * dropping the interlock without the callout lock
			 * held.
			 */
			mutex_spin_exit(lock);
			mutex_exit(interlock);
			relock = interlock;
			interlock = NULL;
		} else {
			/* XXX Better to do priority inheritance. */
			KASSERT(l->l_wchan == NULL);
			cc->cc_nwait++;
			cc->cc_ev_block.ev_count++;
			l->l_kpriority = true;
			sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock);
			sleepq_enqueue(&cc->cc_sleepq, cc, "callout",
			    &sleep_syncobj);
			sleepq_block(0, false);
		}
		lock = callout_lock(c);
	}

	mutex_spin_exit(lock);
	if (__predict_false(relock != NULL))
		mutex_enter(relock);

	return expired;
}
Exemplo n.º 5
0
/*
 * pcu_save_lwp: save PCU state to the given LWP.
 */
void
pcu_save(const pcu_ops_t *pcu)
{
	const u_int id = pcu->pcu_id;
	lwp_t * const l = curlwp;

	KASSERT(!cpu_intr_p() && !cpu_softintr_p());

	if (__predict_true(l->l_pcu_cpu[id] == NULL)) {
		return;
	}
	pcu_lwp_op(pcu, l, PCU_SAVE | PCU_RELEASE);
}
Exemplo n.º 6
0
/*
 * pcu_load: load/initialize the PCU state of current LWP on current CPU.
 */
void
pcu_load(const pcu_ops_t *pcu)
{
	const u_int id = pcu->pcu_id;
	struct cpu_info *ci, *curci;
	lwp_t * const l = curlwp;
	uint64_t where;
	int s;

	KASSERT(!cpu_intr_p() && !cpu_softintr_p());

	s = splsoftclock();
	curci = curcpu();
	ci = l->l_pcu_cpu[id];

	/* Does this CPU already have our PCU state loaded? */
	if (ci == curci) {
		KASSERT(curci->ci_pcu_curlwp[id] == l);
		pcu->pcu_state_load(l, PCU_ENABLE);	/* Re-enable */
		splx(s);
		return;
	}

	/* If PCU state of this LWP is on the remote CPU - save it there. */
	if (ci) {
		splx(s);
		/* Note: there is a race; see description in the top. */
		where = xc_unicast(XC_HIGHPRI, (xcfunc_t)pcu_cpu_op,
		    __UNCONST(pcu), (void *)(PCU_SAVE | PCU_RELEASE), ci);
		xc_wait(where);

		/* Enter IPL_SOFTCLOCK and re-fetch the current CPU. */
		s = splsoftclock();
		curci = curcpu();
	}
	KASSERT(l->l_pcu_cpu[id] == NULL);

	/* Save the PCU state on the current CPU, if there is any. */
	pcu_cpu_op(pcu, PCU_SAVE | PCU_RELEASE);
	KASSERT(curci->ci_pcu_curlwp[id] == NULL);

	/*
	 * Finally, load the state for this LWP on this CPU.  Indicate to
	 * load function whether PCU was used before.  Note the usage.
	 */
	pcu_do_op(pcu, l, PCU_CLAIM | PCU_ENABLE | PCU_RELOAD);
	splx(s);
}
Exemplo n.º 7
0
/*
 * pserialize_perform:
 *
 *	Perform the write side of passive serialization.  The calling
 *	thread holds an exclusive lock on the data object(s) being updated.
 *	We wait until every processor in the system has made at least two
 *	passes through cpu_swichto().  The wait is made with the caller's
 *	update lock held, but is short term.
 */
void
pserialize_perform(pserialize_t psz)
{
	uint64_t xc;

	KASSERT(!cpu_intr_p());
	KASSERT(!cpu_softintr_p());

	if (__predict_false(panicstr != NULL)) {
		return;
	}
	KASSERT(psz->psz_owner == NULL);
	KASSERT(ncpu > 0);

	/*
	 * Set up the object and put it onto the queue.  The lock
	 * activity here provides the necessary memory barrier to
	 * make the caller's data update completely visible to
	 * other processors.
	 */
	psz->psz_owner = curlwp;
	kcpuset_copy(psz->psz_target, kcpuset_running);
	kcpuset_zero(psz->psz_pass);

	mutex_spin_enter(&psz_lock);
	TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain);
	psz_work_todo++;

	do {
		mutex_spin_exit(&psz_lock);

		/*
		 * Force some context switch activity on every CPU, as
		 * the system may not be busy.  Pause to not flood.
		 */
		xc = xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL);
		xc_wait(xc);
		kpause("psrlz", false, 1, NULL);

		mutex_spin_enter(&psz_lock);
	} while (!kcpuset_iszero(psz->psz_target));

	psz_ev_excl.ev_count++;
	mutex_spin_exit(&psz_lock);

	psz->psz_owner = NULL;
}
Exemplo n.º 8
0
/*
 * pcu_discard: discard the PCU state of current LWP.
 * If the "usesw" flag is set, pcu_used_p() will return "true".
 */
void
pcu_discard(const pcu_ops_t *pcu, bool usesw)
{
	const u_int id = pcu->pcu_id;
	lwp_t * const l = curlwp;

	KASSERT(!cpu_intr_p() && !cpu_softintr_p());

	if (usesw)
		l->l_pcu_used[PCU_USER] |= (1 << id);
	else
		l->l_pcu_used[PCU_USER] &= ~(1 << id);

	if (__predict_true(l->l_pcu_cpu[id] == NULL)) {
		return;
	}
	pcu_lwp_op(pcu, l, PCU_RELEASE);
}
Exemplo n.º 9
0
void *
intr_establish(int irq, int ipl, int type, int (*func)(void *), void *arg)
{
	KASSERT(!cpu_intr_p());
	KASSERT(!cpu_softintr_p());

	for (size_t slot = 0; slot < PIC_MAXPICS; slot++) {
		struct pic_softc * const pic = pic_list[slot];
		if (pic == NULL || pic->pic_irqbase < 0)
			continue;
		if (pic->pic_irqbase <= irq
		    && irq < pic->pic_irqbase + pic->pic_maxsources) {
			return pic_establish_intr(pic, irq - pic->pic_irqbase,
			    ipl, type, func, arg);
		}
	}

	return NULL;
}
/*
 * rw_vector_enter:
 *
 *	Acquire a rwlock.
 */
void
rw_vector_enter(krwlock_t *rw, const krw_t op)
{
	uintptr_t owner, incr, need_wait, set_wait, curthread, next;
	turnstile_t *ts;
	int queue;
	lwp_t *l;
	LOCKSTAT_TIMER(slptime);
	LOCKSTAT_TIMER(slpcnt);
	LOCKSTAT_TIMER(spintime);
	LOCKSTAT_COUNTER(spincnt);
	LOCKSTAT_FLAG(lsflag);

	l = curlwp;
	curthread = (uintptr_t)l;

	RW_ASSERT(rw, !cpu_intr_p());
	RW_ASSERT(rw, curthread != 0);
	RW_WANTLOCK(rw, op);

	if (panicstr == NULL) {
		LOCKDEBUG_BARRIER(&kernel_lock, 1);
	}

	/*
	 * We play a slight trick here.  If we're a reader, we want
	 * increment the read count.  If we're a writer, we want to
	 * set the owner field and whe WRITE_LOCKED bit.
	 *
	 * In the latter case, we expect those bits to be zero,
	 * therefore we can use an add operation to set them, which
	 * means an add operation for both cases.
	 */
	if (__predict_true(op == RW_READER)) {
		incr = RW_READ_INCR;
		set_wait = RW_HAS_WAITERS;
		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
		queue = TS_READER_Q;
	} else {
		RW_DASSERT(rw, op == RW_WRITER);
		incr = curthread | RW_WRITE_LOCKED;
		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
		need_wait = RW_WRITE_LOCKED | RW_THREAD;
		queue = TS_WRITER_Q;
	}

	LOCKSTAT_ENTER(lsflag);

	KPREEMPT_DISABLE(curlwp);
	for (owner = rw->rw_owner; ;) {
		/*
		 * Read the lock owner field.  If the need-to-wait
		 * indicator is clear, then try to acquire the lock.
		 */
		if ((owner & need_wait) == 0) {
			next = rw_cas(rw, owner, (owner + incr) &
			    ~RW_WRITE_WANTED);
			if (__predict_true(next == owner)) {
				/* Got it! */
				membar_enter();
				break;
			}

			/*
			 * Didn't get it -- spin around again (we'll
			 * probably sleep on the next iteration).
			 */
			owner = next;
			continue;
		}
		if (__predict_false(panicstr != NULL)) {
			kpreempt_enable();
			return;
		}
		if (__predict_false(RW_OWNER(rw) == curthread)) {
			rw_abort(rw, __func__, "locking against myself");
		}
		/*
		 * If the lock owner is running on another CPU, and
		 * there are no existing waiters, then spin.
		 */
		if (rw_oncpu(owner)) {
			LOCKSTAT_START_TIMER(lsflag, spintime);
			u_int count = SPINLOCK_BACKOFF_MIN;
			do {
				KPREEMPT_ENABLE(curlwp);
				SPINLOCK_BACKOFF(count);
				KPREEMPT_DISABLE(curlwp);
				owner = rw->rw_owner;
			} while (rw_oncpu(owner));
			LOCKSTAT_STOP_TIMER(lsflag, spintime);
			LOCKSTAT_COUNT(spincnt, 1);
			if ((owner & need_wait) == 0)
				continue;
		}

		/*
		 * Grab the turnstile chain lock.  Once we have that, we
		 * can adjust the waiter bits and sleep queue.
		 */
		ts = turnstile_lookup(rw);

		/*
		 * Mark the rwlock as having waiters.  If the set fails,
		 * then we may not need to sleep and should spin again.
		 * Reload rw_owner because turnstile_lookup() may have
		 * spun on the turnstile chain lock.
		 */
		owner = rw->rw_owner;
		if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
			turnstile_exit(rw);
			continue;
		}
		next = rw_cas(rw, owner, owner | set_wait);
		if (__predict_false(next != owner)) {
			turnstile_exit(rw);
			owner = next;
			continue;
		}

		LOCKSTAT_START_TIMER(lsflag, slptime);
		turnstile_block(ts, queue, rw, &rw_syncobj);
		LOCKSTAT_STOP_TIMER(lsflag, slptime);
		LOCKSTAT_COUNT(slpcnt, 1);

		/*
		 * No need for a memory barrier because of context switch.
		 * If not handed the lock, then spin again.
		 */
		if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
			break;

		owner = rw->rw_owner;
	}
	KPREEMPT_ENABLE(curlwp);

	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK |
	    (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);
	LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime);
	LOCKSTAT_EXIT(lsflag);

	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
	    (op == RW_READER && RW_COUNT(rw) != 0));
	RW_LOCKED(rw, op);
}
Exemplo n.º 11
0
/*
 * trap(frame): exception, fault, and trap interface to BSD kernel.
 *
 * This common code is called from assembly language IDT gate entry routines
 * that prepare a suitable stack frame, and restore this frame after the
 * exception has been processed. Note that the effect is as if the arguments
 * were passed call by reference.
 */
void
trap(struct trapframe *frame)
{
	struct lwp *l = curlwp;
	struct proc *p;
	struct pcb *pcb;
	extern char fusubail[], kcopy_fault[], return_address_fault[],
	    IDTVEC(osyscall)[];
	struct trapframe *vframe;
	ksiginfo_t ksi;
	void *onfault;
	int type, error;
	uint32_t cr2;
	bool pfail;

	if (__predict_true(l != NULL)) {
		pcb = lwp_getpcb(l);
		p = l->l_proc;
	} else {
		/*
		 * this can happen eg. on break points in early on boot.
		 */
		pcb = NULL;
		p = NULL;
	}
	type = frame->tf_trapno;

#ifdef DEBUG
	if (trapdebug) {
		trap_print(frame, l);
	}
#endif
	if (type != T_NMI &&
	    !KERNELMODE(frame->tf_cs, frame->tf_eflags)) {
		type |= T_USER;
		l->l_md.md_regs = frame;
		pcb->pcb_cr2 = 0;
		LWP_CACHE_CREDS(l, p);
	}

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in its per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 *
	 * If the DTrace kernel module has registered a trap handler,
	 * call it and if it returns non-zero, assume that it has
	 * handled the trap and modified the trap frame so that this
	 * function can return normally.
	 */
	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
	    dtrace_trap_func != NULL) {
		if ((*dtrace_trap_func)(frame, type)) {
			return;
		}
	}
#endif

	switch (type) {

	case T_ASTFLT:
		/*FALLTHROUGH*/

	default:
	we_re_toast:
		if (type == T_TRCTRAP)
			check_dr0();
		else
			trap_print(frame, l);

		if (kdb_trap(type, 0, frame))
			return;
		if (kgdb_trap(type, frame))
			return;
		/*
		 * If this is a breakpoint, don't panic if we're not connected.
		 */
		if (type == T_BPTFLT && kgdb_disconnected()) {
			printf("kgdb: ignored %s\n", trap_type[type]);
			return;
		}
		panic("trap");
		/*NOTREACHED*/

	case T_PROTFLT:
	case T_SEGNPFLT:
	case T_ALIGNFLT:
	case T_TSSFLT:
		if (p == NULL)
			goto we_re_toast;
		/* Check for copyin/copyout fault. */
		onfault = onfault_handler(pcb, frame);
		if (onfault != NULL) {
copyefault:
			error = EFAULT;
copyfault:
			frame->tf_eip = (uintptr_t)onfault;
			frame->tf_eax = error;
			return;
		}

		/*
		 * Check for failure during return to user mode.
		 * This can happen loading invalid values into the segment
		 * registers, or during the 'iret' itself.
		 *
		 * We do this by looking at the instruction we faulted on.
		 * The specific instructions we recognize only happen when
		 * returning from a trap, syscall, or interrupt.
		 */

kernelfault:
		KSI_INIT_TRAP(&ksi);
		ksi.ksi_signo = SIGSEGV;
		ksi.ksi_code = SEGV_ACCERR;
		ksi.ksi_trap = type;

		switch (*(u_char *)frame->tf_eip) {
		case 0xcf:	/* iret */
			/*
			 * The 'iret' instruction faulted, so we have the
			 * 'user' registers saved after the kernel %eip:%cs:%fl
			 * of the 'iret' and below that the user %eip:%cs:%fl
			 * the 'iret' was processing.
			 * We must delete the 3 words of kernel return address
			 * from the stack to generate a normal stack frame
			 * (eg for sending a SIGSEGV).
			 */
			vframe = (void *)((int *)frame + 3);
			if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
				goto we_re_toast;
			memmove(vframe, frame,
			    offsetof(struct trapframe, tf_eip));
			/* Set the faulting address to the user %eip */
			ksi.ksi_addr = (void *)vframe->tf_eip;
			break;
		case 0x8e:
			switch (*(uint32_t *)frame->tf_eip) {
			case 0x8e242c8e:	/* mov (%esp,%gs), then */
			case 0x0424648e:	/* mov 0x4(%esp),%fs */
			case 0x0824448e:	/* mov 0x8(%esp),%es */
			case 0x0c245c8e:	/* mov 0xc(%esp),%ds */
				break;
			default:
				goto we_re_toast;
			}
			/*
			 * We faulted loading one if the user segment registers.
			 * The stack frame containing the user registers is
			 * still valid and is just below the %eip:%cs:%fl of
			 * the kernel fault frame.
			 */
			vframe = (void *)(&frame->tf_eflags + 1);
			if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
				goto we_re_toast;
			/* There is no valid address for the fault */
			break;
		default:
			goto we_re_toast;
		}
		/*
		 * We might have faulted trying to execute the
		 * trampoline for a local (nested) signal handler.
		 * Only generate SIGSEGV if the user %cs isn't changed.
		 * (This is only strictly necessary in the 'iret' case.)
		 */
		if (!pmap_exec_fixup(&p->p_vmspace->vm_map, vframe, pcb)) {
			/* Save outer frame for any signal return */
			l->l_md.md_regs = vframe;
			(*p->p_emul->e_trapsignal)(l, &ksi);
		}
		/* Return to user by reloading the user frame */
		trap_return_fault_return(vframe);
		/* NOTREACHED */

	case T_PROTFLT|T_USER:		/* protection fault */
	case T_TSSFLT|T_USER:
	case T_SEGNPFLT|T_USER:
	case T_STKFLT|T_USER:
	case T_ALIGNFLT|T_USER:
		KSI_INIT_TRAP(&ksi);

		ksi.ksi_addr = (void *)rcr2();
		switch (type) {
		case T_SEGNPFLT|T_USER:
		case T_STKFLT|T_USER:
			ksi.ksi_signo = SIGBUS;
			ksi.ksi_code = BUS_ADRERR;
			break;
		case T_TSSFLT|T_USER:
			ksi.ksi_signo = SIGBUS;
			ksi.ksi_code = BUS_OBJERR;
			break;
		case T_ALIGNFLT|T_USER:
			ksi.ksi_signo = SIGBUS;
			ksi.ksi_code = BUS_ADRALN;
			break;
		case T_PROTFLT|T_USER:
#ifdef VM86
			if (frame->tf_eflags & PSL_VM) {
				vm86_gpfault(l, type & ~T_USER);
				goto out;
			}
#endif
			/*
			 * If pmap_exec_fixup does something,
			 * let's retry the trap.
			 */
			if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, pcb)){
				goto out;
			}
			ksi.ksi_signo = SIGSEGV;
			ksi.ksi_code = SEGV_ACCERR;
			break;
		default:
			KASSERT(0);
			break;
		}
		goto trapsignal;

	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
		KSI_INIT_TRAP(&ksi);
		ksi.ksi_signo = SIGILL;
		ksi.ksi_addr = (void *) frame->tf_eip;
		switch (type) {
		case T_PRIVINFLT|T_USER:
			ksi.ksi_code = ILL_PRVOPC;
			break;
		case T_FPOPFLT|T_USER:
			ksi.ksi_code = ILL_COPROC;
			break;
		default:
			ksi.ksi_code = 0;
			break;
		}
		goto trapsignal;

	case T_ASTFLT|T_USER:
		/* Allow process switch. */
		//curcpu()->ci_data.cpu_nast++;
		if (l->l_pflag & LP_OWEUPC) {
			l->l_pflag &= ~LP_OWEUPC;
			ADDUPROF(l);
		}
		/* Allow a forced task switch. */
		if (curcpu()->ci_want_resched) {
			preempt();
		}
		goto out;

	case T_BOUND|T_USER:
	case T_OFLOW|T_USER:
	case T_DIVIDE|T_USER:
		KSI_INIT_TRAP(&ksi);
		ksi.ksi_signo = SIGFPE;
		ksi.ksi_addr = (void *)frame->tf_eip;
		switch (type) {
		case T_BOUND|T_USER:
			ksi.ksi_code = FPE_FLTSUB;
			break;
		case T_OFLOW|T_USER:
			ksi.ksi_code = FPE_INTOVF;
			break;
		case T_DIVIDE|T_USER:
			ksi.ksi_code = FPE_INTDIV;
			break;
		default:
			ksi.ksi_code = 0;
			break;
		}
		goto trapsignal;

	case T_PAGEFLT:
		/* Allow page faults in kernel mode. */
		if (__predict_false(l == NULL))
			goto we_re_toast;

		/*
		 * fusubail is used by [fs]uswintr() to prevent page faulting
		 * from inside the profiling interrupt.
		 */
		onfault = pcb->pcb_onfault;
		if (onfault == fusubail || onfault == return_address_fault) {
			goto copyefault;
		}
		if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) {
			goto we_re_toast;
		}

		cr2 = rcr2();
		goto faultcommon;

	case T_PAGEFLT|T_USER: {	/* page fault */
		register vaddr_t va;
		register struct vmspace *vm;
		register struct vm_map *map;
		vm_prot_t ftype;
		extern struct vm_map *kernel_map;

		cr2 = rcr2();
faultcommon:
		vm = p->p_vmspace;
		if (__predict_false(vm == NULL)) {
			goto we_re_toast;
		}
		pcb->pcb_cr2 = cr2;
		va = trunc_page((vaddr_t)cr2);
		/*
		 * It is only a kernel address space fault iff:
		 *	1. (type & T_USER) == 0  and
		 *	2. pcb_onfault not set or
		 *	3. pcb_onfault set but supervisor space fault
		 * The last can occur during an exec() copyin where the
		 * argument space is lazy-allocated.
		 */
		if (type == T_PAGEFLT && va >= KERNBASE)
			map = kernel_map;
		else
			map = &vm->vm_map;
		if (frame->tf_err & PGEX_W)
			ftype = VM_PROT_WRITE;
		else if (frame->tf_err & PGEX_X)
			ftype = VM_PROT_EXECUTE;
		else
			ftype = VM_PROT_READ;

#ifdef DIAGNOSTIC
		if (map == kernel_map && va == 0) {
			printf("trap: bad kernel access at %lx\n", va);
			goto we_re_toast;
		}
#endif
		/* Fault the original page in. */
		onfault = pcb->pcb_onfault;
		pcb->pcb_onfault = NULL;
		error = uvm_fault(map, va, ftype);
		pcb->pcb_onfault = onfault;
		if (error == 0) {
			if (map != kernel_map && (void *)va >= vm->vm_maxsaddr)
				uvm_grow(p, va);

			pfail = false;
			while (type == T_PAGEFLT) {
				/*
				 * we need to switch pmap now if we're in
				 * the middle of copyin/out.
				 *
				 * but we don't need to do so for kcopy as
				 * it never touch userspace.
 				 */
				kpreempt_disable();
				if (curcpu()->ci_want_pmapload) {
					onfault = onfault_handler(pcb, frame);
					if (onfault != kcopy_fault) {
						pmap_load();
					}
				}
				/*
				 * We need to keep the pmap loaded and
				 * so avoid being preempted until back
				 * into the copy functions.  Disable
				 * interrupts at the hardware level before
				 * re-enabling preemption.  Interrupts
				 * will be re-enabled by 'iret' when
				 * returning back out of the trap stub.
				 * They'll only be re-enabled when the
				 * program counter is once again in
				 * the copy functions, and so visible
				 * to cpu_kpreempt_exit().
				 */
#ifndef XEN
				x86_disable_intr();
#endif
				l->l_nopreempt--;
				if (l->l_nopreempt > 0 || !l->l_dopreempt ||
				    pfail) {
					return;
				}
#ifndef XEN
				x86_enable_intr();
#endif
				/*
				 * If preemption fails for some reason,
				 * don't retry it.  The conditions won't
				 * change under our nose.
				 */
				pfail = kpreempt(0);
			}
			goto out;
		}

		if (type == T_PAGEFLT) {
			onfault = onfault_handler(pcb, frame);
			if (onfault != NULL)
				goto copyfault;
			printf("uvm_fault(%p, %#lx, %d) -> %#x\n",
			    map, va, ftype, error);
			goto kernelfault;
		}

		KSI_INIT_TRAP(&ksi);
		ksi.ksi_trap = type & ~T_USER;
		ksi.ksi_addr = (void *)cr2;
		switch (error) {
		case EINVAL:
			ksi.ksi_signo = SIGBUS;
			ksi.ksi_code = BUS_ADRERR;
			break;
		case EACCES:
			ksi.ksi_signo = SIGSEGV;
			ksi.ksi_code = SEGV_ACCERR;
			error = EFAULT;
			break;
		case ENOMEM:
			ksi.ksi_signo = SIGKILL;
			printf("UVM: pid %d.%d (%s), uid %d killed: "
			    "out of swap\n", p->p_pid, l->l_lid, p->p_comm,
			    l->l_cred ?  kauth_cred_geteuid(l->l_cred) : -1);
			break;
		default:
			ksi.ksi_signo = SIGSEGV;
			ksi.ksi_code = SEGV_MAPERR;
			break;
		}

#ifdef TRAP_SIGDEBUG
		printf("pid %d.%d (%s): signal %d at eip %x addr %lx "
		    "error %d\n", p->p_pid, l->l_lid, p->p_comm, ksi.ksi_signo,
		    frame->tf_eip, va, error);
#endif
		(*p->p_emul->e_trapsignal)(l, &ksi);
		break;
	}

	case T_TRCTRAP:
		/* Check whether they single-stepped into a lcall. */
		if (frame->tf_eip == (int)IDTVEC(osyscall))
			return;
		if (frame->tf_eip == (int)IDTVEC(osyscall) + 1) {
			frame->tf_eflags &= ~PSL_T;
			return;
		}
		goto we_re_toast;

	case T_BPTFLT|T_USER:		/* bpt instruction fault */
	case T_TRCTRAP|T_USER:		/* trace trap */
		/*
		 * Don't go single-stepping into a RAS.
		 */
		if (p->p_raslist == NULL ||
		    (ras_lookup(p, (void *)frame->tf_eip) == (void *)-1)) {
			KSI_INIT_TRAP(&ksi);
			ksi.ksi_signo = SIGTRAP;
			ksi.ksi_trap = type & ~T_USER;
			if (type == (T_BPTFLT|T_USER))
				ksi.ksi_code = TRAP_BRKPT;
			else
				ksi.ksi_code = TRAP_TRACE;
			ksi.ksi_addr = (void *)frame->tf_eip;
			(*p->p_emul->e_trapsignal)(l, &ksi);
		}
		break;

	case T_NMI:
		if (nmi_dispatch(frame))
			return;
		/* NMI can be hooked up to a pushbutton for debugging */
		if (kgdb_trap(type, frame))
			return;
		if (kdb_trap(type, 0, frame))
			return;
		/* machine/parity/power fail/"kitchen sink" faults */
#if NMCA > 0
		mca_nmi();
#endif
		x86_nmi();
	}

	if ((type & T_USER) == 0)
		return;
out:
	userret(l);
	return;
trapsignal:
	ksi.ksi_trap = type & ~T_USER;
	(*p->p_emul->e_trapsignal)(l, &ksi);
	userret(l);
}
Exemplo n.º 12
0
/*
 * General page fault handler.
 */
void
do_fault(struct trapframe *tf, struct lwp *l,
    struct vm_map *map, vaddr_t va, vm_prot_t atype)
{
	int error;

	if (pmap_fault(map->pmap, va, atype))
		return;

	struct pcb * const pcb = lwp_getpcb(l);
	void * const onfault = pcb->pcb_onfault;
	const bool user = TRAP_USERMODE(tf);

	if (cpu_intr_p()) {
		KASSERT(!user);
		error = EFAULT;
	} else {
		pcb->pcb_onfault = NULL;
		error = uvm_fault(map, va, atype);
		pcb->pcb_onfault = onfault;
	}

	if (error != 0) {
		ksiginfo_t ksi;

		if (onfault != NULL) {
			tf->tf_r0 = error;
			tf->tf_r15 = (tf->tf_r15 & ~R15_PC) |
			    (register_t)onfault;
			return;
		}
#ifdef DDB
		if (db_validating) {
			db_faulted = true;
			tf->tf_r15 += INSN_SIZE;
			return;
		}
#endif
		if (!user) {
#ifdef DDB
			db_printf("Unhandled data abort in kernel mode\n");
			kdb_trap(T_FAULT, tf);
#else
#ifdef DEBUG
			printf("Unhandled data abort:\n");
			printregs(tf);
#endif
			panic("unhandled data abort in kernel mode");
#endif
		}

		KSI_INIT_TRAP(&ksi);

		if (error == ENOMEM) {
			printf("UVM: pid %d (%s), uid %d killed: "
			    "out of swap\n",
			    l->l_proc->p_pid, l->l_proc->p_comm,
			    l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1);
			ksi.ksi_signo = SIGKILL;
		} else
			ksi.ksi_signo = SIGSEGV;
		ksi.ksi_code = (error == EPERM) ? SEGV_ACCERR : SEGV_MAPERR;
		ksi.ksi_addr = (void *) va;
		trapsignal(l, &ksi);
	} else if (!user) {
		ucas_ras_check(tf);
	}
}