Beispiel #1
0
void
cpu_idle(void)
{
	struct thread *td = curthread;
	struct mdglobaldata *gd = mdcpu;
	int reqflags;

	crit_exit();
	KKASSERT(td->td_critcount == 0);
	cpu_enable_intr();
	for (;;) {
		/*
		 * See if there are any LWKTs ready to go.
		 */
		lwkt_switch();

		/*
		 * The idle loop halts only if no threads are scheduleable
		 * and no signals have occured.
		 */
		if (cpu_idle_hlt &&
		    (td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
			splz();
#ifdef SMP
			KKASSERT(MP_LOCK_HELD() == 0);
#endif
			if ((td->td_gd->gd_reqflags & RQF_IDLECHECK_WK_MASK) == 0) {
#ifdef DEBUGIDLE
				struct timeval tv1, tv2;
				gettimeofday(&tv1, NULL);
#endif
				reqflags = gd->mi.gd_reqflags &
					   ~RQF_IDLECHECK_WK_MASK;
				umtx_sleep(&gd->mi.gd_reqflags, reqflags,
					   1000000);
#ifdef DEBUGIDLE
				gettimeofday(&tv2, NULL);
				if (tv2.tv_usec - tv1.tv_usec +
				    (tv2.tv_sec - tv1.tv_sec) * 1000000 
				    > 500000) {
					kprintf("cpu %d idlelock %08x %08x\n",
						gd->mi.gd_cpuid,
						gd->mi.gd_reqflags,
						gd->gd_fpending);
				}
#endif
			}
			++cpu_idle_hltcnt;
		} else {
			splz();
#ifdef SMP
			__asm __volatile("pause");
#endif
			++cpu_idle_spincnt;
		}
	}
}
Beispiel #2
0
/*
 * Enable write allocate feature of AMD processors.
 * Following two functions require the Maxmem variable being set.
 */
void
enable_K5_wt_alloc(void)
{
	u_int64_t	msr;

	/*
	 * Write allocate is supported only on models 1, 2, and 3, with
	 * a stepping of 4 or greater.
	 */
	if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
		cpu_disable_intr();
		msr = rdmsr(0x83);		/* HWCR */
		wrmsr(0x83, msr & !(0x10));

		/*
		 * We have to tell the chip where the top of memory is,
		 * since video cards could have frame bufferes there,
		 * memory-mapped I/O could be there, etc.
		 */
		if(Maxmem > 0)
		  msr = Maxmem / 16;
		else
		  msr = 0;
		msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE;

		/*
		 * There is no way to know wheter 15-16M hole exists or not. 
		 * Therefore, we disable write allocate for this range.
		 */
		wrmsr(0x86, 0x0ff00f0);
		msr |= AMD_WT_ALLOC_PRE;
		wrmsr(0x85, msr);

		msr=rdmsr(0x83);
		wrmsr(0x83, msr|0x10); /* enable write allocate */

		cpu_enable_intr();
	}
}
Beispiel #3
0
/*
 * Exception, fault, and trap interface to the kernel.
 * This common code is called from assembly language IDT gate entry
 * routines that prepare a suitable stack frame, and restore this
 * frame after the exception has been processed.
 *
 * This function is also called from doreti in an interlock to handle ASTs.
 * For example:  hardwareint->INTROUTINE->(set ast)->doreti->trap
 *
 * NOTE!  We have to retrieve the fault address prior to potentially
 *	  blocking, including blocking on any token.
 *
 * NOTE!  NMI and kernel DBG traps remain on their respective pcpu IST
 *	  stacks if taken from a kernel RPL. trap() cannot block in this
 *	  situation.  DDB entry or a direct report-and-return is ok.
 *
 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing
 * if an attempt is made to switch from a fast interrupt or IPI.
 */
void
trap(struct trapframe *frame)
{
	static struct krate sscpubugrate = { 1 };
	struct globaldata *gd = mycpu;
	struct thread *td = gd->gd_curthread;
	struct lwp *lp = td->td_lwp;
	struct proc *p;
	int sticks = 0;
	int i = 0, ucode = 0, type, code;
#ifdef INVARIANTS
	int crit_count = td->td_critcount;
	lwkt_tokref_t curstop = td->td_toks_stop;
#endif
	vm_offset_t eva;

	p = td->td_proc;
	clear_quickret();

#ifdef DDB
        /*
	 * We need to allow T_DNA faults when the debugger is active since
	 * some dumping paths do large bcopy() which use the floating
	 * point registers for faster copying.
	 */
	if (db_active && frame->tf_trapno != T_DNA) {
		eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0);
		++gd->gd_trap_nesting_level;
		trap_fatal(frame, eva);
		--gd->gd_trap_nesting_level;
		goto out2;
	}
#endif

	eva = 0;

	if ((frame->tf_rflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled interrupts
		 * and then trapped.  Enabling interrupts now is wrong, but
		 * it is better than running with interrupts disabled until
		 * they are accidentally enabled later.
		 */

		type = frame->tf_trapno;
		if (ISPL(frame->tf_cs) == SEL_UPL) {
			/* JG curproc can be NULL */
			kprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curproc->p_comm, type);
		} else if ((type == T_STKFLT || type == T_PROTFLT ||
			    type == T_SEGNPFLT) &&
			   frame->tf_rip == (long)doreti_iret) {
			/*
			 * iretq fault from kernel mode during return to
			 * userland.
			 *
			 * This situation is expected, don't complain.
			 */
		} else if (type != T_NMI && type != T_BPTFLT &&
			   type != T_TRCTRAP) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			kprintf("kernel trap %d (%s @ 0x%016jx) with "
				"interrupts disabled\n",
				type,
				td->td_comm,
				frame->tf_rip);
		}
		cpu_enable_intr();
	}

	type = frame->tf_trapno;
	code = frame->tf_err;

	if (ISPL(frame->tf_cs) == SEL_UPL) {
		/* user trap */

		KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid,
			frame->tf_trapno, eva);

		userenter(td, p);

		sticks = (int)td->td_sticks;
		KASSERT(lp->lwp_md.md_regs == frame,
			("Frame mismatch %p %p", lp->lwp_md.md_regs, frame));

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			frame->tf_rflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
			ucode = code;
			i = SIGFPE;
			break;

		case T_ASTFLT:		/* Allow process switch */
			mycpu->gd_cnt.v_soft++;
			if (mycpu->gd_reqflags & RQF_AST_OWEUPC) {
				atomic_clear_int(&mycpu->gd_reqflags,
						 RQF_AST_OWEUPC);
				addupc_task(p, p->p_prof.pr_addr,
					    p->p_prof.pr_ticks);
			}
			goto out;

		case T_PROTFLT:		/* general protection fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_STKFLT:		/* stack fault */
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */
			i = trap_pfault(frame, TRUE);
#ifdef DDB
			if (frame->tf_rip == 0) {
				/* used for kernel debugging only */
				while (freeze_on_seg_fault)
					tsleep(p, 0, "freeze", hz * 20);
			}
#endif
			if (i == -1 || i == 0)
				goto out;
			if (i == SIGSEGV) {
				ucode = SEGV_MAPERR;
			} else {
				i = SIGSEGV;
				ucode = SEGV_ACCERR;
			}
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#if NISA > 0
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef DDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (ddb_on_nmi) {
					kprintf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* DDB */
				goto out2;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* NISA > 0 */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
			/*
			 * Virtual kernel intercept - pass the DNA exception
			 * to the virtual kernel if it asked to handle it.
			 * This occurs when the virtual kernel is holding
			 * onto the FP context for a different emulated
			 * process then the one currently running.
			 *
			 * We must still call npxdna() since we may have
			 * saved FP state that the virtual kernel needs
			 * to hand over to a different emulated process.
			 */
			if (lp->lwp_vkernel && lp->lwp_vkernel->ve &&
			    (td->td_pcb->pcb_flags & FP_VIRTFP)
			) {
				npxdna();
				break;
			}

			/*
			 * The kernel may have switched out the FP unit's
			 * state, causing the user process to take a fault
			 * when it tries to use the FP unit.  Restore the
			 * state here
			 */
			if (npxdna()) {
				gd->gd_cnt.v_trap++;
				goto out;
			}
			i = SIGFPE;
			ucode = FPE_FPU_NP_TRAP;
			break;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = 0; /* XXX */
			i = SIGFPE;
			break;
		}
	} else {
		/* kernel trap */

		switch (type) {
		case T_PAGEFLT:			/* page fault */
			trap_pfault(frame, FALSE);
			goto out2;

		case T_DNA:
			/*
			 * The kernel is apparently using fpu for copying.
			 * XXX this should be fatal unless the kernel has
			 * registered such use.
			 */
			if (npxdna()) {
				gd->gd_cnt.v_trap++;
				goto out2;
			}
			break;

		case T_STKFLT:		/* stack fault */
		case T_PROTFLT:		/* general protection fault */
		case T_SEGNPFLT:	/* segment not present fault */
			/*
			 * Invalid segment selectors and out of bounds
			 * %rip's and %rsp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (mycpu->gd_intr_nesting_level == 0) {
				/*
				 * NOTE: in 64-bit mode traps push rsp/ss
				 *	 even if no ring change occurs.
				 */
				if (td->td_pcb->pcb_onfault &&
				    td->td_pcb->pcb_onfault_sp ==
				    frame->tf_rsp) {
					frame->tf_rip = (register_t)
						td->td_pcb->pcb_onfault;
					goto out2;
				}

				/*
				 * If the iretq in doreti faults during
				 * return to user, it will be special-cased
				 * in IDTVEC(prot) to get here.  We want
				 * to 'return' to doreti_iret_fault in
				 * ipl.s in approximately the same state we
				 * were in at the iretq.
				 */
				if (frame->tf_rip == (long)doreti_iret) {
					frame->tf_rip = (long)doreti_iret_fault;
					goto out2;
				}
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_rflags & PSL_NT) {
				frame->tf_rflags &= ~PSL_NT;
#if 0
				/* do we need this? */
				if (frame->tf_rip == (long)doreti_iret)
					frame->tf_rip = (long)doreti_iret_fault;
#endif
				goto out2;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			/*
			 * Detect historical CPU artifact on syscall or int $3
			 * entry (if not shortcutted in exception.s via
			 * DIRECT_DISALLOW_SS_CPUBUG).
			 */
			gd->gd_cnt.v_trap++;
			if (frame->tf_rip == (register_t)IDTVEC(fast_syscall)) {
				krateprintf(&sscpubugrate,
					"Caught #DB at syscall cpu artifact\n");
				goto out2;
			}
			if (frame->tf_rip == (register_t)IDTVEC(bpt)) {
				krateprintf(&sscpubugrate,
					"Caught #DB at int $N cpu artifact\n");
				goto out2;
			}

			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap()) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				load_dr6(rdr6() & ~0xf);
				goto out2;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If DDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
			ucode = TRAP_BRKPT;
#ifdef DDB
			if (kdb_trap(type, 0, frame))
				goto out2;
#endif
			break;

#if NISA > 0
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef DDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (ddb_on_nmi) {
					kprintf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* DDB */
				goto out2;
			} else if (panic_on_nmi == 0)
				goto out2;
			/* FALL THROUGH */
#endif /* NISA > 0 */
		}
		trap_fatal(frame, 0);
		goto out2;
	}

	/*
	 * Fault from user mode, virtual kernel interecept.
	 *
	 * If the fault is directly related to a VM context managed by a
	 * virtual kernel then let the virtual kernel handle it.
	 */
	if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
		vkernel_trap(lp, frame);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	gd->gd_cnt.v_trap++;
	trapsignal(lp, i, ucode);

#ifdef DEBUG
	if (type <= MAX_TRAP_MSG) {
		uprintf("fatal process exception: %s",
			trap_msg[type]);
		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
			uprintf(", fault VA = 0x%lx", frame->tf_addr);
		uprintf("\n");
	}
#endif

out:
	userret(lp, frame, sticks);
	userexit(lp);
out2:	;
	if (p != NULL && lp != NULL)
		KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid);
#ifdef INVARIANTS
	KASSERT(crit_count == td->td_critcount,
		("trap: critical section count mismatch! %d/%d",
		crit_count, td->td_pri));
	KASSERT(curstop == td->td_toks_stop,
		("trap: extra tokens held after trap! %ld/%ld",
		curstop - &td->td_toks_base,
		td->td_toks_stop - &td->td_toks_base));
#endif
}
Beispiel #4
0
void
trap(struct trapframe *frame)
{
	struct globaldata *gd = mycpu;
	struct thread *td = gd->gd_curthread;
	struct lwp *lp = td->td_lwp;
	struct proc *p;
	int sticks = 0;
	int i = 0, ucode = 0, type, code;
	int have_mplock = 0;
#ifdef INVARIANTS
	int crit_count = td->td_critcount;
	lwkt_tokref_t curstop = td->td_toks_stop;
#endif
	vm_offset_t eva;

	p = td->td_proc;
#ifdef DDB
	/*
	 * We need to allow T_DNA faults when the debugger is active since
	 * some dumping paths do large bcopy() which use the floating
	 * point registers for faster copying.
	 */
	if (db_active && frame->tf_trapno != T_DNA) {
		eva = (frame->tf_trapno == T_PAGEFLT ? rcr2() : 0);
		++gd->gd_trap_nesting_level;
		MAKEMPSAFE(have_mplock);
		trap_fatal(frame, eva);
		--gd->gd_trap_nesting_level;
		goto out2;
	}
#endif

	eva = 0;
	++gd->gd_trap_nesting_level;
	if (frame->tf_trapno == T_PAGEFLT) {
		/*
		 * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
		 * This problem is worked around by using an interrupt
		 * gate for the pagefault handler.  We are finally ready
		 * to read %cr2 and then must reenable interrupts.
		 *
		 * XXX this should be in the switch statement, but the
		 * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
		 * flow of control too much for this to be obviously
		 * correct.
		 */
		eva = rcr2();
		cpu_enable_intr();
	}

	--gd->gd_trap_nesting_level;

	if (!(frame->tf_eflags & PSL_I)) {
		/*
		 * Buggy application or kernel code has disabled interrupts
		 * and then trapped.  Enabling interrupts now is wrong, but
		 * it is better than running with interrupts disabled until
		 * they are accidentally enabled later.
		 */
		type = frame->tf_trapno;
		if (ISPL(frame->tf_cs)==SEL_UPL || (frame->tf_eflags & PSL_VM)) {
			MAKEMPSAFE(have_mplock);
			kprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curproc->p_comm, type);
		} else if (type != T_BPTFLT && type != T_TRCTRAP) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			MAKEMPSAFE(have_mplock);
			kprintf("kernel trap %d with interrupts disabled\n",
			    type);
		}
		cpu_enable_intr();
	}

#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
	type = frame->tf_trapno;
	code = frame->tf_err;

	if (in_vm86call) {
		if (frame->tf_eflags & PSL_VM &&
		    (type == T_PROTFLT || type == T_STKFLT)) {
			KKASSERT(get_mplock_count(curthread) > 0);
			i = vm86_emulate((struct vm86frame *)frame);
			KKASSERT(get_mplock_count(curthread) > 0);
			if (i != 0) {
				/*
				 * returns to original process
				 */
				vm86_trap((struct vm86frame *)frame,
					  have_mplock);
				KKASSERT(0); /* NOT REACHED */
			}
			goto out2;
		}
		switch (type) {
			/*
			 * these traps want either a process context, or
			 * assume a normal userspace trap.
			 */
		case T_PROTFLT:
		case T_SEGNPFLT:
			trap_fatal(frame, eva);
			goto out2;
		case T_TRCTRAP:
			type = T_BPTFLT;	/* kernel breakpoint */
			/* FALL THROUGH */
		}
		goto kernel_trap;	/* normal kernel trap handling */
	}

        if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
		/* user trap */

		KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid,
			frame->tf_trapno, eva);

		userenter(td, p);

		sticks = (int)td->td_sticks;
		lp->lwp_md.md_regs = frame;

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			frame->tf_eflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
			ucode = code;
			i = SIGFPE;
			break;

		case T_ASTFLT:		/* Allow process switch */
			mycpu->gd_cnt.v_soft++;
			if (mycpu->gd_reqflags & RQF_AST_OWEUPC) {
				atomic_clear_int(&mycpu->gd_reqflags,
						 RQF_AST_OWEUPC);
				addupc_task(p, p->p_prof.pr_addr,
					    p->p_prof.pr_ticks);
			}
			goto out;

			/*
			 * The following two traps can happen in
			 * vm86 mode, and, if so, we want to handle
			 * them specially.
			 */
		case T_PROTFLT:		/* general protection fault */
		case T_STKFLT:		/* stack fault */
			if (frame->tf_eflags & PSL_VM) {
				i = vm86_emulate((struct vm86frame *)frame);
				if (i == 0)
					goto out;
				break;
			}
			i = SIGBUS;
			ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
			break;
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */
			i = trap_pfault(frame, TRUE, eva);
			if (i == -1)
				goto out;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
			if (i == -2)
				goto restart;
#endif
			if (i == 0)
				goto out;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				i = SIGSEGV;
				ucode = SEGV_ACCERR;
			}
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#if NISA > 0
		case T_NMI:
			MAKEMPSAFE(have_mplock);
#ifdef POWERFAIL_NMI
			goto handle_powerfail;
#else /* !POWERFAIL_NMI */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef DDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (ddb_on_nmi) {
					kprintf ("NMI ... going to debugger\n");
					kdb_trap (type, 0, frame);
				}
#endif /* DDB */
				goto out2;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
			/*
			 * Virtual kernel intercept - pass the DNA exception
			 * to the virtual kernel if it asked to handle it.
			 * This occurs when the virtual kernel is holding
			 * onto the FP context for a different emulated
			 * process then the one currently running.
			 *
			 * We must still call npxdna() since we may have
			 * saved FP state that the virtual kernel needs
			 * to hand over to a different emulated process.
			 */
			if (lp->lwp_vkernel && lp->lwp_vkernel->ve &&
			    (td->td_pcb->pcb_flags & FP_VIRTFP)
			) {
				npxdna();
				break;
			}

#if NNPX > 0
			/* 
			 * The kernel may have switched out the FP unit's
			 * state, causing the user process to take a fault
			 * when it tries to use the FP unit.  Restore the
			 * state here
			 */
			if (npxdna())
				goto out;
#endif
			if (!pmath_emulate) {
				i = SIGFPE;
				ucode = FPE_FPU_NP_TRAP;
				break;
			}
			i = (*pmath_emulate)(frame);
			if (i == 0) {
				if (!(frame->tf_eflags & PSL_T))
					goto out2;
				frame->tf_eflags &= ~PSL_T;
				i = SIGTRAP;
			}
			/* else ucode = emulator_only_knows() XXX */
			break;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = 0; /* XXX */
			i = SIGFPE;
			break;
		}
	} else {
kernel_trap:
		/* kernel trap */

		switch (type) {
		case T_PAGEFLT:			/* page fault */
			trap_pfault(frame, FALSE, eva);
			goto out2;

		case T_DNA:
#if NNPX > 0
			/*
			 * The kernel may be using npx for copying or other
			 * purposes.
			 */
			if (npxdna())
				goto out2;
#endif
			break;

		case T_PROTFLT:		/* general protection fault */
		case T_SEGNPFLT:	/* segment not present fault */
			/*
			 * Invalid segment selectors and out of bounds
			 * %eip's and %esp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
#define	MAYBE_DORETI_FAULT(where, whereto)				\
	do {								\
		if (frame->tf_eip == (int)where) {			\
			frame->tf_eip = (int)whereto;			\
			goto out2;					\
		}							\
	} while (0)
			if (mycpu->gd_intr_nesting_level == 0) {
				/*
				 * Invalid %fs's and %gs's can be created using
				 * procfs or PT_SETREGS or by invalidating the
				 * underlying LDT entry.  This causes a fault
				 * in kernel mode when the kernel attempts to
				 * switch contexts.  Lose the bad context
				 * (XXX) so that we can continue, and generate
				 * a signal.
				 */
				MAYBE_DORETI_FAULT(doreti_iret,
						   doreti_iret_fault);
				MAYBE_DORETI_FAULT(doreti_popl_ds,
						   doreti_popl_ds_fault);
				MAYBE_DORETI_FAULT(doreti_popl_es,
						   doreti_popl_es_fault);
				MAYBE_DORETI_FAULT(doreti_popl_fs,
						   doreti_popl_fs_fault);
				MAYBE_DORETI_FAULT(doreti_popl_gs,
						   doreti_popl_gs_fault);

				/*
				 * NOTE: cpu doesn't push esp on kernel trap
				 */
				if (td->td_pcb->pcb_onfault &&
				    td->td_pcb->pcb_onfault_sp ==
				    (int)&frame->tf_esp) {
					frame->tf_eip = 
					    (register_t)td->td_pcb->pcb_onfault;
					goto out2;
				}
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_eflags & PSL_NT) {
				frame->tf_eflags &= ~PSL_NT;
				goto out2;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			if (frame->tf_eip == (int)IDTVEC(syscall)) {
				/*
				 * We've just entered system mode via the
				 * syscall lcall.  Continue single stepping
				 * silently until the syscall handler has
				 * saved the flags.
				 */
				goto out2;
			}
			if (frame->tf_eip == (int)IDTVEC(syscall) + 1) {
				/*
				 * The syscall handler has now saved the
				 * flags.  Stop single stepping it.
				 */
				frame->tf_eflags &= ~PSL_T;
				goto out2;
			}
                        /*
                         * Ignore debug register trace traps due to
                         * accesses in the user's address space, which
                         * can happen under several conditions such as
                         * if a user sets a watchpoint on a buffer and
                         * then passes that buffer to a system call.
                         * We still want to get TRCTRAPS for addresses
                         * in kernel space because that is useful when
                         * debugging the kernel.
                         */
                        if (user_dbreg_trap()) {
                                /*
                                 * Reset breakpoint bits because the
                                 * processor doesn't
                                 */
                                load_dr6(rdr6() & 0xfffffff0);
                                goto out2;
                        }
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If DDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
			ucode = TRAP_BRKPT;
#ifdef DDB
			MAKEMPSAFE(have_mplock);
			if (kdb_trap (type, 0, frame))
				goto out2;
#endif
			break;

#if NISA > 0
		case T_NMI:
			MAKEMPSAFE(have_mplock);
#ifdef POWERFAIL_NMI
#ifndef TIMER_FREQ
#  define TIMER_FREQ 1193182
#endif
	handle_powerfail:
		{
		  static unsigned lastalert = 0;

		  if (time_uptime - lastalert > 10) {
		      log(LOG_WARNING, "NMI: power fail\n");
		      sysbeep(TIMER_FREQ/880, hz);
		      lastalert = time_uptime;
		  }
		    /* YYY mp count */
		  goto out2;
		}
#else /* !POWERFAIL_NMI */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef DDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (ddb_on_nmi) {
					kprintf ("NMI ... going to debugger\n");
					kdb_trap (type, 0, frame);
				}
#endif /* DDB */
				goto out2;
			} else if (panic_on_nmi == 0)
				goto out2;
			/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
		}

		MAKEMPSAFE(have_mplock);
		trap_fatal(frame, eva);
		goto out2;
	}

	/*
	 * Virtual kernel intercept - if the fault is directly related to a
	 * VM context managed by a virtual kernel then let the virtual kernel
	 * handle it.
	 */
	if (lp->lwp_vkernel && lp->lwp_vkernel->ve) {
		vkernel_trap(lp, frame);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	MAKEMPSAFE(have_mplock);
	trapsignal(lp, i, ucode);

#ifdef DEBUG
	if (type <= MAX_TRAP_MSG) {
		uprintf("fatal process exception: %s",
			trap_msg[type]);
		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
			uprintf(", fault VA = 0x%lx", (u_long)eva);
		uprintf("\n");
	}
#endif

out:
	userret(lp, frame, sticks);
	userexit(lp);
out2:	;
	if (have_mplock)
		rel_mplock();
	if (p != NULL && lp != NULL)
		KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid);
#ifdef INVARIANTS
	KASSERT(crit_count == td->td_critcount,
		("trap: critical section count mismatch! %d/%d",
		crit_count, td->td_pri));
	KASSERT(curstop == td->td_toks_stop,
		("trap: extra tokens held after trap! %zd/%zd",
		curstop - &td->td_toks_base,
		td->td_toks_stop - &td->td_toks_base));
#endif
}
Beispiel #5
0
/*
 * Called with a critical section held and interrupts enabled.
 */
int
pmap_inval_intr(cpumask_t *cpumaskp, int toolong)
{
    globaldata_t gd = mycpu;
    pmap_inval_info_t *info;
    int loopme = 0;
    int cpu;
    cpumask_t cpumask;

    /*
     * Check all cpus for invalidations we may need to service.
     */
    cpu_ccfence();
    cpu = gd->gd_cpuid;
    cpumask = *cpumaskp;

    while (CPUMASK_TESTNZERO(cpumask)) {
        int n = BSFCPUMASK(cpumask);

#ifdef LOOPRECOVER
        KKASSERT(n >= 0 && n < MAXCPU);
#endif

        CPUMASK_NANDBIT(cpumask, n);
        info = &invinfo[n];

        /*
         * Due to interrupts/races we can catch a new operation
         * in an older interrupt.  A fence is needed once we detect
         * the (not) done bit.
         */
        if (!CPUMASK_TESTBIT(info->done, cpu))
            continue;
        cpu_lfence();
#ifdef LOOPRECOVER
        if (toolong) {
            kprintf("pminvl %d->%d %08jx %08jx mode=%d\n",
                    cpu, n, info->done.ary[0], info->mask.ary[0],
                    info->mode);
        }
#endif

        /*
         * info->mask and info->done always contain the originating
         * cpu until the originator is done.  Targets may still be
         * present in info->done after the originator is done (they
         * will be finishing up their loops).
         *
         * Clear info->mask bits on other cpus to indicate that they
         * have quiesced (entered the loop).  Once the other mask bits
         * are clear we can execute the operation on the original,
         * then clear the mask and done bits on the originator.  The
         * targets will then finish up their side and clear their
         * done bits.
         *
         * The command is considered 100% done when all done bits have
         * been cleared.
         */
        if (n != cpu) {
            /*
             * Command state machine for 'other' cpus.
             */
            if (CPUMASK_TESTBIT(info->mask, cpu)) {
                /*
                 * Other cpu indicate to originator that they
                 * are quiesced.
                 */
                ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                loopme = 1;
            } else if (info->ptep &&
                       CPUMASK_TESTBIT(info->mask, n)) {
                /*
                 * Other cpu must wait for the originator (n)
                 * to complete its command if ptep is not NULL.
                 */
                loopme = 1;
            } else {
                /*
                 * Other cpu detects that the originator has
                 * completed its command, or there was no
                 * command.
                 *
                 * Now that the page table entry has changed,
                 * we can follow up with our own invalidation.
                 */
                vm_offset_t va = info->va;
                int npgs;

                if (va == (vm_offset_t)-1 ||
                        info->npgs > MAX_INVAL_PAGES) {
                    cpu_invltlb();
                } else {
                    for (npgs = info->npgs; npgs; --npgs) {
                        cpu_invlpg((void *)va);
                        va += PAGE_SIZE;
                    }
                }
                ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
                /* info invalid now */
                /* loopme left alone */
            }
        } else if (CPUMASK_TESTBIT(info->mask, cpu)) {
            /*
             * Originator is waiting for other cpus
             */
            if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) {
                /*
                 * Originator waits for other cpus to enter
                 * their loop (aka quiesce).
                 *
                 * If this bugs out the IPI may have been lost,
                 * try to reissue by resetting our own
                 * reentrancy bit and clearing the smurf mask
                 * for the cpus that did not respond, then
                 * reissuing the IPI.
                 */
                loopme = 1;
#ifdef LOOPRECOVER
                if (loopwdog(info)) {
                    info->failed = 1;
                    loopdebug("C", info);
                    /* XXX recover from possible bug */
                    mdcpu->gd_xinvaltlb = 0;
                    ATOMIC_CPUMASK_NANDMASK(smp_smurf_mask,
                                            info->mask);
                    cpu_disable_intr();
                    smp_invlpg(&smp_active_mask);

                    /*
                     * Force outer-loop retest of Xinvltlb
                     * requests (see mp_machdep.c).
                     */
                    mdcpu->gd_xinvaltlb = 2;
                    cpu_enable_intr();
                }
#endif
            } else {
                /*
                 * Originator executes operation and clears
                 * mask to allow other cpus to finish.
                 */
                KKASSERT(info->mode != INVDONE);
                if (info->mode == INVSTORE) {
                    if (info->ptep)
                        info->opte = atomic_swap_long(info->ptep, info->npte);
                    CHECKSIGMASK(info);
                    ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                    CHECKSIGMASK(info);
                } else {
                    if (atomic_cmpset_long(info->ptep,
                                           info->opte, info->npte)) {
                        info->success = 1;
                    } else {
                        info->success = 0;
                    }
                    CHECKSIGMASK(info);
                    ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
                    CHECKSIGMASK(info);
                }
                loopme = 1;
            }
        } else {
            /*
             * Originator does not have to wait for the other
             * cpus to finish.  It clears its done bit.  A new
             * command will not be initiated by the originator
             * until the other cpus have cleared their done bits
             * (asynchronously).
             */
            vm_offset_t va = info->va;
            int npgs;

            if (va == (vm_offset_t)-1 ||
                    info->npgs > MAX_INVAL_PAGES) {
                cpu_invltlb();
            } else {
                for (npgs = info->npgs; npgs; --npgs) {
                    cpu_invlpg((void *)va);
                    va += PAGE_SIZE;
                }
            }

            /* leave loopme alone */
            /* other cpus may still be finishing up */
            /* can't race originator since that's us */
            info->mode = INVDONE;
            ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
        }
    }
    return loopme;
}
Beispiel #6
0
/*
 *	Save context for the specified CPU
 */
void *
i_cpr_save_context(void *arg)
{
	long	index = (long)arg;
	psm_state_request_t *papic_state;
	int resuming;
	int	ret;
	wc_cpu_t	*wc_cpu = wc_other_cpus + index;

	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))

	ASSERT(index < NCPU);

	papic_state = &(wc_cpu)->wc_apic_state;

	ret = i_cpr_platform_alloc(papic_state);
	ASSERT(ret == 0);

	ret = i_cpr_save_apic(papic_state);
	ASSERT(ret == 0);

	i_cpr_save_stack(curthread, wc_cpu);

	/*
	 * wc_save_context returns twice, once when susending and
	 * once when resuming,  wc_save_context() returns 0 when
	 * suspending and non-zero upon resume
	 */
	resuming = (wc_save_context(wc_cpu) == 0);

	/*
	 * do NOT call any functions after this point, because doing so
	 * will modify the stack that we are running on
	 */

	if (resuming) {

		ret = i_cpr_restore_apic(papic_state);
		ASSERT(ret == 0);

		i_cpr_platform_free(papic_state);

		/*
		 * Enable interrupts on this cpu.
		 * Do not bind interrupts to this CPU's local APIC until
		 * the CPU is ready to receive interrupts.
		 */
		ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
		mutex_enter(&cpu_lock);
		cpu_enable_intr(CPU);
		mutex_exit(&cpu_lock);

		/*
		 * Setting the bit in cpu_ready_set must be the last operation
		 * in processor initialization; the boot CPU will continue to
		 * boot once it sees this bit set for all active CPUs.
		 */
		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);

		PMD(PMD_SX,
		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
		    CPU->cpu_id))
	} else {