Example #1
0
// cpu_idle - at the end of kern_init, the first kernel thread idleproc will do below works
void cpu_idle(void)
{
	while (1) {
		assert((read_rflags() & FL_IF) != 0);
		asm volatile ("hlt");
	}
}
Example #2
0
void
cpu_idle_mwait_cycle(void)
{
	struct cpu_info *ci = curcpu();

	if ((read_rflags() & PSL_I) == 0)
		panic("idle with interrupts blocked!");

	/* something already queued? */
	if (!cpu_is_idle(ci))
		return;

	/*
	 * About to idle; setting the MWAIT_IN_IDLE bit tells
	 * cpu_unidle() that it can't be a no-op and tells cpu_kick()
	 * that it doesn't need to use an IPI.  We also set the
	 * MWAIT_KEEP_IDLING bit: those routines clear it to stop
	 * the mwait.  Once they're set, we do a final check of the
	 * queue, in case another cpu called setrunqueue() and added
	 * something to the queue and called cpu_unidle() between
	 * the check in sched_idle() and here.
	 */
	atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING | MWAIT_ONLY);
	if (cpu_is_idle(ci)) {
		monitor(&ci->ci_mwait, 0, 0);
		if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
			mwait(0, 0);
	}

	/* done idling; let cpu_kick() know that an IPI is required */
	atomic_clearbits_int(&ci->ci_mwait, MWAIT_IDLING);
}
Example #3
0
static int
acpi_timer_test(void)
{
    uint32_t	last, this;
    int		min, max, max2, n, delta;
    register_t	s;

    min = INT32_MAX;
    max = max2 = 0;

    /* Test the timer with interrupts disabled to get accurate results. */
#if defined(__i386__)
    s = read_eflags();
#elif defined(__x86_64__)
    s = read_rflags();
#else
#error "no read_eflags"
#endif
    cpu_disable_intr();
    AcpiGetTimer(&last);
    for (n = 0; n < 2000; n++) {
	AcpiGetTimer(&this);
	delta = acpi_TimerDelta(this, last);
	if (delta > max) {
	    max2 = max;
	    max = delta;
	} else if (delta > max2) {
	    max2 = delta;
	}
	if (delta < min)
	    min = delta;
	last = this;
    }
#if defined(__i386__)
    write_eflags(s);
#elif defined(__x86_64__)
    write_rflags(s);
#else
#error "no read_eflags"
#endif

    delta = max2 - min;
    if ((max - min > 8 || delta > 3) && vmm_guest == VMM_GUEST_NONE)
	n = 0;
    else if (min < 0 || max == 0 || max2 == 0)
	n = 0;
    else
	n = 1;
    if (bootverbose) {
	kprintf("ACPI timer looks %s min = %d, max = %d, width = %d\n",
		n ? "GOOD" : "BAD ",
		min, max, max - min);
    }

    return (n);
}
Example #4
0
static void
ioapic_abi_intr_teardown(int intr)
{
	const struct ioapic_irqmap *map;
	int vector, select;
	uint32_t value;
	register_t ef;

	KASSERT(intr >= 0 && intr < IOAPIC_HWI_VECTORS,
	    ("ioapic teardown, invalid irq %d\n", intr));

	map = &ioapic_irqmaps[mycpuid][intr];
	KASSERT(IOAPIC_IMT_ISHWI(map),
	    ("ioapic teardown, not hwi irq %d, type %d, cpu%d",
	     intr, map->im_type, mycpuid));
	if (map->im_type != IOAPIC_IMT_LEGACY)
		return;

	KASSERT(ioapic_irqs[intr].io_addr != NULL,
	    ("ioapic teardown, no GSI information, irq %d\n", intr));

	ef = read_rflags();
	cpu_disable_intr();

	/*
	 * Teardown an interrupt vector.  The vector should already be
	 * installed in the cpu's IDT, but make sure.
	 */
	IOAPIC_INTRDIS(intr);

	vector = IDT_OFFSET + intr;

	/*
	 * In order to avoid losing an EOI for a level interrupt, which
	 * is vector based, make sure that the IO APIC is programmed for
	 * edge-triggering first, then reprogrammed with the new vector.
	 * This should clear the IRR bit.
	 */
	imen_lock();

	select = ioapic_irqs[intr].io_idx;
	value = ioapic_read(ioapic_irqs[intr].io_addr, select);

	ioapic_write(ioapic_irqs[intr].io_addr, select,
	    (value & ~APIC_TRIGMOD_MASK));
	ioapic_write(ioapic_irqs[intr].io_addr, select,
	    (value & ~IOART_INTVEC) | vector);

	imen_unlock();

	write_rflags(ef);
}
Example #5
0
static int
acpi_timer_test(void)
{
    uint32_t	last, this;
    int		min, max, n, delta;
    register_t	s;

    min = 10000000;
    max = 0;

    /* Test the timer with interrupts disabled to get accurate results. */
#if defined(__i386__)
    s = read_eflags();
#elif defined(__x86_64__)
    s = read_rflags();
#else
#error "no read_eflags"
#endif
    cpu_disable_intr();
    last = acpi_timer_read();
    for (n = 0; n < 2000; n++) {
	this = acpi_timer_read();
	delta = acpi_TimerDelta(this, last);
	if (delta > max)
	    max = delta;
	else if (delta < min)
	    min = delta;
	last = this;
    }
#if defined(__i386__)
    write_eflags(s);
#elif defined(__x86_64__)
    write_rflags(s);
#else
#error "no read_eflags"
#endif

    if (max - min > 2)
	n = 0;
    else if (min < 0 || max == 0)
	n = 0;
    else
	n = 1;
    if (bootverbose) {
	kprintf("ACPI timer looks %s min = %d, max = %d, width = %d\n",
		n ? "GOOD" : "BAD ",
		min, max, max - min);
    }

    return (n);
}
Example #6
0
static void
ioapic_abi_intr_setup(int intr, int flags)
{
	const struct ioapic_irqmap *map;
	int vector, select;
	uint32_t value;
	register_t ef;

	KASSERT(intr >= 0 && intr < IOAPIC_HWI_VECTORS,
	    ("ioapic setup, invalid irq %d", intr));

	map = &ioapic_irqmaps[mycpuid][intr];
	KASSERT(IOAPIC_IMT_ISHWI(map),
	    ("ioapic setup, not hwi irq %d, type %d, cpu%d",
	     intr, map->im_type, mycpuid));
	if (map->im_type != IOAPIC_IMT_LEGACY)
		return;

	KASSERT(ioapic_irqs[intr].io_addr != NULL,
	    ("ioapic setup, no GSI information, irq %d", intr));

	ef = read_rflags();
	cpu_disable_intr();

	vector = IDT_OFFSET + intr;

	/*
	 * Now reprogram the vector in the IO APIC.  In order to avoid
	 * losing an EOI for a level interrupt, which is vector based,
	 * make sure that the IO APIC is programmed for edge-triggering
	 * first, then reprogrammed with the new vector.  This should
	 * clear the IRR bit.
	 */
	imen_lock();

	select = ioapic_irqs[intr].io_idx;
	value = ioapic_read(ioapic_irqs[intr].io_addr, select);
	value |= IOART_INTMSET;

	ioapic_write(ioapic_irqs[intr].io_addr, select,
	    (value & ~APIC_TRIGMOD_MASK));
	ioapic_write(ioapic_irqs[intr].io_addr, select,
	    (value & ~IOART_INTVEC) | vector);

	imen_unlock();

	IOAPIC_INTREN(intr);

	write_rflags(ef);
}
Example #7
0
void
icu_definit(void)
{
	register_t ef;

	KKASSERT(MachIntrABI.type == MACHINTR_ICU);

	ef = read_rflags();
	cpu_disable_intr();

	/* Leave interrupts masked */
	outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
	outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);

	MachIntrABI.setdefault();
	icu_init();

	write_rflags(ef);
}
Example #8
0
void
icu_reinit_noioapic(void)
{
	register_t ef;

	KKASSERT(MachIntrABI.type == MACHINTR_ICU);
	KKASSERT(ioapic_enable == 0);

	crit_enter();
	ef = read_rflags();
	cpu_disable_intr();

	/* Leave interrupts masked */
	outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
	outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);

	icu_init();
	MachIntrABI.stabilize();

	write_rflags(ef);

	MachIntrABI.cleanup();
	crit_exit();
}
Example #9
0
void
trap(struct trapframe *frame)
{
#ifdef KDTRACE_HOOKS
	struct reg regs;
#endif
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	int i = 0, ucode = 0, code;
	u_int type;
	register_t addr = 0;
	ksiginfo_t ksi;

	PCPU_INC(cnt.v_trap);
	type = frame->tf_trapno;

#ifdef SMP
	/* Handler for NMI IPIs used for stopping CPUs. */
	if (type == T_NMI) {
	         if (ipi_nmi_handler() == 0)
	                   goto out;
	}
#endif /* SMP */

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif

	if (type == T_RESERVED) {
		trap_fatal(frame, 0);
		goto out;
	}

#ifdef	HWPMC_HOOKS
	/*
	 * CPU PMCs interrupt using an NMI.  If the PMC module is
	 * active, pass the 'rip' value to the PMC module's interrupt
	 * handler.  A return value of '1' from the handler means that
	 * the NMI was handled by it and we can return immediately.
	 */
	if (type == T_NMI && pmc_intr &&
	    (*pmc_intr)(PCPU_GET(cpuid), frame))
		goto out;
#endif

	if (type == T_MCHK) {
		mca_intr();
		goto out;
	}

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in its per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 */
	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
		goto out;
#endif

	if ((frame->tf_rflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled
		 * interrupts and then trapped.  Enabling interrupts
		 * now is wrong, but it is better than running with
		 * interrupts disabled until they are accidentally
		 * enabled later.
		 */
		if (ISPL(frame->tf_cs) == SEL_UPL)
			uprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curthread->td_name, type);
		else if (type != T_NMI && type != T_BPTFLT &&
		    type != T_TRCTRAP) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			printf("kernel trap %d with interrupts disabled\n",
			    type);

			/*
			 * We shouldn't enable interrupts while holding a
			 * spin lock.
			 */
			if (td->td_md.md_spinlock_count == 0)
				enable_intr();
		}
	}

	code = frame->tf_err;

        if (ISPL(frame->tf_cs) == SEL_UPL) {
		/* user trap */

		td->td_pticks = 0;
		td->td_frame = frame;
		addr = frame->tf_rip;
		if (td->td_ucred != p->p_ucred) 
			cred_update_thread(td);

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			enable_intr();
#ifdef KDTRACE_HOOKS
			if (type == T_BPTFLT) {
				fill_frame_regs(frame, &regs);
				if (dtrace_pid_probe_ptr != NULL &&
				    dtrace_pid_probe_ptr(&regs) == 0)
					goto out;
			}
#endif
			frame->tf_rflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
			ucode = fputrap_x87();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;

		case T_PROTFLT:		/* general protection fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_STKFLT:		/* stack fault */
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */
			addr = frame->tf_addr;
			i = trap_pfault(frame, TRUE);
			if (i == -1)
				goto userout;
			if (i == 0)
				goto user;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				if (prot_fault_translation == 0) {
					/*
					 * Autodetect.
					 * This check also covers the images
					 * without the ABI-tag ELF note.
					 */
					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
					    && p->p_osrel >= P_OSREL_SIGSEGV) {
						i = SIGSEGV;
						ucode = SEGV_ACCERR;
					} else {
						i = SIGBUS;
						ucode = BUS_PAGE_FAULT;
					}
				} else if (prot_fault_translation == 1) {
					/*
					 * Always compat mode.
					 */
					i = SIGBUS;
					ucode = BUS_PAGE_FAULT;
				} else {
					/*
					 * Always SIGSEGV mode.
					 */
					i = SIGSEGV;
					ucode = SEGV_ACCERR;
				}
			}
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#ifdef DEV_ISA
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto userout;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* DEV_ISA */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
			/* transparent fault (due to context switch "late") */
			KASSERT(PCB_USER_FPU(td->td_pcb),
			    ("kernel FPU ctx has leaked"));
			fpudna();
			goto userout;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = fputrap_sse();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;
#ifdef KDTRACE_HOOKS
		case T_DTRACE_RET:
			enable_intr();
			fill_frame_regs(frame, &regs);
			if (dtrace_return_probe_ptr != NULL &&
			    dtrace_return_probe_ptr(&regs) == 0)
				goto out;
			break;
#endif
		}
	} else {
		/* kernel trap */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case T_PAGEFLT:			/* page fault */
			(void) trap_pfault(frame, FALSE);
			goto out;

		case T_DNA:
			KASSERT(!PCB_USER_FPU(td->td_pcb),
			    ("Unregistered use of FPU in kernel"));
			fpudna();
			goto out;

		case T_ARITHTRAP:	/* arithmetic trap */
		case T_XMMFLT:		/* SIMD floating-point exception */
		case T_FPOPFLT:		/* FPU operand fetch fault */
			/*
			 * XXXKIB for now disable any FPU traps in kernel
			 * handler registration seems to be overkill
			 */
			trap_fatal(frame, 0);
			goto out;

		case T_STKFLT:		/* stack fault */
			break;

		case T_PROTFLT:		/* general protection fault */
		case T_SEGNPFLT:	/* segment not present fault */
			if (td->td_intr_nesting_level != 0)
				break;

			/*
			 * Invalid segment selectors and out of bounds
			 * %rip's and %rsp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (frame->tf_rip == (long)doreti_iret) {
				frame->tf_rip = (long)doreti_iret_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_ds) {
				frame->tf_rip = (long)ds_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_es) {
				frame->tf_rip = (long)es_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_fs) {
				frame->tf_rip = (long)fs_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_gs) {
				frame->tf_rip = (long)gs_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_gsbase) {
				frame->tf_rip = (long)gsbase_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_fsbase) {
				frame->tf_rip = (long)fsbase_load_fault;
				goto out;
			}
			if (curpcb->pcb_onfault != NULL) {
				frame->tf_rip = (long)curpcb->pcb_onfault;
				goto out;
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_rflags & PSL_NT) {
				frame->tf_rflags &= ~PSL_NT;
				goto out;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap()) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				/* XXX check upper bits here */
				load_dr6(rdr6() & 0xfffffff0);
				goto out;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If KDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
#ifdef KDB
			if (kdb_trap(type, 0, frame))
				goto out;
#endif
			break;

#ifdef DEV_ISA
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto out;
			} else if (panic_on_nmi == 0)
				goto out;
			/* FALLTHROUGH */
#endif /* DEV_ISA */
		}

		trap_fatal(frame, 0);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	ksiginfo_init_trap(&ksi);
	ksi.ksi_signo = i;
	ksi.ksi_code = ucode;
	ksi.ksi_trapno = type;
	ksi.ksi_addr = (void *)addr;
	if (uprintf_signal) {
		uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
		    "addr 0x%lx rsp 0x%lx rip 0x%lx "
		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
		    p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
		    frame->tf_rsp, frame->tf_rip,
		    fubyte((void *)(frame->tf_rip + 0)),
		    fubyte((void *)(frame->tf_rip + 1)),
		    fubyte((void *)(frame->tf_rip + 2)),
		    fubyte((void *)(frame->tf_rip + 3)),
		    fubyte((void *)(frame->tf_rip + 4)),
		    fubyte((void *)(frame->tf_rip + 5)),
		    fubyte((void *)(frame->tf_rip + 6)),
		    fubyte((void *)(frame->tf_rip + 7)));
	}
	KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
	trapsignal(td, &ksi);

user:
	userret(td, frame);
	KASSERT(PCB_USER_FPU(td->td_pcb),
	    ("Return from trap with kernel FPU ctx leaked"));
userout:
out:
	return;
}
Example #10
0
/*
 * API function - invalidate the pte at (va) and replace *ptep with npte
 * atomically only if *ptep equals opte, across the pmap's active cpus.
 *
 * Returns 1 on success, 0 on failure (caller typically retries).
 */
int
pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep,
                      pt_entry_t opte, pt_entry_t npte)
{
    globaldata_t gd = mycpu;
    pmap_inval_info_t *info;
    int success;
    int cpu = gd->gd_cpuid;
    cpumask_t tmpmask;
    unsigned long rflags;

    /*
     * Initialize invalidation for pmap and enter critical section.
     */
    if (pmap == NULL)
        pmap = &kernel_pmap;
    pmap_inval_init(pmap);

    /*
     * Shortcut single-cpu case if possible.
     */
    if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) {
        if (atomic_cmpset_long(ptep, opte, npte)) {
            if (va == (vm_offset_t)-1)
                cpu_invltlb();
            else
                cpu_invlpg((void *)va);
            pmap_inval_done(pmap);
            return 1;
        } else {
            pmap_inval_done(pmap);
            return 0;
        }
    }

    /*
     * We need a critical section to prevent getting preempted while
     * we setup our command.  A preemption might execute its own
     * pmap_inval*() command and create confusion below.
     */
    info = &invinfo[cpu];
    info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1);

    /*
     * We must wait for other cpus which may still be finishing
     * up a prior operation.
     */
    while (CPUMASK_TESTNZERO(info->done)) {
#ifdef LOOPRECOVER
        if (loopwdog(info)) {
            info->failed = 1;
            loopdebug("B", info);
            /* XXX recover from possible bug */
            CPUMASK_ASSZERO(info->done);
        }
#endif
        cpu_pause();
    }
    KKASSERT(info->mode == INVDONE);

    /*
     * Must set our cpu in the invalidation scan mask before
     * any possibility of [partial] execution (remember, XINVLTLB
     * can interrupt a critical section).
     */
    ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);

    info->va = va;
    info->npgs = 1;			/* unused */
    info->ptep = ptep;
    info->npte = npte;
    info->opte = opte;
#ifdef LOOPRECOVER
    info->failed = 0;
#endif
    info->mode = INVCMPSET;
    info->success = 0;

    tmpmask = pmap->pm_active;	/* volatile */
    cpu_ccfence();
    CPUMASK_ANDMASK(tmpmask, smp_active_mask);
    CPUMASK_ORBIT(tmpmask, cpu);
    info->mask = tmpmask;

    /*
     * Command may start executing the moment 'done' is initialized,
     * disable current cpu interrupt to prevent 'done' field from
     * changing (other cpus can't clear done bits until the originating
     * cpu clears its mask bit).
     */
#ifdef LOOPRECOVER
    info->sigmask = tmpmask;
    CHECKSIGMASK(info);
#endif
    cpu_sfence();
    rflags = read_rflags();
    cpu_disable_intr();

    ATOMIC_CPUMASK_COPY(info->done, tmpmask);

    /*
     * Pass our copy of the done bits (so they don't change out from
     * under us) to generate the Xinvltlb interrupt on the targets.
     */
    smp_invlpg(&tmpmask);
    success = info->success;
    KKASSERT(info->mode == INVDONE);

    ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
    write_rflags(rflags);
    pmap_inval_done(pmap);

    return success;
}
Example #11
0
/*
 * Invalidate the specified va across all cpus associated with the pmap.
 * If va == (vm_offset_t)-1, we invltlb() instead of invlpg().  The operation
 * will be done fully synchronously with storing npte into *ptep and returning
 * opte.
 *
 * If ptep is NULL the operation will execute semi-synchronously.
 * ptep must be NULL if npgs > 1
 */
pt_entry_t
pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs,
               pt_entry_t *ptep, pt_entry_t npte)
{
    globaldata_t gd = mycpu;
    pmap_inval_info_t *info;
    pt_entry_t opte = 0;
    int cpu = gd->gd_cpuid;
    cpumask_t tmpmask;
    unsigned long rflags;

    /*
     * Initialize invalidation for pmap and enter critical section.
     */
    if (pmap == NULL)
        pmap = &kernel_pmap;
    pmap_inval_init(pmap);

    /*
     * Shortcut single-cpu case if possible.
     */
    if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) {
        /*
         * Convert to invltlb if there are too many pages to
         * invlpg on.
         */
        if (npgs > MAX_INVAL_PAGES) {
            npgs = 0;
            va = (vm_offset_t)-1;
        }

        /*
         * Invalidate the specified pages, handle invltlb if requested.
         */
        while (npgs) {
            --npgs;
            if (ptep) {
                opte = atomic_swap_long(ptep, npte);
                ++ptep;
            }
            if (va == (vm_offset_t)-1)
                break;
            cpu_invlpg((void *)va);
            va += PAGE_SIZE;
        }
        if (va == (vm_offset_t)-1)
            cpu_invltlb();
        pmap_inval_done(pmap);

        return opte;
    }

    /*
     * We need a critical section to prevent getting preempted while
     * we setup our command.  A preemption might execute its own
     * pmap_inval*() command and create confusion below.
     *
     * tsc_target is our watchdog timeout that will attempt to recover
     * from a lost IPI.  Set to 1/16 second for now.
     */
    info = &invinfo[cpu];
    info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1);

    /*
     * We must wait for other cpus which may still be finishing up a
     * prior operation that we requested.
     *
     * We do not have to disable interrupts here.  An Xinvltlb can occur
     * at any time (even within a critical section), but it will not
     * act on our command until we set our done bits.
     */
    while (CPUMASK_TESTNZERO(info->done)) {
#ifdef LOOPRECOVER
        if (loopwdog(info)) {
            info->failed = 1;
            loopdebug("A", info);
            /* XXX recover from possible bug */
            CPUMASK_ASSZERO(info->done);
        }
#endif
        cpu_pause();
    }
    KKASSERT(info->mode == INVDONE);

    /*
     * Must set our cpu in the invalidation scan mask before
     * any possibility of [partial] execution (remember, XINVLTLB
     * can interrupt a critical section).
     */
    ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);

    info->va = va;
    info->npgs = npgs;
    info->ptep = ptep;
    info->npte = npte;
    info->opte = 0;
#ifdef LOOPRECOVER
    info->failed = 0;
#endif
    info->mode = INVSTORE;

    tmpmask = pmap->pm_active;	/* volatile (bits may be cleared) */
    cpu_ccfence();
    CPUMASK_ANDMASK(tmpmask, smp_active_mask);

    /*
     * If ptep is NULL the operation can be semi-synchronous, which means
     * we can improve performance by flagging and removing idle cpus
     * (see the idleinvlclr function in mp_machdep.c).
     *
     * Typically kernel page table operation is semi-synchronous.
     */
    if (ptep == NULL)
        smp_smurf_idleinvlclr(&tmpmask);
    CPUMASK_ORBIT(tmpmask, cpu);
    info->mask = tmpmask;

    /*
     * Command may start executing the moment 'done' is initialized,
     * disable current cpu interrupt to prevent 'done' field from
     * changing (other cpus can't clear done bits until the originating
     * cpu clears its mask bit, but other cpus CAN start clearing their
     * mask bits).
     */
#ifdef LOOPRECOVER
    info->sigmask = tmpmask;
    CHECKSIGMASK(info);
#endif
    cpu_sfence();
    rflags = read_rflags();
    cpu_disable_intr();

    ATOMIC_CPUMASK_COPY(info->done, tmpmask);
    /* execution can begin here due to races */

    /*
     * Pass our copy of the done bits (so they don't change out from
     * under us) to generate the Xinvltlb interrupt on the targets.
     */
    smp_invlpg(&tmpmask);
    opte = info->opte;
    KKASSERT(info->mode == INVDONE);

    /*
     * Target cpus will be in their loop exiting concurrently with our
     * cleanup.  They will not lose the bitmask they obtained before so
     * we can safely clear this bit.
     */
    ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
    write_rflags(rflags);
    pmap_inval_done(pmap);

    return opte;
}