void
db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif)
{
	CPU_INFO_ITERATOR cii;
	struct callout_cpu *cc;
	struct cpu_info *ci;
	int b;

	db_printf("hardclock_ticks now: %d\n", hardclock_ticks);
	db_printf("    ticks  wheel               arg  func\n");

	/*
	 * Don't lock the callwheel; all the other CPUs are paused
	 * anyhow, and we might be called in a circumstance where
	 * some other CPU was paused while holding the lock.
	 */
	for (CPU_INFO_FOREACH(cii, ci)) {
		cc = ci->ci_data.cpu_callout;
		db_show_callout_bucket(cc, &cc->cc_todo);
	}
	for (b = 0; b < BUCKETS; b++) {
		for (CPU_INFO_FOREACH(cii, ci)) {
			cc = ci->ci_data.cpu_callout;
			db_show_callout_bucket(cc, &cc->cc_wheel[b]);
		}
	}
}
Beispiel #2
0
static void
percpu_cpu_enlarge(size_t size)
{
    CPU_INFO_ITERATOR cii;
    struct cpu_info *ci;

    for (CPU_INFO_FOREACH(cii, ci)) {
        percpu_cpu_t pcc;

        pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */
        pcc.pcc_size = size;
        if (!mp_online) {
            percpu_cpu_swap(ci, &pcc);
        } else {
            uint64_t where;

            uvm_lwp_hold(curlwp); /* don't swap out pcc */
            where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
            xc_wait(where);
            uvm_lwp_rele(curlwp);
        }
        KASSERT(pcc.pcc_size < size);
        if (pcc.pcc_data != NULL) {
            kmem_free(pcc.pcc_data, pcc.pcc_size);
        }
    }
}
Beispiel #3
0
static void
exynos_set_cpufreq(const struct cpu_freq *freqreq)
{
	struct cpu_info *ci;
	uint32_t regval;
	int M, P, S;
	int cii;

	M = freqreq->M;
	P = freqreq->P;
	S = freqreq->S;

	regval = __SHIFTIN(M, PLL_CON0_M) |
		 __SHIFTIN(P, PLL_CON0_P) |
		 __SHIFTIN(S, PLL_CON0_S);

	/* enable PPL and write config */
	regval |= PLL_CON0_ENABLE;
	bus_space_write_4(&armv7_generic_bs_tag, exynos_cmu_apll_bsh, PLL_CON0_OFFSET,
		regval);

	/* update our cycle counter i.e. our CPU frequency for all CPUs */
	for (CPU_INFO_FOREACH(cii, ci)) {
		ci->ci_data.cpu_cc_freq = exynos_get_cpufreq();
	}
}
Beispiel #4
0
/*
 * Linux-style /proc/cpuinfo.
 * Only used when procfs is mounted with -o linux.
 *
 * In the multiprocessor case, this should be a loop over all CPUs.
 */
int
procfs_getcpuinfstr(char *bf, int *len)
{
	struct cpu_info *ci;
	CPU_INFO_ITERATOR cii;
	int i = 0, used = *len, total = *len;

	*len = 0;
	for (CPU_INFO_FOREACH(cii, ci)) {
		if (procfs_getonecpu(i++, ci, bf, &used) == 0) {
			*len += used;
			total = 0;
			break;
		}
		total -= used;
		if (total > 0) {
			bf += used;
			*bf++ = '\n';
			*len += used + 1;
			used = --total;
			if (used == 0)
				break;
		} else {
			*len += used;
			break;
		}
	}
	return total == 0 ? -1 : 0;
}
Beispiel #5
0
/*
 * Linux-style /proc/cpuinfo.
 * Only used when procfs is mounted with -o linux.
 *
 * In the multiprocessor case, this should be a loop over all CPUs.
 */
int
procfs_getcpuinfstr(char *bf, size_t *len)
{
	struct cpu_info *ci;
	CPU_INFO_ITERATOR cii;
	size_t i, total, size, used;

	i = total = 0;
	used = size = *len;

	for (CPU_INFO_FOREACH(cii, ci)) {
		procfs_getonecpu(i++, ci, bf, &used);
		total += used + 1;
		if (used + 1 < size) {
			bf += used;
			*bf++ = '\n';
			size -= used + 1;
			used = size;
		} else
			used = 0;
	}
	size = *len;
	*len = total;
	return size < *len ? -1 : 0;
}
Beispiel #6
0
int
workqueue_create(struct workqueue **wqp, const char *name,
    void (*callback_func)(struct work *, void *), void *callback_arg,
    pri_t prio, int ipl, int flags)
{
	struct workqueue *wq;
	struct workqueue_queue *q;
	void *ptr;
	int error = 0;

	CTASSERT(sizeof(work_impl_t) <= sizeof(struct work));

	ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP);
	wq = (void *)roundup2((uintptr_t)ptr, coherency_unit);
	wq->wq_ptr = ptr;
	wq->wq_flags = flags;

	workqueue_init(wq, name, callback_func, callback_arg, prio, ipl);

	if (flags & WQ_PERCPU) {
		struct cpu_info *ci;
		CPU_INFO_ITERATOR cii;

		/* create the work-queue for each CPU */
		for (CPU_INFO_FOREACH(cii, ci)) {
			q = workqueue_queue_lookup(wq, ci);
			error = workqueue_initqueue(wq, q, ipl, ci);
			if (error) {
				break;
			}
		}
	} else {
Beispiel #7
0
/*
 * rw_onproc:
 *
 *	Return true if an rwlock owner is running on a CPU in the system.
 *	If the target is waiting on the kernel big lock, then we must
 *	release it.  This is necessary to avoid deadlock.
 *
 *	Note that we can't use the rwlock owner field as an LWP pointer.  We
 *	don't have full control over the timing of our execution, and so the
 *	pointer could be completely invalid by the time we dereference it.
 */
static int
rw_onproc(uintptr_t owner, struct cpu_info **cip)
{
#ifdef MULTIPROCESSOR
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci;
	lwp_t *l;

	if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED)
		return 0;
	l = (lwp_t *)(owner & RW_THREAD);

	/* See if the target is running on a CPU somewhere. */
	if ((ci = *cip) != NULL && ci->ci_curlwp == l)
		goto run;
	for (CPU_INFO_FOREACH(cii, ci))
		if (ci->ci_curlwp == l)
			goto run;

	/* No: it may be safe to block now. */
	*cip = NULL;
	return 0;

 run:
 	/* Target is running; do we need to block? */
 	*cip = ci;
	return ci->ci_biglock_wanted != l;
#else
	return 0;
#endif	/* MULTIPROCESSOR */
}
Beispiel #8
0
/*
 * percpu_foreach: call the specified callback function for each cpus.
 *
 * => called in thread context.
 * => caller should not rely on the cpu iteration order.
 * => the callback function should be minimum because it is executed with
 *    holding a global lock, which can block low-priority xcalls.
 *    eg. it's illegal for a callback function to sleep for memory allocation.
 */
void
percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg)
{
    CPU_INFO_ITERATOR cii;
    struct cpu_info *ci;

    percpu_traverse_enter();
    for (CPU_INFO_FOREACH(cii, ci)) {
        (*cb)(percpu_getptr_remote(pc, ci), arg, ci);
    }
    percpu_traverse_exit();
}
/*
 * Set up the real-time and statistics clocks.
 * Leave stathz 0 only if no alternative timer is available.
 *
 * The frequencies of these clocks must be an even number of microseconds.
 */
void
timer_init_4m(void)
{
	struct cpu_info *cpi;
	CPU_INFO_ITERATOR n;

	timerreg4m->t_limit = tmr_ustolim4m(tick);
	for (CPU_INFO_FOREACH(n, cpi)) {
		cpi->counterreg_4m->t_limit = tmr_ustolim4m(statint);
	}
	icr_si_bic(SINTR_T);
}
Beispiel #10
0
/*
 * kcpuset internally uses an array of uint32_t while xen uses an array of
 * u_long. As we're little-endian we can cast one to the other.
 */
typedef union {
#ifdef _LP64
	uint32_t xcpum_km[2];
#else
	uint32_t xcpum_km[1];
#endif	
	u_long   xcpum_xm;
} xcpumask_t;

void
xen_failsafe_handler(void)
{

	panic("xen_failsafe_handler called!\n");
}


void
xen_set_ldt(vaddr_t base, uint32_t entries)
{
	vaddr_t va;
	vaddr_t end;
	pt_entry_t *ptp;
	int s;

#ifdef __x86_64__
	end = base + (entries << 3);
#else
	end = base + entries * sizeof(union descriptor);
#endif

	for (va = base; va < end; va += PAGE_SIZE) {
		KASSERT(va >= VM_MIN_KERNEL_ADDRESS);
		ptp = kvtopte(va);
		XENPRINTF(("xen_set_ldt %#" PRIxVADDR " %d %p\n",
		    base, entries, ptp));
		pmap_pte_clearbits(ptp, PG_RW);
	}
	s = splvm();
	xpq_queue_set_ldt(base, entries);
	splx(s);
}

#ifdef XENDEBUG
void xpq_debug_dump(void);
#endif

#define XPQUEUE_SIZE 2048
static mmu_update_t xpq_queue_array[MAXCPUS][XPQUEUE_SIZE];
static int xpq_idx_array[MAXCPUS];

#ifdef i386
extern union descriptor tmpgdt[];
#endif /* i386 */
void
xpq_flush_queue(void)
{
	int i, ok = 0, ret;

	mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid];
	int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid];

	XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx));
	for (i = 0; i < xpq_idx; i++)
		XENPRINTK2(("%d: 0x%08" PRIx64 " 0x%08" PRIx64 "\n", i,
		    xpq_queue[i].ptr, xpq_queue[i].val));

retry:
	ret = HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok);

	if (xpq_idx != 0 && ret < 0) {
		struct cpu_info *ci;
		CPU_INFO_ITERATOR cii;

		printf("xpq_flush_queue: %d entries (%d successful) on "
		    "cpu%d (%ld)\n",
		    xpq_idx, ok, curcpu()->ci_index, curcpu()->ci_cpuid);

		if (ok != 0) {
			xpq_queue += ok;
			xpq_idx -= ok;
			ok = 0;
			goto retry;
		}

		for (CPU_INFO_FOREACH(cii, ci)) {
			xpq_queue = xpq_queue_array[ci->ci_cpuid];
			xpq_idx = xpq_idx_array[ci->ci_cpuid];
			printf("cpu%d (%ld):\n", ci->ci_index, ci->ci_cpuid);
			for (i = 0; i < xpq_idx; i++) {
				printf("  0x%016" PRIx64 ": 0x%016" PRIx64 "\n",
				   xpq_queue[i].ptr, xpq_queue[i].val);
			}
#ifdef __x86_64__
			for (i = 0; i < PDIR_SLOT_PTE; i++) {
				if (ci->ci_kpm_pdir[i] == 0)
					continue;
				printf(" kpm_pdir[%d]: 0x%" PRIx64 "\n",
				    i, ci->ci_kpm_pdir[i]);
			}
#endif
		}
		panic("HYPERVISOR_mmu_update failed, ret: %d\n", ret);
	}
	xpq_idx_array[curcpu()->ci_cpuid] = 0;
}
Beispiel #11
0
void
flush_workqueue(struct workqueue_struct *wq)
{
	static const struct wq_flush zero_wqf;
	struct wq_flush wqf = zero_wqf;

	mutex_init(&wqf.wqf_lock, MUTEX_DEFAULT, IPL_NONE);
	cv_init(&wqf.wqf_cv, "lnxwflsh");

	if (1) {
		struct wq_flush_work *const wqfw = kmem_zalloc(sizeof(*wqfw),
		    KM_SLEEP);

		wqf.wqf_n = 1;
		wqfw->wqfw_flush = &wqf;
		INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
		wqfw->wqfw_work.w_wq = wq;
		wqfw->wqfw_work.w_state = WORK_PENDING;
		workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk,
		    NULL);
	} else {
		struct cpu_info *ci;
		CPU_INFO_ITERATOR cii;
		struct wq_flush_work *wqfw;

		panic("per-CPU Linux workqueues don't work yet!");

		wqf.wqf_n = 0;
		for (CPU_INFO_FOREACH(cii, ci)) {
			wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP);
			mutex_enter(&wqf.wqf_lock);
			wqf.wqf_n++;
			mutex_exit(&wqf.wqf_lock);
			wqfw->wqfw_flush = &wqf;
			INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier);
			wqfw->wqfw_work.w_state = WORK_PENDING;
			wqfw->wqfw_work.w_wq = wq;
			workqueue_enqueue(wq->wq_workqueue,
			    &wqfw->wqfw_work.w_wk, ci);
		}
	}

	mutex_enter(&wqf.wqf_lock);
	while (0 < wqf.wqf_n)
		cv_wait(&wqf.wqf_cv, &wqf.wqf_lock);
	mutex_exit(&wqf.wqf_lock);

	cv_destroy(&wqf.wqf_cv);
	mutex_destroy(&wqf.wqf_lock);
}
Beispiel #12
0
/*
 * Grow the GDT.
 */
void
gdt_grow(int which)
{
    size_t old_len, new_len;
    CPU_INFO_ITERATOR cii;
    struct cpu_info *ci;
    struct vm_page *pg;
    vaddr_t va;

    old_len = gdt_size[which] * sizeof(gdt[0]);
    gdt_size[which] <<= 1;
    new_len = old_len << 1;

#ifdef XEN
    if (which != 0) {
        size_t max_len = MAXGDTSIZ * sizeof(gdt[0]);
        if (old_len == 0) {
            gdt_size[which] = MINGDTSIZ;
            new_len = gdt_size[which] * sizeof(gdt[0]);
        }
        for(va = (vaddr_t)(cpu_info_primary.ci_gdt) + old_len + max_len;
                va < (vaddr_t)(cpu_info_primary.ci_gdt) + new_len + max_len;
                va += PAGE_SIZE) {
            while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO))
                    == NULL) {
                uvm_wait("gdt_grow");
            }
            pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
                           VM_PROT_READ | VM_PROT_WRITE);
        }
        return;
    }
#endif

    for (CPU_INFO_FOREACH(cii, ci)) {
        for (va = (vaddr_t)(ci->ci_gdt) + old_len;
                va < (vaddr_t)(ci->ci_gdt) + new_len;
                va += PAGE_SIZE) {
            while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) ==
                    NULL) {
                uvm_wait("gdt_grow");
            }
            pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
                           VM_PROT_READ | VM_PROT_WRITE);
        }
    }

    pmap_update(pmap_kernel());
}
Beispiel #13
0
int
hppa_ipi_broadcast(u_long ipi)
{
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci;
	int count = 0;

	for (CPU_INFO_FOREACH(cii, ci)) {
		if (ci != curcpu() && (ci->ci_flags & CPUF_RUNNING))
			if (hppa_ipi_send(ci, ipi))
				count++;
	}
	
	return count;	
}
Beispiel #14
0
void
interrupt_get_available(kcpuset_t *cpuset)
{
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci;

	kcpuset_zero(cpuset);

	mutex_enter(&cpu_lock);
	for (CPU_INFO_FOREACH(cii, ci)) {
		if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0)
			kcpuset_set(cpuset, cpu_index(ci));
	}
	mutex_exit(&cpu_lock);
}
Beispiel #15
0
void
cpu_multicast_ipi(__cpuset_t cpuset, int tag)
{
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci;

	CPUSET_DEL(cpuset, cpu_index(curcpu()));
	if (CPUSET_EMPTY_P(cpuset))
		return;

	for (CPU_INFO_FOREACH(cii, ci)) {
		if (CPUSET_HAS_P(cpuset, cpu_index(ci))) {
			CPUSET_DEL(cpuset, cpu_index(ci));
			(void)cpu_send_ipi(ci, tag);
		}
	}
}
Beispiel #16
0
static inline void
pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
{
	int err = 0;

	if (!kcpuset_match(target, kcpuset_attached)) {
		const struct cpu_info * const self = curcpu();
		CPU_INFO_ITERATOR cii;
		struct cpu_info *lci;

		for (CPU_INFO_FOREACH(cii, lci)) {
			const cpuid_t lcid = cpu_index(lci);

			if (__predict_false(lci == self) ||
			    !kcpuset_isset(target, lcid)) {
				continue;
			}
			err |= x86_ipi(LAPIC_TLB_VECTOR,
			    lci->ci_cpuid, LAPIC_DLMODE_FIXED);
		}
	} else {
Beispiel #17
0
void
setgdt(int sel, const void *base, size_t limit,
       int type, int dpl, int def32, int gran)
{
    struct segment_descriptor *sd = &gdt[sel].sd;
    CPU_INFO_ITERATOR cii;
    struct cpu_info *ci;

#ifdef XEN
    if (type == SDT_SYS386TSS) {
        /* printk("XXX TSS descriptor not supported in GDT\n"); */
        return;
    }
#endif
    setsegment(sd, base, limit, type, dpl, def32, gran);
    for (CPU_INFO_FOREACH(cii, ci)) {
        if (ci->ci_gdt != NULL)
            update_descriptor(&ci->ci_gdt[sel],
                              (union descriptor *)sd);
    }
}
/*
 * Call hardclock on all CPUs.
 */
static void
handle_hardclock(struct clockframe *cap)
{
	int s;
#ifdef MULTIPROCESSOR
	struct cpu_info *cpi;
	CPU_INFO_ITERATOR n;

	for (CPU_INFO_FOREACH(n, cpi)) {
		if (cpi == cpuinfo.ci_self) {
			KASSERT(CPU_IS_PRIMARY(cpi));
			continue;
		}
		
		raise_ipi(cpi, IPL_HARDCLOCK);
	}
#endif
	s = splsched();
	hardclock(cap);
	splx(s);
}
Beispiel #19
0
void
vpanic(const char *fmt, va_list ap)
{
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci, *oci;
	int bootopt;
	static char scratchstr[256]; /* stores panic message */

	spldebug_stop();

	if (lwp0.l_cpu && curlwp) {
		/*
		 * Disable preemption.  If already panicing on another CPU, sit
		 * here and spin until the system is rebooted.  Allow the CPU that
		 * first paniced to panic again.
		 */
		kpreempt_disable();
		ci = curcpu();
		oci = atomic_cas_ptr((void *)&paniccpu, NULL, ci);
		if (oci != NULL && oci != ci) {
			/* Give interrupts a chance to try and prevent deadlock. */
			for (;;) {
#ifndef _RUMPKERNEL /* XXXpooka: temporary build fix, see kern/40505 */
				DELAY(10);
#endif /* _RUMPKERNEL */
			}
		}

		/*
		 * Convert the current thread to a bound thread and prevent all
		 * CPUs from scheduling unbound jobs.  Do so without taking any
		 * locks.
		 */
		curlwp->l_pflag |= LP_BOUND;
		for (CPU_INFO_FOREACH(cii, ci)) {
			ci->ci_schedstate.spc_flags |= SPCF_OFFLINE;
		}
	}
Beispiel #20
0
void
cpu_debug_dump(void)
{
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci;
	char running, hatched, paused, resumed, halted;

	db_printf("CPU CPUID STATE CPUINFO            CPL INT MTX IPIS\n");
	for (CPU_INFO_FOREACH(cii, ci)) {
		hatched = (kcpuset_isset(cpus_hatched, cpu_index(ci)) ? 'H' : '-');
		running = (kcpuset_isset(cpus_running, cpu_index(ci)) ? 'R' : '-');
		paused  = (kcpuset_isset(cpus_paused,  cpu_index(ci)) ? 'P' : '-');
		resumed = (kcpuset_isset(cpus_resumed, cpu_index(ci)) ? 'r' : '-');
		halted  = (kcpuset_isset(cpus_halted,  cpu_index(ci)) ? 'h' : '-');
		db_printf("%3d 0x%03lx %c%c%c%c%c %p "
			"%3d %3d %3d "
			"0x%02" PRIx64 "/0x%02" PRIx64 "\n",
			cpu_index(ci), ci->ci_cpuid,
			running, hatched, paused, resumed, halted,
			ci, ci->ci_cpl, ci->ci_idepth, ci->ci_mtx_count,
			ci->ci_active_ipis, ci->ci_request_ipis);
	}
}
void
timerattach_obio_4m(device_t parent, device_t self, void *aux)
{
	union obio_attach_args *uoba = aux;
	struct sbus_attach_args *sa = &uoba->uoba_sbus;
	struct cpu_info *cpi;
	bus_space_handle_t bh;
	int i;
	CPU_INFO_ITERATOR n;

	if (sa->sa_nreg < 2) {
		printf(": only %d register sets\n", sa->sa_nreg);
		return;
	}

	/* Map the system timer */
	i = sa->sa_nreg - 1;
	if (bus_space_map2(sa->sa_bustag,
			   BUS_ADDR(sa->sa_reg[i].oa_space,
				    sa->sa_reg[i].oa_base),
			   sizeof(struct timer_4m),
			   BUS_SPACE_MAP_LINEAR,
			   TIMERREG_VA, &bh) != 0) {
		printf(": can't map registers\n");
		return;
	}
	timerreg4m = (struct timer_4m *)TIMERREG_VA;

	/* Map each CPU's counter */
	for (i = 0; i < sa->sa_nreg - 1; i++) {
		/*
		 * Check whether the CPU corresponding to this timer
		 * register is installed.
		 */
		for (CPU_INFO_FOREACH(n, cpi)) {
			if ((i == 0 && sparc_ncpus == 1) || cpi->mid == i + 8) {
				/* We got a corresponding MID. */
				break;
			}
			cpi = NULL;
		}
		if (cpi == NULL)
			continue;

		if (sbus_bus_map(sa->sa_bustag,
				 sa->sa_reg[i].oa_space,
				 sa->sa_reg[i].oa_base,
				 sizeof(struct timer_4m),
				 BUS_SPACE_MAP_LINEAR,
				 &bh) != 0) {
			printf(": can't map CPU counter %d\n", i);
			return;
		}
		cpi->counterreg_4m = (struct counter_4m *)bh;
	}

#if defined(MULTIPROCESSOR)
	if (sparc_ncpus > 1) {
		/*
		 * Note that we don't actually use this cookie after checking
		 * it was establised, we call directly via raise_ipi() on
		 * IPL_HARDCLOCK.
		 */
		void *hardclock_cookie;

		hardclock_cookie = sparc_softintr_establish(IPL_HARDCLOCK,
		    hardclock_ipi, NULL);
		if (hardclock_cookie == NULL)
			panic("timerattach: cannot establish hardclock_intr");
	}
#endif

	/* Put processor counter in "timer" mode */
	timerreg4m->t_cfg = 0;

	timerattach(&timerreg4m->t_counter, &timerreg4m->t_limit);
}
static void
dtrace_load(void *dummy)
{
	dtrace_provider_id_t id;
	CPU_INFO_ITERATOR cpuind;
	struct cpu_info *cinfo;

	dtrace_debug_init(NULL);
	dtrace_gethrtime_init(NULL);

	/* Hook into the trap handler. */
	dtrace_trap_func = dtrace_trap;

	/* Hang our hook for thread switches. */
	dtrace_vtime_switch_func = dtrace_vtime_switch;

	/* Hang our hook for exceptions. */
	dtrace_invop_init();

	/*
	 * XXX This is a short term hack to avoid having to comment
	 * out lots and lots of lock/unlock calls.
	 */
	mutex_init(&mod_lock,"XXX mod_lock hack", MUTEX_DEFAULT, NULL);

	/*
	 * Initialise the mutexes without 'witness' because the dtrace
	 * code is mostly written to wait for memory. To have the
	 * witness code change a malloc() from M_WAITOK to M_NOWAIT
	 * because a lock is held would surely create a panic in a
	 * low memory situation. And that low memory situation might be
	 * the very problem we are trying to trace.
	 */
	mutex_init(&dtrace_lock,"dtrace probe state", MUTEX_DEFAULT, NULL);
	mutex_init(&dtrace_provider_lock,"dtrace provider state", MUTEX_DEFAULT, NULL);
	mutex_init(&dtrace_meta_lock,"dtrace meta-provider state", MUTEX_DEFAULT, NULL);
	mutex_init(&dtrace_errlock,"dtrace error lock", MUTEX_DEFAULT, NULL);

	mutex_enter(&dtrace_provider_lock);
	mutex_enter(&dtrace_lock);
	mutex_enter(&cpu_lock);

	ASSERT(MUTEX_HELD(&cpu_lock));

	dtrace_arena = vmem_create("dtrace", 1, INT_MAX, 1,
			NULL, NULL, NULL, 0, VM_SLEEP, IPL_NONE);

	dtrace_state_cache = kmem_cache_create(__UNCONST("dtrace_state_cache"),
	    sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
	    NULL, NULL, NULL, NULL, NULL, 0);

	ASSERT(MUTEX_HELD(&cpu_lock));
	dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
	    offsetof(dtrace_probe_t, dtpr_nextmod),
	    offsetof(dtrace_probe_t, dtpr_prevmod));

	dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
	    offsetof(dtrace_probe_t, dtpr_nextfunc),
	    offsetof(dtrace_probe_t, dtpr_prevfunc));

	dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
	    offsetof(dtrace_probe_t, dtpr_nextname),
	    offsetof(dtrace_probe_t, dtpr_prevname));

	if (dtrace_retain_max < 1) {
		cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
		    "setting to 1", dtrace_retain_max);
		dtrace_retain_max = 1;
	}

	/*
	 * Now discover our toxic ranges.
	 */
	dtrace_toxic_ranges(dtrace_toxrange_add);

	/*
	 * Before we register ourselves as a provider to our own framework,
	 * we would like to assert that dtrace_provider is NULL -- but that's
	 * not true if we were loaded as a dependency of a DTrace provider.
	 * Once we've registered, we can assert that dtrace_provider is our
	 * pseudo provider.
	 */
	(void) dtrace_register("dtrace", &dtrace_provider_attr,
	    DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);

	ASSERT(dtrace_provider != NULL);
	ASSERT((dtrace_provider_id_t)dtrace_provider == id);

	dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
	    dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
	dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
	    dtrace_provider, NULL, NULL, "END", 0, NULL);
	dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
	    dtrace_provider, NULL, NULL, "ERROR", 1, NULL);

	mutex_exit(&cpu_lock);

	/*
	 * If DTrace helper tracing is enabled, we need to allocate the
	 * trace buffer and initialize the values.
	 */
	if (dtrace_helptrace_enabled) {
		ASSERT(dtrace_helptrace_buffer == NULL);
		dtrace_helptrace_buffer =
		    kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
		dtrace_helptrace_next = 0;
		dtrace_helptrace_size = dtrace_helptrace_bufsize;
	}

	mutex_exit(&dtrace_lock);
	mutex_exit(&dtrace_provider_lock);

	mutex_enter(&cpu_lock);

	/* Setup the CPUs */
	for (CPU_INFO_FOREACH(cpuind, cinfo)) {
		(void) dtrace_cpu_setup(CPU_CONFIG, cpu_index(cinfo));
	}

	mutex_exit(&cpu_lock);

	dtrace_anon_init(NULL);
#if 0
	dtrace_dev = make_dev(&dtrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/dtrace");
#endif

	return;
}
/* ARGSUSED */
static int
dtrace_ioctl(struct file *fp, u_long cmd, void *addr)
{
	dtrace_state_t *state = (dtrace_state_t *)fp->f_data;
	int error = 0;

	if (state == NULL)
		return (EINVAL);

	if (state->dts_anon) {
		ASSERT(dtrace_anon.dta_state == NULL);
		state = state->dts_anon;
	}

	switch (cmd) {
	case DTRACEIOC_AGGDESC: {
		dtrace_aggdesc_t **paggdesc = (dtrace_aggdesc_t **) addr;
		dtrace_aggdesc_t aggdesc;
		dtrace_action_t *act;
		dtrace_aggregation_t *agg;
		int nrecs;
		uint32_t offs;
		dtrace_recdesc_t *lrec;
		void *buf;
		size_t size;
		uintptr_t dest;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_AGGDESC\n",__func__,__LINE__);

		if (copyin((void *) *paggdesc, &aggdesc, sizeof (aggdesc)) != 0)
			return (EFAULT);

		mutex_enter(&dtrace_lock);

		if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
			mutex_exit(&dtrace_lock);
			return (EINVAL);
		}

		aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;

		nrecs = aggdesc.dtagd_nrecs;
		aggdesc.dtagd_nrecs = 0;

		offs = agg->dtag_base;
		lrec = &agg->dtag_action.dta_rec;
		aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;

		for (act = agg->dtag_first; ; act = act->dta_next) {
			ASSERT(act->dta_intuple ||
			    DTRACEACT_ISAGG(act->dta_kind));

			/*
			 * If this action has a record size of zero, it
			 * denotes an argument to the aggregating action.
			 * Because the presence of this record doesn't (or
			 * shouldn't) affect the way the data is interpreted,
			 * we don't copy it out to save user-level the
			 * confusion of dealing with a zero-length record.
			 */
			if (act->dta_rec.dtrd_size == 0) {
				ASSERT(agg->dtag_hasarg);
				continue;
			}

			aggdesc.dtagd_nrecs++;

			if (act == &agg->dtag_action)
				break;
		}

		/*
		 * Now that we have the size, we need to allocate a temporary
		 * buffer in which to store the complete description.  We need
		 * the temporary buffer to be able to drop dtrace_lock()
		 * across the copyout(), below.
		 */
		size = sizeof (dtrace_aggdesc_t) +
		    (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));

		buf = kmem_alloc(size, KM_SLEEP);
		dest = (uintptr_t)buf;

		bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
		dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);

		for (act = agg->dtag_first; ; act = act->dta_next) {
			dtrace_recdesc_t rec = act->dta_rec;

			/*
			 * See the comment in the above loop for why we pass
			 * over zero-length records.
			 */
			if (rec.dtrd_size == 0) {
				ASSERT(agg->dtag_hasarg);
				continue;
			}

			if (nrecs-- == 0)
				break;

			rec.dtrd_offset -= offs;
			bcopy(&rec, (void *)dest, sizeof (rec));
			dest += sizeof (dtrace_recdesc_t);

			if (act == &agg->dtag_action)
				break;
		}

		mutex_exit(&dtrace_lock);

		if (copyout(buf, (void *) *paggdesc, dest - (uintptr_t)buf) != 0) {
			kmem_free(buf, size);
			return (EFAULT);
		}

		kmem_free(buf, size);
		return (0);
	}
	case DTRACEIOC_AGGSNAP:
	case DTRACEIOC_BUFSNAP: {
		dtrace_bufdesc_t **pdesc = (dtrace_bufdesc_t **) addr;
		dtrace_bufdesc_t desc;
		caddr_t cached;
		dtrace_buffer_t *buf;

		dtrace_debug_output();

		if (copyin((void *) *pdesc, &desc, sizeof (desc)) != 0)
			return (EFAULT);

		DTRACE_IOCTL_PRINTF("%s(%d): %s curcpu %d cpu %d\n",
		    __func__,__LINE__,
		    cmd == DTRACEIOC_AGGSNAP ?
		    "DTRACEIOC_AGGSNAP":"DTRACEIOC_BUFSNAP",
		    cpu_number(), desc.dtbd_cpu);

		if (desc.dtbd_cpu >= ncpu)
			return (ENOENT);

		mutex_enter(&dtrace_lock);

		if (cmd == DTRACEIOC_BUFSNAP) {
			buf = &state->dts_buffer[desc.dtbd_cpu];
		} else {
			buf = &state->dts_aggbuffer[desc.dtbd_cpu];
		}

		if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
			size_t sz = buf->dtb_offset;

			if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
				mutex_exit(&dtrace_lock);
				return (EBUSY);
			}

			/*
			 * If this buffer has already been consumed, we're
			 * going to indicate that there's nothing left here
			 * to consume.
			 */
			if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
				mutex_exit(&dtrace_lock);

				desc.dtbd_size = 0;
				desc.dtbd_drops = 0;
				desc.dtbd_errors = 0;
				desc.dtbd_oldest = 0;
				sz = sizeof (desc);

				if (copyout(&desc, (void *) *pdesc, sz) != 0)
					return (EFAULT);

				return (0);
			}

			/*
			 * If this is a ring buffer that has wrapped, we want
			 * to copy the whole thing out.
			 */
			if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
				dtrace_buffer_polish(buf);
				sz = buf->dtb_size;
			}

			if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
				mutex_exit(&dtrace_lock);
				return (EFAULT);
			}

			desc.dtbd_size = sz;
			desc.dtbd_drops = buf->dtb_drops;
			desc.dtbd_errors = buf->dtb_errors;
			desc.dtbd_oldest = buf->dtb_xamot_offset;

			mutex_exit(&dtrace_lock);

			if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
				return (EFAULT);

			buf->dtb_flags |= DTRACEBUF_CONSUMED;

			return (0);
		}

		if (buf->dtb_tomax == NULL) {
			ASSERT(buf->dtb_xamot == NULL);
			mutex_exit(&dtrace_lock);
			return (ENOENT);
		}

		cached = buf->dtb_tomax;
		ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));

		dtrace_xcall(desc.dtbd_cpu,
		    (dtrace_xcall_t)dtrace_buffer_switch, buf);

		state->dts_errors += buf->dtb_xamot_errors;

		/*
		 * If the buffers did not actually switch, then the cross call
		 * did not take place -- presumably because the given CPU is
		 * not in the ready set.  If this is the case, we'll return
		 * ENOENT.
		 */
		if (buf->dtb_tomax == cached) {
			ASSERT(buf->dtb_xamot != cached);
			mutex_exit(&dtrace_lock);
			return (ENOENT);
		}

		ASSERT(cached == buf->dtb_xamot);

		DTRACE_IOCTL_PRINTF("%s(%d): copyout the buffer snapshot\n",__func__,__LINE__);

		/*
		 * We have our snapshot; now copy it out.
		 */
		if (copyout(buf->dtb_xamot, desc.dtbd_data,
		    buf->dtb_xamot_offset) != 0) {
			mutex_exit(&dtrace_lock);
			return (EFAULT);
		}

		desc.dtbd_size = buf->dtb_xamot_offset;
		desc.dtbd_drops = buf->dtb_xamot_drops;
		desc.dtbd_errors = buf->dtb_xamot_errors;
		desc.dtbd_oldest = 0;

		mutex_exit(&dtrace_lock);

		DTRACE_IOCTL_PRINTF("%s(%d): copyout buffer desc: size %zd drops %lu errors %lu\n",__func__,__LINE__,(size_t) desc.dtbd_size,(u_long) desc.dtbd_drops,(u_long) desc.dtbd_errors);

		/*
		 * Finally, copy out the buffer description.
		 */
		if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0)
			return (EFAULT);

		return (0);
	}
	case DTRACEIOC_CONF: {
		dtrace_conf_t conf;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_CONF\n",__func__,__LINE__);

		bzero(&conf, sizeof (conf));
		conf.dtc_difversion = DIF_VERSION;
		conf.dtc_difintregs = DIF_DIR_NREGS;
		conf.dtc_diftupregs = DIF_DTR_NREGS;
		conf.dtc_ctfmodel = CTF_MODEL_NATIVE;

		*((dtrace_conf_t *) addr) = conf;

		return (0);
	}
	case DTRACEIOC_DOFGET: {
		dof_hdr_t **pdof = (dof_hdr_t **) addr;
		dof_hdr_t hdr, *dof = *pdof;
		int rval;
		uint64_t len;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_DOFGET\n",__func__,__LINE__);

		if (copyin((void *)dof, &hdr, sizeof (hdr)) != 0)
			return (EFAULT);

		mutex_enter(&dtrace_lock);
		dof = dtrace_dof_create(state);
		mutex_exit(&dtrace_lock);

		len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
		rval = copyout(dof, (void *) *pdof, len);
		dtrace_dof_destroy(dof);

		return (rval == 0 ? 0 : EFAULT);
	}
	case DTRACEIOC_ENABLE: {
		dof_hdr_t *dof = NULL;
		dtrace_enabling_t *enab = NULL;
		dtrace_vstate_t *vstate;
		int err = 0;
		int rval;
		dtrace_enable_io_t *p = (dtrace_enable_io_t *) addr;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_ENABLE\n",__func__,__LINE__);

		/*
		 * If a NULL argument has been passed, we take this as our
		 * cue to reevaluate our enablings.
		 */
		if (p->dof == NULL) {
			dtrace_enabling_matchall();

			return (0);
		}

		if ((dof = dtrace_dof_copyin((uintptr_t) p->dof, &rval)) == NULL)
			return (EINVAL);

		mutex_enter(&cpu_lock);
		mutex_enter(&dtrace_lock);
		vstate = &state->dts_vstate;

		if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
			mutex_exit(&dtrace_lock);
			mutex_exit(&cpu_lock);
			dtrace_dof_destroy(dof);
			return (EBUSY);
		}

		if (dtrace_dof_slurp(dof, vstate, curlwp->l_cred, &enab, 0, B_TRUE) != 0) {
			mutex_exit(&dtrace_lock);
			mutex_exit(&cpu_lock);
			dtrace_dof_destroy(dof);
			return (EINVAL);
		}

		if ((rval = dtrace_dof_options(dof, state)) != 0) {
			dtrace_enabling_destroy(enab);
			mutex_exit(&dtrace_lock);
			mutex_exit(&cpu_lock);
			dtrace_dof_destroy(dof);
			return (rval);
		}

		if ((err = dtrace_enabling_match(enab, &p->n_matched)) == 0) {
			err = dtrace_enabling_retain(enab);
		} else {
			dtrace_enabling_destroy(enab);
		}

		mutex_exit(&cpu_lock);
		mutex_exit(&dtrace_lock);
		dtrace_dof_destroy(dof);

		return (err);
	}
	case DTRACEIOC_EPROBE: {
		dtrace_eprobedesc_t **pepdesc = (dtrace_eprobedesc_t **) addr;
		dtrace_eprobedesc_t epdesc;
		dtrace_ecb_t *ecb;
		dtrace_action_t *act;
		void *buf;
		size_t size;
		uintptr_t dest;
		int nrecs;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_EPROBE\n",__func__,__LINE__);

		if (copyin((void *)*pepdesc, &epdesc, sizeof (epdesc)) != 0)
			return (EFAULT);

		mutex_enter(&dtrace_lock);

		if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
			mutex_exit(&dtrace_lock);
			return (EINVAL);
		}

		if (ecb->dte_probe == NULL) {
			mutex_exit(&dtrace_lock);
			return (EINVAL);
		}

		epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
		epdesc.dtepd_uarg = ecb->dte_uarg;
		epdesc.dtepd_size = ecb->dte_size;

		nrecs = epdesc.dtepd_nrecs;
		epdesc.dtepd_nrecs = 0;
		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
				continue;

			epdesc.dtepd_nrecs++;
		}

		/*
		 * Now that we have the size, we need to allocate a temporary
		 * buffer in which to store the complete description.  We need
		 * the temporary buffer to be able to drop dtrace_lock()
		 * across the copyout(), below.
		 */
		size = sizeof (dtrace_eprobedesc_t) +
		    (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));

		buf = kmem_alloc(size, KM_SLEEP);
		dest = (uintptr_t)buf;

		bcopy(&epdesc, (void *)dest, sizeof (epdesc));
		dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);

		for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
			if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
				continue;

			if (nrecs-- == 0)
				break;

			bcopy(&act->dta_rec, (void *)dest,
			    sizeof (dtrace_recdesc_t));
			dest += sizeof (dtrace_recdesc_t);
		}

		mutex_exit(&dtrace_lock);

		if (copyout(buf, (void *) *pepdesc, dest - (uintptr_t)buf) != 0) {
			kmem_free(buf, size);
			return (EFAULT);
		}

		kmem_free(buf, size);
		return (0);
	}
	case DTRACEIOC_FORMAT: {
		dtrace_fmtdesc_t *fmt = (dtrace_fmtdesc_t *) addr;
		char *str;
		int len;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_FORMAT\n",__func__,__LINE__);

		mutex_enter(&dtrace_lock);

		if (fmt->dtfd_format == 0 ||
		    fmt->dtfd_format > state->dts_nformats) {
			mutex_exit(&dtrace_lock);
			return (EINVAL);
		}

		/*
		 * Format strings are allocated contiguously and they are
		 * never freed; if a format index is less than the number
		 * of formats, we can assert that the format map is non-NULL
		 * and that the format for the specified index is non-NULL.
		 */
		ASSERT(state->dts_formats != NULL);
		str = state->dts_formats[fmt->dtfd_format - 1];
		ASSERT(str != NULL);

		len = strlen(str) + 1;

		if (len > fmt->dtfd_length) {
			fmt->dtfd_length = len;
		} else {
			if (copyout(str, fmt->dtfd_string, len) != 0) {
				mutex_exit(&dtrace_lock);
				return (EINVAL);
			}
		}

		mutex_exit(&dtrace_lock);
		return (0);
	}
	case DTRACEIOC_GO: {
		int rval;
		processorid_t *cpuid = (processorid_t *) addr;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_GO\n",__func__,__LINE__);

		rval = dtrace_state_go(state, cpuid);

		return (rval);
	}
	case DTRACEIOC_PROBEARG: {
		dtrace_argdesc_t *desc = (dtrace_argdesc_t *) addr;
		dtrace_probe_t *probe;
		dtrace_provider_t *prov;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROBEARG\n",__func__,__LINE__);

		if (desc->dtargd_id == DTRACE_IDNONE)
			return (EINVAL);

		if (desc->dtargd_ndx == DTRACE_ARGNONE)
			return (EINVAL);

		mutex_enter(&dtrace_provider_lock);
		mutex_enter(&mod_lock);
		mutex_enter(&dtrace_lock);

		if (desc->dtargd_id > dtrace_nprobes) {
			mutex_exit(&dtrace_lock);
			mutex_exit(&mod_lock);
			mutex_exit(&dtrace_provider_lock);
			return (EINVAL);
		}

		if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) {
			mutex_exit(&dtrace_lock);
			mutex_exit(&mod_lock);
			mutex_exit(&dtrace_provider_lock);
			return (EINVAL);
		}

		mutex_exit(&dtrace_lock);

		prov = probe->dtpr_provider;

		if (prov->dtpv_pops.dtps_getargdesc == NULL) {
			/*
			 * There isn't any typed information for this probe.
			 * Set the argument number to DTRACE_ARGNONE.
			 */
			desc->dtargd_ndx = DTRACE_ARGNONE;
		} else {
			desc->dtargd_native[0] = '\0';
			desc->dtargd_xlate[0] = '\0';
			desc->dtargd_mapping = desc->dtargd_ndx;

			prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
			    probe->dtpr_id, probe->dtpr_arg, desc);
		}

		mutex_exit(&mod_lock);
		mutex_exit(&dtrace_provider_lock);

		return (0);
	}
	case DTRACEIOC_PROBEMATCH:
	case DTRACEIOC_PROBES: {
		dtrace_probedesc_t *p_desc = (dtrace_probedesc_t *) addr;
		dtrace_probe_t *probe = NULL;
		dtrace_probekey_t pkey;
		dtrace_id_t i;
		int m = 0;
		uint32_t priv = 0;
		uid_t uid = 0;
		zoneid_t zoneid = 0;

		DTRACE_IOCTL_PRINTF("%s(%d): %s\n",__func__,__LINE__,
		    cmd == DTRACEIOC_PROBEMATCH ?
		    "DTRACEIOC_PROBEMATCH":"DTRACEIOC_PROBES");

		p_desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
		p_desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
		p_desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
		p_desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0';

		/*
		 * Before we attempt to match this probe, we want to give
		 * all providers the opportunity to provide it.
		 */
		if (p_desc->dtpd_id == DTRACE_IDNONE) {
			mutex_enter(&dtrace_provider_lock);
			dtrace_probe_provide(p_desc, NULL);
			mutex_exit(&dtrace_provider_lock);
			p_desc->dtpd_id++;
		}

		if (cmd == DTRACEIOC_PROBEMATCH)  {
			dtrace_probekey(p_desc, &pkey);
			pkey.dtpk_id = DTRACE_IDNONE;
		}

		dtrace_cred2priv(curlwp->l_cred, &priv, &uid, &zoneid);

		mutex_enter(&dtrace_lock);

		if (cmd == DTRACEIOC_PROBEMATCH) {
			for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
				if ((probe = dtrace_probes[i - 1]) != NULL &&
				    (m = dtrace_match_probe(probe, &pkey,
				    priv, uid, zoneid)) != 0)
					break;
			}

			if (m < 0) {
				mutex_exit(&dtrace_lock);
				return (EINVAL);
			}

		} else {
			for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) {
				if ((probe = dtrace_probes[i - 1]) != NULL &&
				    dtrace_match_priv(probe, priv, uid, zoneid))
					break;
			}
		}

		if (probe == NULL) {
			mutex_exit(&dtrace_lock);
			return (ESRCH);
		}

		dtrace_probe_description(probe, p_desc);
		mutex_exit(&dtrace_lock);

		return (0);
	}
	case DTRACEIOC_PROVIDER: {
		dtrace_providerdesc_t *pvd = (dtrace_providerdesc_t *) addr;
		dtrace_provider_t *pvp;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROVIDER\n",__func__,__LINE__);

		pvd->dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
		error = 0;
again:
		mutex_enter(&dtrace_provider_lock);

		for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
			if (strcmp(pvp->dtpv_name, pvd->dtvd_name) == 0)
				break;
		}

		mutex_exit(&dtrace_provider_lock);

		if (pvp == NULL && error == 0) {
			error = module_autoload(pvd->dtvd_name,
			    MODULE_CLASS_MISC);
			if (error == 0)
				goto again;
		}

		if (pvp == NULL)
			return (ESRCH);

		bcopy(&pvp->dtpv_priv, &pvd->dtvd_priv, sizeof (dtrace_ppriv_t));
		bcopy(&pvp->dtpv_attr, &pvd->dtvd_attr, sizeof (dtrace_pattr_t));

		return (0);
	}
	case DTRACEIOC_REPLICATE: {
		dtrace_repldesc_t *desc = (dtrace_repldesc_t *) addr;
		dtrace_probedesc_t *match = &desc->dtrpd_match;
		dtrace_probedesc_t *create = &desc->dtrpd_create;
		int err;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_REPLICATE\n",__func__,__LINE__);

		match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
		match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
		match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
		match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';

		create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
		create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
		create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
		create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';

		mutex_enter(&dtrace_lock);
		err = dtrace_enabling_replicate(state, match, create);
		mutex_exit(&dtrace_lock);

		return (err);
	}
	case DTRACEIOC_STATUS: {
		dtrace_status_t *stat = (dtrace_status_t *) addr;
		dtrace_dstate_t *dstate;
		int j;
		uint64_t nerrs;
		CPU_INFO_ITERATOR cpuind;
		struct cpu_info *cinfo;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STATUS\n",__func__,__LINE__);

		/*
		 * See the comment in dtrace_state_deadman() for the reason
		 * for setting dts_laststatus to INT64_MAX before setting
		 * it to the correct value.
		 */
		state->dts_laststatus = INT64_MAX;
		dtrace_membar_producer();
		state->dts_laststatus = dtrace_gethrtime();

		bzero(stat, sizeof (*stat));

		mutex_enter(&dtrace_lock);

		if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
			mutex_exit(&dtrace_lock);
			return (ENOENT);
		}

		if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
			stat->dtst_exiting = 1;

		nerrs = state->dts_errors;
		dstate = &state->dts_vstate.dtvs_dynvars;

		for (CPU_INFO_FOREACH(cpuind, cinfo)) {
		    	int ci = cpu_index(cinfo);
			dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[ci];

			stat->dtst_dyndrops += dcpu->dtdsc_drops;
			stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
			stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;

			if (state->dts_buffer[ci].dtb_flags & DTRACEBUF_FULL)
				stat->dtst_filled++;

			nerrs += state->dts_buffer[ci].dtb_errors;

			for (j = 0; j < state->dts_nspeculations; j++) {
				dtrace_speculation_t *spec;
				dtrace_buffer_t *buf;

				spec = &state->dts_speculations[j];
				buf = &spec->dtsp_buffer[ci];
				stat->dtst_specdrops += buf->dtb_xamot_drops;
			}
		}

		stat->dtst_specdrops_busy = state->dts_speculations_busy;
		stat->dtst_specdrops_unavail = state->dts_speculations_unavail;
		stat->dtst_stkstroverflows = state->dts_stkstroverflows;
		stat->dtst_dblerrors = state->dts_dblerrors;
		stat->dtst_killed =
		    (state->dts_activity == DTRACE_ACTIVITY_KILLED);
		stat->dtst_errors = nerrs;

		mutex_exit(&dtrace_lock);

		return (0);
	}
	case DTRACEIOC_STOP: {
		processorid_t *cpuid = (processorid_t *) addr;

		DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__);

		mutex_enter(&dtrace_lock);
		error = dtrace_state_stop(state, cpuid);
		mutex_exit(&dtrace_lock);

		return (error);
	}
	default:
		error = ENOTTY;
	}
	return (error);
}
Beispiel #24
0
void
cpu_hatch(struct cpu_info *ci)
{
	struct pmap_tlb_info * const ti = ci->ci_tlb_info;

	/*
	 * Invalidate all the TLB enties (even wired ones) and then reserve
	 * space for the wired TLB entries.
	 */
	mips3_cp0_wired_write(0);
	tlb_invalidate_all();
	mips3_cp0_wired_write(ti->ti_wired);

	/*
	 * Setup HWRENA and USERLOCAL COP0 registers (MIPSxxR2).
	 */
	cpu_hwrena_setup();

	/*
	 * If we are using register zero relative addressing to access cpu_info
	 * in the exception vectors, enter that mapping into TLB now.
	 */
	if (ci->ci_tlb_slot >= 0) {
		const uint32_t tlb_lo = MIPS3_PG_G|MIPS3_PG_V
		    | mips3_paddr_to_tlbpfn((vaddr_t)ci);
		const struct tlbmask tlbmask = {
			.tlb_hi = -PAGE_SIZE | KERNEL_PID,
#if (PGSHIFT & 1)
			.tlb_lo0 = tlb_lo,
			.tlb_lo1 = tlb_lo + MIPS3_PG_NEXT,
#else
			.tlb_lo0 = 0,
			.tlb_lo1 = tlb_lo,
#endif
			.tlb_mask = -1,
		};

		tlb_invalidate_addr(tlbmask.tlb_hi, KERNEL_PID);
		tlb_write_entry(ci->ci_tlb_slot, &tlbmask);
	}

	/*
	 * Flush the icache just be sure.
	 */
	mips_icache_sync_all();

	/*
	 * Let this CPU do its own initialization (for things that have to be
	 * done on the local CPU).
	 */
	(*mips_locoresw.lsw_cpu_init)(ci);

	// Show this CPU as present.
	atomic_or_ulong(&ci->ci_flags, CPUF_PRESENT);

	/*
	 * Announce we are hatched
	 */
	kcpuset_atomic_set(cpus_hatched, cpu_index(ci));

	/*
	 * Now wait to be set free!
	 */
	while (! kcpuset_isset(cpus_running, cpu_index(ci))) {
		/* spin, spin, spin */
	}

	/*
	 * initialize the MIPS count/compare clock
	 */
	mips3_cp0_count_write(ci->ci_data.cpu_cc_skew);
	KASSERT(ci->ci_cycles_per_hz != 0);
	ci->ci_next_cp0_clk_intr = ci->ci_data.cpu_cc_skew + ci->ci_cycles_per_hz;
	mips3_cp0_compare_write(ci->ci_next_cp0_clk_intr);
	ci->ci_data.cpu_cc_skew = 0;

	/*
	 * Let this CPU do its own post-running initialization
	 * (for things that have to be done on the local CPU).
	 */
	(*mips_locoresw.lsw_cpu_run)(ci);

	/*
	 * Now turn on interrupts (and verify they are on).
	 */
	spl0();
	KASSERTMSG(ci->ci_cpl == IPL_NONE, "cpl %d", ci->ci_cpl);
	KASSERT(mips_cp0_status_read() & MIPS_SR_INT_IE);

	kcpuset_atomic_set(pmap_kernel()->pm_onproc, cpu_index(ci));
	kcpuset_atomic_set(pmap_kernel()->pm_active, cpu_index(ci));

	/*
	 * And do a tail call to idle_loop
	 */
	idle_loop(NULL);
}

void
cpu_boot_secondary_processors(void)
{
	CPU_INFO_ITERATOR cii;
	struct cpu_info *ci;
	for (CPU_INFO_FOREACH(cii, ci)) {
		if (CPU_IS_PRIMARY(ci))
			continue;
		KASSERT(ci->ci_data.cpu_idlelwp);

		/*
		 * Skip this CPU if it didn't sucessfully hatch.
		 */
		if (!kcpuset_isset(cpus_hatched, cpu_index(ci)))
			continue;

		ci->ci_data.cpu_cc_skew = mips3_cp0_count_read();
		atomic_or_ulong(&ci->ci_flags, CPUF_RUNNING);
		kcpuset_set(cpus_running, cpu_index(ci));
		// Spin until the cpu calls idle_loop
		for (u_int i = 0; i < 100; i++) {
			if (kcpuset_isset(cpus_running, cpu_index(ci)))
				break;
			delay(1000);
		}
	}
}