void db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) { CPU_INFO_ITERATOR cii; struct callout_cpu *cc; struct cpu_info *ci; int b; db_printf("hardclock_ticks now: %d\n", hardclock_ticks); db_printf(" ticks wheel arg func\n"); /* * Don't lock the callwheel; all the other CPUs are paused * anyhow, and we might be called in a circumstance where * some other CPU was paused while holding the lock. */ for (CPU_INFO_FOREACH(cii, ci)) { cc = ci->ci_data.cpu_callout; db_show_callout_bucket(cc, &cc->cc_todo); } for (b = 0; b < BUCKETS; b++) { for (CPU_INFO_FOREACH(cii, ci)) { cc = ci->ci_data.cpu_callout; db_show_callout_bucket(cc, &cc->cc_wheel[b]); } } }
static void percpu_cpu_enlarge(size_t size) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; for (CPU_INFO_FOREACH(cii, ci)) { percpu_cpu_t pcc; pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ pcc.pcc_size = size; if (!mp_online) { percpu_cpu_swap(ci, &pcc); } else { uint64_t where; uvm_lwp_hold(curlwp); /* don't swap out pcc */ where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); xc_wait(where); uvm_lwp_rele(curlwp); } KASSERT(pcc.pcc_size < size); if (pcc.pcc_data != NULL) { kmem_free(pcc.pcc_data, pcc.pcc_size); } } }
static void exynos_set_cpufreq(const struct cpu_freq *freqreq) { struct cpu_info *ci; uint32_t regval; int M, P, S; int cii; M = freqreq->M; P = freqreq->P; S = freqreq->S; regval = __SHIFTIN(M, PLL_CON0_M) | __SHIFTIN(P, PLL_CON0_P) | __SHIFTIN(S, PLL_CON0_S); /* enable PPL and write config */ regval |= PLL_CON0_ENABLE; bus_space_write_4(&armv7_generic_bs_tag, exynos_cmu_apll_bsh, PLL_CON0_OFFSET, regval); /* update our cycle counter i.e. our CPU frequency for all CPUs */ for (CPU_INFO_FOREACH(cii, ci)) { ci->ci_data.cpu_cc_freq = exynos_get_cpufreq(); } }
/* * Linux-style /proc/cpuinfo. * Only used when procfs is mounted with -o linux. * * In the multiprocessor case, this should be a loop over all CPUs. */ int procfs_getcpuinfstr(char *bf, int *len) { struct cpu_info *ci; CPU_INFO_ITERATOR cii; int i = 0, used = *len, total = *len; *len = 0; for (CPU_INFO_FOREACH(cii, ci)) { if (procfs_getonecpu(i++, ci, bf, &used) == 0) { *len += used; total = 0; break; } total -= used; if (total > 0) { bf += used; *bf++ = '\n'; *len += used + 1; used = --total; if (used == 0) break; } else { *len += used; break; } } return total == 0 ? -1 : 0; }
/* * Linux-style /proc/cpuinfo. * Only used when procfs is mounted with -o linux. * * In the multiprocessor case, this should be a loop over all CPUs. */ int procfs_getcpuinfstr(char *bf, size_t *len) { struct cpu_info *ci; CPU_INFO_ITERATOR cii; size_t i, total, size, used; i = total = 0; used = size = *len; for (CPU_INFO_FOREACH(cii, ci)) { procfs_getonecpu(i++, ci, bf, &used); total += used + 1; if (used + 1 < size) { bf += used; *bf++ = '\n'; size -= used + 1; used = size; } else used = 0; } size = *len; *len = total; return size < *len ? -1 : 0; }
int workqueue_create(struct workqueue **wqp, const char *name, void (*callback_func)(struct work *, void *), void *callback_arg, pri_t prio, int ipl, int flags) { struct workqueue *wq; struct workqueue_queue *q; void *ptr; int error = 0; CTASSERT(sizeof(work_impl_t) <= sizeof(struct work)); ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP); wq = (void *)roundup2((uintptr_t)ptr, coherency_unit); wq->wq_ptr = ptr; wq->wq_flags = flags; workqueue_init(wq, name, callback_func, callback_arg, prio, ipl); if (flags & WQ_PERCPU) { struct cpu_info *ci; CPU_INFO_ITERATOR cii; /* create the work-queue for each CPU */ for (CPU_INFO_FOREACH(cii, ci)) { q = workqueue_queue_lookup(wq, ci); error = workqueue_initqueue(wq, q, ipl, ci); if (error) { break; } } } else {
/* * rw_onproc: * * Return true if an rwlock owner is running on a CPU in the system. * If the target is waiting on the kernel big lock, then we must * release it. This is necessary to avoid deadlock. * * Note that we can't use the rwlock owner field as an LWP pointer. We * don't have full control over the timing of our execution, and so the * pointer could be completely invalid by the time we dereference it. */ static int rw_onproc(uintptr_t owner, struct cpu_info **cip) { #ifdef MULTIPROCESSOR CPU_INFO_ITERATOR cii; struct cpu_info *ci; lwp_t *l; if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) return 0; l = (lwp_t *)(owner & RW_THREAD); /* See if the target is running on a CPU somewhere. */ if ((ci = *cip) != NULL && ci->ci_curlwp == l) goto run; for (CPU_INFO_FOREACH(cii, ci)) if (ci->ci_curlwp == l) goto run; /* No: it may be safe to block now. */ *cip = NULL; return 0; run: /* Target is running; do we need to block? */ *cip = ci; return ci->ci_biglock_wanted != l; #else return 0; #endif /* MULTIPROCESSOR */ }
/* * percpu_foreach: call the specified callback function for each cpus. * * => called in thread context. * => caller should not rely on the cpu iteration order. * => the callback function should be minimum because it is executed with * holding a global lock, which can block low-priority xcalls. * eg. it's illegal for a callback function to sleep for memory allocation. */ void percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; percpu_traverse_enter(); for (CPU_INFO_FOREACH(cii, ci)) { (*cb)(percpu_getptr_remote(pc, ci), arg, ci); } percpu_traverse_exit(); }
/* * Set up the real-time and statistics clocks. * Leave stathz 0 only if no alternative timer is available. * * The frequencies of these clocks must be an even number of microseconds. */ void timer_init_4m(void) { struct cpu_info *cpi; CPU_INFO_ITERATOR n; timerreg4m->t_limit = tmr_ustolim4m(tick); for (CPU_INFO_FOREACH(n, cpi)) { cpi->counterreg_4m->t_limit = tmr_ustolim4m(statint); } icr_si_bic(SINTR_T); }
/* * kcpuset internally uses an array of uint32_t while xen uses an array of * u_long. As we're little-endian we can cast one to the other. */ typedef union { #ifdef _LP64 uint32_t xcpum_km[2]; #else uint32_t xcpum_km[1]; #endif u_long xcpum_xm; } xcpumask_t; void xen_failsafe_handler(void) { panic("xen_failsafe_handler called!\n"); } void xen_set_ldt(vaddr_t base, uint32_t entries) { vaddr_t va; vaddr_t end; pt_entry_t *ptp; int s; #ifdef __x86_64__ end = base + (entries << 3); #else end = base + entries * sizeof(union descriptor); #endif for (va = base; va < end; va += PAGE_SIZE) { KASSERT(va >= VM_MIN_KERNEL_ADDRESS); ptp = kvtopte(va); XENPRINTF(("xen_set_ldt %#" PRIxVADDR " %d %p\n", base, entries, ptp)); pmap_pte_clearbits(ptp, PG_RW); } s = splvm(); xpq_queue_set_ldt(base, entries); splx(s); } #ifdef XENDEBUG void xpq_debug_dump(void); #endif #define XPQUEUE_SIZE 2048 static mmu_update_t xpq_queue_array[MAXCPUS][XPQUEUE_SIZE]; static int xpq_idx_array[MAXCPUS]; #ifdef i386 extern union descriptor tmpgdt[]; #endif /* i386 */ void xpq_flush_queue(void) { int i, ok = 0, ret; mmu_update_t *xpq_queue = xpq_queue_array[curcpu()->ci_cpuid]; int xpq_idx = xpq_idx_array[curcpu()->ci_cpuid]; XENPRINTK2(("flush queue %p entries %d\n", xpq_queue, xpq_idx)); for (i = 0; i < xpq_idx; i++) XENPRINTK2(("%d: 0x%08" PRIx64 " 0x%08" PRIx64 "\n", i, xpq_queue[i].ptr, xpq_queue[i].val)); retry: ret = HYPERVISOR_mmu_update_self(xpq_queue, xpq_idx, &ok); if (xpq_idx != 0 && ret < 0) { struct cpu_info *ci; CPU_INFO_ITERATOR cii; printf("xpq_flush_queue: %d entries (%d successful) on " "cpu%d (%ld)\n", xpq_idx, ok, curcpu()->ci_index, curcpu()->ci_cpuid); if (ok != 0) { xpq_queue += ok; xpq_idx -= ok; ok = 0; goto retry; } for (CPU_INFO_FOREACH(cii, ci)) { xpq_queue = xpq_queue_array[ci->ci_cpuid]; xpq_idx = xpq_idx_array[ci->ci_cpuid]; printf("cpu%d (%ld):\n", ci->ci_index, ci->ci_cpuid); for (i = 0; i < xpq_idx; i++) { printf(" 0x%016" PRIx64 ": 0x%016" PRIx64 "\n", xpq_queue[i].ptr, xpq_queue[i].val); } #ifdef __x86_64__ for (i = 0; i < PDIR_SLOT_PTE; i++) { if (ci->ci_kpm_pdir[i] == 0) continue; printf(" kpm_pdir[%d]: 0x%" PRIx64 "\n", i, ci->ci_kpm_pdir[i]); } #endif } panic("HYPERVISOR_mmu_update failed, ret: %d\n", ret); } xpq_idx_array[curcpu()->ci_cpuid] = 0; }
void flush_workqueue(struct workqueue_struct *wq) { static const struct wq_flush zero_wqf; struct wq_flush wqf = zero_wqf; mutex_init(&wqf.wqf_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&wqf.wqf_cv, "lnxwflsh"); if (1) { struct wq_flush_work *const wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP); wqf.wqf_n = 1; wqfw->wqfw_flush = &wqf; INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier); wqfw->wqfw_work.w_wq = wq; wqfw->wqfw_work.w_state = WORK_PENDING; workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk, NULL); } else { struct cpu_info *ci; CPU_INFO_ITERATOR cii; struct wq_flush_work *wqfw; panic("per-CPU Linux workqueues don't work yet!"); wqf.wqf_n = 0; for (CPU_INFO_FOREACH(cii, ci)) { wqfw = kmem_zalloc(sizeof(*wqfw), KM_SLEEP); mutex_enter(&wqf.wqf_lock); wqf.wqf_n++; mutex_exit(&wqf.wqf_lock); wqfw->wqfw_flush = &wqf; INIT_WORK(&wqfw->wqfw_work, &linux_wq_barrier); wqfw->wqfw_work.w_state = WORK_PENDING; wqfw->wqfw_work.w_wq = wq; workqueue_enqueue(wq->wq_workqueue, &wqfw->wqfw_work.w_wk, ci); } } mutex_enter(&wqf.wqf_lock); while (0 < wqf.wqf_n) cv_wait(&wqf.wqf_cv, &wqf.wqf_lock); mutex_exit(&wqf.wqf_lock); cv_destroy(&wqf.wqf_cv); mutex_destroy(&wqf.wqf_lock); }
/* * Grow the GDT. */ void gdt_grow(int which) { size_t old_len, new_len; CPU_INFO_ITERATOR cii; struct cpu_info *ci; struct vm_page *pg; vaddr_t va; old_len = gdt_size[which] * sizeof(gdt[0]); gdt_size[which] <<= 1; new_len = old_len << 1; #ifdef XEN if (which != 0) { size_t max_len = MAXGDTSIZ * sizeof(gdt[0]); if (old_len == 0) { gdt_size[which] = MINGDTSIZ; new_len = gdt_size[which] * sizeof(gdt[0]); } for(va = (vaddr_t)(cpu_info_primary.ci_gdt) + old_len + max_len; va < (vaddr_t)(cpu_info_primary.ci_gdt) + new_len + max_len; va += PAGE_SIZE) { while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == NULL) { uvm_wait("gdt_grow"); } pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ | VM_PROT_WRITE); } return; } #endif for (CPU_INFO_FOREACH(cii, ci)) { for (va = (vaddr_t)(ci->ci_gdt) + old_len; va < (vaddr_t)(ci->ci_gdt) + new_len; va += PAGE_SIZE) { while ((pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO)) == NULL) { uvm_wait("gdt_grow"); } pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ | VM_PROT_WRITE); } } pmap_update(pmap_kernel()); }
int hppa_ipi_broadcast(u_long ipi) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; int count = 0; for (CPU_INFO_FOREACH(cii, ci)) { if (ci != curcpu() && (ci->ci_flags & CPUF_RUNNING)) if (hppa_ipi_send(ci, ipi)) count++; } return count; }
void interrupt_get_available(kcpuset_t *cpuset) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; kcpuset_zero(cpuset); mutex_enter(&cpu_lock); for (CPU_INFO_FOREACH(cii, ci)) { if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) kcpuset_set(cpuset, cpu_index(ci)); } mutex_exit(&cpu_lock); }
void cpu_multicast_ipi(__cpuset_t cpuset, int tag) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; CPUSET_DEL(cpuset, cpu_index(curcpu())); if (CPUSET_EMPTY_P(cpuset)) return; for (CPU_INFO_FOREACH(cii, ci)) { if (CPUSET_HAS_P(cpuset, cpu_index(ci))) { CPUSET_DEL(cpuset, cpu_index(ci)); (void)cpu_send_ipi(ci, tag); } } }
static inline void pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target) { int err = 0; if (!kcpuset_match(target, kcpuset_attached)) { const struct cpu_info * const self = curcpu(); CPU_INFO_ITERATOR cii; struct cpu_info *lci; for (CPU_INFO_FOREACH(cii, lci)) { const cpuid_t lcid = cpu_index(lci); if (__predict_false(lci == self) || !kcpuset_isset(target, lcid)) { continue; } err |= x86_ipi(LAPIC_TLB_VECTOR, lci->ci_cpuid, LAPIC_DLMODE_FIXED); } } else {
void setgdt(int sel, const void *base, size_t limit, int type, int dpl, int def32, int gran) { struct segment_descriptor *sd = &gdt[sel].sd; CPU_INFO_ITERATOR cii; struct cpu_info *ci; #ifdef XEN if (type == SDT_SYS386TSS) { /* printk("XXX TSS descriptor not supported in GDT\n"); */ return; } #endif setsegment(sd, base, limit, type, dpl, def32, gran); for (CPU_INFO_FOREACH(cii, ci)) { if (ci->ci_gdt != NULL) update_descriptor(&ci->ci_gdt[sel], (union descriptor *)sd); } }
/* * Call hardclock on all CPUs. */ static void handle_hardclock(struct clockframe *cap) { int s; #ifdef MULTIPROCESSOR struct cpu_info *cpi; CPU_INFO_ITERATOR n; for (CPU_INFO_FOREACH(n, cpi)) { if (cpi == cpuinfo.ci_self) { KASSERT(CPU_IS_PRIMARY(cpi)); continue; } raise_ipi(cpi, IPL_HARDCLOCK); } #endif s = splsched(); hardclock(cap); splx(s); }
void vpanic(const char *fmt, va_list ap) { CPU_INFO_ITERATOR cii; struct cpu_info *ci, *oci; int bootopt; static char scratchstr[256]; /* stores panic message */ spldebug_stop(); if (lwp0.l_cpu && curlwp) { /* * Disable preemption. If already panicing on another CPU, sit * here and spin until the system is rebooted. Allow the CPU that * first paniced to panic again. */ kpreempt_disable(); ci = curcpu(); oci = atomic_cas_ptr((void *)&paniccpu, NULL, ci); if (oci != NULL && oci != ci) { /* Give interrupts a chance to try and prevent deadlock. */ for (;;) { #ifndef _RUMPKERNEL /* XXXpooka: temporary build fix, see kern/40505 */ DELAY(10); #endif /* _RUMPKERNEL */ } } /* * Convert the current thread to a bound thread and prevent all * CPUs from scheduling unbound jobs. Do so without taking any * locks. */ curlwp->l_pflag |= LP_BOUND; for (CPU_INFO_FOREACH(cii, ci)) { ci->ci_schedstate.spc_flags |= SPCF_OFFLINE; } }
void cpu_debug_dump(void) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; char running, hatched, paused, resumed, halted; db_printf("CPU CPUID STATE CPUINFO CPL INT MTX IPIS\n"); for (CPU_INFO_FOREACH(cii, ci)) { hatched = (kcpuset_isset(cpus_hatched, cpu_index(ci)) ? 'H' : '-'); running = (kcpuset_isset(cpus_running, cpu_index(ci)) ? 'R' : '-'); paused = (kcpuset_isset(cpus_paused, cpu_index(ci)) ? 'P' : '-'); resumed = (kcpuset_isset(cpus_resumed, cpu_index(ci)) ? 'r' : '-'); halted = (kcpuset_isset(cpus_halted, cpu_index(ci)) ? 'h' : '-'); db_printf("%3d 0x%03lx %c%c%c%c%c %p " "%3d %3d %3d " "0x%02" PRIx64 "/0x%02" PRIx64 "\n", cpu_index(ci), ci->ci_cpuid, running, hatched, paused, resumed, halted, ci, ci->ci_cpl, ci->ci_idepth, ci->ci_mtx_count, ci->ci_active_ipis, ci->ci_request_ipis); } }
void timerattach_obio_4m(device_t parent, device_t self, void *aux) { union obio_attach_args *uoba = aux; struct sbus_attach_args *sa = &uoba->uoba_sbus; struct cpu_info *cpi; bus_space_handle_t bh; int i; CPU_INFO_ITERATOR n; if (sa->sa_nreg < 2) { printf(": only %d register sets\n", sa->sa_nreg); return; } /* Map the system timer */ i = sa->sa_nreg - 1; if (bus_space_map2(sa->sa_bustag, BUS_ADDR(sa->sa_reg[i].oa_space, sa->sa_reg[i].oa_base), sizeof(struct timer_4m), BUS_SPACE_MAP_LINEAR, TIMERREG_VA, &bh) != 0) { printf(": can't map registers\n"); return; } timerreg4m = (struct timer_4m *)TIMERREG_VA; /* Map each CPU's counter */ for (i = 0; i < sa->sa_nreg - 1; i++) { /* * Check whether the CPU corresponding to this timer * register is installed. */ for (CPU_INFO_FOREACH(n, cpi)) { if ((i == 0 && sparc_ncpus == 1) || cpi->mid == i + 8) { /* We got a corresponding MID. */ break; } cpi = NULL; } if (cpi == NULL) continue; if (sbus_bus_map(sa->sa_bustag, sa->sa_reg[i].oa_space, sa->sa_reg[i].oa_base, sizeof(struct timer_4m), BUS_SPACE_MAP_LINEAR, &bh) != 0) { printf(": can't map CPU counter %d\n", i); return; } cpi->counterreg_4m = (struct counter_4m *)bh; } #if defined(MULTIPROCESSOR) if (sparc_ncpus > 1) { /* * Note that we don't actually use this cookie after checking * it was establised, we call directly via raise_ipi() on * IPL_HARDCLOCK. */ void *hardclock_cookie; hardclock_cookie = sparc_softintr_establish(IPL_HARDCLOCK, hardclock_ipi, NULL); if (hardclock_cookie == NULL) panic("timerattach: cannot establish hardclock_intr"); } #endif /* Put processor counter in "timer" mode */ timerreg4m->t_cfg = 0; timerattach(&timerreg4m->t_counter, &timerreg4m->t_limit); }
static void dtrace_load(void *dummy) { dtrace_provider_id_t id; CPU_INFO_ITERATOR cpuind; struct cpu_info *cinfo; dtrace_debug_init(NULL); dtrace_gethrtime_init(NULL); /* Hook into the trap handler. */ dtrace_trap_func = dtrace_trap; /* Hang our hook for thread switches. */ dtrace_vtime_switch_func = dtrace_vtime_switch; /* Hang our hook for exceptions. */ dtrace_invop_init(); /* * XXX This is a short term hack to avoid having to comment * out lots and lots of lock/unlock calls. */ mutex_init(&mod_lock,"XXX mod_lock hack", MUTEX_DEFAULT, NULL); /* * Initialise the mutexes without 'witness' because the dtrace * code is mostly written to wait for memory. To have the * witness code change a malloc() from M_WAITOK to M_NOWAIT * because a lock is held would surely create a panic in a * low memory situation. And that low memory situation might be * the very problem we are trying to trace. */ mutex_init(&dtrace_lock,"dtrace probe state", MUTEX_DEFAULT, NULL); mutex_init(&dtrace_provider_lock,"dtrace provider state", MUTEX_DEFAULT, NULL); mutex_init(&dtrace_meta_lock,"dtrace meta-provider state", MUTEX_DEFAULT, NULL); mutex_init(&dtrace_errlock,"dtrace error lock", MUTEX_DEFAULT, NULL); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); mutex_enter(&cpu_lock); ASSERT(MUTEX_HELD(&cpu_lock)); dtrace_arena = vmem_create("dtrace", 1, INT_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP, IPL_NONE); dtrace_state_cache = kmem_cache_create(__UNCONST("dtrace_state_cache"), sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); ASSERT(MUTEX_HELD(&cpu_lock)); dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), offsetof(dtrace_probe_t, dtpr_nextmod), offsetof(dtrace_probe_t, dtpr_prevmod)); dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), offsetof(dtrace_probe_t, dtpr_nextfunc), offsetof(dtrace_probe_t, dtpr_prevfunc)); dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), offsetof(dtrace_probe_t, dtpr_nextname), offsetof(dtrace_probe_t, dtpr_prevname)); if (dtrace_retain_max < 1) { cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " "setting to 1", dtrace_retain_max); dtrace_retain_max = 1; } /* * Now discover our toxic ranges. */ dtrace_toxic_ranges(dtrace_toxrange_add); /* * Before we register ourselves as a provider to our own framework, * we would like to assert that dtrace_provider is NULL -- but that's * not true if we were loaded as a dependency of a DTrace provider. * Once we've registered, we can assert that dtrace_provider is our * pseudo provider. */ (void) dtrace_register("dtrace", &dtrace_provider_attr, DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); ASSERT(dtrace_provider != NULL); ASSERT((dtrace_provider_id_t)dtrace_provider == id); dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "END", 0, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "ERROR", 1, NULL); mutex_exit(&cpu_lock); /* * If DTrace helper tracing is enabled, we need to allocate the * trace buffer and initialize the values. */ if (dtrace_helptrace_enabled) { ASSERT(dtrace_helptrace_buffer == NULL); dtrace_helptrace_buffer = kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); dtrace_helptrace_next = 0; dtrace_helptrace_size = dtrace_helptrace_bufsize; } mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); mutex_enter(&cpu_lock); /* Setup the CPUs */ for (CPU_INFO_FOREACH(cpuind, cinfo)) { (void) dtrace_cpu_setup(CPU_CONFIG, cpu_index(cinfo)); } mutex_exit(&cpu_lock); dtrace_anon_init(NULL); #if 0 dtrace_dev = make_dev(&dtrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/dtrace"); #endif return; }
/* ARGSUSED */ static int dtrace_ioctl(struct file *fp, u_long cmd, void *addr) { dtrace_state_t *state = (dtrace_state_t *)fp->f_data; int error = 0; if (state == NULL) return (EINVAL); if (state->dts_anon) { ASSERT(dtrace_anon.dta_state == NULL); state = state->dts_anon; } switch (cmd) { case DTRACEIOC_AGGDESC: { dtrace_aggdesc_t **paggdesc = (dtrace_aggdesc_t **) addr; dtrace_aggdesc_t aggdesc; dtrace_action_t *act; dtrace_aggregation_t *agg; int nrecs; uint32_t offs; dtrace_recdesc_t *lrec; void *buf; size_t size; uintptr_t dest; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_AGGDESC\n",__func__,__LINE__); if (copyin((void *) *paggdesc, &aggdesc, sizeof (aggdesc)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { mutex_exit(&dtrace_lock); return (EINVAL); } aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; nrecs = aggdesc.dtagd_nrecs; aggdesc.dtagd_nrecs = 0; offs = agg->dtag_base; lrec = &agg->dtag_action.dta_rec; aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; for (act = agg->dtag_first; ; act = act->dta_next) { ASSERT(act->dta_intuple || DTRACEACT_ISAGG(act->dta_kind)); /* * If this action has a record size of zero, it * denotes an argument to the aggregating action. * Because the presence of this record doesn't (or * shouldn't) affect the way the data is interpreted, * we don't copy it out to save user-level the * confusion of dealing with a zero-length record. */ if (act->dta_rec.dtrd_size == 0) { ASSERT(agg->dtag_hasarg); continue; } aggdesc.dtagd_nrecs++; if (act == &agg->dtag_action) break; } /* * Now that we have the size, we need to allocate a temporary * buffer in which to store the complete description. We need * the temporary buffer to be able to drop dtrace_lock() * across the copyout(), below. */ size = sizeof (dtrace_aggdesc_t) + (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); for (act = agg->dtag_first; ; act = act->dta_next) { dtrace_recdesc_t rec = act->dta_rec; /* * See the comment in the above loop for why we pass * over zero-length records. */ if (rec.dtrd_size == 0) { ASSERT(agg->dtag_hasarg); continue; } if (nrecs-- == 0) break; rec.dtrd_offset -= offs; bcopy(&rec, (void *)dest, sizeof (rec)); dest += sizeof (dtrace_recdesc_t); if (act == &agg->dtag_action) break; } mutex_exit(&dtrace_lock); if (copyout(buf, (void *) *paggdesc, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } kmem_free(buf, size); return (0); } case DTRACEIOC_AGGSNAP: case DTRACEIOC_BUFSNAP: { dtrace_bufdesc_t **pdesc = (dtrace_bufdesc_t **) addr; dtrace_bufdesc_t desc; caddr_t cached; dtrace_buffer_t *buf; dtrace_debug_output(); if (copyin((void *) *pdesc, &desc, sizeof (desc)) != 0) return (EFAULT); DTRACE_IOCTL_PRINTF("%s(%d): %s curcpu %d cpu %d\n", __func__,__LINE__, cmd == DTRACEIOC_AGGSNAP ? "DTRACEIOC_AGGSNAP":"DTRACEIOC_BUFSNAP", cpu_number(), desc.dtbd_cpu); if (desc.dtbd_cpu >= ncpu) return (ENOENT); mutex_enter(&dtrace_lock); if (cmd == DTRACEIOC_BUFSNAP) { buf = &state->dts_buffer[desc.dtbd_cpu]; } else { buf = &state->dts_aggbuffer[desc.dtbd_cpu]; } if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { size_t sz = buf->dtb_offset; if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { mutex_exit(&dtrace_lock); return (EBUSY); } /* * If this buffer has already been consumed, we're * going to indicate that there's nothing left here * to consume. */ if (buf->dtb_flags & DTRACEBUF_CONSUMED) { mutex_exit(&dtrace_lock); desc.dtbd_size = 0; desc.dtbd_drops = 0; desc.dtbd_errors = 0; desc.dtbd_oldest = 0; sz = sizeof (desc); if (copyout(&desc, (void *) *pdesc, sz) != 0) return (EFAULT); return (0); } /* * If this is a ring buffer that has wrapped, we want * to copy the whole thing out. */ if (buf->dtb_flags & DTRACEBUF_WRAPPED) { dtrace_buffer_polish(buf); sz = buf->dtb_size; } if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { mutex_exit(&dtrace_lock); return (EFAULT); } desc.dtbd_size = sz; desc.dtbd_drops = buf->dtb_drops; desc.dtbd_errors = buf->dtb_errors; desc.dtbd_oldest = buf->dtb_xamot_offset; mutex_exit(&dtrace_lock); if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0) return (EFAULT); buf->dtb_flags |= DTRACEBUF_CONSUMED; return (0); } if (buf->dtb_tomax == NULL) { ASSERT(buf->dtb_xamot == NULL); mutex_exit(&dtrace_lock); return (ENOENT); } cached = buf->dtb_tomax; ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); dtrace_xcall(desc.dtbd_cpu, (dtrace_xcall_t)dtrace_buffer_switch, buf); state->dts_errors += buf->dtb_xamot_errors; /* * If the buffers did not actually switch, then the cross call * did not take place -- presumably because the given CPU is * not in the ready set. If this is the case, we'll return * ENOENT. */ if (buf->dtb_tomax == cached) { ASSERT(buf->dtb_xamot != cached); mutex_exit(&dtrace_lock); return (ENOENT); } ASSERT(cached == buf->dtb_xamot); DTRACE_IOCTL_PRINTF("%s(%d): copyout the buffer snapshot\n",__func__,__LINE__); /* * We have our snapshot; now copy it out. */ if (copyout(buf->dtb_xamot, desc.dtbd_data, buf->dtb_xamot_offset) != 0) { mutex_exit(&dtrace_lock); return (EFAULT); } desc.dtbd_size = buf->dtb_xamot_offset; desc.dtbd_drops = buf->dtb_xamot_drops; desc.dtbd_errors = buf->dtb_xamot_errors; desc.dtbd_oldest = 0; mutex_exit(&dtrace_lock); DTRACE_IOCTL_PRINTF("%s(%d): copyout buffer desc: size %zd drops %lu errors %lu\n",__func__,__LINE__,(size_t) desc.dtbd_size,(u_long) desc.dtbd_drops,(u_long) desc.dtbd_errors); /* * Finally, copy out the buffer description. */ if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0) return (EFAULT); return (0); } case DTRACEIOC_CONF: { dtrace_conf_t conf; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_CONF\n",__func__,__LINE__); bzero(&conf, sizeof (conf)); conf.dtc_difversion = DIF_VERSION; conf.dtc_difintregs = DIF_DIR_NREGS; conf.dtc_diftupregs = DIF_DTR_NREGS; conf.dtc_ctfmodel = CTF_MODEL_NATIVE; *((dtrace_conf_t *) addr) = conf; return (0); } case DTRACEIOC_DOFGET: { dof_hdr_t **pdof = (dof_hdr_t **) addr; dof_hdr_t hdr, *dof = *pdof; int rval; uint64_t len; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_DOFGET\n",__func__,__LINE__); if (copyin((void *)dof, &hdr, sizeof (hdr)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); dof = dtrace_dof_create(state); mutex_exit(&dtrace_lock); len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); rval = copyout(dof, (void *) *pdof, len); dtrace_dof_destroy(dof); return (rval == 0 ? 0 : EFAULT); } case DTRACEIOC_ENABLE: { dof_hdr_t *dof = NULL; dtrace_enabling_t *enab = NULL; dtrace_vstate_t *vstate; int err = 0; int rval; dtrace_enable_io_t *p = (dtrace_enable_io_t *) addr; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_ENABLE\n",__func__,__LINE__); /* * If a NULL argument has been passed, we take this as our * cue to reevaluate our enablings. */ if (p->dof == NULL) { dtrace_enabling_matchall(); return (0); } if ((dof = dtrace_dof_copyin((uintptr_t) p->dof, &rval)) == NULL) return (EINVAL); mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); vstate = &state->dts_vstate; if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); return (EBUSY); } if (dtrace_dof_slurp(dof, vstate, curlwp->l_cred, &enab, 0, B_TRUE) != 0) { mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); return (EINVAL); } if ((rval = dtrace_dof_options(dof, state)) != 0) { dtrace_enabling_destroy(enab); mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); return (rval); } if ((err = dtrace_enabling_match(enab, &p->n_matched)) == 0) { err = dtrace_enabling_retain(enab); } else { dtrace_enabling_destroy(enab); } mutex_exit(&cpu_lock); mutex_exit(&dtrace_lock); dtrace_dof_destroy(dof); return (err); } case DTRACEIOC_EPROBE: { dtrace_eprobedesc_t **pepdesc = (dtrace_eprobedesc_t **) addr; dtrace_eprobedesc_t epdesc; dtrace_ecb_t *ecb; dtrace_action_t *act; void *buf; size_t size; uintptr_t dest; int nrecs; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_EPROBE\n",__func__,__LINE__); if (copyin((void *)*pepdesc, &epdesc, sizeof (epdesc)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { mutex_exit(&dtrace_lock); return (EINVAL); } if (ecb->dte_probe == NULL) { mutex_exit(&dtrace_lock); return (EINVAL); } epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; epdesc.dtepd_uarg = ecb->dte_uarg; epdesc.dtepd_size = ecb->dte_size; nrecs = epdesc.dtepd_nrecs; epdesc.dtepd_nrecs = 0; for (act = ecb->dte_action; act != NULL; act = act->dta_next) { if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) continue; epdesc.dtepd_nrecs++; } /* * Now that we have the size, we need to allocate a temporary * buffer in which to store the complete description. We need * the temporary buffer to be able to drop dtrace_lock() * across the copyout(), below. */ size = sizeof (dtrace_eprobedesc_t) + (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; bcopy(&epdesc, (void *)dest, sizeof (epdesc)); dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); for (act = ecb->dte_action; act != NULL; act = act->dta_next) { if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) continue; if (nrecs-- == 0) break; bcopy(&act->dta_rec, (void *)dest, sizeof (dtrace_recdesc_t)); dest += sizeof (dtrace_recdesc_t); } mutex_exit(&dtrace_lock); if (copyout(buf, (void *) *pepdesc, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } kmem_free(buf, size); return (0); } case DTRACEIOC_FORMAT: { dtrace_fmtdesc_t *fmt = (dtrace_fmtdesc_t *) addr; char *str; int len; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_FORMAT\n",__func__,__LINE__); mutex_enter(&dtrace_lock); if (fmt->dtfd_format == 0 || fmt->dtfd_format > state->dts_nformats) { mutex_exit(&dtrace_lock); return (EINVAL); } /* * Format strings are allocated contiguously and they are * never freed; if a format index is less than the number * of formats, we can assert that the format map is non-NULL * and that the format for the specified index is non-NULL. */ ASSERT(state->dts_formats != NULL); str = state->dts_formats[fmt->dtfd_format - 1]; ASSERT(str != NULL); len = strlen(str) + 1; if (len > fmt->dtfd_length) { fmt->dtfd_length = len; } else { if (copyout(str, fmt->dtfd_string, len) != 0) { mutex_exit(&dtrace_lock); return (EINVAL); } } mutex_exit(&dtrace_lock); return (0); } case DTRACEIOC_GO: { int rval; processorid_t *cpuid = (processorid_t *) addr; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_GO\n",__func__,__LINE__); rval = dtrace_state_go(state, cpuid); return (rval); } case DTRACEIOC_PROBEARG: { dtrace_argdesc_t *desc = (dtrace_argdesc_t *) addr; dtrace_probe_t *probe; dtrace_provider_t *prov; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROBEARG\n",__func__,__LINE__); if (desc->dtargd_id == DTRACE_IDNONE) return (EINVAL); if (desc->dtargd_ndx == DTRACE_ARGNONE) return (EINVAL); mutex_enter(&dtrace_provider_lock); mutex_enter(&mod_lock); mutex_enter(&dtrace_lock); if (desc->dtargd_id > dtrace_nprobes) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); return (EINVAL); } if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); return (EINVAL); } mutex_exit(&dtrace_lock); prov = probe->dtpr_provider; if (prov->dtpv_pops.dtps_getargdesc == NULL) { /* * There isn't any typed information for this probe. * Set the argument number to DTRACE_ARGNONE. */ desc->dtargd_ndx = DTRACE_ARGNONE; } else { desc->dtargd_native[0] = '\0'; desc->dtargd_xlate[0] = '\0'; desc->dtargd_mapping = desc->dtargd_ndx; prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg, desc); } mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); return (0); } case DTRACEIOC_PROBEMATCH: case DTRACEIOC_PROBES: { dtrace_probedesc_t *p_desc = (dtrace_probedesc_t *) addr; dtrace_probe_t *probe = NULL; dtrace_probekey_t pkey; dtrace_id_t i; int m = 0; uint32_t priv = 0; uid_t uid = 0; zoneid_t zoneid = 0; DTRACE_IOCTL_PRINTF("%s(%d): %s\n",__func__,__LINE__, cmd == DTRACEIOC_PROBEMATCH ? "DTRACEIOC_PROBEMATCH":"DTRACEIOC_PROBES"); p_desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; p_desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; p_desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; p_desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; /* * Before we attempt to match this probe, we want to give * all providers the opportunity to provide it. */ if (p_desc->dtpd_id == DTRACE_IDNONE) { mutex_enter(&dtrace_provider_lock); dtrace_probe_provide(p_desc, NULL); mutex_exit(&dtrace_provider_lock); p_desc->dtpd_id++; } if (cmd == DTRACEIOC_PROBEMATCH) { dtrace_probekey(p_desc, &pkey); pkey.dtpk_id = DTRACE_IDNONE; } dtrace_cred2priv(curlwp->l_cred, &priv, &uid, &zoneid); mutex_enter(&dtrace_lock); if (cmd == DTRACEIOC_PROBEMATCH) { for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && (m = dtrace_match_probe(probe, &pkey, priv, uid, zoneid)) != 0) break; } if (m < 0) { mutex_exit(&dtrace_lock); return (EINVAL); } } else { for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && dtrace_match_priv(probe, priv, uid, zoneid)) break; } } if (probe == NULL) { mutex_exit(&dtrace_lock); return (ESRCH); } dtrace_probe_description(probe, p_desc); mutex_exit(&dtrace_lock); return (0); } case DTRACEIOC_PROVIDER: { dtrace_providerdesc_t *pvd = (dtrace_providerdesc_t *) addr; dtrace_provider_t *pvp; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROVIDER\n",__func__,__LINE__); pvd->dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; error = 0; again: mutex_enter(&dtrace_provider_lock); for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { if (strcmp(pvp->dtpv_name, pvd->dtvd_name) == 0) break; } mutex_exit(&dtrace_provider_lock); if (pvp == NULL && error == 0) { error = module_autoload(pvd->dtvd_name, MODULE_CLASS_MISC); if (error == 0) goto again; } if (pvp == NULL) return (ESRCH); bcopy(&pvp->dtpv_priv, &pvd->dtvd_priv, sizeof (dtrace_ppriv_t)); bcopy(&pvp->dtpv_attr, &pvd->dtvd_attr, sizeof (dtrace_pattr_t)); return (0); } case DTRACEIOC_REPLICATE: { dtrace_repldesc_t *desc = (dtrace_repldesc_t *) addr; dtrace_probedesc_t *match = &desc->dtrpd_match; dtrace_probedesc_t *create = &desc->dtrpd_create; int err; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_REPLICATE\n",__func__,__LINE__); match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; mutex_enter(&dtrace_lock); err = dtrace_enabling_replicate(state, match, create); mutex_exit(&dtrace_lock); return (err); } case DTRACEIOC_STATUS: { dtrace_status_t *stat = (dtrace_status_t *) addr; dtrace_dstate_t *dstate; int j; uint64_t nerrs; CPU_INFO_ITERATOR cpuind; struct cpu_info *cinfo; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STATUS\n",__func__,__LINE__); /* * See the comment in dtrace_state_deadman() for the reason * for setting dts_laststatus to INT64_MAX before setting * it to the correct value. */ state->dts_laststatus = INT64_MAX; dtrace_membar_producer(); state->dts_laststatus = dtrace_gethrtime(); bzero(stat, sizeof (*stat)); mutex_enter(&dtrace_lock); if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { mutex_exit(&dtrace_lock); return (ENOENT); } if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) stat->dtst_exiting = 1; nerrs = state->dts_errors; dstate = &state->dts_vstate.dtvs_dynvars; for (CPU_INFO_FOREACH(cpuind, cinfo)) { int ci = cpu_index(cinfo); dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[ci]; stat->dtst_dyndrops += dcpu->dtdsc_drops; stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; if (state->dts_buffer[ci].dtb_flags & DTRACEBUF_FULL) stat->dtst_filled++; nerrs += state->dts_buffer[ci].dtb_errors; for (j = 0; j < state->dts_nspeculations; j++) { dtrace_speculation_t *spec; dtrace_buffer_t *buf; spec = &state->dts_speculations[j]; buf = &spec->dtsp_buffer[ci]; stat->dtst_specdrops += buf->dtb_xamot_drops; } } stat->dtst_specdrops_busy = state->dts_speculations_busy; stat->dtst_specdrops_unavail = state->dts_speculations_unavail; stat->dtst_stkstroverflows = state->dts_stkstroverflows; stat->dtst_dblerrors = state->dts_dblerrors; stat->dtst_killed = (state->dts_activity == DTRACE_ACTIVITY_KILLED); stat->dtst_errors = nerrs; mutex_exit(&dtrace_lock); return (0); } case DTRACEIOC_STOP: { processorid_t *cpuid = (processorid_t *) addr; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__); mutex_enter(&dtrace_lock); error = dtrace_state_stop(state, cpuid); mutex_exit(&dtrace_lock); return (error); } default: error = ENOTTY; } return (error); }
void cpu_hatch(struct cpu_info *ci) { struct pmap_tlb_info * const ti = ci->ci_tlb_info; /* * Invalidate all the TLB enties (even wired ones) and then reserve * space for the wired TLB entries. */ mips3_cp0_wired_write(0); tlb_invalidate_all(); mips3_cp0_wired_write(ti->ti_wired); /* * Setup HWRENA and USERLOCAL COP0 registers (MIPSxxR2). */ cpu_hwrena_setup(); /* * If we are using register zero relative addressing to access cpu_info * in the exception vectors, enter that mapping into TLB now. */ if (ci->ci_tlb_slot >= 0) { const uint32_t tlb_lo = MIPS3_PG_G|MIPS3_PG_V | mips3_paddr_to_tlbpfn((vaddr_t)ci); const struct tlbmask tlbmask = { .tlb_hi = -PAGE_SIZE | KERNEL_PID, #if (PGSHIFT & 1) .tlb_lo0 = tlb_lo, .tlb_lo1 = tlb_lo + MIPS3_PG_NEXT, #else .tlb_lo0 = 0, .tlb_lo1 = tlb_lo, #endif .tlb_mask = -1, }; tlb_invalidate_addr(tlbmask.tlb_hi, KERNEL_PID); tlb_write_entry(ci->ci_tlb_slot, &tlbmask); } /* * Flush the icache just be sure. */ mips_icache_sync_all(); /* * Let this CPU do its own initialization (for things that have to be * done on the local CPU). */ (*mips_locoresw.lsw_cpu_init)(ci); // Show this CPU as present. atomic_or_ulong(&ci->ci_flags, CPUF_PRESENT); /* * Announce we are hatched */ kcpuset_atomic_set(cpus_hatched, cpu_index(ci)); /* * Now wait to be set free! */ while (! kcpuset_isset(cpus_running, cpu_index(ci))) { /* spin, spin, spin */ } /* * initialize the MIPS count/compare clock */ mips3_cp0_count_write(ci->ci_data.cpu_cc_skew); KASSERT(ci->ci_cycles_per_hz != 0); ci->ci_next_cp0_clk_intr = ci->ci_data.cpu_cc_skew + ci->ci_cycles_per_hz; mips3_cp0_compare_write(ci->ci_next_cp0_clk_intr); ci->ci_data.cpu_cc_skew = 0; /* * Let this CPU do its own post-running initialization * (for things that have to be done on the local CPU). */ (*mips_locoresw.lsw_cpu_run)(ci); /* * Now turn on interrupts (and verify they are on). */ spl0(); KASSERTMSG(ci->ci_cpl == IPL_NONE, "cpl %d", ci->ci_cpl); KASSERT(mips_cp0_status_read() & MIPS_SR_INT_IE); kcpuset_atomic_set(pmap_kernel()->pm_onproc, cpu_index(ci)); kcpuset_atomic_set(pmap_kernel()->pm_active, cpu_index(ci)); /* * And do a tail call to idle_loop */ idle_loop(NULL); } void cpu_boot_secondary_processors(void) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; for (CPU_INFO_FOREACH(cii, ci)) { if (CPU_IS_PRIMARY(ci)) continue; KASSERT(ci->ci_data.cpu_idlelwp); /* * Skip this CPU if it didn't sucessfully hatch. */ if (!kcpuset_isset(cpus_hatched, cpu_index(ci))) continue; ci->ci_data.cpu_cc_skew = mips3_cp0_count_read(); atomic_or_ulong(&ci->ci_flags, CPUF_RUNNING); kcpuset_set(cpus_running, cpu_index(ci)); // Spin until the cpu calls idle_loop for (u_int i = 0; i < 100; i++) { if (kcpuset_isset(cpus_running, cpu_index(ci))) break; delay(1000); } } }