/* * System call to access CPU performance counters. */ static int cpc(int cmd, id_t lwpid, void *udata1, void *udata2, void *udata3) { kthread_t *t; int error; int size; const char *str; int code; /* * This CPC syscall should only be loaded if it found a PCBE to use. */ ASSERT(pcbe_ops != NULL); if (curproc->p_agenttp == curthread) { /* * Only if /proc is invoking this system call from * the agent thread do we allow the caller to examine * the contexts of other lwps in the process. And * because we know we're the agent, we know we don't * have to grab p_lock because no-one else can change * the state of the process. */ if ((t = idtot(curproc, lwpid)) == NULL || t == curthread) return (set_errno(ESRCH)); ASSERT(t->t_tid == lwpid && ttolwp(t) != NULL); } else t = curthread; if (t->t_cpc_set == NULL && (cmd == CPC_SAMPLE || cmd == CPC_RELE)) return (set_errno(EINVAL)); switch (cmd) { case CPC_BIND: /* * udata1 = pointer to packed nvlist buffer * udata2 = size of packed nvlist buffer * udata3 = User addr to return error subcode in. */ rw_enter(&kcpc_cpuctx_lock, RW_READER); if (kcpc_cpuctx || dtrace_cpc_in_use) { rw_exit(&kcpc_cpuctx_lock); return (set_errno(EAGAIN)); } if (kcpc_hw_lwp_hook() != 0) { rw_exit(&kcpc_cpuctx_lock); return (set_errno(EACCES)); } /* * An LWP may only have one set bound to it at a time; if there * is a set bound to this LWP already, we unbind it here. */ if (t->t_cpc_set != NULL) (void) kcpc_unbind(t->t_cpc_set); ASSERT(t->t_cpc_set == NULL); if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1, (size_t)udata2)) != 0) { rw_exit(&kcpc_cpuctx_lock); return (set_errno(error)); } if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) { rw_exit(&kcpc_cpuctx_lock); kcpc_free_set(t->t_cpc_set); t->t_cpc_set = NULL; if (copyout(&error, udata3, sizeof (error)) == -1) return (set_errno(EFAULT)); return (set_errno(EINVAL)); } if ((error = kcpc_bind_thread(t->t_cpc_set, t, &code)) != 0) { rw_exit(&kcpc_cpuctx_lock); kcpc_free_set(t->t_cpc_set); t->t_cpc_set = NULL; /* * EINVAL and EACCES are the only errors with more * specific subcodes. */ if ((error == EINVAL || error == EACCES) && copyout(&code, udata3, sizeof (code)) == -1) return (set_errno(EFAULT)); return (set_errno(error)); } rw_exit(&kcpc_cpuctx_lock); return (0); case CPC_SAMPLE: /* * udata1 = pointer to user's buffer * udata2 = pointer to user's hrtime * udata3 = pointer to user's tick */ /* * We only allow thread-bound sets to be sampled via the * syscall, so if this set has a CPU-bound context, return an * error. */ if (t->t_cpc_set->ks_ctx->kc_cpuid != -1) return (set_errno(EINVAL)); if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2, udata3)) != 0) return (set_errno(error)); return (0); case CPC_PRESET: case CPC_RESTART: /* * These are valid only if this lwp has a bound set. */ if (t->t_cpc_set == NULL) return (set_errno(EINVAL)); if (cmd == CPC_PRESET) { /* * The preset is shipped up to us from userland in two * parts. This lets us handle 64-bit values from 32-bit * and 64-bit applications in the same manner. * * udata1 = index of request to preset * udata2 = new 64-bit preset (most sig. 32 bits) * udata3 = new 64-bit preset (least sig. 32 bits) */ if ((error = kcpc_preset(t->t_cpc_set, (intptr_t)udata1, ((uint64_t)(uintptr_t)udata2 << 32ULL) | (uint64_t)(uintptr_t)udata3)) != 0) return (set_errno(error)); } else { /* * udata[1-3] = unused */ if ((error = kcpc_restart(t->t_cpc_set)) != 0) return (set_errno(error)); } return (0); case CPC_ENABLE: case CPC_DISABLE: udata1 = 0; /*FALLTHROUGH*/ case CPC_USR_EVENTS: case CPC_SYS_EVENTS: if (t != curthread || t->t_cpc_set == NULL) return (set_errno(EINVAL)); /* * Provided for backwards compatibility with CPCv1. * * Stop the counters and record the current counts. Use the * counts as the preset to rebind a new set with the requests * reconfigured as requested. * * udata1: 1 == enable; 0 == disable * udata{2,3}: unused */ rw_enter(&kcpc_cpuctx_lock, RW_READER); if ((error = kcpc_enable(t, cmd, (int)(uintptr_t)udata1)) != 0) { rw_exit(&kcpc_cpuctx_lock); return (set_errno(error)); } rw_exit(&kcpc_cpuctx_lock); return (0); case CPC_NPIC: return (cpc_ncounters); case CPC_CAPS: return (pcbe_ops->pcbe_caps); case CPC_EVLIST_SIZE: case CPC_LIST_EVENTS: /* * udata1 = pointer to user's int or buffer * udata2 = picnum * udata3 = unused */ if ((uintptr_t)udata2 >= cpc_ncounters) return (set_errno(EINVAL)); size = strlen( pcbe_ops->pcbe_list_events((uintptr_t)udata2)) + 1; if (cmd == CPC_EVLIST_SIZE) { if (suword32(udata1, size) == -1) return (set_errno(EFAULT)); } else { if (copyout( pcbe_ops->pcbe_list_events((uintptr_t)udata2), udata1, size) == -1) return (set_errno(EFAULT)); } return (0); case CPC_ATTRLIST_SIZE: case CPC_LIST_ATTRS: /* * udata1 = pointer to user's int or buffer * udata2 = unused * udata3 = unused * * attrlist size is length of PCBE-supported attributes, plus * room for "picnum\0" plus an optional ',' separator char. */ str = pcbe_ops->pcbe_list_attrs(); size = strlen(str) + sizeof (SEPARATOR ATTRLIST) + 1; if (str[0] != '\0') /* * A ',' separator character is necessary. */ size += 1; if (cmd == CPC_ATTRLIST_SIZE) { if (suword32(udata1, size) == -1) return (set_errno(EFAULT)); } else { /* * Copyout the PCBE attributes, and then append the * generic attribute list (with separator if necessary). */ if (copyout(str, udata1, strlen(str)) == -1) return (set_errno(EFAULT)); if (str[0] != '\0') { if (copyout(SEPARATOR ATTRLIST, ((char *)udata1) + strlen(str), strlen(SEPARATOR ATTRLIST) + 1) == -1) return (set_errno(EFAULT)); } else if (copyout(ATTRLIST, (char *)udata1 + strlen(str), strlen(ATTRLIST) + 1) == -1) return (set_errno(EFAULT)); } return (0); case CPC_IMPL_NAME: case CPC_CPUREF: /* * udata1 = pointer to user's buffer * udata2 = unused * udata3 = unused */ if (cmd == CPC_IMPL_NAME) { str = pcbe_ops->pcbe_impl_name(); ASSERT(strlen(str) < CPC_MAX_IMPL_NAME); } else { str = pcbe_ops->pcbe_cpuref(); ASSERT(strlen(str) < CPC_MAX_CPUREF); } if (copyout(str, udata1, strlen(str) + 1) != 0) return (set_errno(EFAULT)); return (0); case CPC_INVALIDATE: kcpc_invalidate(t); return (0); case CPC_RELE: if ((error = kcpc_unbind(t->t_cpc_set)) != 0) return (set_errno(error)); return (0); default: return (set_errno(EINVAL)); } }
/* * Copy in a packed nvlist from the user and create a request set out of it. * If successful, return 0 and store a pointer to the set we've created. Returns * error code on error. */ int kcpc_copyin_set(kcpc_set_t **inset, void *ubuf, size_t len) { kcpc_set_t *set; int i; int j; char *packbuf; nvlist_t *nvl; nvpair_t *nvp = NULL; nvlist_t *attrs; nvpair_t *nvp_attr; kcpc_attr_t *attrp; nvlist_t **reqlist; uint_t nreqs; uint64_t uint64; uint32_t uint32; uint32_t setflags = (uint32_t)-1; char *string; char *name; if (len < CPC_MIN_PACKSIZE || len > CPC_MAX_PACKSIZE) return (EINVAL); packbuf = kmem_alloc(len, KM_SLEEP); if (copyin(ubuf, packbuf, len) == -1) { kmem_free(packbuf, len); return (EFAULT); } if (nvlist_unpack(packbuf, len, &nvl, KM_SLEEP) != 0) { kmem_free(packbuf, len); return (EINVAL); } /* * The nvlist has been unpacked so there is no need for the packed * representation from this point on. */ kmem_free(packbuf, len); i = 0; while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { switch (nvpair_type(nvp)) { case DATA_TYPE_UINT32: if (strcmp(nvpair_name(nvp), "flags") != 0 || nvpair_value_uint32(nvp, &setflags) != 0) { nvlist_free(nvl); return (EINVAL); } break; case DATA_TYPE_NVLIST_ARRAY: if (strcmp(nvpair_name(nvp), "reqs") != 0 || nvpair_value_nvlist_array(nvp, &reqlist, &nreqs) != 0) { nvlist_free(nvl); return (EINVAL); } break; default: nvlist_free(nvl); return (EINVAL); } i++; } /* * There should be two members in the top-level nvlist: * an array of nvlists consisting of the requests, and flags. * Anything else is an invalid set. */ if (i != 2) { nvlist_free(nvl); return (EINVAL); } if (nreqs > CPC_MAX_NREQS) { nvlist_free(nvl); return (EINVAL); } /* * The requests are now stored in the nvlist array at reqlist. * Note that the use of kmem_zalloc() to alloc the kcpc_set_t means * we don't need to call the init routines for ks_lock and ks_condv. */ set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) * nreqs, KM_SLEEP); set->ks_nreqs = nreqs; /* * If the nvlist didn't contain a flags member, setflags was initialized * with an illegal value and this set will fail sanity checks later on. */ set->ks_flags = setflags; /* * Initialize bind/unbind set synchronization. */ set->ks_state &= ~KCPC_SET_BOUND; /* * Build the set up one request at a time, always keeping it self- * consistent so we can give it to kcpc_free_set() if we need to back * out and return and error. */ for (i = 0; i < nreqs; i++) { nvp = NULL; set->ks_req[i].kr_picnum = -1; while ((nvp = nvlist_next_nvpair(reqlist[i], nvp)) != NULL) { name = nvpair_name(nvp); switch (nvpair_type(nvp)) { case DATA_TYPE_UINT32: if (nvpair_value_uint32(nvp, &uint32) == EINVAL) goto inval; if (strcmp(name, "cr_flags") == 0) set->ks_req[i].kr_flags = uint32; if (strcmp(name, "cr_index") == 0) set->ks_req[i].kr_index = uint32; break; case DATA_TYPE_UINT64: if (nvpair_value_uint64(nvp, &uint64) == EINVAL) goto inval; if (strcmp(name, "cr_preset") == 0) set->ks_req[i].kr_preset = uint64; break; case DATA_TYPE_STRING: if (nvpair_value_string(nvp, &string) == EINVAL) goto inval; if (strcmp(name, "cr_event") == 0) (void) strncpy(set->ks_req[i].kr_event, string, CPC_MAX_EVENT_LEN); break; case DATA_TYPE_NVLIST: if (strcmp(name, "cr_attr") != 0) goto inval; if (nvpair_value_nvlist(nvp, &attrs) == EINVAL) goto inval; nvp_attr = NULL; /* * If the picnum has been specified as an * attribute, consume that attribute here and * remove it from the list of attributes. */ if (nvlist_lookup_uint64(attrs, "picnum", &uint64) == 0) { if (nvlist_remove(attrs, "picnum", DATA_TYPE_UINT64) != 0) panic("nvlist %p faulty", (void *)attrs); set->ks_req[i].kr_picnum = uint64; } if ((set->ks_req[i].kr_nattrs = kcpc_nvlist_npairs(attrs)) == 0) break; if (set->ks_req[i].kr_nattrs > CPC_MAX_ATTRS) goto inval; set->ks_req[i].kr_attr = kmem_alloc(set->ks_req[i].kr_nattrs * sizeof (kcpc_attr_t), KM_SLEEP); j = 0; while ((nvp_attr = nvlist_next_nvpair(attrs, nvp_attr)) != NULL) { attrp = &set->ks_req[i].kr_attr[j]; if (nvpair_type(nvp_attr) != DATA_TYPE_UINT64) goto inval; (void) strncpy(attrp->ka_name, nvpair_name(nvp_attr), CPC_MAX_ATTR_LEN); if (nvpair_value_uint64(nvp_attr, &(attrp->ka_val)) == EINVAL) goto inval; j++; } ASSERT(j == set->ks_req[i].kr_nattrs); default: break; } } } nvlist_free(nvl); *inset = set; return (0); inval: nvlist_free(nvl); kcpc_free_set(set); return (EINVAL); }
/* * Caller must hold kcpc_cpuctx_lock. */ int kcpc_enable(kthread_t *t, int cmd, int enable) { kcpc_ctx_t *ctx = t->t_cpc_ctx; kcpc_set_t *set = t->t_cpc_set; kcpc_set_t *newset; int i; int flag; int err; ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); if (ctx == NULL) { /* * This thread has a set but no context; it must be a * CPU-bound set. */ ASSERT(t->t_cpc_set != NULL); ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); return (EINVAL); } else if (ctx->kc_flags & KCPC_CTX_INVALID) return (EAGAIN); if (cmd == CPC_ENABLE) { if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) return (EINVAL); kpreempt_disable(); atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); kcpc_restore(ctx); kpreempt_enable(); } else if (cmd == CPC_DISABLE) { if (ctx->kc_flags & KCPC_CTX_FREEZE) return (EINVAL); kpreempt_disable(); kcpc_save(ctx); atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); kpreempt_enable(); } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { /* * Strategy for usr/sys: stop counters and update set's presets * with current counter values, unbind, update requests with * new config, then re-bind. */ flag = (cmd == CPC_USR_EVENTS) ? CPC_COUNT_USER: CPC_COUNT_SYSTEM; kpreempt_disable(); atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); pcbe_ops->pcbe_allstop(); kpreempt_enable(); for (i = 0; i < set->ks_nreqs; i++) { set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); if (enable) set->ks_req[i].kr_flags |= flag; else set->ks_req[i].kr_flags &= ~flag; } newset = kcpc_dup_set(set); if (kcpc_unbind(set) != 0) return (EINVAL); t->t_cpc_set = newset; if (kcpc_bind_thread(newset, t, &err) != 0) { t->t_cpc_set = NULL; kcpc_free_set(newset); return (EINVAL); } } else return (EINVAL); return (0); }
/*ARGSUSED*/ static int kcpc_ioctl(dev_t dev, int cmd, intptr_t data, int flags, cred_t *cr, int *rvp) { kthread_t *t = curthread; processorid_t cpuid; void *udata1 = NULL; void *udata2 = NULL; void *udata3 = NULL; int error; int code; STRUCT_DECL(__cpc_args, args); STRUCT_INIT(args, flags); if (curthread->t_bind_cpu != getminor(dev)) return (EAGAIN); /* someone unbound it? */ cpuid = getminor(dev); if (cmd == CPCIO_BIND || cmd == CPCIO_SAMPLE) { if (copyin((void *)data, STRUCT_BUF(args), STRUCT_SIZE(args)) == -1) return (EFAULT); udata1 = STRUCT_FGETP(args, udata1); udata2 = STRUCT_FGETP(args, udata2); udata3 = STRUCT_FGETP(args, udata3); } switch (cmd) { case CPCIO_BIND: /* * udata1 = pointer to packed nvlist buffer * udata2 = size of packed nvlist buffer * udata3 = User addr to return error subcode in. */ if (t->t_cpc_set != NULL) { (void) kcpc_unbind(t->t_cpc_set); ASSERT(t->t_cpc_set == NULL); } if ((error = kcpc_copyin_set(&t->t_cpc_set, udata1, (size_t)udata2)) != 0) { return (error); } if ((error = kcpc_verify_set(t->t_cpc_set)) != 0) { kcpc_free_set(t->t_cpc_set); t->t_cpc_set = NULL; if (copyout(&error, udata3, sizeof (error)) == -1) return (EFAULT); return (EINVAL); } if ((error = kcpc_bind_cpu(t->t_cpc_set, cpuid, &code)) != 0) { kcpc_free_set(t->t_cpc_set); t->t_cpc_set = NULL; /* * Subcodes are only returned for EINVAL and EACCESS. */ if ((error == EINVAL || error == EACCES) && copyout(&code, udata3, sizeof (code)) == -1) return (EFAULT); return (error); } return (0); case CPCIO_SAMPLE: /* * udata1 = pointer to user's buffer * udata2 = pointer to user's hrtime * udata3 = pointer to user's tick */ /* * Only CPU-bound sets may be sampled via the ioctl(). If this * set has no CPU-bound context, return an error. */ if (t->t_cpc_set == NULL) return (EINVAL); if ((error = kcpc_sample(t->t_cpc_set, udata1, udata2, udata3)) != 0) return (error); return (0); case CPCIO_RELE: if (t->t_cpc_set == NULL) return (EINVAL); return (kcpc_unbind(t->t_cpc_set)); default: return (EINVAL); } }
/*ARGSUSED*/ static void kcpc_free(kcpc_ctx_t *ctx, int isexec) { int i; kcpc_set_t *set = ctx->kc_set; ASSERT(set != NULL); atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); if (isexec) { /* * This thread is execing, and after the exec it should not have * any performance counter context. Stop the counters properly * here so the system isn't surprised by an overflow interrupt * later. */ if (ctx->kc_cpuid != -1) { cpu_t *cp; /* * CPU-bound context; stop the appropriate CPU's ctrs. * Hold cpu_lock while examining the CPU to ensure it * doesn't go away. */ mutex_enter(&cpu_lock); cp = cpu_get(ctx->kc_cpuid); /* * The CPU could have been DR'd out, so only stop the * CPU and clear its context pointer if the CPU still * exists. */ if (cp != NULL) { mutex_enter(&cp->cpu_cpc_ctxlock); kcpc_stop_hw(ctx); cp->cpu_cpc_ctx = NULL; mutex_exit(&cp->cpu_cpc_ctxlock); } mutex_exit(&cpu_lock); ASSERT(curthread->t_cpc_ctx == NULL); } else { /* * Thread-bound context; stop _this_ CPU's counters. */ kpreempt_disable(); pcbe_ops->pcbe_allstop(); atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); kpreempt_enable(); curthread->t_cpc_ctx = NULL; } /* * Since we are being called from an exec and we know that * exec is not permitted via the agent thread, we should clean * up this thread's CPC state completely, and not leave dangling * CPC pointers behind. */ ASSERT(ctx->kc_thread == curthread); curthread->t_cpc_set = NULL; } /* * Walk through each request in this context's set and free the PCBE's * configuration if it exists. */ for (i = 0; i < set->ks_nreqs; i++) { if (set->ks_req[i].kr_config != NULL) pcbe_ops->pcbe_free(set->ks_req[i].kr_config); } kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); kcpc_ctx_free(ctx); kcpc_free_set(set); }