static void mips_isuspend(struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int cstatus, nrctrs, i; int cpu; // it is on the cpu no 'cpu' that we suspended gathering statistics cpu = smp_processor_id(); // what are we going to with the stored 'cpu' no? telling somone // look this state was last suspended on cpu 'cpu' set_isuspend_cpu(state, cpu); /* early to limit cpu's live range */ // what are we caching? cache = __get_cpu_cache(cpu); cstatus = state->cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { unsigned int pmc, now; pmc = state->pmc[i].map; // instead of setting the freeze bits, just zero out the whole reg cache->ctrl_regs[pmc] = 0; write_pmctrl(pmc, cache->ctrl_regs[pmc]); now = read_pmc(pmc); state->pmc[i].sum += now - state->pmc[i].start; state->pmc[i].start = now; } /* cache->k1.id is still == state->k1.id */ // sampled the i-mode registers }
static void do_vperfctr_release(struct vperfctr *child_perfctr, struct task_struct *parent_tsk) { struct vperfctr *parent_perfctr; unsigned int cstatus, nrctrs, i; parent_perfctr = parent_tsk->thread.perfctr; if (parent_perfctr && child_perfctr) { // since more than one child can try to add to parent's // counters, we need a lock spin_lock(&parent_perfctr->children_lock); if (parent_perfctr->inheritance_id == child_perfctr->inheritance_id) { cstatus = parent_perfctr->cpu_state.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) parent_perfctr->children.tsc += child_perfctr->cpu_state.tsc_sum + child_perfctr->children.tsc; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = 0; i < nrctrs; ++i) parent_perfctr->children.pmc[i] += child_perfctr->cpu_state.pmc[i].sum + child_perfctr->children.pmc[i]; } spin_unlock(&parent_perfctr->children_lock); } // now that we reaped the data from child's task structure // the child's task structure can be freed. Only the child's // vperfctr structure seems to be released. Is the 'task_struct' // released in __vperfctr_release() itself? Doesn't seem so. schedule_put_vperfctr(child_perfctr); }
// the start values have to be reset as we might have changed then in // _isuspend() static inline void setup_imode_start_values(struct perfctr_cpu_state *state) { unsigned int cstatus, nrctrs, i; cstatus = state->cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) state->pmc[i].start = state->control.pmc[i].ireset; }
/* schedule() --> switch_to() --> .. --> __vperfctr_resume(). * PRE: perfctr == current->arch.thread.perfctr * If the counters are runnable, resume them. * PREEMPT note: switch_to() runs with preemption disabled. */ void __vperfctr_resume(struct vperfctr *perfctr) { if (IS_RUNNING(perfctr)) { #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK if (unlikely(atomic_read(&perfctr->bad_cpus_allowed)) && perfctr_cstatus_nrctrs(perfctr->cpu_state.user.cstatus)) { perfctr->cpu_state.user.cstatus = 0; perfctr->resume_cstatus = 0; BUG_ON(current->state != TASK_RUNNING); send_sig(SIGILL, current, 1); return; } #endif vperfctr_resume_with_overflow_check(perfctr); } }
static void mips_iresume(const struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int cstatus, nrctrs, i; int cpu; cpu = smp_processor_id(); cache = __get_cpu_cache(cpu); if (cache->k1.id == state->k1.id) { // we need to do this and force reload of control registers // to unfreeze control registers cache->k1.id = 0; // if no one else was scheduled after we were suspended, // the regiseters are still warm, actually hot and don't // have to reload them. Is that right ? // we are being rescheduled on the same processor if (is_isuspend_cpu(state, cpu)) return; /* skip reload of PMCs */ } // The CPU state wasn't ours. // The counters must be frozen before being reinitialised, // to prevent unexpected increments and missed overflows. // At this point, only the i-mode registers are frozen. Is there // any reason to freeze a-mode counters ?! // All unused counters must be reset to a non-overflow state. // accumulation mode registers are reset to zero, while the i-mode // registers are being written from state->pmc[i].start. The field // state->pmc[].start for i-mode registers was set to the values // specified in the .ireset field in the function ... cstatus = state->cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { unsigned int map = state->pmc[i].map; cache->ctrl_regs[map] = 0; write_pmctrl(map, 0); // zero value write_pmc(map, state->pmc[i].start); } // cache->k1.id remains != state->k1.id }
static int vperfctr_enable_control(struct vperfctr *perfctr, struct task_struct *tsk) { int err; unsigned int next_cstatus; unsigned int nrctrs, i; if (perfctr->cpu_state.control.header.nractrs || perfctr->cpu_state.control.header.nrictrs) { cpumask_t old_mask, new_mask; //old_mask = tsk->cpus_allowed; old_mask = tsk->cpu_mask; cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask); if (cpus_empty(new_mask)) return -EINVAL; if (!cpus_equal(new_mask, old_mask)) set_cpus_allowed(tsk, new_mask); } perfctr->cpu_state.user.cstatus = 0; perfctr->resume_cstatus = 0; /* remote access note: perfctr_cpu_update_control() is ok */ err = perfctr_cpu_update_control(&perfctr->cpu_state, 0); if (err < 0) return err; next_cstatus = perfctr->cpu_state.user.cstatus; if (!perfctr_cstatus_enabled(next_cstatus)) return 0; if (!perfctr_cstatus_has_tsc(next_cstatus)) perfctr->cpu_state.user.tsc_sum = 0; nrctrs = perfctr_cstatus_nrctrs(next_cstatus); for(i = 0; i < nrctrs; ++i) if (!(perfctr->preserve & (1<<i))) perfctr->cpu_state.user.pmc[i].sum = 0; spin_lock(&perfctr->children_lock); perfctr->inheritance_id = new_inheritance_id(); memset(&perfctr->children, 0, sizeof perfctr->children); spin_unlock(&perfctr->children_lock); return 0; }
/* schedule() --> switch_to() --> .. --> __vperfctr_resume(). * PRE: perfctr == current->thread.perfctr * If the counters are runnable, resume them. * PREEMPT note: switch_to() runs with preemption disabled. */ void __vperfctr_resume(struct vperfctr *perfctr) { if (IS_RUNNING(perfctr)) { // logical place to add the functionality // what exactly are we doing here ? #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK if (unlikely(atomic_read(&perfctr->bad_cpus_allowed)) && perfctr_cstatus_nrctrs(perfctr->cpu_state.cstatus)) { perfctr->cpu_state.cstatus = 0; vperfctr_clear_iresume_cstatus(perfctr); BUG_ON(current->state != TASK_RUNNING); send_sig(SIGILL, current, 1); return; } #endif vperfctr_resume_with_overflow_check(perfctr); } }
// the following overflow check is being done only for the i-mode registers // how is the overflow of a-mode registers handled ? inline unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) { unsigned int cstatus, nrctrs, i, pmc_mask; cstatus = state->cstatus; i = perfctr_cstatus_nractrs(cstatus); // a-mode count nrctrs = perfctr_cstatus_nrctrs(cstatus); for(pmc_mask = 0; i < nrctrs; ++i) { // Ok, reset the overflown i-mode counters if ((int)state->pmc[i].start < 0) { /* MIPS-specific */ /* XXX: "+=" to correct for overshots */ state->pmc[i].start = state->control.pmc[i].ireset; pmc_mask |= (1 << i); } } return pmc_mask; }
// PREEMPT note: called in IRQ context with preemption disabled. static void vperfctr_ihandler(unsigned long pc) { struct task_struct *tsk = current; struct vperfctr *perfctr; unsigned int pmc, cstatus, now = 0; int i; perfctr = tsk->thread.perfctr; if (!perfctr) { return; } if (!perfctr_cstatus_has_ictrs(perfctr->cpu_state.cstatus)) { return; } // if someone has really overflown then continue else return // just read, don't freeze them cstatus = perfctr->cpu_state.cstatus; for (i = perfctr_cstatus_nractrs(cstatus); (i < perfctr_cstatus_nrctrs(cstatus)) && ((int)now >= 0); ++i) { pmc = perfctr->cpu_state.pmc[i].map; now = read_pmc(pmc); } if ((int)now >= 0) { return; } // Fine, we are suspending the counters and reading them. vperfctr_suspend() // in turn invokes _suspend() on i-mode ctrs (where they are frozen and read) // and a-mode counters (where they are just read) vperfctr_suspend(perfctr); // Ok, Signal to the userland is sent in the following routine. But before that // the following routine calls vperfctr_resume() if the TSC counting is on. // what happens in that resume is just the TSC value is read and stored in the // 'start' state of the TSC vperfctr_handle_overflow(tsk, perfctr); }
static void mips_write_control(const struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int nrctrs, i; // cache stores the information pertaining to one id. Under // what conditions does that cache state remain intact? Can some // processes tell that their statistics be not recorded. In such // a case when a thread is rescheuldes on the same processpor // without the intervening thread recording the statistics, then // the cache will be hot cache = get_cpu_cache(); if (cache->k1.id == state->k1.id) { return; } nrctrs = perfctr_cstatus_nrctrs(state->cstatus); preempt_disable(); for (i = 0; i < nrctrs; ++i) { unsigned int ctrl_reg = state->control.pmc[i].ctrl_reg; unsigned int pmc = state->pmc[i].map; // assuming that the 'state' values have been // updated from control values specified by users if (ctrl_reg != cache->ctrl_regs[pmc]) { if (!perfctr_cntmode) { MIPS_XLR_UNSET_CNT_ALL_THREADS(ctrl_reg); MIPS_XLR_SET_THREADID(ctrl_reg, netlogic_thr_id()); } else { MIPS_XLR_SET_CNT_ALL_THREADS(ctrl_reg); } cache->ctrl_regs[pmc] = ctrl_reg; write_pmctrl(pmc, ctrl_reg); } } cache->k1.id = state->k1.id; preempt_enable(); }
/* release_task() -> perfctr_release_task() -> __vperfctr_release(). * A task is being released. If it inherited its perfctr settings * from its parent, then merge its final counts back into the parent. * Then unlink the child's perfctr. * PRE: caller has write_lock_irq(&tasklist_lock). * PREEMPT note: preemption is disabled due to tasklist_lock. * * When current == parent_tsk, the child's counts can be merged * into the parent's immediately. This is the common case. * * When current != parent_tsk, the parent must be task_lock()ed * before its perfctr state can be accessed. task_lock() is illegal * here due to the write_lock_irq(&tasklist_lock) in release_task(), * so the operation is done via schedule_work(). */ static void do_vperfctr_release(struct vperfctr *child_perfctr, struct task_struct *parent_tsk) { struct vperfctr *parent_perfctr; unsigned int cstatus, nrctrs, i; parent_perfctr = parent_tsk->arch.thread.perfctr; if (parent_perfctr && child_perfctr) { spin_lock(&parent_perfctr->children_lock); if (parent_perfctr->inheritance_id == child_perfctr->inheritance_id) { cstatus = parent_perfctr->cpu_state.user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) parent_perfctr->children.tsc += child_perfctr->cpu_state.user.tsc_sum + child_perfctr->children.tsc; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = 0; i < nrctrs; ++i) parent_perfctr->children.pmc[i] += child_perfctr->cpu_state.user.pmc[i].sum + child_perfctr->children.pmc[i]; } spin_unlock(&parent_perfctr->children_lock); } schedule_put_vperfctr(child_perfctr); }
static int do_vperfctr_control(struct vperfctr *perfctr, const struct vperfctr_control __user *argp, unsigned int argbytes, struct task_struct *tsk) { struct vperfctr_control *control; int err; unsigned int next_cstatus; unsigned int nrctrs, i; if (!tsk) { return -ESRCH; /* attempt to update unlinked perfctr */ } /* The control object can be large (over 300 bytes on i386), so kmalloc() it instead of storing it on the stack. We must use task-private storage to prevent racing with a monitor process attaching to us before the non-preemptible perfctr update step. Therefore we cannot store the copy in the perfctr object itself. */ control = kmalloc(sizeof(*control), GFP_USER); if (!control) { return -ENOMEM; } err = -EINVAL; if (argbytes > sizeof *control) { goto out_kfree; } err = -EFAULT; if (copy_from_user(control, argp, argbytes)) { goto out_kfree; } if (argbytes < sizeof *control) memset((char*)control + argbytes, 0, sizeof *control - argbytes); // figure out what is happening in the following 'if' loop if (control->cpu_control.nractrs || control->cpu_control.nrictrs) { cpumask_t old_mask, new_mask; old_mask = tsk->cpus_allowed; cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask); err = -EINVAL; if (cpus_empty(new_mask)) { goto out_kfree; } if (!cpus_equal(new_mask, old_mask)) set_cpus_allowed(tsk, new_mask); } /* PREEMPT note: preemption is disabled over the entire region since we're updating an active perfctr. */ preempt_disable(); // the task whose control register I am changing might actually be // in suspended state. That can happen when the other is executing // under the control of another task as in the case of debugging // or ptrace. However, if the write_control is done for the current // executing process, first suspend them and then do the update // why are we resetting 'perfctr->cpu_state.cstatus' ? if (IS_RUNNING(perfctr)) { if (tsk == current) vperfctr_suspend(perfctr); // not sure why we are zeroing out the following explicitly perfctr->cpu_state.cstatus = 0; vperfctr_clear_iresume_cstatus(perfctr); } // coying the user-specified control values to 'state' perfctr->cpu_state.control = control->cpu_control; /* remote access note: perfctr_cpu_update_control() is ok */ err = perfctr_cpu_update_control(&perfctr->cpu_state, 0); if (err < 0) { goto out; } next_cstatus = perfctr->cpu_state.cstatus; if (!perfctr_cstatus_enabled(next_cstatus)) goto out; /* XXX: validate si_signo? */ perfctr->si_signo = control->si_signo; if (!perfctr_cstatus_has_tsc(next_cstatus)) perfctr->cpu_state.tsc_sum = 0; nrctrs = perfctr_cstatus_nrctrs(next_cstatus); for(i = 0; i < nrctrs; ++i) if (!(control->preserve & (1<<i))) perfctr->cpu_state.pmc[i].sum = 0; // I am not sure why we are removing the inheritance just because // we updated the control information. True, because the children might // be performing something else. So, the control will have to be set // before spawning any children spin_lock(&perfctr->children_lock); perfctr->inheritance_id = new_inheritance_id(); memset(&perfctr->children, 0, sizeof perfctr->children); spin_unlock(&perfctr->children_lock); if (tsk == current) { vperfctr_resume(perfctr); } out: preempt_enable(); out_kfree: kfree(control); return err; }
static int sys_vperfctr_control(struct vperfctr *perfctr, struct perfctr_struct_buf *argp, struct task_struct *tsk) { struct vperfctr_control control; int err; unsigned int next_cstatus; unsigned int nrctrs, i; cpumask_t cpumask; if (!tsk) return -ESRCH; /* attempt to update unlinked perfctr */ err = perfctr_copy_from_user(&control, argp, &vperfctr_control_sdesc); if (err) return err; /* Step 1: Update the control but keep the counters disabled. PREEMPT note: Preemption is disabled since we're updating an active perfctr. */ preempt_disable(); if (IS_RUNNING(perfctr)) { if (tsk == current) vperfctr_suspend(perfctr); perfctr->cpu_state.cstatus = 0; vperfctr_clear_iresume_cstatus(perfctr); } perfctr->cpu_state.control = control.cpu_control; /* remote access note: perfctr_cpu_update_control() is ok */ cpus_setall(cpumask); #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK /* make a stopped vperfctr have an unconstrained cpumask */ perfctr->cpumask = cpumask; #endif err = perfctr_cpu_update_control(&perfctr->cpu_state, &cpumask); if (err < 0) { next_cstatus = 0; } else { next_cstatus = perfctr->cpu_state.cstatus; perfctr->cpu_state.cstatus = 0; perfctr->updater_tgid = current->tgid; #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK perfctr->cpumask = cpumask; #endif } preempt_enable_no_resched(); if (!perfctr_cstatus_enabled(next_cstatus)) return err; #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK /* Step 2: Update the task's CPU affinity mask. PREEMPT note: Preemption must be enabled for set_cpus_allowed(). */ if (control.cpu_control.nractrs || control.cpu_control.nrictrs) { cpumask_t old_mask, new_mask; old_mask = tsk->cpus_allowed; cpus_and(new_mask, old_mask, cpumask); if (cpus_empty(new_mask)) return -EINVAL; if (!cpus_equal(new_mask, old_mask)) set_cpus_allowed(tsk, new_mask); } #endif /* Step 3: Enable the counters with the new control and affinity. PREEMPT note: Preemption is disabled since we're updating an active perfctr. */ preempt_disable(); /* We had to enable preemption above for set_cpus_allowed() so we may have lost a race with a concurrent update via the remote control interface. If so then we must abort our update of this perfctr. */ if (perfctr->updater_tgid != current->tgid) { printk(KERN_WARNING "perfctr: control update by task %d" " was lost due to race with update by task %d\n", current->tgid, perfctr->updater_tgid); err = -EBUSY; } else { /* XXX: validate si_signo? */ perfctr->si_signo = control.si_signo; perfctr->cpu_state.cstatus = next_cstatus; if (!perfctr_cstatus_has_tsc(next_cstatus)) perfctr->cpu_state.tsc_sum = 0; nrctrs = perfctr_cstatus_nrctrs(next_cstatus); for(i = 0; i < nrctrs; ++i) if (!(control.preserve & (1<<i))) perfctr->cpu_state.pmc[i].sum = 0; perfctr->flags = control.flags; if (tsk == current) vperfctr_resume(perfctr); } preempt_enable(); return err; }