/* * Initialize the microstate level and the * associated accounting information for an LWP. */ void init_mstate( kthread_t *t, int init_state) { struct mstate *ms; klwp_t *lwp; hrtime_t curtime; ASSERT(init_state != LMS_WAIT_CPU); ASSERT((unsigned)init_state < NMSTATES); if ((lwp = ttolwp(t)) != NULL) { ms = &lwp->lwp_mstate; curtime = gethrtime_unscaled(); ms->ms_prev = LMS_SYSTEM; ms->ms_start = curtime; ms->ms_term = 0; ms->ms_state_start = curtime; t->t_mstate = init_state; t->t_waitrq = 0; t->t_hrtime = curtime; if ((t->t_proc_flag & TP_MSACCT) == 0) t->t_proc_flag |= TP_MSACCT; bzero((caddr_t)&ms->ms_acct[0], sizeof (ms->ms_acct)); } }
/* * Put specified thread to specified wait queue without dropping thread's lock. * Returns 1 if thread was successfully placed on project's wait queue, or * 0 if wait queue is blocked. */ int waitq_enqueue(waitq_t *wq, kthread_t *t) { ASSERT(THREAD_LOCK_HELD(t)); ASSERT(t->t_sleepq == NULL); ASSERT(t->t_waitq == NULL); ASSERT(t->t_link == NULL); disp_lock_enter_high(&wq->wq_lock); /* * Can't enqueue anything on a blocked wait queue */ if (wq->wq_blocked) { disp_lock_exit_high(&wq->wq_lock); return (0); } /* * Mark the time when thread is placed on wait queue. The microstate * accounting code uses this timestamp to determine wait times. */ t->t_waitrq = gethrtime_unscaled(); /* * Mark thread as not swappable. If necessary, it will get * swapped out when it returns to the userland. */ t->t_schedflag |= TS_DONT_SWAP; DTRACE_SCHED1(cpucaps__sleep, kthread_t *, t); waitq_link(wq, t); THREAD_WAIT(t, &wq->wq_lock); return (1); }
void syscall_mstate(int fromms, int toms) { kthread_t *t = curthread; zone_t *z = ttozone(t); struct mstate *ms; hrtime_t *mstimep; hrtime_t curtime; klwp_t *lwp; hrtime_t newtime; cpu_t *cpu; uint16_t gen; if ((lwp = ttolwp(t)) == NULL) return; ASSERT(fromms < NMSTATES); ASSERT(toms < NMSTATES); ms = &lwp->lwp_mstate; mstimep = &ms->ms_acct[fromms]; curtime = gethrtime_unscaled(); newtime = curtime - ms->ms_state_start; while (newtime < 0) { curtime = gethrtime_unscaled(); newtime = curtime - ms->ms_state_start; } *mstimep += newtime; if (fromms == LMS_USER) atomic_add_64(&z->zone_utime, newtime); else if (fromms == LMS_SYSTEM) atomic_add_64(&z->zone_stime, newtime); t->t_mstate = toms; ms->ms_state_start = curtime; ms->ms_prev = fromms; kpreempt_disable(); /* don't change CPU while changing CPU's state */ cpu = CPU; ASSERT(cpu == t->t_cpu); if ((toms != LMS_USER) && (cpu->cpu_mstate != CMS_SYSTEM)) { NEW_CPU_MSTATE(CMS_SYSTEM); } else if ((toms == LMS_USER) && (cpu->cpu_mstate != CMS_USER)) { NEW_CPU_MSTATE(CMS_USER); } kpreempt_enable(); }
void init_cpu_mstate( cpu_t *cpu, int init_state) { ASSERT(init_state != CMS_DISABLED); cpu->cpu_mstate = init_state; cpu->cpu_mstate_start = gethrtime_unscaled(); cpu->cpu_waitrq = 0; bzero((caddr_t)&cpu->cpu_acct[0], sizeof (cpu->cpu_acct)); }
/* * Return the amount of onproc and runnable time this thread has experienced. * * Because the fields we read are not protected by locks when updated * by the thread itself, this is an inherently racey interface. In * particular, the ASSERT(THREAD_LOCK_HELD(t)) doesn't guarantee as much * as it might appear to. * * The implication for users of this interface is that onproc and runnable * are *NOT* monotonically increasing; they may temporarily be larger than * they should be. */ void mstate_systhread_times(kthread_t *t, hrtime_t *onproc, hrtime_t *runnable) { struct mstate *const ms = &ttolwp(t)->lwp_mstate; int mstate; hrtime_t now; hrtime_t state_start; hrtime_t waitrq; hrtime_t aggr_onp; hrtime_t aggr_run; ASSERT(THREAD_LOCK_HELD(t)); ASSERT(t->t_procp->p_flag & SSYS); ASSERT(ttolwp(t) != NULL); /* shouldn't be any non-SYSTEM on-CPU time */ ASSERT(ms->ms_acct[LMS_USER] == 0); ASSERT(ms->ms_acct[LMS_TRAP] == 0); mstate = t->t_mstate; waitrq = t->t_waitrq; state_start = ms->ms_state_start; aggr_onp = ms->ms_acct[LMS_SYSTEM]; aggr_run = ms->ms_acct[LMS_WAIT_CPU]; now = gethrtime_unscaled(); /* if waitrq == 0, then there is no time to account to TS_RUN */ if (waitrq == 0) waitrq = now; /* If there is system time to accumulate, do so */ if (mstate == LMS_SYSTEM && state_start < waitrq) aggr_onp += waitrq - state_start; if (waitrq < now) aggr_run += now - waitrq; scalehrtime(&aggr_onp); scalehrtime(&aggr_run); *onproc = aggr_onp; *runnable = aggr_run; }
/* * Return an aggregation of user and system CPU time consumed by * the specified thread in scaled nanoseconds. */ hrtime_t mstate_thread_onproc_time(kthread_t *t) { hrtime_t aggr_time; hrtime_t now; hrtime_t waitrq; hrtime_t state_start; struct mstate *ms; klwp_t *lwp; int mstate; ASSERT(THREAD_LOCK_HELD(t)); if ((lwp = ttolwp(t)) == NULL) return (0); mstate = t->t_mstate; waitrq = t->t_waitrq; ms = &lwp->lwp_mstate; state_start = ms->ms_state_start; aggr_time = ms->ms_acct[LMS_USER] + ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; now = gethrtime_unscaled(); /* * NOTE: gethrtime_unscaled on X86 taken on different CPUs is * inconsistent, so it is possible that now < state_start. */ if (mstate == LMS_USER || mstate == LMS_SYSTEM || mstate == LMS_TRAP) { /* if waitrq is zero, count all of the time. */ if (waitrq == 0) { waitrq = now; } if (waitrq > state_start) { aggr_time += waitrq - state_start; } } scalehrtime(&aggr_time); return (aggr_time); }
/* * Called to indicate a new CPU has started up so * that either t0 or the slave startup thread can * be accounted for. */ void pg_cmt_cpu_startup(cpu_t *cp) { pg_ev_thread_swtch(cp, gethrtime_unscaled(), cp->cpu_idle_thread, cp->cpu_thread); }
/* * Idle the present CPU, deep c-state is supported */ void cpu_acpi_idle(void) { cpu_t *cp = CPU; cpu_acpi_handle_t handle; cma_c_state_t *cs_data; cpu_acpi_cstate_t *cstates; hrtime_t start, end; int cpu_max_cstates; uint32_t cs_indx; uint16_t cs_type; cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; handle = mach_state->ms_acpi_handle; ASSERT(CPU_ACPI_CSTATES(handle) != NULL); cs_data = mach_state->ms_cstate.cma_state.cstate; cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); ASSERT(cstates != NULL); cpu_max_cstates = cpu_acpi_get_max_cstates(handle); if (cpu_max_cstates > CPU_MAX_CSTATES) cpu_max_cstates = CPU_MAX_CSTATES; if (cpu_max_cstates == 1) { /* no ACPI c-state data */ (*non_deep_idle_cpu)(); return; } start = gethrtime_unscaled(); cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); cs_type = cstates[cs_indx].cs_type; switch (cs_type) { default: /* FALLTHROUGH */ case CPU_ACPI_C1: (*non_deep_idle_cpu)(); break; case CPU_ACPI_C2: acpi_cpu_cstate(&cstates[cs_indx]); break; case CPU_ACPI_C3: /* * All supported Intel processors maintain cache coherency * during C3. Currently when entering C3 processors flush * core caches to higher level shared cache. The shared cache * maintains state and supports probes during C3. * Consequently there is no need to handle cache coherency * and Bus Master activity here with the cache flush, BM_RLD * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described * in section 8.1.4 of the ACPI Specification 4.0. */ acpi_cpu_cstate(&cstates[cs_indx]); break; } end = gethrtime_unscaled(); /* * Update statistics */ cpupm_wakeup_cstate_data(cs_data, end); }