static void mon_perf(double srt) { cpumask_t ocpu_used, ocpu_pwrdom_used; int pnstate = 0, nstate; int cpu; /* * Find cpus requiring performance and their cooresponding power * domains. Save the number of cpus requiring performance in * pnstate. */ ocpu_used = cpu_used; ocpu_pwrdom_used = cpu_pwrdom_used; CPUMASK_ASSZERO(cpu_used); CPUMASK_ASSZERO(cpu_pwrdom_used); for (cpu = 0; cpu < NCpus; ++cpu) { struct cpu_state *state = &pcpu_state[cpu]; int s; s = get_nstate(state, srt); if (s) { CPUMASK_ORBIT(cpu_used, cpu); CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]); } pnstate += s; state->cpu_limit = s; } /* * Calculate nstate, the number of cpus we wish to run at max * performance. */ nstate = get_nstate(&global_cpu_state, srt); if (nstate == global_cpu_state.cpu_limit && (pnstate == global_pcpu_limit || nstate > pnstate)) { /* Nothing changed; keep the sets */ cpu_used = ocpu_used; cpu_pwrdom_used = ocpu_pwrdom_used; global_pcpu_limit = pnstate; return; } global_pcpu_limit = pnstate; if (nstate > pnstate) { /* * Add spare cpus to meet global performance requirement. */ add_spare_cpus(ocpu_used, nstate - pnstate); } global_cpu_state.cpu_limit = nstate; /* * Adjust cpu and cpu power domain performance */ adj_perf(ocpu_used, ocpu_pwrdom_used); }
/* * Figure out the cpu power domains. */ static int acpi_get_cpupwrdom(void) { struct cpu_pwrdom *dom; cpumask_t pwrdom_mask; char buf[64]; char members[1024]; char *str; size_t msize; int n, i, ncpu = 0, dom_id; memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom)); memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain)); CPUMASK_ASSZERO(cpu_pwrdom_mask); for (i = 0; i < MAXDOM; ++i) { snprintf(buf, sizeof(buf), "hw.acpi.cpu.px_dom%d.available", i); if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0) continue; dom = calloc(1, sizeof(*dom)); dom->dom_id = i; if (cpu_pwrdomain[i] != NULL) { fprintf(stderr, "cpu power domain %d exists\n", i); exit(1); } cpu_pwrdomain[i] = dom; CPUMASK_ORBIT(cpu_pwrdom_mask, i); } pwrdom_mask = cpu_pwrdom_mask; while (CPUMASK_TESTNZERO(pwrdom_mask)) { dom_id = BSFCPUMASK(pwrdom_mask); CPUMASK_NANDBIT(pwrdom_mask, dom_id); dom = cpu_pwrdomain[dom_id]; CPUMASK_ASSZERO(dom->dom_cpumask); snprintf(buf, sizeof(buf), "hw.acpi.cpu.px_dom%d.members", dom->dom_id); msize = sizeof(members); if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) { cpu_pwrdomain[dom_id] = NULL; free(dom); continue; } members[msize] = 0; for (str = strtok(members, " "); str; str = strtok(NULL, " ")) { n = -1; sscanf(str, "cpu%d", &n); if (n >= 0) { ++ncpu; ++dom->dom_ncpus; CPUMASK_ORBIT(dom->dom_cpumask, n); cpu2pwrdom[n] = dom->dom_id; } } if (dom->dom_ncpus == 0) { cpu_pwrdomain[dom_id] = NULL; free(dom); continue; } if (DebugOpt) { printf("dom%d cpumask: ", dom->dom_id); for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) { printf("%jx ", (uintmax_t)dom->dom_cpumask.ary[i]); } printf("\n"); } } if (ncpu != NCpus) { if (DebugOpt) printf("Found %d cpus, expecting %d\n", ncpu, NCpus); pwrdom_mask = cpu_pwrdom_mask; while (CPUMASK_TESTNZERO(pwrdom_mask)) { dom_id = BSFCPUMASK(pwrdom_mask); CPUMASK_NANDBIT(pwrdom_mask, dom_id); dom = cpu_pwrdomain[dom_id]; if (dom != NULL) free(dom); } return 0; } return 1; }
/* * API function - invalidate the pte at (va) and replace *ptep with npte * atomically only if *ptep equals opte, across the pmap's active cpus. * * Returns 1 on success, 0 on failure (caller typically retries). */ int pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, pt_entry_t opte, pt_entry_t npte) { globaldata_t gd = mycpu; pmap_inval_info_t *info; int success; int cpu = gd->gd_cpuid; cpumask_t tmpmask; unsigned long rflags; /* * Initialize invalidation for pmap and enter critical section. */ if (pmap == NULL) pmap = &kernel_pmap; pmap_inval_init(pmap); /* * Shortcut single-cpu case if possible. */ if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { if (atomic_cmpset_long(ptep, opte, npte)) { if (va == (vm_offset_t)-1) cpu_invltlb(); else cpu_invlpg((void *)va); pmap_inval_done(pmap); return 1; } else { pmap_inval_done(pmap); return 0; } } /* * We need a critical section to prevent getting preempted while * we setup our command. A preemption might execute its own * pmap_inval*() command and create confusion below. */ info = &invinfo[cpu]; info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1); /* * We must wait for other cpus which may still be finishing * up a prior operation. */ while (CPUMASK_TESTNZERO(info->done)) { #ifdef LOOPRECOVER if (loopwdog(info)) { info->failed = 1; loopdebug("B", info); /* XXX recover from possible bug */ CPUMASK_ASSZERO(info->done); } #endif cpu_pause(); } KKASSERT(info->mode == INVDONE); /* * Must set our cpu in the invalidation scan mask before * any possibility of [partial] execution (remember, XINVLTLB * can interrupt a critical section). */ ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); info->va = va; info->npgs = 1; /* unused */ info->ptep = ptep; info->npte = npte; info->opte = opte; #ifdef LOOPRECOVER info->failed = 0; #endif info->mode = INVCMPSET; info->success = 0; tmpmask = pmap->pm_active; /* volatile */ cpu_ccfence(); CPUMASK_ANDMASK(tmpmask, smp_active_mask); CPUMASK_ORBIT(tmpmask, cpu); info->mask = tmpmask; /* * Command may start executing the moment 'done' is initialized, * disable current cpu interrupt to prevent 'done' field from * changing (other cpus can't clear done bits until the originating * cpu clears its mask bit). */ #ifdef LOOPRECOVER info->sigmask = tmpmask; CHECKSIGMASK(info); #endif cpu_sfence(); rflags = read_rflags(); cpu_disable_intr(); ATOMIC_CPUMASK_COPY(info->done, tmpmask); /* * Pass our copy of the done bits (so they don't change out from * under us) to generate the Xinvltlb interrupt on the targets. */ smp_invlpg(&tmpmask); success = info->success; KKASSERT(info->mode == INVDONE); ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu); write_rflags(rflags); pmap_inval_done(pmap); return success; }
int main(int ac, char **av) { int ch; int res; char *sched = NULL; char *cpustr = NULL; char *sched_cpustr = NULL; char *p = NULL; cpumask_t cpumask; int cpuid; pid_t pid = getpid(); /* See usched_set(2) - BUGS */ CPUMASK_ASSZERO(cpumask); while ((ch = getopt(ac, av, "d")) != -1) { switch (ch) { case 'd': DebugOpt = 1; break; default: usage(); /* NOTREACHED */ } } ac -= optind; av += optind; if (ac < 2) { usage(); /* NOTREACHED */ } sched_cpustr = strdup(av[0]); sched = strsep(&sched_cpustr, ":"); if (strcmp(sched, "default") == 0) fprintf(stderr, "Ignoring scheduler == \"default\": not implemented\n"); cpustr = strsep(&sched_cpustr, ""); if (strlen(sched) == 0 && cpustr == NULL) { usage(); /* NOTREACHED */ } /* * XXX needs expanded support for > 64 cpus */ if (cpustr != NULL) { uint64_t v; v = (uint64_t)strtoull(cpustr, NULL, 0); for (cpuid = 0; cpuid < (int)sizeof(v) * 8; ++cpuid) { if (v & (1LU << cpuid)) CPUMASK_ORBIT(cpumask, cpuid); } } if (strlen(sched) != 0) { if (DebugOpt) fprintf(stderr, "DEBUG: USCHED_SET_SCHEDULER: scheduler: %s\n", sched); res = usched_set(pid, USCHED_SET_SCHEDULER, sched, strlen(sched)); if (res != 0) { asprintf(&p, "usched_set(%d, USCHED_SET_SCHEDULER, \"%s\", %d)", pid, sched, (int)strlen(sched)); perror(p); exit(1); } } if (CPUMASK_TESTNZERO(cpumask)) { for (cpuid = 0; cpuid < (int)sizeof(cpumask) * 8; ++cpuid) { if (CPUMASK_TESTBIT(cpumask, cpuid)) break; } if (DebugOpt) { fprintf(stderr, "DEBUG: USCHED_SET_CPU: cpuid: %d\n", cpuid); } res = usched_set(pid, USCHED_SET_CPU, &cpuid, sizeof(int)); if (res != 0) { asprintf(&p, "usched_set(%d, USCHED_SET_CPU, &%d, %d)", pid, cpuid, (int)sizeof(int)); perror(p); exit(1); } CPUMASK_NANDBIT(cpumask, cpuid); while (CPUMASK_TESTNZERO(cpumask)) { ++cpuid; if (CPUMASK_TESTBIT(cpumask, cpuid) == 0) continue; CPUMASK_NANDBIT(cpumask, cpuid); if (DebugOpt) { fprintf(stderr, "DEBUG: USCHED_ADD_CPU: cpuid: %d\n", cpuid); } res = usched_set(pid, USCHED_ADD_CPU, &cpuid, sizeof(int)); if (res != 0) { asprintf(&p, "usched_set(%d, USCHED_ADD_CPU, &%d, %d)", pid, cpuid, (int)sizeof(int)); perror(p); exit(1); } } } execvp(av[1], av + 1); exit(1); }
/* * Invalidate the specified va across all cpus associated with the pmap. * If va == (vm_offset_t)-1, we invltlb() instead of invlpg(). The operation * will be done fully synchronously with storing npte into *ptep and returning * opte. * * If ptep is NULL the operation will execute semi-synchronously. * ptep must be NULL if npgs > 1 */ pt_entry_t pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs, pt_entry_t *ptep, pt_entry_t npte) { globaldata_t gd = mycpu; pmap_inval_info_t *info; pt_entry_t opte = 0; int cpu = gd->gd_cpuid; cpumask_t tmpmask; unsigned long rflags; /* * Initialize invalidation for pmap and enter critical section. */ if (pmap == NULL) pmap = &kernel_pmap; pmap_inval_init(pmap); /* * Shortcut single-cpu case if possible. */ if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { /* * Convert to invltlb if there are too many pages to * invlpg on. */ if (npgs > MAX_INVAL_PAGES) { npgs = 0; va = (vm_offset_t)-1; } /* * Invalidate the specified pages, handle invltlb if requested. */ while (npgs) { --npgs; if (ptep) { opte = atomic_swap_long(ptep, npte); ++ptep; } if (va == (vm_offset_t)-1) break; cpu_invlpg((void *)va); va += PAGE_SIZE; } if (va == (vm_offset_t)-1) cpu_invltlb(); pmap_inval_done(pmap); return opte; } /* * We need a critical section to prevent getting preempted while * we setup our command. A preemption might execute its own * pmap_inval*() command and create confusion below. * * tsc_target is our watchdog timeout that will attempt to recover * from a lost IPI. Set to 1/16 second for now. */ info = &invinfo[cpu]; info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1); /* * We must wait for other cpus which may still be finishing up a * prior operation that we requested. * * We do not have to disable interrupts here. An Xinvltlb can occur * at any time (even within a critical section), but it will not * act on our command until we set our done bits. */ while (CPUMASK_TESTNZERO(info->done)) { #ifdef LOOPRECOVER if (loopwdog(info)) { info->failed = 1; loopdebug("A", info); /* XXX recover from possible bug */ CPUMASK_ASSZERO(info->done); } #endif cpu_pause(); } KKASSERT(info->mode == INVDONE); /* * Must set our cpu in the invalidation scan mask before * any possibility of [partial] execution (remember, XINVLTLB * can interrupt a critical section). */ ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); info->va = va; info->npgs = npgs; info->ptep = ptep; info->npte = npte; info->opte = 0; #ifdef LOOPRECOVER info->failed = 0; #endif info->mode = INVSTORE; tmpmask = pmap->pm_active; /* volatile (bits may be cleared) */ cpu_ccfence(); CPUMASK_ANDMASK(tmpmask, smp_active_mask); /* * If ptep is NULL the operation can be semi-synchronous, which means * we can improve performance by flagging and removing idle cpus * (see the idleinvlclr function in mp_machdep.c). * * Typically kernel page table operation is semi-synchronous. */ if (ptep == NULL) smp_smurf_idleinvlclr(&tmpmask); CPUMASK_ORBIT(tmpmask, cpu); info->mask = tmpmask; /* * Command may start executing the moment 'done' is initialized, * disable current cpu interrupt to prevent 'done' field from * changing (other cpus can't clear done bits until the originating * cpu clears its mask bit, but other cpus CAN start clearing their * mask bits). */ #ifdef LOOPRECOVER info->sigmask = tmpmask; CHECKSIGMASK(info); #endif cpu_sfence(); rflags = read_rflags(); cpu_disable_intr(); ATOMIC_CPUMASK_COPY(info->done, tmpmask); /* execution can begin here due to races */ /* * Pass our copy of the done bits (so they don't change out from * under us) to generate the Xinvltlb interrupt on the targets. */ smp_invlpg(&tmpmask); opte = info->opte; KKASSERT(info->mode == INVDONE); /* * Target cpus will be in their loop exiting concurrently with our * cleanup. They will not lose the bitmask they obtained before so * we can safely clear this bit. */ ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu); write_rflags(rflags); pmap_inval_done(pmap); return opte; }