static void adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used) { cpumask_t old_usched_used; int cpu, inc; /* * Set cpus requiring performance to the userland process * scheduler. Leave the rest of cpus unmapped. */ old_usched_used = usched_cpu_used; usched_cpu_used = cpu_used; if (CPUMASK_TESTZERO(usched_cpu_used)) CPUMASK_ORBIT(usched_cpu_used, 0); if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used)) set_uschedcpus(); /* * Adjust per-cpu performance. */ CPUMASK_XORMASK(xcpu_used, cpu_used); while (CPUMASK_TESTNZERO(xcpu_used)) { cpu = BSFCPUMASK(xcpu_used); CPUMASK_NANDBIT(xcpu_used, cpu); if (CPUMASK_TESTBIT(cpu_used, cpu)) { /* Increase cpu performance */ inc = 1; } else { /* Decrease cpu performance */ inc = 0; } adj_cpu_perf(cpu, inc); } /* * Adjust cpu power domain performance. This could affect * a set of cpus. */ CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used); while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) { int dom; dom = BSFCPUMASK(xcpu_pwrdom_used); CPUMASK_NANDBIT(xcpu_pwrdom_used, dom); if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) { /* Increase cpu power domain performance */ inc = 1; } else { /* Decrease cpu power domain performance */ inc = 0; } adj_cpu_pwrdom(dom, inc); } }
/* * Find the nth present CPU and return its pc_cpuid as well as set the * pc_acpi_id from the most reliable source. */ static int acpi_cpu_get_id(uint32_t idx, uint32_t *acpi_id, uint32_t *cpu_id) { struct mdglobaldata *md; uint32_t i; KASSERT(acpi_id != NULL, ("Null acpi_id")); KASSERT(cpu_id != NULL, ("Null cpu_id")); for (i = 0; i < ncpus; i++) { if (CPUMASK_TESTBIT(smp_active_mask, i) == 0) continue; md = (struct mdglobaldata *)globaldata_find(i); KASSERT(md != NULL, ("no pcpu data for %d", i)); if (idx-- == 0) { /* * If gd_acpi_id was not initialized (e.g., box w/o MADT) * override it with the value from the ASL. Otherwise, if the * two don't match, prefer the MADT-derived value. Finally, * return the gd_cpuid to reference this processor. */ if (md->gd_acpi_id == 0xffffffff) { kprintf("cpu%d: acpi id was not set, set it to %u\n", i, *acpi_id); md->gd_acpi_id = *acpi_id; } else if (md->gd_acpi_id != *acpi_id) { kprintf("cpu%d: acpi id mismatch, madt %u, " "processor object %u\n", i, md->gd_acpi_id, *acpi_id); *acpi_id = md->gd_acpi_id; } *cpu_id = md->mi.gd_cpuid; return 0; } } return ESRCH; }
static void add_spare_cpus(const cpumask_t ocpu_used, int ncpu) { cpumask_t saved_pwrdom, xcpu_used; int done = 0, cpu; /* * Find more cpus in the previous cpu set. */ xcpu_used = cpu_used; CPUMASK_XORMASK(xcpu_used, ocpu_used); while (CPUMASK_TESTNZERO(xcpu_used)) { cpu = BSFCPUMASK(xcpu_used); CPUMASK_NANDBIT(xcpu_used, cpu); if (CPUMASK_TESTBIT(ocpu_used, cpu)) { CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]); CPUMASK_ORBIT(cpu_used, cpu); --ncpu; if (ncpu == 0) return; } } /* * Find more cpus in the used cpu power domains. */ saved_pwrdom = cpu_pwrdom_used; again: while (CPUMASK_TESTNZERO(saved_pwrdom)) { cpumask_t unused_cpumask; int dom; dom = BSFCPUMASK(saved_pwrdom); CPUMASK_NANDBIT(saved_pwrdom, dom); unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask; CPUMASK_NANDMASK(unused_cpumask, cpu_used); while (CPUMASK_TESTNZERO(unused_cpumask)) { cpu = BSFCPUMASK(unused_cpumask); CPUMASK_NANDBIT(unused_cpumask, cpu); CPUMASK_ORBIT(cpu_pwrdom_used, dom); CPUMASK_ORBIT(cpu_used, cpu); --ncpu; if (ncpu == 0) return; } } if (!done) { done = 1; /* * Find more cpus in unused cpu power domains */ saved_pwrdom = cpu_pwrdom_mask; CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used); goto again; } if (DebugOpt) printf("%d cpus not found\n", ncpu); }
/* * Parse a _CST package and set up its Cx states. Since the _CST object * can change dynamically, our notify handler may call this function * to clean up and probe the new _CST package. */ static int acpi_cst_cx_probe_cst(struct acpi_cst_softc *sc, int reprobe) { struct acpi_cst_cx *cx_ptr; ACPI_STATUS status; ACPI_BUFFER buf; ACPI_OBJECT *top; ACPI_OBJECT *pkg; uint32_t count; int i; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); #ifdef INVARIANTS if (reprobe) KKASSERT(&curthread->td_msgport == netisr_cpuport(sc->cst_cpuid)); #endif buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; status = AcpiEvaluateObject(sc->cst_handle, "_CST", NULL, &buf); if (ACPI_FAILURE(status)) return (ENXIO); /* _CST is a package with a count and at least one Cx package. */ top = (ACPI_OBJECT *)buf.Pointer; if (!ACPI_PKG_VALID(top, 2) || acpi_PkgInt32(top, 0, &count) != 0) { device_printf(sc->cst_dev, "invalid _CST package\n"); AcpiOsFree(buf.Pointer); return (ENXIO); } if (count != top->Package.Count - 1) { device_printf(sc->cst_dev, "invalid _CST state count (%d != %d)\n", count, top->Package.Count - 1); count = top->Package.Count - 1; } if (count > MAX_CX_STATES) { device_printf(sc->cst_dev, "_CST has too many states (%d)\n", count); count = MAX_CX_STATES; } sc->cst_flags |= ACPI_CST_FLAG_PROBING | ACPI_CST_FLAG_MATCH_HT; cpu_sfence(); /* * Free all previously allocated resources * * NOTE: It is needed for _CST reprobing. */ acpi_cst_free_resource(sc, 0); /* Set up all valid states. */ sc->cst_cx_count = 0; cx_ptr = sc->cst_cx_states; for (i = 0; i < count; i++) { int error; pkg = &top->Package.Elements[i + 1]; if (!ACPI_PKG_VALID(pkg, 4) || acpi_PkgInt32(pkg, 1, &cx_ptr->type) != 0 || acpi_PkgInt32(pkg, 2, &cx_ptr->trans_lat) != 0 || acpi_PkgInt32(pkg, 3, &cx_ptr->power) != 0) { device_printf(sc->cst_dev, "skipping invalid Cx state package\n"); continue; } /* Validate the state to see if we should use it. */ switch (cx_ptr->type) { case ACPI_STATE_C1: sc->cst_non_c3 = i; cx_ptr->enter = acpi_cst_c1_halt_enter; error = acpi_cst_cx_setup(cx_ptr); if (error) panic("C1 CST HALT setup failed: %d", error); if (sc->cst_cx_count != 0) { /* * C1 is not the first C-state; something really stupid * is going on ... */ sc->cst_flags &= ~ACPI_CST_FLAG_MATCH_HT; } cx_ptr++; sc->cst_cx_count++; continue; case ACPI_STATE_C2: sc->cst_non_c3 = i; break; case ACPI_STATE_C3: default: if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_C3) != 0) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "cpu_cst%d: C3[%d] not available.\n", device_get_unit(sc->cst_dev), i)); continue; } break; } /* * Allocate the control register for C2 or C3(+). */ KASSERT(cx_ptr->res == NULL, ("still has res")); acpi_PkgRawGas(pkg, 0, &cx_ptr->gas); /* * We match number of C2/C3 for hyperthreads, only if the * register is "Fixed Hardware", e.g. on most of the Intel * CPUs. We don't have much to do for the rest of the * register types. */ if (cx_ptr->gas.SpaceId != ACPI_ADR_SPACE_FIXED_HARDWARE) sc->cst_flags &= ~ACPI_CST_FLAG_MATCH_HT; cx_ptr->rid = sc->cst_parent->cpu_next_rid; acpi_bus_alloc_gas(sc->cst_dev, &cx_ptr->res_type, &cx_ptr->rid, &cx_ptr->gas, &cx_ptr->res, RF_SHAREABLE); if (cx_ptr->res != NULL) { sc->cst_parent->cpu_next_rid++; ACPI_DEBUG_PRINT((ACPI_DB_INFO, "cpu_cst%d: Got C%d - %d latency\n", device_get_unit(sc->cst_dev), cx_ptr->type, cx_ptr->trans_lat)); cx_ptr->enter = acpi_cst_cx_io_enter; cx_ptr->btag = rman_get_bustag(cx_ptr->res); cx_ptr->bhand = rman_get_bushandle(cx_ptr->res); error = acpi_cst_cx_setup(cx_ptr); if (error) panic("C%d CST I/O setup failed: %d", cx_ptr->type, error); cx_ptr++; sc->cst_cx_count++; } else { error = acpi_cst_cx_setup(cx_ptr); if (!error) { KASSERT(cx_ptr->enter != NULL, ("C%d enter is not set", cx_ptr->type)); cx_ptr++; sc->cst_cx_count++; } } } AcpiOsFree(buf.Pointer); if (sc->cst_flags & ACPI_CST_FLAG_MATCH_HT) { cpumask_t mask; mask = get_cpumask_from_level(sc->cst_cpuid, CORE_LEVEL); if (CPUMASK_TESTNZERO(mask)) { int cpu; for (cpu = 0; cpu < ncpus; ++cpu) { struct acpi_cst_softc *sc1 = acpi_cst_softc[cpu]; if (sc1 == NULL || sc1 == sc || (sc1->cst_flags & ACPI_CST_FLAG_ATTACHED) == 0 || (sc1->cst_flags & ACPI_CST_FLAG_MATCH_HT) == 0) continue; if (!CPUMASK_TESTBIT(mask, sc1->cst_cpuid)) continue; if (sc1->cst_cx_count != sc->cst_cx_count) { struct acpi_cst_softc *src_sc, *dst_sc; if (bootverbose) { device_printf(sc->cst_dev, "inconstent C-state count: %d, %s has %d\n", sc->cst_cx_count, device_get_nameunit(sc1->cst_dev), sc1->cst_cx_count); } if (sc1->cst_cx_count > sc->cst_cx_count) { src_sc = sc1; dst_sc = sc; } else { src_sc = sc; dst_sc = sc1; } acpi_cst_copy(dst_sc, src_sc); } } } } if (reprobe) { /* If there are C3(+) states, always enable bus master wakeup */ if ((acpi_cst_quirks & ACPI_CST_QUIRK_NO_BM) == 0) { for (i = 0; i < sc->cst_cx_count; ++i) { struct acpi_cst_cx *cx = &sc->cst_cx_states[i]; if (cx->type >= ACPI_STATE_C3) { AcpiWriteBitRegister(ACPI_BITREG_BUS_MASTER_RLD, 1); break; } } } /* Fix up the lowest Cx being used */ acpi_cst_set_lowest_oncpu(sc, sc->cst_cx_lowest_req); } /* * Cache the lowest non-C3 state. * NOTE: must after cst_cx_lowest is set. */ acpi_cst_non_c3(sc); cpu_sfence(); sc->cst_flags &= ~ACPI_CST_FLAG_PROBING; return (0); }
int main(int ac, char **av) { int ch; int res; char *sched = NULL; char *cpustr = NULL; char *sched_cpustr = NULL; char *p = NULL; cpumask_t cpumask; int cpuid; pid_t pid = getpid(); /* See usched_set(2) - BUGS */ CPUMASK_ASSZERO(cpumask); while ((ch = getopt(ac, av, "d")) != -1) { switch (ch) { case 'd': DebugOpt = 1; break; default: usage(); /* NOTREACHED */ } } ac -= optind; av += optind; if (ac < 2) { usage(); /* NOTREACHED */ } sched_cpustr = strdup(av[0]); sched = strsep(&sched_cpustr, ":"); if (strcmp(sched, "default") == 0) fprintf(stderr, "Ignoring scheduler == \"default\": not implemented\n"); cpustr = strsep(&sched_cpustr, ""); if (strlen(sched) == 0 && cpustr == NULL) { usage(); /* NOTREACHED */ } /* * XXX needs expanded support for > 64 cpus */ if (cpustr != NULL) { uint64_t v; v = (uint64_t)strtoull(cpustr, NULL, 0); for (cpuid = 0; cpuid < (int)sizeof(v) * 8; ++cpuid) { if (v & (1LU << cpuid)) CPUMASK_ORBIT(cpumask, cpuid); } } if (strlen(sched) != 0) { if (DebugOpt) fprintf(stderr, "DEBUG: USCHED_SET_SCHEDULER: scheduler: %s\n", sched); res = usched_set(pid, USCHED_SET_SCHEDULER, sched, strlen(sched)); if (res != 0) { asprintf(&p, "usched_set(%d, USCHED_SET_SCHEDULER, \"%s\", %d)", pid, sched, (int)strlen(sched)); perror(p); exit(1); } } if (CPUMASK_TESTNZERO(cpumask)) { for (cpuid = 0; cpuid < (int)sizeof(cpumask) * 8; ++cpuid) { if (CPUMASK_TESTBIT(cpumask, cpuid)) break; } if (DebugOpt) { fprintf(stderr, "DEBUG: USCHED_SET_CPU: cpuid: %d\n", cpuid); } res = usched_set(pid, USCHED_SET_CPU, &cpuid, sizeof(int)); if (res != 0) { asprintf(&p, "usched_set(%d, USCHED_SET_CPU, &%d, %d)", pid, cpuid, (int)sizeof(int)); perror(p); exit(1); } CPUMASK_NANDBIT(cpumask, cpuid); while (CPUMASK_TESTNZERO(cpumask)) { ++cpuid; if (CPUMASK_TESTBIT(cpumask, cpuid) == 0) continue; CPUMASK_NANDBIT(cpumask, cpuid); if (DebugOpt) { fprintf(stderr, "DEBUG: USCHED_ADD_CPU: cpuid: %d\n", cpuid); } res = usched_set(pid, USCHED_ADD_CPU, &cpuid, sizeof(int)); if (res != 0) { asprintf(&p, "usched_set(%d, USCHED_ADD_CPU, &%d, %d)", pid, cpuid, (int)sizeof(int)); perror(p); exit(1); } } } execvp(av[1], av + 1); exit(1); }
/* * Called with a critical section held and interrupts enabled. */ int pmap_inval_intr(cpumask_t *cpumaskp, int toolong) { globaldata_t gd = mycpu; pmap_inval_info_t *info; int loopme = 0; int cpu; cpumask_t cpumask; /* * Check all cpus for invalidations we may need to service. */ cpu_ccfence(); cpu = gd->gd_cpuid; cpumask = *cpumaskp; while (CPUMASK_TESTNZERO(cpumask)) { int n = BSFCPUMASK(cpumask); #ifdef LOOPRECOVER KKASSERT(n >= 0 && n < MAXCPU); #endif CPUMASK_NANDBIT(cpumask, n); info = &invinfo[n]; /* * Due to interrupts/races we can catch a new operation * in an older interrupt. A fence is needed once we detect * the (not) done bit. */ if (!CPUMASK_TESTBIT(info->done, cpu)) continue; cpu_lfence(); #ifdef LOOPRECOVER if (toolong) { kprintf("pminvl %d->%d %08jx %08jx mode=%d\n", cpu, n, info->done.ary[0], info->mask.ary[0], info->mode); } #endif /* * info->mask and info->done always contain the originating * cpu until the originator is done. Targets may still be * present in info->done after the originator is done (they * will be finishing up their loops). * * Clear info->mask bits on other cpus to indicate that they * have quiesced (entered the loop). Once the other mask bits * are clear we can execute the operation on the original, * then clear the mask and done bits on the originator. The * targets will then finish up their side and clear their * done bits. * * The command is considered 100% done when all done bits have * been cleared. */ if (n != cpu) { /* * Command state machine for 'other' cpus. */ if (CPUMASK_TESTBIT(info->mask, cpu)) { /* * Other cpu indicate to originator that they * are quiesced. */ ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); loopme = 1; } else if (info->ptep && CPUMASK_TESTBIT(info->mask, n)) { /* * Other cpu must wait for the originator (n) * to complete its command if ptep is not NULL. */ loopme = 1; } else { /* * Other cpu detects that the originator has * completed its command, or there was no * command. * * Now that the page table entry has changed, * we can follow up with our own invalidation. */ vm_offset_t va = info->va; int npgs; if (va == (vm_offset_t)-1 || info->npgs > MAX_INVAL_PAGES) { cpu_invltlb(); } else { for (npgs = info->npgs; npgs; --npgs) { cpu_invlpg((void *)va); va += PAGE_SIZE; } } ATOMIC_CPUMASK_NANDBIT(info->done, cpu); /* info invalid now */ /* loopme left alone */ } } else if (CPUMASK_TESTBIT(info->mask, cpu)) { /* * Originator is waiting for other cpus */ if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) { /* * Originator waits for other cpus to enter * their loop (aka quiesce). * * If this bugs out the IPI may have been lost, * try to reissue by resetting our own * reentrancy bit and clearing the smurf mask * for the cpus that did not respond, then * reissuing the IPI. */ loopme = 1; #ifdef LOOPRECOVER if (loopwdog(info)) { info->failed = 1; loopdebug("C", info); /* XXX recover from possible bug */ mdcpu->gd_xinvaltlb = 0; ATOMIC_CPUMASK_NANDMASK(smp_smurf_mask, info->mask); cpu_disable_intr(); smp_invlpg(&smp_active_mask); /* * Force outer-loop retest of Xinvltlb * requests (see mp_machdep.c). */ mdcpu->gd_xinvaltlb = 2; cpu_enable_intr(); } #endif } else { /* * Originator executes operation and clears * mask to allow other cpus to finish. */ KKASSERT(info->mode != INVDONE); if (info->mode == INVSTORE) { if (info->ptep) info->opte = atomic_swap_long(info->ptep, info->npte); CHECKSIGMASK(info); ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); CHECKSIGMASK(info); } else { if (atomic_cmpset_long(info->ptep, info->opte, info->npte)) { info->success = 1; } else { info->success = 0; } CHECKSIGMASK(info); ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); CHECKSIGMASK(info); } loopme = 1; } } else { /* * Originator does not have to wait for the other * cpus to finish. It clears its done bit. A new * command will not be initiated by the originator * until the other cpus have cleared their done bits * (asynchronously). */ vm_offset_t va = info->va; int npgs; if (va == (vm_offset_t)-1 || info->npgs > MAX_INVAL_PAGES) { cpu_invltlb(); } else { for (npgs = info->npgs; npgs; --npgs) { cpu_invlpg((void *)va); va += PAGE_SIZE; } } /* leave loopme alone */ /* other cpus may still be finishing up */ /* can't race originator since that's us */ info->mode = INVDONE; ATOMIC_CPUMASK_NANDBIT(info->done, cpu); } } return loopme; }