/* * Called on a per-cpu basis */ void initclocks_pcpu(void) { struct globaldata *gd = mycpu; crit_enter(); if (gd->gd_cpuid == 0) { gd->gd_time_seconds = 1; gd->gd_cpuclock_base = sys_cputimer->count(); } else { /* XXX */ gd->gd_time_seconds = globaldata_find(0)->gd_time_seconds; gd->gd_cpuclock_base = globaldata_find(0)->gd_cpuclock_base; } systimer_intr_enable(); #ifdef IFPOLL_ENABLE ifpoll_init_pcpu(gd->gd_cpuid); #endif /* * Use a non-queued periodic systimer to prevent multiple ticks from * building up if the sysclock jumps forward (8254 gets reset). The * sysclock will never jump backwards. Our time sync is based on * the actual sysclock, not the ticks count. */ systimer_init_periodic_nq(&gd->gd_hardclock, hardclock, NULL, hz); systimer_init_periodic_nq(&gd->gd_statclock, statclock, NULL, stathz); /* XXX correct the frequency for scheduler / estcpu tests */ systimer_init_periodic_nq(&gd->gd_schedclock, schedclock, NULL, ESTCPUFREQ); crit_exit(); }
static int acpi_cpu_cst_attach(device_t dev) { ACPI_BUFFER buf; ACPI_OBJECT *obj; struct mdglobaldata *md; struct acpi_cpu_softc *sc; ACPI_STATUS status; int cpu_id; ACPI_FUNCTION_TRACE((char *)(uintptr_t)__func__); sc = device_get_softc(dev); sc->cpu_dev = dev; sc->cpu_parent = device_get_softc(device_get_parent(dev)); sc->cpu_handle = acpi_get_handle(dev); cpu_id = acpi_get_magic(dev); cpu_softc[cpu_id] = sc; md = (struct mdglobaldata *)globaldata_find(device_get_unit(dev)); sc->md = md; cpu_smi_cmd = AcpiGbl_FADT.SmiCommand; cpu_cst_cnt = AcpiGbl_FADT.CstControl; buf.Pointer = NULL; buf.Length = ACPI_ALLOCATE_BUFFER; status = AcpiEvaluateObject(sc->cpu_handle, NULL, NULL, &buf); if (ACPI_FAILURE(status)) { device_printf(dev, "attach failed to get Processor obj - %s\n", AcpiFormatException(status)); return (ENXIO); } obj = (ACPI_OBJECT *)buf.Pointer; sc->cpu_p_blk = obj->Processor.PblkAddress; sc->cpu_p_blk_len = obj->Processor.PblkLength; sc->cpu_acpi_id = obj->Processor.ProcId; AcpiOsFree(obj); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "acpi_cpu%d: P_BLK at %#x/%d\n", device_get_unit(dev), sc->cpu_p_blk, sc->cpu_p_blk_len)); /* * If this is the first cpu we attach, create and initialize the generic * resources that will be used by all acpi cpu devices. */ if (device_get_unit(dev) == 0) { /* Assume we won't be using generic Cx mode by default */ cpu_cx_generic = FALSE; /* Queue post cpu-probing task handler */ AcpiOsExecute(OSL_NOTIFY_HANDLER, acpi_cpu_startup, NULL); } /* Probe for Cx state support. */ acpi_cpu_cx_probe(sc); /* Finally, call identify and probe/attach for child devices. */ bus_generic_probe(dev); bus_generic_attach(dev); return (0); }
/* * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped * counting as of this interrupt. We use timer1 in free-running mode (not * generating any interrupts) as our main counter. Each cpu has timeouts * pending. * * This code is INTR_MPSAFE and may be called without the BGL held. */ static void clkintr(void *dummy, void *frame_arg) { static sysclock_t sysclock_count; /* NOTE! Must be static */ struct globaldata *gd = mycpu; struct globaldata *gscan; int n; /* * SWSTROBE mode is a one-shot, the timer is no longer running */ timer0_running = 0; /* * XXX the dispatcher needs work. right now we call systimer_intr() * directly or via IPI for any cpu with systimers queued, which is * usually *ALL* of them. We need to use the LAPIC timer for this. */ sysclock_count = sys_cputimer->count(); for (n = 0; n < ncpus; ++n) { gscan = globaldata_find(n); if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) continue; if (gscan != gd) { lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, &sysclock_count, 1); } else { systimer_intr(&sysclock_count, 0, frame_arg); } } }
/* * Find the nth present CPU and return its pc_cpuid as well as set the * pc_acpi_id from the most reliable source. */ static int acpi_cpu_get_id(uint32_t idx, uint32_t *acpi_id, uint32_t *cpu_id) { struct mdglobaldata *md; uint32_t i; KASSERT(acpi_id != NULL, ("Null acpi_id")); KASSERT(cpu_id != NULL, ("Null cpu_id")); for (i = 0; i < ncpus; i++) { if ((smp_active_mask & CPUMASK(i)) == 0) continue; md = (struct mdglobaldata *)globaldata_find(i); KASSERT(md != NULL, ("no pcpu data for %d", i)); if (idx-- == 0) { /* * If pc_acpi_id was not initialized (e.g., a non-APIC UP box) * override it with the value from the ASL. Otherwise, if the * two don't match, prefer the MADT-derived value. Finally, * return the pc_cpuid to reference this processor. */ if (md->gd_acpi_id == 0xffffffff) md->gd_acpi_id = *acpi_id; else if (md->gd_acpi_id != *acpi_id) *acpi_id = md->gd_acpi_id; *cpu_id = md->mi.gd_cpuid; return 0; } } return ESRCH; }
void callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *), void *arg, int cpuid) { KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid)); #ifndef SMP callout_reset(c, to_ticks, ftn, arg); #else if (cpuid == mycpuid) { callout_reset(c, to_ticks, ftn, arg); } else { struct globaldata *target_gd; struct callout_remote_arg rmt; int seq; rmt.c = c; rmt.ftn = ftn; rmt.arg = arg; rmt.to_ticks = to_ticks; target_gd = globaldata_find(cpuid); seq = lwkt_send_ipiq(target_gd, callout_reset_ipi, &rmt); lwkt_wait_ipiq(target_gd, seq); } #endif }
/* * Schedule start call */ static void lgue_start_schedule(struct ifnet *ifp) { #ifdef SMP int cpu; cpu = ifp->if_start_cpuid(ifp); if (cpu != mycpuid) lwkt_send_ipiq(globaldata_find(cpu), lgue_start_ipifunc, ifp); else #endif lgue_start_ipifunc(ifp); }
/* * vcnt() - accumulate statistics from the cnt structure for each cpu * * The vmmeter structure is now per-cpu as well as global. Those * statistics which can be kept on a per-cpu basis (to avoid cache * stalls between cpus) can be moved to the per-cpu vmmeter. Remaining * statistics, such as v_free_reserved, are left in the global * structure. * * (sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) * * No requirements. */ static int vcnt(SYSCTL_HANDLER_ARGS) { int i; int count = 0; int offset = arg2; for (i = 0; i < ncpus; ++i) { struct globaldata *gd = globaldata_find(i); count += *(int *)((char *)&gd->gd_cnt + offset); } return(SYSCTL_OUT(req, &count, sizeof(int))); }
/* * No requirements. */ static int vcnt_intr(SYSCTL_HANDLER_ARGS) { int i; int count = 0; for (i = 0; i < ncpus; ++i) { struct globaldata *gd = globaldata_find(i); count += gd->gd_cnt.v_intr + gd->gd_cnt.v_ipi + gd->gd_cnt.v_timer; } return(SYSCTL_OUT(req, &count, sizeof(int))); }
/* * Count number of cached vnodes. This is middling expensive so be * careful not to make this call in the critical path, particularly * not updating the global. Each cpu tracks its own accumulator. * The individual accumulators are not accurate and must be summed * together. */ int countcachedvnodes(int gupdate) { int i; int n = 0; for (i = 0; i < ncpus; ++i) { globaldata_t gd = globaldata_find(i); n += gd->gd_cachedvnodes; } if (gupdate) cachedvnodes = n; return n; }
static void lwkt_idleloop(void *dummy) { globaldata_t gd = mycpu; DBPRINTF(("idlestart cpu %d pri %d (should be < 32) mpcount %d (should be 0)\n", gd->gd_cpuid, curthread->td_pri, curthread->td_mpcount)); gd->gd_pid = getpid(); for (;;) { /* * If only our 'main' thread is left, schedule it. */ if (gd->gd_num_threads == gd->gd_sys_threads) { int i; globaldata_t tgd; for (i = 0; i < ncpus; ++i) { tgd = globaldata_find(i); if (tgd->gd_num_threads != tgd->gd_sys_threads) break; } if (i == ncpus && (main_td.td_flags & TDF_RUNQ) == 0) lwkt_schedule(&main_td); } /* * Wait for an interrupt, aka wait for a signal or an upcall to * occur, then switch away. */ crit_enter(); if (gd->gd_runqmask || (curthread->td_flags & TDF_IDLE_NOHLT)) { curthread->td_flags &= ~TDF_IDLE_NOHLT; } else { printf("cpu %d halting\n", gd->gd_cpuid); cpu_halt(); printf("cpu %d resuming\n", gd->gd_cpuid); } crit_exit(); lwkt_switch(); } }
/* * No requirements. */ static int do_vmmeter(SYSCTL_HANDLER_ARGS) { int boffset = offsetof(struct vmmeter, vmmeter_uint_begin); int eoffset = offsetof(struct vmmeter, vmmeter_uint_end); struct vmmeter vmm; int i; bzero(&vmm, sizeof(vmm)); for (i = 0; i < ncpus; ++i) { int off; struct globaldata *gd = globaldata_find(i); for (off = boffset; off <= eoffset; off += sizeof(u_int)) { *(u_int *)((char *)&vmm + off) += *(u_int *)((char *)&gd->gd_cnt + off); } } vmm.v_intr += vmm.v_ipi + vmm.v_timer; return (sysctl_handle_opaque(oidp, &vmm, sizeof(vmm), req)); }
void dump_reactivate_cpus(void) { #ifdef SMP globaldata_t gd; int cpu, seq; #endif dump_stop_usertds = 1; need_user_resched(); #ifdef SMP for (cpu = 0; cpu < ncpus; cpu++) { gd = globaldata_find(cpu); seq = lwkt_send_ipiq(gd, need_user_resched_remote, NULL); lwkt_wait_ipiq(gd, seq); } restart_cpus(stopped_cpus); #endif }
/* * Long-term (10-second interval) statistics collection */ static uint64_t collect_nlookup_callback(int n) { static uint64_t last_total; uint64_t save; uint64_t total; total = 0; for (n = 0; n < ncpus; ++n) { globaldata_t gd = globaldata_find(n); struct nchstats *sp; if ((sp = gd->gd_nchstats) != NULL) total += sp->ncs_longhits + sp->ncs_longmiss; } save = total; total = total - last_total; last_total = save; return total; }
/* * Find the nth present CPU and return its pc_cpuid as well as set the * pc_acpi_id from the most reliable source. */ static int acpi_cpu_get_id(uint32_t idx, uint32_t *acpi_id, uint32_t *cpu_id) { struct mdglobaldata *md; uint32_t i; KASSERT(acpi_id != NULL, ("Null acpi_id")); KASSERT(cpu_id != NULL, ("Null cpu_id")); for (i = 0; i < ncpus; i++) { if (CPUMASK_TESTBIT(smp_active_mask, i) == 0) continue; md = (struct mdglobaldata *)globaldata_find(i); KASSERT(md != NULL, ("no pcpu data for %d", i)); if (idx-- == 0) { /* * If gd_acpi_id was not initialized (e.g., box w/o MADT) * override it with the value from the ASL. Otherwise, if the * two don't match, prefer the MADT-derived value. Finally, * return the gd_cpuid to reference this processor. */ if (md->gd_acpi_id == 0xffffffff) { kprintf("cpu%d: acpi id was not set, set it to %u\n", i, *acpi_id); md->gd_acpi_id = *acpi_id; } else if (md->gd_acpi_id != *acpi_id) { kprintf("cpu%d: acpi id mismatch, madt %u, " "processor object %u\n", i, md->gd_acpi_id, *acpi_id); *acpi_id = md->gd_acpi_id; } *cpu_id = md->mi.gd_cpuid; return 0; } } return ESRCH; }
/* * Remote IPI for callout_reset_bycpu(). The operation is performed only * on the 1->0 transition of the counter, otherwise there are callout_stop()s * pending after us. * * The IPI counter and PENDING flags must be set atomically with the * 1->0 transition. The ACTIVE flag was set prior to the ipi being * sent and we do not want to race a caller on the original cpu trying * to deactivate() the flag concurrent with our installation of the * callout. */ static void callout_reset_ipi(void *arg) { struct callout *c = arg; globaldata_t gd = mycpu; globaldata_t tgd; int flags; int nflags; for (;;) { flags = c->c_flags; cpu_ccfence(); KKASSERT((flags & CALLOUT_IPI_MASK) > 0); /* * We should already be armed for our cpu, if armed to another * cpu, chain the IPI. If for some reason we are not armed, * we can arm ourselves. */ if (flags & CALLOUT_ARMED) { if (CALLOUT_FLAGS_TO_CPU(flags) != gd->gd_cpuid) { tgd = globaldata_find( CALLOUT_FLAGS_TO_CPU(flags)); lwkt_send_ipiq(tgd, callout_reset_ipi, c); return; } nflags = (flags & ~CALLOUT_EXECUTED); } else { nflags = (flags & ~(CALLOUT_CPU_MASK | CALLOUT_EXECUTED)) | CALLOUT_ARMED | CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid); } /* * Decrement the IPI count, retain and clear the WAITING * status, clear EXECUTED. * * NOTE: It is possible for the callout to already have been * marked pending due to SMP races. */ nflags = nflags - 1; if ((flags & CALLOUT_IPI_MASK) == 1) { nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED); nflags |= CALLOUT_PENDING; } if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Only install the callout on the 1->0 transition * of the IPI count, and only if PENDING was not * already set. The latter situation should never * occur but we check anyway. */ if ((flags & (CALLOUT_PENDING|CALLOUT_IPI_MASK)) == 1) { softclock_pcpu_t sc; sc = &softclock_pcpu_ary[gd->gd_cpuid]; c->c_time = sc->curticks + c->c_load; TAILQ_INSERT_TAIL( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); } break; } /* retry */ cpu_pause(); } /* * Issue wakeup if requested. */ if (flags & CALLOUT_WAITING) wakeup(c); }
/* * Setup a callout to run on the specified cpu. Should generally be used * to run a callout on a specific cpu which does not nominally change. */ void callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *), void *arg, int cpuid) { globaldata_t gd; globaldata_t tgd; #ifdef INVARIANTS if ((c->c_flags & CALLOUT_DID_INIT) == 0) { callout_init(c); kprintf( "callout_reset(%p) from %p: callout was not initialized\n", c, ((int **)&c)[-1]); print_backtrace(-1); } #endif gd = mycpu; crit_enter_gd(gd); tgd = globaldata_find(cpuid); /* * Our cpu must temporarily gain ownership of the callout and cancel * anything still running, which is complex. The easiest way to do * it is to issue a callout_stop(). * * Clearing bits on flags (vs nflags) is a way to guarantee they were * not previously set, by forcing the atomic op to fail. The callout * must not be pending or armed after the stop_sync, if it is we have * to loop up and stop_sync() again. */ for (;;) { int flags; int nflags; callout_stop_sync(c); flags = c->c_flags & ~(CALLOUT_PENDING | CALLOUT_ARMED); nflags = (flags & ~(CALLOUT_CPU_MASK | CALLOUT_EXECUTED)) | CALLOUT_CPU_TO_FLAGS(tgd->gd_cpuid) | CALLOUT_ARMED | CALLOUT_ACTIVE; nflags = nflags + 1; /* bump IPI count */ if (atomic_cmpset_int(&c->c_flags, flags, nflags)) break; cpu_pause(); } /* * Even though we are not the cpu that now owns the callout, our * bumping of the IPI count (and in a situation where the callout is * not queued to the callwheel) will prevent anyone else from * depending on or acting on the contents of the callout structure. */ if (to_ticks <= 0) to_ticks = 1; c->c_arg = arg; c->c_func = ftn; c->c_load = to_ticks; /* IPI will add curticks */ lwkt_send_ipiq(tgd, callout_reset_ipi, c); crit_exit_gd(gd); }
static int settime(struct timeval *tv) { struct timeval delta, tv1, tv2; static struct timeval maxtime, laststep; struct timespec ts; int origcpu; if ((origcpu = mycpu->gd_cpuid) != 0) lwkt_setcpu_self(globaldata_find(0)); crit_enter(); microtime(&tv1); delta = *tv; timevalsub(&delta, &tv1); /* * If the system is secure, we do not allow the time to be * set to a value earlier than 1 second less than the highest * time we have yet seen. The worst a miscreant can do in * this circumstance is "freeze" time. He couldn't go * back to the past. * * We similarly do not allow the clock to be stepped more * than one second, nor more than once per second. This allows * a miscreant to make the clock march double-time, but no worse. */ if (securelevel > 1) { if (delta.tv_sec < 0 || delta.tv_usec < 0) { /* * Update maxtime to latest time we've seen. */ if (tv1.tv_sec > maxtime.tv_sec) maxtime = tv1; tv2 = *tv; timevalsub(&tv2, &maxtime); if (tv2.tv_sec < -1) { tv->tv_sec = maxtime.tv_sec - 1; kprintf("Time adjustment clamped to -1 second\n"); } } else { if (tv1.tv_sec == laststep.tv_sec) { crit_exit(); return (EPERM); } if (delta.tv_sec > 1) { tv->tv_sec = tv1.tv_sec + 1; kprintf("Time adjustment clamped to +1 second\n"); } laststep = *tv; } } ts.tv_sec = tv->tv_sec; ts.tv_nsec = tv->tv_usec * 1000; set_timeofday(&ts); crit_exit(); if (origcpu != 0) lwkt_setcpu_self(globaldata_find(origcpu)); resettodr(); return (0); }
/* * Go through the rigmarole of shutting down.. * this used to be in machdep.c but I'll be dammned if I could see * anything machine dependant in it. */ static void boot(int howto) { /* * Get rid of any user scheduler baggage and then give * us a high priority. */ if (curthread->td_release) curthread->td_release(curthread); lwkt_setpri_self(TDPRI_MAX); /* collect extra flags that shutdown_nice might have set */ howto |= shutdown_howto; #ifdef SMP /* * We really want to shutdown on the BSP. Subsystems such as ACPI * can't power-down the box otherwise. */ if (smp_active_mask > 1) { kprintf("boot() called on cpu#%d\n", mycpu->gd_cpuid); } if (panicstr == NULL && mycpu->gd_cpuid != 0) { kprintf("Switching to cpu #0 for shutdown\n"); lwkt_setcpu_self(globaldata_find(0)); } #endif /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Try to get rid of any remaining FS references. The calling * process, proc0, and init may still hold references. The * VFS cache subsystem may still hold a root reference to root. * * XXX this needs work. We really need to SIGSTOP all remaining * processes in order to avoid blowups due to proc0's filesystem * references going away. For now just make sure that the init * process is stopped. */ if (panicstr == NULL) { shutdown_cleanup_proc(curproc); shutdown_cleanup_proc(&proc0); if (initproc) { if (initproc != curproc) { ksignal(initproc, SIGSTOP); tsleep(boot, 0, "shutdn", hz / 20); } shutdown_cleanup_proc(initproc); } vfs_cache_setroot(NULL, NULL); } /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { int iter, nbusy, pbusy; waittime = 0; kprintf("\nsyncing disks... "); sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */ /* * With soft updates, some buffers that are * written will be remarked as dirty until other * buffers are written. */ for (iter = pbusy = 0; iter < 20; iter++) { nbusy = scan_all_buffers(shutdown_busycount1, NULL); if (nbusy == 0) break; kprintf("%d ", nbusy); if (nbusy < pbusy) iter = 0; pbusy = nbusy; /* * XXX: * Process soft update work queue if buffers don't sync * after 6 iterations by permitting the syncer to run. */ if (iter > 5) bio_ops_sync(NULL); sys_sync(NULL); /* YYY was sync(&proc0, NULL). why proc0 ? */ tsleep(boot, 0, "shutdn", hz * iter / 20 + 1); } kprintf("\n"); /* * Count only busy local buffers to prevent forcing * a fsck if we're just a client of a wedged NFS server */ nbusy = scan_all_buffers(shutdown_busycount2, NULL); if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ kprintf("giving up on %d buffers\n", nbusy); #ifdef DDB if (debugger_on_panic) Debugger("busy buffer problem"); #endif /* DDB */ tsleep(boot, 0, "shutdn", hz * 5 + 1); } else { kprintf("done\n"); /* * Unmount filesystems */ if (panicstr == NULL) vfs_unmountall(); } tsleep(boot, 0, "shutdn", hz / 10 + 1); } print_uptime(); /* * Dump before doing post_sync shutdown ops */ crit_enter(); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold) { dumpsys(); } /* * Ok, now do things that assume all filesystem activity has * been completed. This will also call the device shutdown * methods. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ }
/* * Stop a running timer and ensure that any running callout completes before * returning. If the timer is running on another cpu this function may block * to interlock against the callout. If the callout is currently executing * or blocked in another thread this function may also block to interlock * against the callout. * * The caller must be careful to avoid deadlocks, either by using * callout_init_lk() (which uses the lockmgr lock cancelation feature), * by using tokens and dealing with breaks in the serialization, or using * the lockmgr lock cancelation feature yourself in the callout callback * function. * * callout_stop() returns non-zero if the callout was pending. */ static int _callout_stop(struct callout *c, int issync) { globaldata_t gd = mycpu; globaldata_t tgd; softclock_pcpu_t sc; int flags; int nflags; int rc; int cpuid; #ifdef INVARIANTS if ((c->c_flags & CALLOUT_DID_INIT) == 0) { callout_init(c); kprintf( "callout_stop(%p) from %p: callout was not initialized\n", c, ((int **)&c)[-1]); print_backtrace(-1); } #endif crit_enter_gd(gd); /* * Fast path operations: * * If ARMED and owned by our cpu, or not ARMED, and other simple * conditions are met, we can just clear ACTIVE and EXECUTED * and we are done. */ for (;;) { flags = c->c_flags; cpu_ccfence(); cpuid = CALLOUT_FLAGS_TO_CPU(flags); /* * Can't handle an armed callout in the fast path if it is * not on the current cpu. We must atomically increment the * IPI count for the IPI we intend to send and break out of * the fast path to enter the slow path. */ if (flags & CALLOUT_ARMED) { if (gd->gd_cpuid != cpuid) { nflags = flags + 1; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* break to slow path */ break; } continue; /* retry */ } } else { cpuid = gd->gd_cpuid; KKASSERT((flags & CALLOUT_IPI_MASK) == 0); KKASSERT((flags & CALLOUT_PENDING) == 0); } /* * Process pending IPIs and retry (only if not called from * an IPI). */ if (flags & CALLOUT_IPI_MASK) { lwkt_process_ipiq(); continue; /* retry */ } /* * Transition to the stopped state, recover the EXECUTED * status. If pending we cannot clear ARMED until after * we have removed (c) from the callwheel. * * NOTE: The callout might already not be armed but in this * case it should also not be pending. */ nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_EXECUTED | CALLOUT_WAITING | CALLOUT_PENDING); /* NOTE: IPI_MASK already tested */ if ((flags & CALLOUT_PENDING) == 0) nflags &= ~CALLOUT_ARMED; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Can only remove from callwheel if currently * pending. */ if (flags & CALLOUT_PENDING) { sc = &softclock_pcpu_ary[gd->gd_cpuid]; if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); c->c_func = NULL; /* * NOTE: Can't clear ARMED until we have * physically removed (c) from the * callwheel. * * NOTE: WAITING bit race exists when doing * unconditional bit clears. */ callout_maybe_clear_armed(c); if (c->c_flags & CALLOUT_WAITING) flags |= CALLOUT_WAITING; } /* * ARMED has been cleared at this point and (c) * might now be stale. Only good for wakeup()s. */ if (flags & CALLOUT_WAITING) wakeup(c); goto skip_slow; } /* retry */ } /* * Slow path (and not called via an IPI). * * When ARMED to a different cpu the stop must be processed on that * cpu. Issue the IPI and wait for completion. We have already * incremented the IPI count. */ tgd = globaldata_find(cpuid); lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); for (;;) { int flags; int nflags; flags = c->c_flags; cpu_ccfence(); if ((flags & CALLOUT_IPI_MASK) == 0) /* fast path */ break; nflags = flags | CALLOUT_WAITING; tsleep_interlock(c, 0); if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { tsleep(c, PINTERLOCKED, "cstp1", 0); } } skip_slow: /* * If (issync) we must also wait for any in-progress callbacks to * complete, unless the stop is being executed from the callback * itself. The EXECUTED flag is set prior to the callback * being made so our existing flags status already has it. * * If auto-lock mode is being used, this is where we cancel any * blocked lock that is potentially preventing the target cpu * from completing the callback. */ while (issync) { intptr_t *runp; intptr_t runco; sc = &softclock_pcpu_ary[cpuid]; if (gd->gd_curthread == &sc->thread) /* stop from cb */ break; runp = &sc->running; runco = *runp; cpu_ccfence(); if ((runco & ~(intptr_t)1) != (intptr_t)c) break; if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_BEG); tsleep_interlock(c, 0); if (atomic_cmpset_long(runp, runco, runco | 1)) tsleep(c, PINTERLOCKED, "cstp3", 0); if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_END); } crit_exit_gd(gd); rc = (flags & CALLOUT_EXECUTED) != 0; return rc; }
static void callout_stop_ipi(void *arg, int issync, struct intrframe *frame) { globaldata_t gd = mycpu; struct callout *c = arg; softclock_pcpu_t sc; /* * Only the fast path can run in an IPI. Chain the stop request * if we are racing cpu changes. */ for (;;) { globaldata_t tgd; int flags; int nflags; int cpuid; flags = c->c_flags; cpu_ccfence(); /* * Can't handle an armed callout in the fast path if it is * not on the current cpu. We must atomically increment the * IPI count and break out of the fast path. * * If called from an IPI we chain the IPI instead. */ if (flags & CALLOUT_ARMED) { cpuid = CALLOUT_FLAGS_TO_CPU(flags); if (gd->gd_cpuid != cpuid) { tgd = globaldata_find(cpuid); lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); break; } } /* * NOTE: As an IPI ourselves we cannot wait for other IPIs * to complete, and we are being executed in-order. */ /* * Transition to the stopped state, recover the EXECUTED * status, decrement the IPI count. If pending we cannot * clear ARMED until after we have removed (c) from the * callwheel, and only if there are no more IPIs pending. */ nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_PENDING); nflags = nflags - 1; /* dec ipi count */ if ((flags & (CALLOUT_IPI_MASK | CALLOUT_PENDING)) == 1) nflags &= ~CALLOUT_ARMED; if ((flags & CALLOUT_IPI_MASK) == 1) nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED); if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Can only remove from callwheel if currently * pending. */ if (flags & CALLOUT_PENDING) { sc = &softclock_pcpu_ary[gd->gd_cpuid]; if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); c->c_func = NULL; /* * NOTE: Can't clear ARMED until we have * physically removed (c) from the * callwheel. * * NOTE: WAITING bit race exists when doing * unconditional bit clears. */ callout_maybe_clear_armed(c); if (c->c_flags & CALLOUT_WAITING) flags |= CALLOUT_WAITING; } /* * ARMED has been cleared at this point and (c) * might now be stale. Only good for wakeup()s. */ if (flags & CALLOUT_WAITING) wakeup(c); break; } /* retry */ } }
/* * MPSAFE thread */ static void vm_pagezero(void *arg) { vm_page_t m = NULL; struct lwbuf *lwb = NULL; struct lwbuf lwb_cache; enum zeroidle_state state = STATE_IDLE; char *pg = NULL; int npages = 0; int sleep_time; int i = 0; int cpu = (int)(intptr_t)arg; int zero_state = 0; /* * Adjust thread parameters before entering our loop. The thread * is started with the MP lock held and with normal kernel thread * priority. * * Also put us on the last cpu for now. * * For now leave the MP lock held, the VM routines cannot be called * with it released until tokenization is finished. */ lwkt_setpri_self(TDPRI_IDLE_WORK); lwkt_setcpu_self(globaldata_find(cpu)); sleep_time = DEFAULT_SLEEP_TIME; /* * Loop forever */ for (;;) { int zero_count; switch(state) { case STATE_IDLE: /* * Wait for work. */ tsleep(&zero_state, 0, "pgzero", sleep_time); if (vm_page_zero_check(&zero_count, &zero_state)) npages = idlezero_rate / 10; sleep_time = vm_page_zero_time(zero_count); if (npages) state = STATE_GET_PAGE; /* Fallthrough */ break; case STATE_GET_PAGE: /* * Acquire page to zero */ if (--npages == 0) { state = STATE_IDLE; } else { m = vm_page_free_fromq_fast(); if (m == NULL) { state = STATE_IDLE; } else { state = STATE_ZERO_PAGE; lwb = lwbuf_alloc(m, &lwb_cache); pg = (char *)lwbuf_kva(lwb); i = 0; } } break; case STATE_ZERO_PAGE: /* * Zero-out the page */ while (i < PAGE_SIZE) { if (idlezero_nocache == 1) bzeront(&pg[i], IDLEZERO_RUN); else bzero(&pg[i], IDLEZERO_RUN); i += IDLEZERO_RUN; lwkt_yield(); } state = STATE_RELEASE_PAGE; break; case STATE_RELEASE_PAGE: lwbuf_free(lwb); vm_page_flag_set(m, PG_ZERO); vm_page_free_toq(m); state = STATE_GET_PAGE; ++idlezero_count; /* non-locked, SMP race ok */ break; } lwkt_yield(); } }