/* Helper, that actually makes sure a vcore is running. Call this is you really * want vcoreid. More often, you'll want to call the regular version. */ static void __ensure_vcore_runs(uint32_t vcoreid) { if (vcore_is_preempted(vcoreid)) { printd("[vcore]: VC %d changing to VC %d\n", vcore_id(), vcoreid); /* Note that at this moment, the vcore could still be mapped (we're * racing with __preempt. If that happens, we'll just fail the * sys_change_vcore(), and next time __ensure runs we'll get it. */ /* We want to recover them from preemption. Since we know they have * notifs disabled, they will need to be directly restarted, so we can * skip the other logic and cut straight to the sys_change_vcore() */ sys_change_vcore(vcoreid, FALSE); } }
/* Similar to the original PDR lock, this tracks the lockholder for better * recovery from preemptions. Under heavy contention, changing to the * lockholder instead of pred makes it more likely to have a vcore outside the * MCS chain handle the preemption. If that never happens, performance will * suffer. * * Simply checking the lockholder causes a lot of unnecessary traffic, so we * first look for signs of preemption in read-mostly locations (by comparison, * the lockholder changes on every lock/unlock). * * We also use the "qnodes are in the lock" style, which is slightly slower than * using the stack in regular MCS/MCSPDR locks, but it speeds PDR up a bit by * not having to read other qnodes' memory to determine their vcoreid. The * slowdown may be due to some weird caching/prefetch settings (like Adjacent * Cacheline Prefetch). * * Note that these locks, like all PDR locks, have opportunities to accidentally * ensure some vcore runs that isn't in the chain. Whenever we read lockholder * or even pred, that particular vcore might subsequently unlock and then get * preempted (or change_to someone else) before we ensure they run. If this * happens and there is another VC in the MCS chain, it will make sure the right * cores run. If there are no other vcores in the chain, it is up to the rest * of the vcore/event handling system to deal with this, which should happen * when one of the other vcores handles the preemption message generated by our * change_to. */ void __mcs_pdr_lock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode) { struct mcs_pdr_qnode *predecessor; uint32_t pred_vcoreid; struct mcs_pdr_qnode *qnode0 = qnode - vcore_id(); seq_ctr_t seq; qnode->next = 0; cmb(); /* swap provides a CPU mb() */ predecessor = atomic_swap_ptr((void**)&lock->lock, qnode); if (predecessor) { qnode->locked = 1; pred_vcoreid = predecessor - qnode0; /* can compute this whenever */ wmb(); /* order the locked write before the next write */ predecessor->next = qnode; seq = ACCESS_ONCE(__procinfo.coremap_seqctr); /* no need for a wrmb(), since this will only get unlocked after they * read our pred->next write */ while (qnode->locked) { /* Check to see if anything is amiss. If someone in the chain is * preempted, then someone will notice. Simply checking our pred * isn't that great of an indicator of preemption. The reason is * that the offline vcore is most likely the lockholder (under heavy * lock contention), and we want someone farther back in the chain * to notice (someone that will stay preempted long enough for a * vcore outside the chain to recover them). Checking the seqctr * will tell us of any preempts since we started, so if a storm * starts while we're spinning, we can join in and try to save the * lockholder before its successor gets it. * * Also, if we're the lockholder, then we need to let our pred run * so they can hand us the lock. */ if (vcore_is_preempted(pred_vcoreid) || seq != __procinfo.coremap_seqctr) { if (lock->lockholder_vcoreid == MCSPDR_NO_LOCKHOLDER || lock->lockholder_vcoreid == vcore_id()) ensure_vcore_runs(pred_vcoreid); else ensure_vcore_runs(lock->lockholder_vcoreid); } cpu_relax(); } } else { lock->lockholder_vcoreid = vcore_id(); } }