void TransDispatcher::Run() { processCleanup(); m_tick.Reset(); }
void Scheduler::watchdogThreadFunc() { info("Started scheduler watchdog thread"); uint64_t lastPhase = 0; int multiplier = 1; uint64_t lastMs = 0; uint64_t fakeLeaveStalls = 0; while (true) { TrueSleep(multiplier*WATCHDOG_INTERVAL_USEC); if (zinfo->terminationConditionMet) { // Synchronize to avoid racing with EndOfPhaseActions code // (zinfo->terminationConditionMet is set on EndOfPhaseActions, // which has schedLock held, we must let it finish) futex_lock(&schedLock); info("Terminating scheduler watchdog thread"); futex_unlock(&schedLock); SimEnd(); } //Fastpath (unlocked, benign read races, only modifies local state) if (lastPhase != curPhase && pendingPidCleanups.size() == 0) { lastPhase = curPhase; fakeLeaveStalls = 0; if (multiplier < WATCHDOG_MAX_MULTIPLER) multiplier++; continue; } //if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) info("Mult %d curPhase %ld", multiplier, curPhase); futex_lock(&schedLock); if (lastPhase == curPhase && !fakeLeaves.empty() && (fakeLeaves.front()->th->futexJoin.action != FJA_WAKE)) { if (++fakeLeaveStalls >= WATCHDOG_STALL_THRESHOLD) { info("Detected possible stall due to fake leaves (%ld current)", fakeLeaves.size()); // Uncomment to print all leaves FakeLeaveInfo* pfl = fakeLeaves.front(); while (pfl) { info(" [%d/%d] %s (%d) @ 0x%lx", getPid(pfl->th->gid), getTid(pfl->th->gid), GetSyscallName(pfl->syscallNumber), pfl->syscallNumber, pfl->pc); pfl = pfl->next; } // Trigger a leave() on the first process, if the process's blacklist regex allows it FakeLeaveInfo* fl = fakeLeaves.front(); ThreadInfo* th = fl->th; uint32_t pid = getPid(th->gid); uint32_t tid = getTid(th->gid); uint32_t cid = th->cid; const g_string& sbRegexStr = zinfo->procArray[pid]->getSyscallBlacklistRegex(); std::regex sbRegex(sbRegexStr.c_str()); if (std::regex_match(GetSyscallName(fl->syscallNumber), sbRegex)) { // If this is the last leave we catch, it is the culprit for sure -> blacklist it // Over time, this will blacklist every blocking syscall // The root reason for being conservative though is that we don't have a sure-fire // way to distinguish IO waits from truly blocking syscalls (TODO) if (fakeLeaves.size() == 1) { info("Blacklisting from future fake leaves: [%d] %s @ 0x%lx | arg0 0x%lx arg1 0x%lx", pid, GetSyscallName(fl->syscallNumber), fl->pc, fl->arg0, fl->arg1); blockingSyscalls[pid].insert(fl->pc); } uint64_t pc = fl->pc; do { finishFakeLeave(th); futex_unlock(&schedLock); leave(pid, tid, cid); futex_lock(&schedLock); // also do real leave for other threads blocked at the same pc ... fl = fakeLeaves.front(); if (fl == nullptr || getPid(th->gid) != pid || fl->pc != pc) break; th = fl->th; tid = getTid(th->gid); cid = th->cid; // ... until a lower bound on queue size, in order to make blacklist work } while (fakeLeaves.size() > 8); } else { info("Skipping, [%d] %s @ 0x%lx | arg0 0x%lx arg1 0x%lx does not match blacklist regex (%s)", pid, GetSyscallName(fl->syscallNumber), fl->pc, fl->arg0, fl->arg1, sbRegexStr.c_str()); } fakeLeaveStalls = 0; } } else { fakeLeaveStalls = 0; } if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) { //info("Watchdog Thread: Sleep dep detected...") int64_t wakeupPhase = sleepQueue.front()->wakeupPhase; int64_t wakeupCycles = (wakeupPhase - curPhase)*zinfo->phaseLength; int64_t wakeupUsec = (wakeupCycles > 0)? wakeupCycles/zinfo->freqMHz : 0; //info("Additional usecs of sleep %ld", wakeupUsec); if (wakeupUsec > 10*1000*1000) warn("Watchdog sleeping for a long time due to long sleep, %ld secs", wakeupUsec/1000/1000); futex_unlock(&schedLock); TrueSleep(WATCHDOG_INTERVAL_USEC + wakeupUsec); futex_lock(&schedLock); if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) { ThreadInfo* sth = sleepQueue.front(); uint64_t curMs = curPhase*zinfo->phaseLength/zinfo->freqMHz/1000; uint64_t endMs = sth->wakeupPhase*zinfo->phaseLength/zinfo->freqMHz/1000; (void)curMs; (void)endMs; //make gcc happy if (curMs > lastMs + 1000) { info("Watchdog Thread: Driving time forward to avoid deadlock on sleep (%ld -> %ld ms)", curMs, endMs); lastMs += 1000; } while (sth->state == SLEEPING) { idlePhases.inc(); callback(); //sth will eventually get woken up if (futex_haswaiters(&schedLock)) { //happens commonly with multiple sleepers and very contended I/O... //info("Sched: Threads waiting on advance, startPhase %ld curPhase %ld", lastPhase, curPhase); break; } if (zinfo->terminationConditionMet) { info("Termination condition met inside watchdog thread loop, exiting"); break; } } idlePeriods.inc(); multiplier = 0; } } if (multiplier < WATCHDOG_MAX_MULTIPLER) { multiplier++; } lastPhase = curPhase; //Lazily clean state of processes that terminated abruptly //NOTE: For now, we rely on the process explicitly telling us that it's going to terminate. //We could make this self-checking by periodically checking for liveness of the processes we're supposedly running. //The bigger problem is that if we get SIGKILL'd, we may not even leave a consistent zsim state behind. while (pendingPidCleanups.size()) { std::pair<uint32_t, uint32_t> p = pendingPidCleanups.back(); uint32_t pid = p.first; //the procIdx pid uint32_t osPid = p.second; std::stringstream ss; ss << "/proc/" << osPid; struct stat dummy; if (stat(ss.str().c_str(), &dummy) == 0) { info("[watchdog] Deferring cleanup of pid %d (%d), not finished yet", pid, osPid); break; } pendingPidCleanups.pop_back(); //must happen while we have the lock futex_unlock(&schedLock); processCleanup(pid); futex_lock(&schedLock); } if (terminateWatchdogThread) { futex_unlock(&schedLock); break; } else { futex_unlock(&schedLock); } } info("Finished scheduler watchdog thread"); }