Beispiel #1
0
void Scheduler::finishFakeLeave(ThreadInfo* th) {
    assert(th->fakeLeave);
    DEBUG_FL("%s (%d)  @ 0x%lx finishFakeLeave()", GetSyscallName(th->fakeLeave->syscallNumber), th->fakeLeave->syscallNumber, th->fakeLeave->pc);
    assert_msg(th->state == RUNNING, "gid 0x%x invalid state %d", th->gid, th->state);
    FakeLeaveInfo* si = th->fakeLeave;
    fakeLeaves.remove(si);
    delete si;
    assert(th->fakeLeave == nullptr);
}
static bool PostPatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDARD std, int syscall, ADDRINT prevIp, ADDRINT timeoutArgVal) {
    assert(inFakeTimeoutMode[tid]);
    int res = (int)PIN_GetSyscallNumber(ctxt, std);

    // Decide if it timed out
    bool timedOut;
    if (syscall == SYS_futex) {
        timedOut = (res == -ETIMEDOUT);
    } else {
        timedOut = (res == 0);
    }

    bool isSleeping = zinfo->sched->isSleeping(procIdx, tid);

    // Decide whether to retry
    bool retrySyscall;
    if (!timedOut) {
        if (isSleeping) zinfo->sched->notifySleepEnd(procIdx, tid);
        retrySyscall = false;
    } else {
        retrySyscall = isSleeping;
    }

    if (retrySyscall && zinfo->procArray[procIdx]->isInFastForward()) {
        warn("[%d] Fast-forwarding started, not retrying timeout syscall (%s)", tid, GetSyscallName(syscall));
        retrySyscall = false;
        assert(isSleeping);
        zinfo->sched->notifySleepEnd(procIdx, tid);
    }

    if (retrySyscall) {
        // ADDRINT curIp = PIN_GetContextReg(ctxt, REG_INST_PTR);
        //info("[%d] post-patch, retrying, IP: 0x%lx -> 0x%lx", tid, curIp, prevIp);
        PIN_SetContextReg(ctxt, REG_INST_PTR, prevIp);
        PIN_SetSyscallNumber(ctxt, std, syscall);
    } else {
        // Restore timeout arg
        PIN_SetSyscallArgument(ctxt, std, getTimeoutArg(syscall), timeoutArgVal);
        inFakeTimeoutMode[tid] = false;

        // Restore arg? I don't think we need this!
        /*if (syscall == SYS_futex) {
            PIN_SetSyscallNumber(ctxt, std, -ETIMEDOUT);
        } else {
            assert(syscall == SYS_epoll_wait || syscall == SYS_epoll_pwait || syscall == SYS_poll);
            PIN_SetSyscallNumber(ctxt, std, 0); //no events returned
        }*/
    }

    //info("[%d] post-patch %s (%d), timedOut %d, sleeping (orig) %d, retrying %d, orig res %d, patched res %d", tid, GetSyscallName(syscall), syscall, timedOut, isSleeping, retrySyscall, res, (int)PIN_GetSyscallNumber(ctxt, std));
    return retrySyscall;
}
Beispiel #3
0
// Accurate join-leave implementation
void Scheduler::syscallLeave(uint32_t pid, uint32_t tid, uint32_t cid, uint64_t pc, int syscallNumber, uint64_t arg0, uint64_t arg1) {
    futex_lock(&schedLock);
    uint32_t gid = getGid(pid, tid);
    ThreadInfo* th = contexts[cid].curThread;
    assert(th->gid == gid);
    assert_msg(th->cid == cid, "%d != %d", th->cid, cid);
    assert(th->state == RUNNING);
    assert_msg(pid < blockingSyscalls.size(), "%d >= %ld?", pid, blockingSyscalls.size());

    bool blacklisted = blockingSyscalls[pid].find(pc) != blockingSyscalls[pid].end();
    if (blacklisted || th->markedForSleep) {
        DEBUG_FL("%s @ 0x%lx calling leave(), reason: %s", GetSyscallName(syscallNumber), pc, blacklisted? "blacklist" : "sleep");
        futex_unlock(&schedLock);
        leave(pid, tid, cid);
    } else {
        DEBUG_FL("%s @ 0x%lx skipping leave()", GetSyscallName(syscallNumber), pc);
        FakeLeaveInfo* si = new FakeLeaveInfo(pc, th, syscallNumber, arg0, arg1);
        fakeLeaves.push_back(si);
        // FIXME(dsm): zsim.cpp's SyscallEnter may be checking whether we are in a syscall and not calling us.
        // If that's the case, this would be stale, which may lead to some false positives/negatives
        futex_unlock(&schedLock);
    }
}
Beispiel #4
0
void Scheduler::watchdogThreadFunc() {
    info("Started scheduler watchdog thread");
    uint64_t lastPhase = 0;
    int multiplier = 1;
    uint64_t lastMs = 0;
    uint64_t fakeLeaveStalls = 0;
    while (true) {
        TrueSleep(multiplier*WATCHDOG_INTERVAL_USEC);

        if (zinfo->terminationConditionMet) {
            // Synchronize to avoid racing with EndOfPhaseActions code
            // (zinfo->terminationConditionMet is set on EndOfPhaseActions,
            // which has schedLock held, we must let it finish)
            futex_lock(&schedLock);
            info("Terminating scheduler watchdog thread");
            futex_unlock(&schedLock);
            SimEnd();
        }

        //Fastpath (unlocked, benign read races, only modifies local state)
        if (lastPhase != curPhase && pendingPidCleanups.size() == 0) {
            lastPhase = curPhase;
            fakeLeaveStalls = 0;
            if (multiplier < WATCHDOG_MAX_MULTIPLER) multiplier++;
            continue;
        }

        //if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) info("Mult %d curPhase %ld", multiplier, curPhase);

        futex_lock(&schedLock);

        if (lastPhase == curPhase && !fakeLeaves.empty() && (fakeLeaves.front()->th->futexJoin.action != FJA_WAKE)) {
            if (++fakeLeaveStalls >= WATCHDOG_STALL_THRESHOLD) {
                info("Detected possible stall due to fake leaves (%ld current)", fakeLeaves.size());
                // Uncomment to print all leaves
                FakeLeaveInfo* pfl = fakeLeaves.front();
                while (pfl) {
                    info(" [%d/%d] %s (%d) @ 0x%lx", getPid(pfl->th->gid), getTid(pfl->th->gid), GetSyscallName(pfl->syscallNumber), pfl->syscallNumber, pfl->pc);
                    pfl = pfl->next;
                }

                // Trigger a leave() on the first process, if the process's blacklist regex allows it
                FakeLeaveInfo* fl = fakeLeaves.front();
                ThreadInfo* th = fl->th;
                uint32_t pid = getPid(th->gid);
                uint32_t tid = getTid(th->gid);
                uint32_t cid = th->cid;

                const g_string& sbRegexStr = zinfo->procArray[pid]->getSyscallBlacklistRegex();
                std::regex sbRegex(sbRegexStr.c_str());
                if (std::regex_match(GetSyscallName(fl->syscallNumber), sbRegex)) {
                    // If this is the last leave we catch, it is the culprit for sure -> blacklist it
                    // Over time, this will blacklist every blocking syscall
                    // The root reason for being conservative though is that we don't have a sure-fire
                    // way to distinguish IO waits from truly blocking syscalls (TODO)
                    if (fakeLeaves.size() == 1) {
                        info("Blacklisting from future fake leaves: [%d] %s @ 0x%lx | arg0 0x%lx arg1 0x%lx", pid, GetSyscallName(fl->syscallNumber), fl->pc, fl->arg0, fl->arg1);
                        blockingSyscalls[pid].insert(fl->pc);
                    }

                    uint64_t pc = fl->pc;
                    do {
                        finishFakeLeave(th);

                        futex_unlock(&schedLock);
                        leave(pid, tid, cid);
                        futex_lock(&schedLock);

                        // also do real leave for other threads blocked at the same pc ...
                        fl = fakeLeaves.front();
                        if (fl == nullptr || getPid(th->gid) != pid || fl->pc != pc)
                            break;
                        th = fl->th;
                        tid = getTid(th->gid);
                        cid = th->cid;
                        // ... until a lower bound on queue size, in order to make blacklist work
                    } while (fakeLeaves.size() > 8);
                } else {
                    info("Skipping, [%d] %s @ 0x%lx | arg0 0x%lx arg1 0x%lx does not match blacklist regex (%s)",
                            pid, GetSyscallName(fl->syscallNumber), fl->pc, fl->arg0, fl->arg1, sbRegexStr.c_str());
                }
                fakeLeaveStalls = 0;
            }
        } else {
            fakeLeaveStalls = 0;
        }

        if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) {
            //info("Watchdog Thread: Sleep dep detected...")
            int64_t wakeupPhase = sleepQueue.front()->wakeupPhase;
            int64_t wakeupCycles = (wakeupPhase - curPhase)*zinfo->phaseLength;
            int64_t wakeupUsec = (wakeupCycles > 0)? wakeupCycles/zinfo->freqMHz : 0;

            //info("Additional usecs of sleep %ld", wakeupUsec);
            if (wakeupUsec > 10*1000*1000) warn("Watchdog sleeping for a long time due to long sleep, %ld secs", wakeupUsec/1000/1000);

            futex_unlock(&schedLock);
            TrueSleep(WATCHDOG_INTERVAL_USEC + wakeupUsec);
            futex_lock(&schedLock);

            if (lastPhase == curPhase && scheduledThreads == outQueue.size() && !sleepQueue.empty()) {
                ThreadInfo* sth = sleepQueue.front();
                uint64_t curMs = curPhase*zinfo->phaseLength/zinfo->freqMHz/1000;
                uint64_t endMs = sth->wakeupPhase*zinfo->phaseLength/zinfo->freqMHz/1000;
                (void)curMs; (void)endMs; //make gcc happy
                if (curMs > lastMs + 1000) {
                    info("Watchdog Thread: Driving time forward to avoid deadlock on sleep (%ld -> %ld ms)", curMs, endMs);
                    lastMs += 1000;
                }
                while (sth->state == SLEEPING) {
                    idlePhases.inc();
                    callback(); //sth will eventually get woken up

                    if (futex_haswaiters(&schedLock)) {
                        //happens commonly with multiple sleepers and very contended I/O...
                        //info("Sched: Threads waiting on advance, startPhase %ld curPhase %ld", lastPhase, curPhase);
                        break;
                    }

                    if (zinfo->terminationConditionMet) {
                        info("Termination condition met inside watchdog thread loop, exiting");
                        break;
                    }
                }
                idlePeriods.inc();
                multiplier = 0;
            }
        }

        if (multiplier < WATCHDOG_MAX_MULTIPLER) {
            multiplier++;
        }

        lastPhase = curPhase;

        //Lazily clean state of processes that terminated abruptly
        //NOTE: For now, we rely on the process explicitly telling us that it's going to terminate.
        //We could make this self-checking by periodically checking for liveness of the processes we're supposedly running.
        //The bigger problem is that if we get SIGKILL'd, we may not even leave a consistent zsim state behind.
        while (pendingPidCleanups.size()) {
            std::pair<uint32_t, uint32_t> p = pendingPidCleanups.back();
            uint32_t pid = p.first; //the procIdx pid
            uint32_t osPid = p.second;

            std::stringstream ss;
            ss << "/proc/" << osPid;
            struct stat dummy;
            if (stat(ss.str().c_str(), &dummy) == 0) {
                info("[watchdog] Deferring cleanup of pid %d (%d), not finished yet", pid, osPid);
                break;
            }

            pendingPidCleanups.pop_back(); //must happen while we have the lock

            futex_unlock(&schedLock);
            processCleanup(pid);
            futex_lock(&schedLock);
        }

        if (terminateWatchdogThread) {
            futex_unlock(&schedLock);
            break;
        } else {
            futex_unlock(&schedLock);
        }
    }
    info("Finished scheduler watchdog thread");
}