uint64_t Scheduler::getFutexWakePhase(bool realtime, FutexInfo fi, CONTEXT* ctxt, SYSCALL_STANDARD std) { int64_t waitNsec = 0; uint64_t wakeupPhase = 0; waitNsec = fi.timeout.tv_sec*1000000000L + fi.timeout.tv_nsec; if (fi.op & FUTEX_CLOCK_REALTIME || realtime) { uint32_t domain = zinfo->procArray[procIdx]->getClockDomain(); uint64_t simNs = cyclesToNs(zinfo->globPhaseCycles); uint64_t offsetNs = simNs + zinfo->clockDomainInfo[domain].realtimeOffsetNs; warn(" REALTIME FUTEX: %ld %ld %ld %ld", waitNsec, simNs, offsetNs, waitNsec-offsetNs); waitNsec = (waitNsec > (int64_t)offsetNs)? (waitNsec - offsetNs) : 0; } if (waitNsec > 0) { struct timespec fakeTimeouts = (struct timespec){0}; //Never timeout. PIN_SetSyscallArgument(ctxt, std, 3, (ADDRINT)&fakeTimeouts); uint64_t waitCycles = waitNsec*zinfo->freqMHz/1000; uint64_t waitPhases = waitCycles/zinfo->phaseLength; wakeupPhase = zinfo->numPhases + waitPhases; } return wakeupPhase; } bool Scheduler::futexSynchronized(uint32_t pid, uint32_t tid, FutexInfo fi) { futex_lock(&schedLock); uint32_t gid = getGid(pid, tid); ThreadInfo* th = gidMap[gid]; futex_unlock(&schedLock); while (true) { int futex_res = syscall(SYS_futex, th->futexWord, FUTEX_WAIT, 1 /*a racing thread waking us up will change value to 0, and we won't block*/, nullptr, nullptr, 0); if (futex_res == 0 || th->futexWord != 1) break; } join(pid, tid); return true; }
static bool PrePatchTimeoutSyscall(uint32_t tid, CONTEXT* ctxt, SYSCALL_STANDARD std, int syscall) { assert(!inFakeTimeoutMode[tid]); // canary: this will probably fail... int64_t waitNsec = 0; // Per-syscall manipulation. This code either succeeds, fakes timeout value and sets waitNsec, or returns false int timeoutArg = getTimeoutArg(syscall); if (syscall == SYS_futex) { // Check preconditions assert(timeoutArg == 3); int* uaddr = (int*) PIN_GetSyscallArgument(ctxt, std, 0); int op = (int) PIN_GetSyscallArgument(ctxt, std, 1); const struct timespec* timeout = (const struct timespec*) PIN_GetSyscallArgument(ctxt, std, 3); //info("FUTEX op %d waitOp %d uaddr %p ts %p", op, isFutexWaitOp(op), uaddr, timeout); if (!(uaddr && isFutexWaitOp(op) && timeout)) return false; // not a timeout FUTEX_WAIT waitNsec = timeout->tv_sec*1000000000L + timeout->tv_nsec; if (op | FUTEX_CLOCK_REALTIME) { // NOTE: FUTEX_CLOCK_REALTIME is not a documented interface AFAIK, but looking at the Linux source code + with some verification, this is the xlat uint32_t domain = zinfo->procArray[procIdx]->getClockDomain(); uint64_t simNs = cyclesToNs(zinfo->globPhaseCycles); uint64_t offsetNs = simNs + zinfo->clockDomainInfo[domain].realtimeOffsetNs; //info(" REALTIME FUTEX: %ld %ld %ld %ld", waitNsec, simNs, offsetNs, waitNsec-offsetNs); waitNsec = (waitNsec > (int64_t)offsetNs)? (waitNsec - offsetNs) : 0; } if (waitNsec <= 0) return false; // while technically waiting, this does not block. I'm guessing this is done for trylocks? It's weird. fakeTimeouts[tid].tv_sec = 0; fakeTimeouts[tid].tv_nsec = 20*1000*1000; // timeout every 20ms of actual host time PIN_SetSyscallArgument(ctxt, std, 3, (ADDRINT)&fakeTimeouts[tid]); } else { assert(syscall == SYS_epoll_wait || syscall == SYS_epoll_pwait || syscall == SYS_poll); int timeout = (int) PIN_GetSyscallArgument(ctxt, std, timeoutArg); if (timeout <= 0) return false; //info("[%d] pre-patch epoll_wait/pwait", tid); PIN_SetSyscallArgument(ctxt, std, timeoutArg, 20); // 20ms timeout waitNsec = ((uint64_t)timeout)*1000*1000; // timeout is in ms } //info("[%d] pre-patch %s (%d) waitNsec = %ld", tid, GetSyscallName(syscall), syscall, waitNsec); uint64_t waitCycles = waitNsec*zinfo->freqMHz/1000; uint64_t waitPhases = waitCycles/zinfo->phaseLength; if (waitPhases < 2) waitPhases = 2; // at least wait 2 phases; this should basically eliminate the chance that we get a SIGSYS before we start executing the syscal instruction uint64_t wakeupPhase = zinfo->numPhases + waitPhases; /*volatile uint32_t* futexWord =*/ zinfo->sched->markForSleep(procIdx, tid, wakeupPhase); // we still want to mark for sleep, bear with me... inFakeTimeoutMode[tid] = true; return true; }