void SendIPCMessage(ipc_message_t msg, bool blocking) { MessageQueue* q = msg.ConsumableEarly() ? ipcEarlyMessageQueue : ipcMessageQueue; msg.blocking = blocking; #ifdef IPC_DEBUG lk_lock(printing_lock, 1); std::cerr << "[SEND] IPC message, ID: " << msg.id << std::endl; lk_unlock(printing_lock); #endif lk_lock(lk_ipcMessageQueue, 1); q->push_back(msg); if (blocking) ackMessages->operator[](msg) = false; lk_unlock(lk_ipcMessageQueue); if (blocking) { lk_lock(lk_ipcMessageQueue, 1); while (ackMessages->at(msg) == false) { lk_unlock(lk_ipcMessageQueue); xio_sleep(10); lk_lock(lk_ipcMessageQueue, 1); } lk_unlock(lk_ipcMessageQueue); } }
/* ========================================================================== */ VOID ILDJIT_startSimulation(THREADID tid, ADDRINT ip) { CODECACHE_FlushCache(); lk_lock(&ildjit_lock, 1); /* We are stopping thread creation here, beacuse we can capture the real * thread creation in Pin only on starting the thread (first insn), which * happens after the actual syscalls. * XXX: This way we can capture the creation of some compiler threads, * but this is generally fine, since they won't get executed */ ILDJIT_executorCreation = false; ILDJIT_executionStarted = true; ILDJIT_executorTID = tid; cerr << "Starting execution, TID: " << tid << endl; lk_unlock(&ildjit_lock); if (reached_warm_invocation) { cerr << "Do Early!" << endl; doLateILDJITInstrumentation(); cerr << "Done Early!" << endl; } }
/* ========================================================================== */ BOOL ILDJIT_IsCreatingExecutor() { bool res; lk_lock(&ildjit_lock, 1); res = ILDJIT_executorCreation; lk_unlock(&ildjit_lock); return res; }
/* ========================================================================== */ BOOL ILDJIT_IsExecuting() { bool res; lk_lock(&ildjit_lock, 1); res = ILDJIT_executionStarted; lk_unlock(&ildjit_lock); return res; }
LK_API void lk_deltimer(lk_Timer *timer) { lk_TimerState *ts = timer->ts; lk_lock(ts->lock); lkT_canceltimer(ts, timer); lk_poolfree(&ts->timers, timer); lk_unlock(ts->lock); }
/* ========================================================================== */ VOID ILDJIT_startLoop_after(THREADID tid, ADDRINT ip) { // This is for when there are serial loops within an executing parallel loop. if (simulating_parallel_loop) { thread_state_t* tstate = get_tls(tid); lk_lock(&tstate->lock, tid + 1); tstate->ignore = false; lk_unlock(&tstate->lock); } }
VOID PTHREAD_stopIgnore(THREADID tid) { if (ExecMode != EXECUTION_MODE_SIMULATE) return; thread_state_t* tstate = get_tls(tid); lk_lock(&tstate->lock, tid + 1); tstate->ignore = false; lk_unlock(&tstate->lock); }
/* ========================================================================== */ VOID ILDJIT_ExecutorCreate(THREADID tid) { lk_lock(&ildjit_lock, 1); ILDJIT_executorCreation = true; cerr << "Starting creation, TID: " << tid << endl; lk_unlock(&ildjit_lock); }
VOID ILDJIT_startLoop(THREADID tid, ADDRINT ip, ADDRINT loop) { // This is for when there are serial loops within an executing parallel loop. if (simulating_parallel_loop) { thread_state_t* tstate = get_tls(tid); lk_lock(&tstate->lock, tid + 1); tstate->ignore = true; lk_unlock(&tstate->lock); } string loop_string = (string)(char*) loop; // Increment invocation counter for this loop if (invocation_counts.count(loop_string) == 0) { invocation_counts[loop_string] = 0; } else { invocation_counts[loop_string]++; } if (KnobWarmLLC.Value()) { if ((!reached_warm_invocation) && (warm_loop == loop_string) && (invocation_counts[loop_string] == warm_loop_invocation)) { assert(invocation_counts[loop_string] == warm_loop_invocation); cerr << "Called warmLoop() for the warm invocation!:" << loop_string << endl; reached_warm_invocation = true; cerr << "Detected that we need to warm!:" << loop_string << endl; cerr << "FastWarm runtime:"; printElapsedTime(); cerr << "Do late!" << endl; doLateILDJITInstrumentation(); cerr << "Done late!" << endl; } } if ((!reached_start_invocation) && (start_loop == loop_string) && (invocation_counts[loop_string] == start_loop_invocation)) { assert(invocation_counts[loop_string] == start_loop_invocation); cerr << "Called startLoop() for the start invocation!:" << loop_string << endl; reached_start_invocation = true; if (start_loop_iteration == (UINT32)-1) { cerr << "Detected that we need to start the next parallel loop!:" << loop_string << endl; reached_start_iteration = true; } } if ((!reached_end_invocation) && (end_loop == loop_string) && (invocation_counts[loop_string] == end_loop_invocation)) { assert(invocation_counts[loop_string] == end_loop_invocation); cerr << "Called startLoop() for the end invocation!:" << (CHAR*)loop << endl; reached_end_invocation = true; if (end_loop_iteration == (UINT32)-1) { cerr << "Detected that we need to end the next parallel loop!:" << loop_string << endl; reached_end_iteration = true; } } }
LK_API lk_Timer *lk_newtimer(lk_Service *svr, lk_TimerHandler *cb, void *ud) { lk_TimerState *ts = lkT_getstate(svr); lk_State *S = lk_state((lk_Slot*)svr); lk_Timer *timer; lk_lock(ts->lock); timer = (lk_Timer*)lk_poolalloc(&ts->timers); timer->u.ud = ud; timer->handler = cb; timer->ts = ts; timer->service = lk_self(S); timer->index = LK_TIMER_NOINDEX; lk_unlock(ts->lock); return timer; }
int LocallyOptimalAllocator::AllocateCoresForProcess(int asid, std::vector<double> scaling, double serial_runtime) { lk_lock(&allocator_lock, 1); // On the first call for this process, initialize some parameters. if (core_allocs.find(asid) == core_allocs.end()) { core_allocs[asid] = 1; process_scaling[asid] = speedup_model->ComputeScalingFactor(scaling); process_serial_runtime[asid] = serial_runtime; process_sync.num_checked_in++; } // Wait for all processes in the system to check in before proceeding. If // not all processes have checked in, return -1. if (process_sync.num_checked_in < *num_processes) { // Erase this entry since this thread was not the last to check in. processes_to_unblock.erase(asid); lk_unlock(&allocator_lock); return -1; } assert(process_sync.num_checked_in == *num_processes); // Only the first thread that reaches this point needs to perform the // allocation optimization function. All other threads can wait for this to // complete and then simply use the output. if (!process_sync.allocation_complete) { speedup_model->OptimizeForTarget(core_allocs, process_scaling, process_serial_runtime); process_sync.allocation_complete = true; // Add all asids in core_allocs into the unblocking list. // std::vector<int> unblock_asids; processes_to_unblock[asid] = std::vector<int>(); for (auto it = core_allocs.begin(); it != core_allocs.end(); ++it) processes_to_unblock[asid].push_back(it->first); } int allocated_cores = core_allocs[asid]; process_sync.num_checked_out++; if (process_sync.num_checked_out == *num_processes) { // The last thread executing this code will reset class variables for // the next allocation. ResetState(); } lk_unlock(&allocator_lock); UpdateSHMAllocation(asid, allocated_cores); return allocated_cores; }
static int lkT_refactor (lk_State *S, void *ud, lk_Slot *slot, lk_Signal *sig) { lk_TimerState *ts = (lk_TimerState*)ud; (void)slot; if (sig != NULL) { lk_Timer *timer = (lk_Timer*)sig->data; if (timer->handler) { int ret = timer->handler(S, timer->u.ud, timer, timer->emittime - timer->starttime); if (ret > 0) lk_starttimer(timer, ret); else { lk_lock(ts->lock); lk_poolfree(&ts->timers, timer); lk_unlock(ts->lock); } } } return LK_OK; }
static int lkT_poller (lk_State *S, void *ud, lk_Slot *slot, lk_Signal *sig) { lk_TimerState *ts = (lk_TimerState*)ud; lk_Time nexttime, current; (void)S; (void)sig; for (;;) { int waittime; lk_lock(ts->lock); lkT_updatetimers(ts, current = lk_time()); nexttime = ts->nexttime; assert(nexttime > current); lk_unlock(ts->lock); waittime = nexttime == LK_FOREVER ? -1 : (int)(nexttime - current); if (lk_wait(slot, NULL, waittime) == LK_ERR) break; } ts->nexttime = LK_FOREVER; lk_freelock(ts->lock); lk_freemempool(&ts->timers); lk_free(S, ts->heap); lk_free(S, ts); return LK_OK; }
/* ========================================================================== */ VOID ILDJIT_endParallelLoop(THREADID tid, ADDRINT loop, ADDRINT numIterations) { #ifdef ILDJIT_DEBUG cerr << tid << ": Pausing simulation!" << endl; #endif if (ExecMode == EXECUTION_MODE_SIMULATE) { if (reached_end_invocation) { cerr << tid << ": Shutting down early!" << endl; shutdownSimulation(); } PauseSimulation(); cerr << tid << ": Paused simulation!" << endl; first_invocation = false; list<THREADID>::iterator it; ATOMIC_ITERATE(thread_list, it, thread_list_lock) { thread_state_t* tstate = get_tls(*it); lk_lock(&tstate->lock, tid + 1); tstate->ignore = true; tstate->pop_loop_state(); lk_unlock(&tstate->lock); } CHAR* loop_name = (CHAR*)loop; UINT32 iterCount = loop_state->simmed_iteration_count - 1; cerr << "Ending loop: " << loop_name << " NumIterations:" << iterCount << endl; simulating_parallel_loop = false; *ss_prev = *ss_curr; assert(loop_states.size() > 0); loop_states.pop(); if (loop_states.size() > 0) { loop_state = &(loop_states.top()); } }
LK_API void lk_starttimer(lk_Timer *timer, lk_Time delayms) { lk_TimerState *ts = timer->ts; lk_lock(ts->lock); lkT_starttimer(ts, timer, delayms); lk_unlock(ts->lock); }
/* ========================================================================== */ VOID SyscallExit(THREADID threadIndex, CONTEXT* ictxt, SYSCALL_STANDARD std, VOID* v) { lk_lock(&syscall_lock, threadIndex + 1); ADDRINT retval = PIN_GetSyscallReturn(ictxt, std); ipc_message_t msg; thread_state_t* tstate = get_tls(threadIndex); #ifdef SYSCALL_DEBUG stringstream log; log << tstate->tid << ": "; #endif switch (tstate->last_syscall_number) { case __NR_brk: #ifdef SYSCALL_DEBUG log << "Ret syscall brk(" << dec << tstate->last_syscall_number << ") addr: 0x" << hex << retval << dec << endl; #endif if (tstate->last_syscall_arg1 != 0) msg.UpdateBrk(asid, tstate->last_syscall_arg1, true); /* Seemingly libc code calls sbrk(0) to get the initial value of the sbrk. * We intercept that and send result to zesto, so that we can correclty deal * with virtual memory. */ else msg.UpdateBrk(asid, retval, false); SendIPCMessage(msg); break; case __NR_munmap: #ifdef SYSCALL_DEBUG log << "Ret syscall munmap(" << dec << tstate->last_syscall_number << ") addr: 0x" << hex << tstate->last_syscall_arg1 << " length: " << tstate->last_syscall_arg2 << dec << endl; #endif if (retval != (ADDRINT)-1) { msg.Munmap(asid, tstate->last_syscall_arg1, tstate->last_syscall_arg2, false); SendIPCMessage(msg); } break; case __NR_mmap: // oldmap #ifdef SYSCALL_DEBUG log << "Ret syscall oldmmap(" << dec << tstate->last_syscall_number << ") addr: 0x" << hex << retval << " length: " << tstate->last_syscall_arg1 << dec << endl; #endif if (retval != (ADDRINT)-1) { msg.Mmap(asid, retval, tstate->last_syscall_arg1, false); SendIPCMessage(msg); } break; #ifndef _LP64 case __NR_mmap2: // ia32-only #ifdef SYSCALL_DEBUG log << "Ret syscall mmap2(" << dec << tstate->last_syscall_number << ") addr: 0x" << hex << retval << " length: " << tstate->last_syscall_arg1 << dec << endl; #endif if (retval != (ADDRINT)-1) { msg.Mmap(asid, retval, tstate->last_syscall_arg1, false); SendIPCMessage(msg); } break; #endif // _LP64 case __NR_mremap: #ifdef SYSCALL_DEBUG log << "Ret syscall mremap(" << dec << tstate->last_syscall_number << ") " << hex << " old_addr: 0x" << tstate->last_syscall_arg1 << " old_length: " << tstate->last_syscall_arg2 << " new address: 0x" << retval << " new_length: " << tstate->last_syscall_arg3 << dec << endl; #endif if (retval != (ADDRINT)-1) { msg.Munmap(asid, tstate->last_syscall_arg1, tstate->last_syscall_arg2, false); SendIPCMessage(msg); msg.Mmap(asid, retval, tstate->last_syscall_arg3, false); SendIPCMessage(msg); } break; case __NR_mprotect: if (retval != (ADDRINT)-1) { if ((tstate->last_syscall_arg3 & PROT_READ) == 0) msg.Munmap(asid, tstate->last_syscall_arg1, tstate->last_syscall_arg2, false); else msg.Mmap(asid, tstate->last_syscall_arg1, tstate->last_syscall_arg2, false); SendIPCMessage(msg); } break; /* Present ourself as if we have num_cores cores */ /* case __NR_sysconf: #ifdef SYSCALL_DEBUG log << "Syscall sysconf (" << dec << syscall_num << ") ret" << endl; #endif if (tstate->last_syscall_arg1 == _SC_NPROCESSORS_ONLN) if ((INT32)retval != - 1) { PIN_SetContextReg(ictxt, REG_EAX, num_cores); PIN_ExecuteAt(ictxt); } break;*/ case __NR_gettimeofday: AfterGettimeofday(threadIndex, retval); #ifdef SYSCALL_DEBUG { timeval* tv = (struct timeval*)tstate->last_syscall_arg1; log << "Ret syscall gettimeofday(" << dec << tstate->last_syscall_number << ") old: " << retval << ", tv_sec: " << tv->tv_sec << ", tv_usec: " << tv->tv_usec << endl; } #endif break; case __NR_futex: { #ifdef SYSCALL_DEBUG log << "Ret syscall futex(" << hex << tstate->last_syscall_arg1 << dec << "," << tstate->last_syscall_arg2 << ")" << endl; #endif } break; default: break; } #ifdef SYSCALL_DEBUG cerr << log.str(); #endif tstate->last_syscall_number = 0; lk_unlock(&syscall_lock); }
/* ========================================================================== */ VOID SyscallEntry(THREADID threadIndex, CONTEXT* ictxt, SYSCALL_STANDARD std, VOID* v) { /* Kill speculative feeder before reaching a syscall. * This guarantees speculative processes don't have side effects. */ if (speculation_mode) { FinishSpeculation(get_tls(threadIndex)); return; } lk_lock(&syscall_lock, threadIndex + 1); ADDRINT syscall_num = PIN_GetSyscallNumber(ictxt, std); ADDRINT arg1 = PIN_GetSyscallArgument(ictxt, std, 0); ADDRINT arg2; ADDRINT arg3; mmap_arg_struct mmap_arg; thread_state_t* tstate = get_tls(threadIndex); tstate->last_syscall_number = syscall_num; #ifdef SYSCALL_DEBUG stringstream log; log << tstate->tid << ": "; #endif switch (syscall_num) { case __NR_brk: #ifdef SYSCALL_DEBUG log << "Syscall brk(" << dec << syscall_num << ") addr: 0x" << hex << arg1 << dec << endl; #endif tstate->last_syscall_arg1 = arg1; break; case __NR_munmap: arg2 = PIN_GetSyscallArgument(ictxt, std, 1); #ifdef SYSCALL_DEBUG log << "Syscall munmap(" << dec << syscall_num << ") addr: 0x" << hex << arg1 << " length: " << arg2 << dec << endl; #endif tstate->last_syscall_arg1 = arg1; tstate->last_syscall_arg2 = arg2; break; case __NR_mmap: // oldmmap #ifndef _LP64 memcpy(&mmap_arg, (void*)arg1, sizeof(mmap_arg_struct)); #else mmap_arg.addr = arg1; mmap_arg.len = PIN_GetSyscallArgument(ictxt, std, 1); #endif tstate->last_syscall_arg1 = mmap_arg.len; #ifdef SYSCALL_DEBUG log << "Syscall oldmmap(" << dec << syscall_num << ") addr: 0x" << hex << mmap_arg.addr << " length: " << mmap_arg.len << dec << endl; #endif break; #ifndef _LP64 case __NR_mmap2: // ia32-only arg2 = PIN_GetSyscallArgument(ictxt, std, 1); #ifdef SYSCALL_DEBUG log << "Syscall mmap2(" << dec << syscall_num << ") addr: 0x" << hex << arg1 << " length: " << arg2 << dec << endl; #endif tstate->last_syscall_arg1 = arg2; break; #endif // _LP64 case __NR_mremap: arg2 = PIN_GetSyscallArgument(ictxt, std, 1); arg3 = PIN_GetSyscallArgument(ictxt, std, 2); #ifdef SYSCALL_DEBUG log << "Syscall mremap(" << dec << syscall_num << ") old_addr: 0x" << hex << arg1 << " old_length: " << arg2 << " new_length: " << arg3 << dec << endl; #endif tstate->last_syscall_arg1 = arg1; tstate->last_syscall_arg2 = arg2; tstate->last_syscall_arg3 = arg3; break; case __NR_gettimeofday: #ifdef SYSCALL_DEBUG log << "Syscall gettimeofday(" << dec << syscall_num << ")" << endl; #endif tstate->last_syscall_arg1 = arg1; BeforeGettimeofday(threadIndex, arg1); break; case __NR_mprotect: arg2 = PIN_GetSyscallArgument(ictxt, std, 1); arg3 = PIN_GetSyscallArgument(ictxt, std, 2); #ifdef SYSCALL_DEBUG log << "Syscall mprotect(" << dec << syscall_num << ") addr: " << hex << arg1 << dec << " length: " << arg2 << " prot: " << hex << arg3 << dec << endl; #endif tstate->last_syscall_arg1 = arg1; tstate->last_syscall_arg2 = arg2; tstate->last_syscall_arg3 = arg3; break; case __NR_futex: { { std::lock_guard<XIOSIM_LOCK> l(tstate->lock); if (tstate->ignore) break; } arg2 = PIN_GetSyscallArgument(ictxt, std, 1); tstate->last_syscall_arg1 = arg1; tstate->last_syscall_arg2 = arg2; #ifdef SYSCALL_DEBUG log << "Syscall futex(" << hex << arg1 << dec << ", " << arg2 << ")" << endl; #endif int futex_op = FUTEX_CMD_MASK & arg2; if (futex_op == FUTEX_WAIT || futex_op == FUTEX_WAIT_BITSET) { AddGiveUpHandshake(threadIndex, false, true); } } break; case __NR_epoll_wait: case __NR_epoll_pwait: #ifdef SYSCALL_DEBUG log << "Syscall epoll_wait(*)" << endl; #endif AddGiveUpHandshake(threadIndex, false, true); break; case __NR_poll: case __NR_ppoll: #ifdef SYSCALL_DEBUG log << "Syscall poll(*)" << endl; #endif AddGiveUpHandshake(threadIndex, false, true); break; case __NR_select: case __NR_pselect6: #ifdef SYSCALL_DEBUG log << "Syscall select(*)" << endl; #endif AddGiveUpHandshake(threadIndex, false, true); break; case __NR_nanosleep: #ifdef SYSCALL_DEBUG log << "Syscall nanosleep(*)" << endl; #endif AddGiveUpHandshake(threadIndex, false, true); break; case __NR_pause: #ifdef SYSCALL_DEBUG log << "Syscall pause(*)" << endl; #endif AddGiveUpHandshake(threadIndex, false, true); break; #ifdef SYSCALL_DEBUG case __NR_open: log << "Syscall open (" << dec << syscall_num << ") path: " << (char*)arg1 << endl; break; #endif #ifdef SYSCALL_DEBUG case __NR_exit: log << "Syscall exit (" << dec << syscall_num << ") code: " << arg1 << endl; break; #endif case __NR_sched_setaffinity: { arg2 = PIN_GetSyscallArgument(ictxt, std, 1); arg3 = PIN_GetSyscallArgument(ictxt, std, 2); #ifdef SYSCALL_DEBUG log << "Syscall sched_setaffinity(" << arg1 << ", " << arg2 << ")"; #endif size_t mask_size = (size_t) arg2; cpu_set_t* mask = (cpu_set_t*) arg3; if (CPU_COUNT(mask) > 1) { #ifdef SYSCALL_DEBUG log << endl; #endif cerr << "We don't virtualize sched_setaffinity with a mask > 1." << endl; break; } int coreID = xiosim::INVALID_CORE; for (size_t i = 0; i < mask_size; i++) { if (CPU_ISSET(i, mask)) { coreID = static_cast<int>(i); break; } } #ifdef SYSCALL_DEBUG log << " cpu " << coreID << endl; #endif AddAffinityHandshake(threadIndex, coreID); } break; /* case __NR_sysconf: #ifdef SYSCALL_DEBUG log << "Syscall sysconf (" << dec << syscall_num << ") arg: " << arg1 << endl; #endif tstate->last_syscall_arg1 = arg1; break; */ default: #ifdef SYSCALL_DEBUG log << "Syscall " << dec << syscall_num << endl; #endif break; } #ifdef SYSCALL_DEBUG cerr << log.str(); #endif lk_unlock(&syscall_lock); }