bool LDSTQ::executed(DInst *dinst) { if(dinst->isDeadInst()) return false; bool doReplay = false; I(inflightInsts.find(dinst) != inflightInsts.end()); const Instruction *inst = dinst->getInst(); AddrDInstQMap::iterator addrIt = instMap.find(calcWord(dinst)); I(addrIt != instMap.end()); dinst->markResolved(); bool beforeInst = true; DInstQueue::iterator instIt = addrIt->second.end(); instIt--; while(instIt != addrIt->second.begin()) { DInst *qdinst = *instIt; if(qdinst == dinst) beforeInst = false; const Instruction *qinst = qdinst->getInst(); if(beforeInst && qdinst->isResolved()) { if(inst->isLoad() && qinst->isLoad()) { ldldViolations.inc(); doReplay = true; if(!dinst->isDeadInst()) gproc->replay(qdinst); } else if(inst->isStore() && qinst->isStore()) { ststViolations.inc(); } else if(inst->isStore() && qinst->isLoad()) { stldViolations.inc(); doReplay = true; if(!dinst->isDeadInst()) gproc->replay(qdinst); } } if(!beforeInst && inst->isLoad() && qinst->isStore() && qdinst->isResolved()) { #ifdef LDSTQ_FWD dinst->setLoadForwarded(); #endif stldForwarding.inc(); break; // found if forwarded no need to check the rest of the entries } instIt--; } return doReplay; }
void LDSTBuffer::getLoadEntry(DInst *dinst) { I(dinst->getInst()->isLoad()); #ifdef LDSTBUFFER_IGNORE_DEPS return; #endif // LOAD EntryType::iterator sit = stores.find(calcWord(dinst)); if (sit == stores.end()) return; DInst *pdinst = sit->second; I(pdinst->getInst()->isStore()); #if defined(TASKSCALAR) && !defined(TS_CAVA) if (dinst->getVersionRef() != pdinst->getVersionRef()) return; #else if (dinst->getContextId() != pdinst->getContextId()) { // FIXME2: In a context switch the same processor may have two different // PIDs // Different processor or window. Queue the instruction even if executed dinst->setDepsAtRetire(); I(pdinst->getInst()->getAddr() != dinst->getInst()->getAddr()); pdinst->addFakeSrc(dinst); GLOG(DEBUG2, "FORWARD pc=0x%x [addr=0x%x] (%p)-> pc=0x%x [addr=0x%x] (%p)" ,(int)pdinst->getInst()->getAddr() , (int)pdinst->getVaddr(), pdinst ,(int)dinst->getInst()->getAddr() , (int)dinst->getVaddr(), dinst); return; } #endif #ifndef LDSTQ_FWD dinst->setLoadForwarded(); #endif if (!pdinst->isExecuted()) { I(pdinst->getInst()->getAddr() != dinst->getInst()->getAddr()); pdinst->addFakeSrc(dinst); } }
void LDSTQNetProtocol::RetLookUpMsgHandler(Message *msg) { GLOG(0,"LDSTQNet::RetLookUpMsgHandler: RetLookUp message processed @%lld\n",globalClock); LDSTQNetMsg *ldstqMsg = static_cast<LDSTQNetMsg *>(msg); DInst * dinst = ldstqMsg->getDInst(); I(dinst); I(dinst->getInst()->isLoad()); dinst->doAtExecuted(); ldstqMsg->garbageCollect(); }
void LDSTQNetProtocol::LookUpMsgHandler(Message *msg) { GLOG(0,"LDSTQNet::LookUpMsgHandler: LookUp message processed @%lld\n",globalClock); LDSTQNetMsg *ldstqMsg = static_cast<LDSTQNetMsg *>(msg); DInst * dinst = ldstqMsg->getDInst(); I(dinst); FULoad * loadRes = static_cast<FULoad *>(dinst->getLSQResource()); if(dinst->getInst()->isLoad()) loadRes->completeLookUp(dinst, 0); //NM bankNo 0 aslinda yanlis, bu kullanilmamali else loadRes->completeLookUp(dinst, 0); //NM bankNo 0 aslinda yanlis, bu kullanilmamali ldstqMsg->garbageCollect(); }
void LDSTBuffer::getStoreEntry(DInst *dinst) { I( dinst->getInst()->isStore() ); #ifdef LDSTBUFFER_IGNORE_DEPS return; #endif EntryType::iterator sit = stores.find(calcWord(dinst)); if (sit != stores.end()) { DInst *pdinst = sit->second; I(pdinst->getInst()->isStore()); if (!pdinst->hasPending() && dinst->getContextId() == pdinst->getContextId()) pdinst->setDeadStore(); } stores[calcWord(dinst)] = dinst; }
void GProcessor::retire() { #ifdef DEBUG // Check for progress. When a processor gets stuck, it sucks big time if ((((int)globalClock) & 0x1FFFFFL) == 0) { if (ROB.empty()) { // ROB should not be empty for lots of time if (prevDInstID == 1) { MSG("GProcessor::retire CPU[%d] ROB empty for long time @%lld", Id, globalClock); } prevDInstID = 1; }else{ DInst *dinst = ROB.top(); if (prevDInstID == dinst->getID()) { I(0); MSG("ExeEngine::retire CPU[%d] no forward progress from pc=0x%x with %d @%lld" ,Id, (uint)dinst->getInst()->getAddr() ,(uint)dinst->getInst()->currentID(), globalClock ); dinst->dump("HEAD"); LDSTBuffer::dump(""); } prevDInstID = dinst->getID(); } } #endif robUsed.sample(ROB.size()); ushort i; for(i=0;i<RetireWidth && !ROB.empty();i++) { DInst *dinst = ROB.top(); if( !dinst->isExecuted() ) { addStatsNoRetire(i, dinst, NotExecuted); return; } // save it now because retire can destroy DInst int rp = dinst->getInst()->getDstPool(); //BEGIN STAT -------------------------------------------------------------------------------------------------------- #if defined(STAT) ConfObject* statConf = new ConfObject; THREAD_ID threadID = dinst->get_threadID(); //Check to see if we're profling or not if(statConf->return_enableSynth() == 1) { Synthesis::checkContainerSizes(threadID); //FIXME Bug with flushing the instructionQueues tuple<DInst, Time_t> instruction_cycle(*dinst, globalClock); Synthesis::analysis(instruction_cycle); // instructionQueueVector[threadID]->push_back(instruction_cycle); // if((INT_32)instructionQueueVector[threadID]->size() > statConf->return_windowSize()) // { // Synthesis::analysis(instructionQueueVector[threadID]->front()); // instructionQueueVector[threadID]->pop_front(); // } // // //if this is the end of the thread, we need to flush the instruction queue // if(dinst->getInst()->getICode()->func == mint_exit) // { // while(instructionQueueVector[threadID]->empty() == 0) // { // Synthesis::analysis(instructionQueueVector[threadID]->front()); // instructionQueueVector[threadID]->pop_front(); // } // } } delete statConf; #endif //END STAT ---------------------------------------------------------------------------------------------------------- //BEGIN PROFILING -------------------------------------------------------------------------------------------------------- #if defined(PROFILE) ConfObject *statConf = new ConfObject; THREAD_ID threadID = dinst->get_threadID(); if(statConf->return_enableProfiling() == 1) { tuple<DInst, Time_t> instruction_cycle(*dinst, globalClock); //Need to ensure that the vector is large enough to hold the next thread if(threadID >= Profiling::transactionDistance.size()) { if(threadID == Profiling::transactionDistance.size()) { std::cerr << "Profiling::Push back to transactionDistance with " << threadID; UINT_32 temp_1 = 0; Profiling::transactionDistance.push_back(temp_1); std::cerr << " and new size of " << Profiling::transactionDistance.size() << "*" << std::endl; } else { std::cerr << "Profiling::Resizing transactionDistance with " << threadID; Profiling::transactionDistance.resize(threadID + 1); std::cerr << " and new size of " << Profiling::transactionDistance.size() << "*" << std::endl; } } if(threadID >= Profiling::isTransaction.size()) { if(threadID == Profiling::isTransaction.size()) { std::cerr << "Profiling::Push back to isTransaction with " << threadID; BOOL temp_1 = 0; Profiling::isTransaction.push_back(temp_1); std::cerr << " and new size of " << Profiling::isTransaction.size() << "*" << std::endl; } else { std::cerr << "Profiling::Resizing isTransaction with " << threadID; Profiling::isTransaction.resize(threadID + 1); std::cerr << " and new size of " << Profiling::isTransaction.size() << "*" << std::endl; } } if(threadID >= instructionQueueVector.size()) { if(threadID == instructionQueueVector.size()) { std::cerr << "Profiling::Push back to instructionQueueVector with " << threadID; instructionQueueVector.push_back(new std::deque< tuple<DInst, Time_t> >); std::cerr << " and new size of " << instructionQueueVector.size() << " and capacity of " << instructionQueueVector.capacity() << "*" << std::endl; } else { std::cerr << "Profiling::Resizing instructionQueueVector with " << threadID; instructionQueueVector.resize(threadID + 1, new std::deque< tuple<DInst, Time_t> >); std::cerr << " and new size of " << instructionQueueVector.size() << " and capacity of " << instructionQueueVector.capacity() << "*" << std::endl; } } if(threadID >= Profiling::currBBStats.size()) { if(threadID == Profiling::currBBStats.size()) { std::cerr << "Profiling::Push back to currBBStats with " << threadID; Profiling::currBBStats.push_back(new WorkloadCharacteristics()); std::cerr << " and new size of " << Profiling::currBBStats.size() << " and capacity of " << Profiling::currBBStats.capacity() << "*" << std::endl; } else { std::cerr << "Profiling::Resizing currBBStats with " << threadID; Profiling::currBBStats.resize(threadID + 1, new WorkloadCharacteristics()); std::cerr << " and new size of " << Profiling::currBBStats.size() << " and capacity of " << Profiling::currBBStats.capacity() << "*" << std::endl; } } if(threadID >= Profiling::firstTransaction.size()) { if(threadID == Profiling::firstTransaction.size()) { std::cerr << "Profiling::Push back to firstTransaction with " << threadID; UINT_32 temp_1 = 1; Profiling::firstTransaction.push_back(temp_1); std::cerr << " and new size of " << Profiling::firstTransaction.size() << " and capacity of " << Profiling::firstTransaction.capacity() << "*" << std::endl; } else { std::cerr << "Profiling::Resizing firstTransaction with " << threadID; Profiling::firstTransaction.resize(threadID + 1); std::cerr << " and new size of " << Profiling::firstTransaction.size() << " and capacity of " << Profiling::firstTransaction.capacity() << "*" << std::endl; Profiling::firstTransaction[threadID] = 1; } } instructionQueueVector[threadID]->push_back(instruction_cycle); if((INT_32)instructionQueueVector[threadID]->size() > statConf->return_windowSize()) { instruction_cycle = instructionQueueVector[threadID]->front(); instructionQueueVector[threadID]->pop_front(); Profiling::analysis(instruction_cycle); } } delete statConf; #endif //END PROFILING -------------------------------------------------------------------------------------------------------- #if (defined TM) // We must grab the type here since we will not be able to use it past the retirement phase transInstType tempTransType = dinst->transType; sType synchType = dinst->synchType; int transPid = dinst->transPid; int transTid = dinst->transTid; int transBCFlag = dinst->transBCFlag; #endif bool fake = dinst->isFake(); I(dinst->getResource()); RetOutcome retOutcome = dinst->getResource()->retire(dinst); if( retOutcome != Retired) { addStatsNoRetire(i, dinst, retOutcome); return; } // dinst CAN NOT be used beyond this point #if (defined TM) instCountTM++; // Call the proper reporting function based on the type of instruction switch(tempTransType){ case transCommit: if(transBCFlag != 2) tmReport->reportCommit(transPid); break; case transBegin: if(transBCFlag != 2) tmReport->reportBegin(transPid, this->Id); break; case transLoad: tmReport->reportLoad(transPid); break; case transStore: tmReport->reportStore(transPid); break; case transAbort: tmReport->beginTMStats(instCountTM); break; case transInt: case transFp: case transBJ: case transFence: tmReport->registerTransInst(transPid,tempTransType); break; case transOther: break; case transNT: tmReport->incrementCommittedInstCountByCpu ( this->Id ); break; } if (synchType == barrier ) { tmReport->reportBarrier ( this->Id); } #endif if (!fake) regPool[rp]++; ROB.pop(); robEnergy->inc(); // read ROB entry (finished?, update retirement rat...) } if(!ROB.empty() || i != 0) addStatsRetire(i); }
void GProcessor::retire() { #ifdef DEBUG // Check for progress. When a processor gets stuck, it sucks big time if ((((int)globalClock) & 0x1FFFFFL) == 0) { if (ROB.empty()) { // ROB should not be empty for lots of time if (prevDInstID == 1) { MSG("GProcessor::retire CPU[%d] ROB empty for long time @%lld", Id, globalClock); } prevDInstID = 1; } else { DInst *dinst = ROB.top(); if (prevDInstID == dinst->getID()) { I(0); MSG("ExeEngine::retire CPU[%d] no forward progress from pc=0x%x with %d @%lld" ,Id, (uint)dinst->getInst()->getAddr() ,(uint)dinst->getInst()->currentID(), globalClock ); dinst->dump("HEAD"); LDSTBuffer::dump(""); } prevDInstID = dinst->getID(); } } #endif robUsed.sample(ROB.size()); ushort i; for(i=0; i<RetireWidth && !ROB.empty(); i++) { DInst *dinst = ROB.top(); if( !dinst->isExecuted() ) { addStatsNoRetire(i, dinst, NotExecuted); return; } uint32_t refetchAt = 1; for(VAddr refAddr: dinst->getRefetchAddrs()) { CBMemRequest::create(refetchAt, memorySystem->getDataSource(), MemRead, refAddr, 0); refetchAt++; } // save it now because retire can destroy DInst int32_t rp = dinst->getInst()->getDstPool(); bool fake = dinst->isFake(); I(dinst->getResource()); RetOutcome retOutcome = dinst->getResource()->retire(dinst); if( retOutcome != Retired) { addStatsNoRetire(i, dinst, retOutcome); return; } // dinst CAN NOT be used beyond this point if (!fake) regPool[rp]++; ROB.pop(); robEnergy->inc(); // read ROB entry (finished?, update retirement rat...) #if (defined TM) tmInsts.inc(); #endif } if(!ROB.empty() || i != 0) addStatsRetire(i); }
void FetchEngine::realFetch(IBucket *bucket, int fetchMax) { int n2Fetched=fetchMax > 0 ? fetchMax : FetchWidth; maxBB = BB4Cycle; // Reset the max number of BB to fetch in this cycle (decreased in processBranch) // This method only can be called once per cycle or the restriction of the // BB4Cycle would not enforced I(pid>=0); I(maxBB>0); I(bucket->empty()); I(missInstID==0); Pid_t myPid = flow.currentPid(); #ifdef TASKSCALAR TaskContext *tc = TaskContext::getTaskContext(myPid); I(tc); GLVID *lvid = gms->findCreateLVID(tc->getVersion()); if (lvid==0) { // Not enough LVIDs. Stall fetch I(missInstID==0); nDelayInst2.add(n2Fetched); return; } HVersion *lvidVersion = tc->getVersion(); // no duplicate #endif do { nGradInsts++; // Before executePC because it can trigger a context switch DInst *dinst = flow.executePC(); if (dinst == 0) break; // If we are stalled, then this instruction will issue again so we do not // want to count this one #if (defined TM) if(transGCM->checkStall(myPid)) { delete(dinst); dinst==0; break; } else tmReport->registerTransInstAbort(myPid,dinst->transType); #endif #ifdef TASKSCALAR dinst->setLVID(lvid, lvidVersion); #endif //TASKSCALAR const Instruction *inst = dinst->getInst(); #if !(defined MIPS_EMUL) if (inst->isStore()) { #if (defined TLS) dinst->getEpoch()->pendInstr(); dinst->getEpoch()->execInstr(); #endif // (defined TLS) } #endif // For !(defined MIPS_EMUL) instFetched(dinst); bucket->push(dinst); #ifdef XACTION gproc->getXactionManager()->dinstFetch(dinst); #endif n2Fetched--; bbSize++; fbSize++; if(inst->isBranch()) { szBB.sample(bbSize); bbSize=0; if (!processBranch(dinst, n2Fetched)) { break; } } }while(n2Fetched>0 && flow.currentPid()==myPid); #ifdef TASKSCALAR if (!bucket->empty()) lvid->garbageCollect(); #endif ushort tmp = FetchWidth - n2Fetched; totalnInst+=tmp; if( totalnInst >= nInst2Sim ) { MSG("stopSimulation at %lld (%lld)",totalnInst, nInst2Sim); osSim->stopSimulation(); } nFetched.add(tmp); }
void FetchEngine::realfetch(IBucket *bucket, EmulInterface *eint, FlowID fid, int32_t n2Fetch, uint16_t maxbb) { uint16_t tempmaxbb = maxbb; // FIXME: delete me AddrType lastpc = 0; bool lastdiff = false; #ifdef USE_FUSE RegType last_dest = LREG_R0; RegType last_src1 = LREG_R0; RegType last_src2 = LREG_R0; #endif do { DInst *dinst = 0; dinst = eint->executeHead(fid); if (dinst == 0) { //if (fid) //I(0); break; } #ifdef USE_FUSE if (/*!dinst->getStatsFlag() && */dinst->getPC() == 0) { if (dinst->getInst()->isLoad()) { MemRequest::sendReqReadWarmup(gms->getDL1(), dinst->getAddr()); dinst->scrap(eint); dinst = 0; } else if (dinst->getInst()->isStore()) { MemRequest::sendReqWriteWarmup(gms->getDL1(), dinst->getAddr()); dinst->scrap(eint); dinst = 0; } } if (dinst == 0) { // Drain cache (mostly) during warmup. FIXME: add a drain cache method? EventScheduler::advanceClock(); EventScheduler::advanceClock(); EventScheduler::advanceClock(); EventScheduler::advanceClock(); EventScheduler::advanceClock(); EventScheduler::advanceClock(); continue; } #endif if (lastpc == 0) { if (AlignedFetch) { n2Fetch -= ((dinst->getPC())>>FetchWidthBits) & (FetchWidth-1); } n2Fetch--; lastdiff = false; }else{ if ((lastpc+4) != dinst->getPC()) { // n2Fetch -= (dinst->getPC()-lastpc)>>2; n2Fetch--; if (lastdiff) { n2Fetch--; // Missed NOP } lastdiff = true; }else{ n2Fetch--; lastdiff = false; } } lastpc = dinst->getPC(); I(!missInst); dinst->setFetchTime(); #ifdef USE_FUSE if(dinst->getInst()->isControl()) { RegType src1 = dinst->getInst()->getSrc1(); if (dinst->getInst()->doesJump2Label() && dinst->getInst()->getSrc2() == LREG_R0 && (src1 == last_dest || src1 == last_src1 || src1 == last_src2 || src1 == LREG_R0) ) { //MSG("pc %x fusion with previous", dinst->getPC()); dinst->scrap(eint); continue; } } #endif bucket->push(dinst); if(dinst->getInst()->isControl()) { bool stall_fetch = processBranch(dinst, n2Fetch,&tempmaxbb); if (stall_fetch) { //bucket->push(dinst); break; } I(!missInst); }else{ //bucket->push(dinst); } #ifdef USE_FUSE last_dest = dinst->getInst()->getDst1(); last_src1 = dinst->getInst()->getSrc1(); last_src2 = dinst->getInst()->getSrc2(); #endif // Fetch uses getHead, ROB retires getTail } while(n2Fetch>0);
DInst *LSQFull::executing(DInst *dinst) /* dinst got executed (out-of-order) {{{1 */ { I(dinst->getAddr()); AddrType tag = calcWord(dinst); const Instruction *inst = dinst->getInst(); DInst *faulty = 0; #if 0 AddrDInstQMap::const_iterator instIt = instMap.begin(); I(instIt != instMap.end()); I(!dinst->isExecuted()); while(instIt != instMap.end()) { if (instIt->first != tag){ instIt++; continue; } #endif std::pair<AddrDInstQMap::iterator, AddrDInstQMap::iterator> ret; ret = instMap.equal_range(tag); for (AddrDInstQMap::iterator instIt=ret.first; instIt!=ret.second; ++instIt) { I(instIt->first == tag); //inst->dump("Executed"); DInst *qdinst = instIt->second; if(qdinst == dinst) { continue; } const Instruction *qinst = qdinst->getInst(); //bool beforeInst = qdinst->getID() < dinst->getID(); bool oooExecuted = qdinst->getID() > dinst->getID(); if(oooExecuted){ if(qdinst->isExecuted() && qdinst->getPC() != dinst->getPC()) { if(inst->isStore() && qinst->isLoad()) { if (faulty == 0) faulty = qdinst; else if (faulty->getID() < qdinst->getID()) faulty = qdinst; } } }else{ if (!dinst->isLoadForwarded() && inst->isLoad() && qinst->isStore() && qdinst->isExecuted()) { dinst->setLoadForwarded(); stldForwarding.inc(dinst->getStatsFlag()); } } } I(!dinst->isExecuted()); // first clear, then mark executed return faulty; } /* }}} */ void LSQFull::remove(DInst *dinst) /* Remove from the LSQ {{{1 (in-order) */ { I(dinst->getAddr()); //const Instruction *inst = dinst->getInst(); std::pair<AddrDInstQMap::iterator,AddrDInstQMap::iterator> rangeIt; //rangeIt = instMap.equal_range(calcWord(dinst)); AddrDInstQMap::iterator instIt = instMap.begin(); //for(AddrDInstQMap::iterator it = rangeIt.first; it != rangeIt.second ; it++) { while(instIt != instMap.end()){ if(instIt->second == dinst) { instMap.erase(instIt); return; } instIt++; } }