Ejemplo n.º 1
0
void GPUSMProcessor::retire() {

  // Pass all the ready instructions to the rrob
  while(!ROB.empty()) {
    DInst *dinst = ROB.top();

    if( !dinst->isExecuted() )
      break;

    bool done = dinst->getClusterResource()->preretire(dinst, false);
    if( !done )
      break;

    rROB.push(dinst);
    ROB.pop();

  }

  robUsed.sample(ROB.size());
  rrobUsed.sample(rROB.size());

  for(uint16_t i=0 ; i<RetireWidth && !rROB.empty() ; i++) {
    DInst *dinst = rROB.top();

    if (!dinst->isExecuted())
      break;

    I(dinst->getCluster());

    bool done = dinst->getCluster()->retire(dinst, false);
    if( !done ) {
      //dinst->getInst()->dump("not ret");
      return;
    }

    nCommitted.inc();

#if 0
    FlowID fid = dinst->getFlowId();
    if( active) {
      EmulInterface *eint = TaskHandler::getEmul(fid);
      eint->reexecuteTail( fid );
    }
#endif

    dinst->destroy(eint);
    rROB.pop();
  }

}
Ejemplo n.º 2
0
// Called when dinst finished execution. Look for dependent to wakeUp
void DepWindow::executed(DInst *dinst) {
  //  MSG("execute [0x%x] @%lld",dinst, globalClock);

  I(!dinst->hasDeps());

  //dinst->dump("Clearing2:");
  dinst->clearRATEntry();

  if (!dinst->hasPending())
    return;

  // NEVER HERE FOR in-order cores

  I(dinst->getCluster());
  I(srcCluster == dinst->getCluster());

  // Only until reaches last. The instructions that are from another processor
  // should be added again to the dependence chain so that MemRequest::ack can
  // awake them (other processor instructions)

  const DInst *stopAtDst = 0;

  I(dinst->isIssued());
  while (dinst->hasPending()) {

    if (stopAtDst == dinst->getFirstPending())
      break;
    DInst *dstReady = dinst->getNextPending();
    I(dstReady);

#if 0
    if (!dstReady->isIssued()) {
      I(dinst->getInst()->isStore());

      I(!dstReady->hasDeps());
      continue;
    }
#endif
    I(!dstReady->isExecuted());

    if (!dstReady->hasDeps()) {
      // Check dstRes because dstReady may not be issued
      I(dstReady->getCluster());
      const Cluster *dstCluster = dstReady->getCluster();
      I(dstCluster);

      Time_t when = wakeUpPort->nextSlot(dinst->getStatsFlag());
      if (dstCluster != srcCluster) {
        wrForwardBus.inc(dinst->getStatsFlag());
        when += InterClusterLat;
      }

      dstReady->setWakeUpTime(when);

      preSelect(dstReady);
    }
  }
}
Ejemplo n.º 3
0
void LDSTBuffer::getLoadEntry(DInst *dinst) 
{
  I(dinst->getInst()->isLoad());

#ifdef LDSTBUFFER_IGNORE_DEPS
  return;
#endif
    
  // LOAD
  EntryType::iterator sit = stores.find(calcWord(dinst));
  if (sit == stores.end())
    return;

  DInst *pdinst = sit->second;
  I(pdinst->getInst()->isStore());

#if defined(TASKSCALAR) && !defined(TS_CAVA)
  if (dinst->getVersionRef() != pdinst->getVersionRef())
    return;
#else
  if (dinst->getContextId() != pdinst->getContextId()) {
    // FIXME2: In a context switch the same processor may have two different
    // PIDs

    // Different processor or window. Queue the instruction even if executed
    dinst->setDepsAtRetire();
    I(pdinst->getInst()->getAddr() != dinst->getInst()->getAddr());
    pdinst->addFakeSrc(dinst);

    GLOG(DEBUG2, "FORWARD pc=0x%x [addr=0x%x] (%p)-> pc=0x%x [addr=0x%x] (%p)"
	,(int)pdinst->getInst()->getAddr() , (int)pdinst->getVaddr(), pdinst
	,(int)dinst->getInst()->getAddr()  , (int)dinst->getVaddr(), dinst);
    return;
  }
#endif

#ifndef LDSTQ_FWD
  dinst->setLoadForwarded();
#endif
  if (!pdinst->isExecuted()) {
    I(pdinst->getInst()->getAddr() != dinst->getInst()->getAddr());
    pdinst->addFakeSrc(dinst);
  }
}
Ejemplo n.º 4
0
void DInst::awakeRemoteInstructions() 
{
  while (hasPending()) {
    DInst *dstReady = getNextPending();

    I(inst->isStore());
    I( dstReady->inst->isLoad());
    I(!dstReady->isExecuted());
    I( dstReady->hasDepsAtRetire());

    I( dstReady->isSrc2Ready()); // LDSTBuffer queue in src2, free by now

    dstReady->clearDepsAtRetire();
    if (dstReady->isIssued() && !dstReady->hasDeps()) {
      Resource *dstRes = dstReady->getResource();
      // Coherence would add the latency because the cache line must be brought
      // again (in theory it must be local to dinst processor and marked dirty
      I(dstRes); // since isIssued it should have a resource
      dstRes->simTime(dstReady);
    }
  }
}
Ejemplo n.º 5
0
// Called when dinst finished execution. Look for dependent to wakeUp
void DepWindow::executed(DInst *dinst) {
  //  MSG("execute [0x%x] @%lld",dinst, globalClock);

  I(!dinst->hasDeps());

  dinst->markExecuted();
  dinst->clearRATEntry(); 

  if (!dinst->hasPending())
    return;

  // NEVER HERE FOR in-order cores

  I(dinst->getCluster());
  I(srcCluster == dinst->getCluster());

  I(dinst->isIssued());
  while (dinst->hasPending()) {
    DInst *dstReady = dinst->getNextPending();
    I(dstReady);

    I(!dstReady->isExecuted());

    if (!dstReady->hasDeps()) {
      // Check dstRes because dstReady may not be issued
      I(dstReady->getCluster());
      const Cluster *dstCluster = dstReady->getCluster();
      I(dstCluster);

      if (dstCluster != srcCluster) {
        wrForwardBus.inc(dinst->getStatsFlag());
        dinst->markInterCluster();
      }

      preSelect(dstReady);
    }
  }
}
Ejemplo n.º 6
0
void GProcessor::retire()
{
#ifdef DEBUG
  // Check for progress. When a processor gets stuck, it sucks big time
  if ((((int)globalClock) & 0x1FFFFFL) == 0) {
    if (ROB.empty()) {
      // ROB should not be empty for lots of time
      if (prevDInstID == 1) {
        MSG("GProcessor::retire CPU[%d] ROB empty for long time @%lld", Id, globalClock);
      }
      prevDInstID = 1;
    }else{
      DInst *dinst = ROB.top();
      if (prevDInstID == dinst->getID()) {
        I(0);
        MSG("ExeEngine::retire CPU[%d] no forward progress from pc=0x%x with %d @%lld"
            ,Id, (uint)dinst->getInst()->getAddr() 
            ,(uint)dinst->getInst()->currentID(), globalClock );
        dinst->dump("HEAD");
        LDSTBuffer::dump("");
      }
      prevDInstID = dinst->getID();
    }
  }
#endif

  robUsed.sample(ROB.size());

  ushort i;
  
  for(i=0;i<RetireWidth && !ROB.empty();i++) {
    DInst *dinst = ROB.top();

    if( !dinst->isExecuted() ) {
      addStatsNoRetire(i, dinst, NotExecuted);
      return;
    }

    // save it now because retire can destroy DInst
    int rp = dinst->getInst()->getDstPool();

//BEGIN STAT --------------------------------------------------------------------------------------------------------
#if defined(STAT)

   ConfObject* statConf = new ConfObject;
   THREAD_ID threadID = dinst->get_threadID();

   //Check to see if we're profling or not
   if(statConf->return_enableSynth() == 1)
   {
      Synthesis::checkContainerSizes(threadID);

//FIXME Bug with flushing the instructionQueues
      tuple<DInst, Time_t> instruction_cycle(*dinst, globalClock);
      Synthesis::analysis(instruction_cycle);
//       instructionQueueVector[threadID]->push_back(instruction_cycle);
//       if((INT_32)instructionQueueVector[threadID]->size() > statConf->return_windowSize())
//       {
//          Synthesis::analysis(instructionQueueVector[threadID]->front());
//          instructionQueueVector[threadID]->pop_front();
//       }
// 
//       //if this is the end of the thread, we need to flush the instruction queue
//       if(dinst->getInst()->getICode()->func == mint_exit)
//       {
//          while(instructionQueueVector[threadID]->empty() == 0)
//          {
//             Synthesis::analysis(instructionQueueVector[threadID]->front());
//             instructionQueueVector[threadID]->pop_front();
//          }
//       }
   }

   delete statConf;
#endif
//END STAT ----------------------------------------------------------------------------------------------------------

//BEGIN PROFILING --------------------------------------------------------------------------------------------------------
#if defined(PROFILE)
   ConfObject *statConf = new ConfObject;
   THREAD_ID threadID = dinst->get_threadID();
   if(statConf->return_enableProfiling() == 1)
   {
      tuple<DInst, Time_t> instruction_cycle(*dinst, globalClock);

      //Need to ensure that the vector is large enough to hold the next thread
      if(threadID >= Profiling::transactionDistance.size())
      {
         if(threadID == Profiling::transactionDistance.size())
         {
            std::cerr << "Profiling::Push back to transactionDistance with " << threadID;
            UINT_32 temp_1 = 0;
            Profiling::transactionDistance.push_back(temp_1);
            std::cerr << " and new size of " << Profiling::transactionDistance.size() << "*" << std::endl;
         }
         else
         {
            std::cerr << "Profiling::Resizing transactionDistance with " << threadID;
            Profiling::transactionDistance.resize(threadID + 1);
            std::cerr << " and new size of " << Profiling::transactionDistance.size() << "*" << std::endl;
         }
      }

      if(threadID >= Profiling::isTransaction.size())
      {
         if(threadID == Profiling::isTransaction.size())
         {
            std::cerr << "Profiling::Push back to isTransaction with " << threadID;
            BOOL temp_1 = 0;
            Profiling::isTransaction.push_back(temp_1);
            std::cerr << " and new size of " << Profiling::isTransaction.size() << "*" << std::endl;
         }
         else
         {
            std::cerr << "Profiling::Resizing isTransaction with " << threadID;
            Profiling::isTransaction.resize(threadID + 1);
            std::cerr << " and new size of " << Profiling::isTransaction.size() << "*" << std::endl;
         }
      }

      if(threadID >= instructionQueueVector.size())
      {
         if(threadID == instructionQueueVector.size())
         {
            std::cerr << "Profiling::Push back to instructionQueueVector with " << threadID;
            instructionQueueVector.push_back(new std::deque< tuple<DInst, Time_t> >);
            std::cerr << " and new size of " << instructionQueueVector.size() << " and capacity of " << instructionQueueVector.capacity() << "*" << std::endl;
         }
         else
         {
            std::cerr << "Profiling::Resizing instructionQueueVector with " << threadID;
            instructionQueueVector.resize(threadID + 1, new std::deque< tuple<DInst, Time_t> >);
            std::cerr << " and new size of " << instructionQueueVector.size() << " and capacity of " << instructionQueueVector.capacity() << "*" << std::endl;
         }
      }

      if(threadID >= Profiling::currBBStats.size())
      {
         if(threadID == Profiling::currBBStats.size())
         {
            std::cerr << "Profiling::Push back to currBBStats with " << threadID;
            Profiling::currBBStats.push_back(new WorkloadCharacteristics());
            std::cerr << " and new size of " << Profiling::currBBStats.size() << " and capacity of " << Profiling::currBBStats.capacity() << "*" << std::endl;
         }
         else
         {
            std::cerr << "Profiling::Resizing currBBStats with " << threadID;
            Profiling::currBBStats.resize(threadID + 1, new WorkloadCharacteristics());
            std::cerr << " and new size of " << Profiling::currBBStats.size() << " and capacity of " << Profiling::currBBStats.capacity() << "*" << std::endl;
         }
      }

      if(threadID >= Profiling::firstTransaction.size())
      {
         if(threadID == Profiling::firstTransaction.size())
         {
            std::cerr << "Profiling::Push back to firstTransaction with " << threadID;
            UINT_32 temp_1 = 1;
            Profiling::firstTransaction.push_back(temp_1);
            std::cerr << " and new size of " << Profiling::firstTransaction.size() << " and capacity of " << Profiling::firstTransaction.capacity() << "*" << std::endl;
         }
         else
         {
            std::cerr << "Profiling::Resizing firstTransaction with " << threadID;
            Profiling::firstTransaction.resize(threadID + 1);
            std::cerr << " and new size of " << Profiling::firstTransaction.size() << " and capacity of " << Profiling::firstTransaction.capacity() << "*" << std::endl;
            Profiling::firstTransaction[threadID] = 1;
         }
      }

      instructionQueueVector[threadID]->push_back(instruction_cycle);
      if((INT_32)instructionQueueVector[threadID]->size() > statConf->return_windowSize())
      {
         instruction_cycle = instructionQueueVector[threadID]->front();
         instructionQueueVector[threadID]->pop_front();
         Profiling::analysis(instruction_cycle);
      }
   }
   delete statConf;
#endif
//END PROFILING --------------------------------------------------------------------------------------------------------

#if (defined TM)
    // We must grab the type here since we will not be able to use it past the retirement phase
    transInstType tempTransType = dinst->transType;
    sType synchType = dinst->synchType;
    int transPid = dinst->transPid;
    int transTid = dinst->transTid;
    int transBCFlag = dinst->transBCFlag;
#endif

    bool fake = dinst->isFake();

    I(dinst->getResource());
    RetOutcome retOutcome = dinst->getResource()->retire(dinst);
    if( retOutcome != Retired) {
      addStatsNoRetire(i, dinst, retOutcome);
      return;
    }
    // dinst CAN NOT be used beyond this point

#if (defined TM)
      instCountTM++;
      // Call the proper reporting function based on the type of instruction
      switch(tempTransType){
      case transCommit:
        if(transBCFlag != 2)
          tmReport->reportCommit(transPid);
        break;
      case transBegin:
        if(transBCFlag != 2)
          tmReport->reportBegin(transPid, this->Id);
        break;
      case transLoad:
        tmReport->reportLoad(transPid);
        break;
      case transStore:
        tmReport->reportStore(transPid);
        break;
      case transAbort:
         tmReport->beginTMStats(instCountTM);
        break;
      case transInt:
      case transFp:
      case transBJ:
      case transFence:
        tmReport->registerTransInst(transPid,tempTransType);
        break;
      case transOther:
        break;
      case transNT:
        tmReport->incrementCommittedInstCountByCpu ( this->Id );
        break;
       }

    if (synchType == barrier )
    {
      tmReport->reportBarrier ( this->Id);
    }
#endif

    if (!fake)
      regPool[rp]++;

    ROB.pop();

    robEnergy->inc(); // read ROB entry (finished?, update retirement rat...)
  }

  if(!ROB.empty() || i != 0) 
    addStatsRetire(i);
  
}
Ejemplo n.º 7
0
void GProcessor::retire()
{
#ifdef DEBUG
    // Check for progress. When a processor gets stuck, it sucks big time
    if ((((int)globalClock) & 0x1FFFFFL) == 0) {
        if (ROB.empty()) {
            // ROB should not be empty for lots of time
            if (prevDInstID == 1) {
                MSG("GProcessor::retire CPU[%d] ROB empty for long time @%lld", Id, globalClock);
            }
            prevDInstID = 1;
        } else {
            DInst *dinst = ROB.top();
            if (prevDInstID == dinst->getID()) {
                I(0);
                MSG("ExeEngine::retire CPU[%d] no forward progress from pc=0x%x with %d @%lld"
                    ,Id, (uint)dinst->getInst()->getAddr()
                    ,(uint)dinst->getInst()->currentID(), globalClock );
                dinst->dump("HEAD");
                LDSTBuffer::dump("");
            }
            prevDInstID = dinst->getID();
        }
    }
#endif

    robUsed.sample(ROB.size());

    ushort i;

    for(i=0; i<RetireWidth && !ROB.empty(); i++) {
        DInst *dinst = ROB.top();

        if( !dinst->isExecuted() ) {
            addStatsNoRetire(i, dinst, NotExecuted);
            return;
        }

        uint32_t refetchAt = 1;
        for(VAddr refAddr: dinst->getRefetchAddrs()) {
            CBMemRequest::create(refetchAt, memorySystem->getDataSource(), MemRead, refAddr, 0);
            refetchAt++;
        }

        // save it now because retire can destroy DInst
        int32_t rp = dinst->getInst()->getDstPool();

        bool fake = dinst->isFake();

        I(dinst->getResource());
        RetOutcome retOutcome = dinst->getResource()->retire(dinst);
        if( retOutcome != Retired) {
            addStatsNoRetire(i, dinst, retOutcome);
            return;
        }
        // dinst CAN NOT be used beyond this point

        if (!fake)
            regPool[rp]++;

        ROB.pop();

        robEnergy->inc(); // read ROB entry (finished?, update retirement rat...)
#if (defined TM)
		tmInsts.inc();
#endif
    }

    if(!ROB.empty() || i != 0)
        addStatsRetire(i);

}
Ejemplo n.º 8
0
DInst *LSQFull::executing(DInst *dinst)
  /* dinst got executed (out-of-order) {{{1 */
{
  I(dinst->getAddr());

  AddrType tag = calcWord(dinst);

  const Instruction *inst = dinst->getInst();
  DInst *faulty = 0;

#if 0
  AddrDInstQMap::const_iterator instIt = instMap.begin();
  I(instIt != instMap.end());

  I(!dinst->isExecuted());

  while(instIt != instMap.end()) {
    if (instIt->first != tag){
      instIt++;
      continue; 
    }
#endif
  std::pair<AddrDInstQMap::iterator, AddrDInstQMap::iterator> ret;
  ret = instMap.equal_range(tag);
  for (AddrDInstQMap::iterator instIt=ret.first; instIt!=ret.second; ++instIt) {
    I(instIt->first == tag);

    //inst->dump("Executed");
    DInst *qdinst = instIt->second;
    if(qdinst == dinst) {
      continue;
    }

    const Instruction *qinst  = qdinst->getInst();

    //bool beforeInst = qdinst->getID() < dinst->getID();
    bool oooExecuted = qdinst->getID() > dinst->getID();
    if(oooExecuted){

      if(qdinst->isExecuted() && qdinst->getPC() != dinst->getPC()) { 

        if(inst->isStore() && qinst->isLoad()) { 
          if (faulty == 0)
            faulty = qdinst; 
          else if (faulty->getID() < qdinst->getID())
            faulty = qdinst;
        }
      }
    }else{
      if (!dinst->isLoadForwarded() && inst->isLoad() && qinst->isStore() && qdinst->isExecuted()) {
        dinst->setLoadForwarded();
        stldForwarding.inc(dinst->getStatsFlag());
      }
    }
  }

  I(!dinst->isExecuted()); // first clear, then mark executed
  return faulty;
}
/* }}} */

void LSQFull::remove(DInst *dinst)
  /* Remove from the LSQ {{{1 (in-order) */
{
  I(dinst->getAddr());

 //const Instruction *inst = dinst->getInst();

  std::pair<AddrDInstQMap::iterator,AddrDInstQMap::iterator> rangeIt;
  //rangeIt = instMap.equal_range(calcWord(dinst));  
  AddrDInstQMap::iterator instIt = instMap.begin();

  //for(AddrDInstQMap::iterator it = rangeIt.first; it != rangeIt.second ; it++) {
  while(instIt != instMap.end()){
    if(instIt->second == dinst) {
      instMap.erase(instIt);
      return;
    }
    instIt++;
  }
}
Ejemplo n.º 9
0
void DInst::killSilently()
{
  I(getPendEvent()==0);
  I(getResource()==0);

#ifdef SESC_BAAD
  if (fetch2Time == 0) {
    fetch1QSize--;
  }else if (renameTime == 0) {
    fetch2QSize--;
  }else if (issueTime == 0) {
    issueQSize--;
  }else if (schedTime == 0) {
    schedQSize--;
  }else if (exeTime == 0) {
    exeQSize--;
  }else{
    retireQSize--;
  }
#endif

  markIssued();
  markExecuted();
  if( getFetch() ) {
    getFetch()->unBlockFetch();
    IS(setFetch(0));
  }

  if (getInst()->isStore())
    LDSTBuffer::storeLocallyPerformed(this);
 
  while (hasPending()) {
    DInst *dstReady = getNextPending();

    if (!dstReady->isIssued()) {
      // Accross processor dependence
      if (dstReady->hasDepsAtRetire())
        dstReady->clearDepsAtRetire();
      
      I(!dstReady->hasDeps());
      continue;
    }
    if (dstReady->isExecuted()) {
      // The instruction got executed even though it has dependences. This is
      // because the instruction got silently killed (killSilently)
      if (!dstReady->hasDeps())
        dstReady->scrap();
      continue;
    }

    if (!dstReady->hasDeps()) {
      I(dstReady->isIssued());
      I(!dstReady->isExecuted());
      Resource *dstRes = dstReady->getResource();
      I(dstRes);
      dstRes->simTime(dstReady);
    }
  }

#ifdef TASKSCALAR
  notifyDataDepViolation(DataDepViolationAtRetire);

  if (lvid) { // maybe got killSilently
    lvid = 0;
    lvidVersion->decOutsReqs();
    lvidVersion->garbageCollect();
    IS(lvidVersion=0);
  }
  
  I(lvidVersion==0);
#endif

  I(!getFetch());

  if (hasDeps())
    return;
  
  I(nDeps == 0);   // No deps src

#if (defined TLS)
  I(!myEpoch);
#endif

  I(!getFetch());
#if (defined MIPS_EMUL)
  context->delDInst();
  context=0;
#endif
  dInstPool.in(this); 
}