void MESIBottomCC::processInval(Address lineAddr, uint32_t lineId, InvType type, bool* reqWriteback) {
    MESIState* state = &array[lineId];
    assert(*state != I);
    switch (type) {
        case INVX: //lose exclusivity
            //Hmmm, do we have to propagate loss of exclusivity down the tree? (nah, topcc will do this automatically -- it knows the final state, always!)
            assert_msg(*state == E || *state == M, "Invalid state %s", MESIStateName(*state));
            if (*state == M) *reqWriteback = true;
            *state = S;
            profINVX.inc();
            break;
        case INV: //invalidate
            assert(*state != I);
            if (*state == M) *reqWriteback = true;
            *state = I;
            profINV.inc();
            break;
        case FWD: //forward
            assert_msg(*state == S, "Invalid state %s on FWD", MESIStateName(*state));
            profFWD.inc();
            break;
        default: panic("!?");
    }
    //NOTE: BottomCC never calls up on an invalidate, so it adds no extra latency
}
Exemple #2
0
uint64_t Cache::access(MemReq& req) {
    uint64_t respCycle = req.cycle;
    bool skipAccess = cc->startAccess(req); //may need to skip access due to races (NOTE: may change req.type!)
	if( skipAccess)
		std::cout<<"skip access"<<std::endl;
    if (likely(!skipAccess)) {
        bool updateReplacement = (req.type == GETS) || (req.type == GETX);
        int32_t lineId = array->lookup(req.lineAddr, &req, updateReplacement);
        respCycle += accLat;

        if (lineId == -1 && cc->shouldAllocate(req)) {
            //Make space for new line
            Address wbLineAddr;
            lineId = array->preinsert(req.lineAddr, &req, &wbLineAddr); //find the lineId to replace
            trace(Cache, "[%s] Evicting 0x%lx", name.c_str(), wbLineAddr);

            //Evictions are not in the critical path in any sane implementation -- we do not include their delays
            //NOTE: We might be "evicting" an invalid line for all we know. Coherence controllers will know what to do
            cc->processEviction(req, wbLineAddr, lineId, respCycle); //1. if needed, send invalidates/downgrades to lower level

            array->postinsert(req.lineAddr, &req, lineId); //do the actual insertion. NOTE: Now we must split insert into a 2-phase thing because cc unlocks us.
        }
        respCycle = cc->processAccess(req, lineId, respCycle);
    }
    cc->endAccess(req);
    assert_msg(respCycle >= req.cycle, "[%s] resp < req? 0x%lx type %s childState %s, respCycle %ld reqCycle %ld",
            name.c_str(), req.lineAddr, AccessTypeName(req.type), MESIStateName(*req.state), respCycle, req.cycle);
    return respCycle;
}
uint64_t MESIBottomCC::processEviction(Address wbLineAddr, uint32_t lineId, bool lowerLevelWriteback, uint64_t cycle, uint32_t srcId) {
    MESIState* state = &array[lineId];
    if (lowerLevelWriteback) {
        //If this happens, when tcc issued the invalidations, it got a writeback. This means we have to do a PUTX, i.e. we have to transition to M if we are in E
        assert(*state == M || *state == E); //Must have exclusive permission!
        *state = M; //Silent E->M transition (at eviction); now we'll do a PUTX
    }
    uint64_t respCycle = cycle;
    switch (*state) {
        case I:
            break; //Nothing to do
        case S:
        case E:
            {
                MemReq req = {wbLineAddr, PUTS, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/};
				//std::cout<<"access:("<<std::hex<<addr<<","<<wbLineAddr<<"PUTS)"<<std::endl;
                respCycle = parents[getParentId(wbLineAddr)]->access(req);
            }
            break;
        case M:
            {
                MemReq req = {wbLineAddr, PUTX, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/};
				//std::cout<<"dirty eviction "<<std::hex<<(wbLineAddr)<<","<<srcId<<std::endl;
				//std::cout<<parents[getParentId(wbLineAddr)]->getName()<<std::endl;
                respCycle = parents[getParentId(wbLineAddr)]->access(req);
            }
            break;

        default: panic("!?");
    }
    assert_msg(*state == I, "Wrong final state %s on eviction", MESIStateName(*state));
    return respCycle;
}
uint64_t exclusive_MESIBottomCC::processEviction(Address wbLineAddr, uint32_t lineId, bool lowerLevelWriteback, uint64_t cycle, uint32_t srcId) {

    //we don't do lower level writeback because the cache is exclusive

    MESIState* state = &array[lineId];

    uint64_t respCycle = cycle;
    switch (*state) {
        case I:
            break; //Nothing to do
        case S:
        case E:
            {
                MemReq req = {wbLineAddr, PUTS, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/};
                respCycle = parents[getParentId(wbLineAddr)]->access(req);
            }
            break;
        case M:
            {
                MemReq req = {wbLineAddr, PUTX, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/};
                respCycle = parents[getParentId(wbLineAddr)]->access(req);
            }
            break;

        default: panic("!?");
    }
    assert_msg(*state == I, "Wrong final state %s on eviction", MESIStateName(*state));
    return respCycle;
}
uint64_t MESIBottomCC::processAccess(Address lineAddr, uint32_t lineId, AccessType type, uint64_t cycle, uint32_t srcId, uint32_t flags) {
    uint64_t respCycle = cycle;
	//uint64_t origin_addr = req.lineAddr;
    MESIState* state = &array[lineId];
    switch (type) {
        // A PUTS/PUTX does nothing w.r.t. higher coherence levels --- it dies here
        case PUTS: //Clean writeback, nothing to do (except profiling)
            assert(*state != I);
            profPUTS.inc();
            break;
        case PUTX: //Dirty writeback
            assert(*state == M || *state == E);
            if (*state == E) {
                //Silent transition, record that block was written to
                *state = M;
            }
            profPUTX.inc();
            break;
        case GETS:
            if (*state == I) {
                uint32_t parentId = getParentId(lineAddr);
                MemReq req = {lineAddr, GETS, selfId, state, cycle, &ccLock, *state, srcId, flags};
				//std::cout<<"GETS access "<<std::hex<<(lineAddr)<<","<<srcId<<std::endl;
                uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
                uint32_t netLat = parentRTTs[parentId];
                profGETNextLevelLat.inc(nextLevelLat);
                profGETNetLat.inc(netLat);
                respCycle += nextLevelLat + netLat;
                profGETSMiss.inc();
                assert(*state == S || *state == E);
            } else {
                profGETSHit.inc();
            }
            break;
        case GETX:
            if (*state == I || *state == S) {
                //Profile before access, state changes
                if (*state == I) profGETXMissIM.inc();
                else profGETXMissSM.inc();
                uint32_t parentId = getParentId(lineAddr);
                MemReq req = {lineAddr, GETX, selfId, state, cycle, &ccLock, *state, srcId, flags};
                uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
				//std::cout<<"GETX access "<<std::hex<<(lineAddr)<<","<<srcId<<std::endl;
                uint32_t netLat = parentRTTs[parentId];
                profGETNextLevelLat.inc(nextLevelLat);
                profGETNetLat.inc(netLat);
                respCycle += nextLevelLat + netLat;
            } else {
                if (*state == E) {
                    // Silent transition
                    // NOTE: When do we silent-transition E->M on an ML hierarchy... on a GETX, or on a PUTX?
                    /* Actually, on both: on a GETX b/c line's going to be modified anyway, and must do it if it is the L1 (it's OK not
                     * to transition if L2+, we'll TX on the PUTX or invalidate, but doing it this way minimizes the differences between
                     * L1 and L2+ controllers); and on a PUTX, because receiving a PUTX while we're in E indicates the child did a silent
                     * transition and now that it is evictiong, it's our turn to maintain M info.
                     */
                    *state = M;
                }
                profGETXHit.inc();
            }
            assert_msg(*state == M, "Wrong final state on GETX, lineId %d numLines %d, finalState %s", lineId, numLines, MESIStateName(*state));
            break;

        default: panic("!?");
    }
    assert_msg(respCycle >= cycle, "XXX %ld %ld", respCycle, cycle);
    return respCycle;
}
uint64_t flexclusive_MESIBottomCC::processAccess(Address lineAddr,
                                                 uint32_t lineId,
                                                 AccessType type,
                                                 uint64_t cycle, uint32_t srcId,
                                                 uint32_t flags, CLUState cs) {
  uint64_t respCycle = cycle;

  if (cs == EX) {
    if ((int)lineId == -1) {
      //info("The line id is %d", (int) lineId);
      assert_msg((type == GETS || type == GETX) , "The type is %d", type);
      if (type == GETS)
        profGETSMiss.inc();
      else
        profGETXMissIM.inc();
      if (!(flags & MemReq::INNER_COPY)) {  // i.e. if line was found in inner
                                            // levels in case of excl llc
        MESIState dummyState = I;  // does this affect race conditions ?
        MemReq req = {lineAddr, type,       selfId, &dummyState, cycle,
                      &ccLock,  dummyState, srcId,  flags};
        uint32_t parentId = getParentId(lineAddr);
        uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
        uint32_t netLat = parentRTTs[parentId];
        profGETNextLevelLat.inc(nextLevelLat);
        profGETNetLat.inc(netLat);
        respCycle += nextLevelLat + netLat;
      }

      assert_msg(respCycle >= cycle, "XXX %ld %ld", respCycle, cycle);
      return respCycle;
    }

    MESIState* state = &array[lineId];
    switch (type) {
      // A PUTS/PUTX does nothing w.r.t. higher coherence levels --- it dies
      // here
      case PUTS:  // Clean writeback, nothing to do (except profiling)
        // assert(*state == I); //we can't assert this a
        // a copy of the data may still be there
        // in the cache from somewhere else
        if (flags & MemReq::INNER_COPY) {
        }  // assert(*state == I)}
        else
          *state = E;  // receive the data in exclusive state
        // for multithreaded application, may need to
        // receive data in shared state also
        profPUTS.inc();
        profExclWB.inc();
        break;
      case PUTX:  // Dirty writeback
        //assert(*state == I);
        // Silent transition, record that block was written to
        if (flags & MemReq::INNER_COPY) {
          //assert(*state == I);
        } else
          *state = M;
        profPUTX.inc();
        profExclWB.inc();
        break;
      case GETS:
        if (*state == I && (!(flags & MemReq::INNER_COPY))) {
          uint32_t parentId = getParentId(lineAddr);
          MESIState dummyState = I;  // does this affect race conditions ?
          MemReq req = {lineAddr, GETS,       selfId, &dummyState, cycle,
                        &ccLock,  dummyState, srcId,  flags};
          uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
          uint32_t netLat = parentRTTs[parentId];
          profGETNextLevelLat.inc(nextLevelLat);
          profGETNetLat.inc(netLat);
          respCycle += nextLevelLat + netLat;
          profGETSMiss.inc();
        } else {
          profGETSHit.inc();
        }
        if (!(flags & MemReq::PREFETCH))
            *state = I;
        else *state = E;

        break;
      case GETX:
        if ((*state == I || *state == S) && (!(flags & MemReq::INNER_COPY))) {
          // Profile before access, state changes
          if (*state == I)
            profGETXMissIM.inc();
          else
            profGETXMissSM.inc();
          uint32_t parentId = getParentId(lineAddr);
          MemReq req = {lineAddr, GETX,   selfId, state, cycle,
                        &ccLock,  *state, srcId,  flags};
          uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
          uint32_t netLat = parentRTTs[parentId];
          profGETNextLevelLat.inc(nextLevelLat);
          profGETNetLat.inc(netLat);
          respCycle += nextLevelLat + netLat;
        } else {  // means state is E or M
          profGETXHit.inc();
        }
        if (!(flags & MemReq::PREFETCH))
            *state = I;  // inv because cache is exclusive
        else
            *state = E;
        break;

      default:
        panic("!?");
    }

  } else {
    MESIState* state = &array[lineId];

    switch (type) {
      // A PUTS/PUTX does nothing w.r.t. higher coherence levels --- it dies
      // here
      case PUTS:  // Clean writeback, nothing to do (except profiling)
        assert(*state != I);
        profPUTS.inc();
        break;
      case PUTX:  // Dirty writeback
        assert(*state == M || *state == E);
        if (*state == E) {
          // Silent transition, record that block was written to
          *state = M;
        }
        profPUTX.inc();
        break;
      case GETS:
        if (*state == I) {
          uint32_t parentId = getParentId(lineAddr);
          if (!(flags & MemReq::INNER_COPY)) {
            MemReq req = {lineAddr, GETS,   selfId, state, cycle,
                          &ccLock,  *state, srcId,  flags};
            uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
            uint32_t netLat = parentRTTs[parentId];
            profGETNextLevelLat.inc(nextLevelLat);
            profGETNetLat.inc(netLat);
            respCycle += nextLevelLat + netLat;
          } else {
            // also need to send invx to the parents
            *state = S;  // don't change respCycle
          }
          profGETSMiss.inc();

          assert(*state == S || *state == E);
        } else {
          profGETSHit.inc();
        }
        break;
      case GETX:
        if (*state == I || *state == S) {
          // Profile before access, state changes
          if (*state == I)
            profGETXMissIM.inc();
          else
            profGETXMissSM.inc();

          if ((flags & MemReq::INNER_COPY)) {  // means line was not found in
                                               // the non-inclusive llc
            // assert(*state == I); //not true anymore, as we always check for
            // inner copy
            *state = M;  // since it is a GETX, the final state should be M
          } else if (!(flags & MemReq::INNER_COPY)) {
            uint32_t parentId = getParentId(lineAddr);
            MemReq req = {lineAddr, GETX,   selfId, state, cycle,
                          &ccLock,  *state, srcId,  flags};
            uint32_t nextLevelLat = parents[parentId]->access(req) - cycle;
            uint32_t netLat = parentRTTs[parentId];
            profGETNextLevelLat.inc(nextLevelLat);
            profGETNetLat.inc(netLat);
            respCycle += nextLevelLat + netLat;
          }
        } else {
          if (*state == E) {
            // Silent transition
            // NOTE: When do we silent-transition E->M on an ML hierarchy... on
            // a GETX, or on a PUTX?
            /* Actually, on both: on a GETX b/c line's going to be modified
             * anyway, and must do it if it is the L1 (it's OK not
             * to transition if L2+, we'll TX on the PUTX or invalidate, but
             * doing it this way minimizes the differences between
             * L1 and L2+ controllers); and on a PUTX, because receiving a PUTX
             * while we're in E indicates the child did a silent
             * transition and now that it is evictiong, it's our turn to
             * maintain M info.
             */
            *state = M;
          }
          profGETXHit.inc();
        }

        assert_msg(
            *state == M,
            "Wrong final state on GETX, lineId %d numLines %d, finalState %s",
            lineId, numLines, MESIStateName(*state));
        break;

      default:
        panic("!?");
    }
  }

  assert_msg(respCycle >= cycle, "XXX %ld %ld", respCycle, cycle);
  return respCycle;
}
// TODO(dsm): This is copied verbatim from Cache. We should split Cache into different methods, then call those.
uint64_t TimingCache::access(MemReq& req) {
    EventRecorder* evRec = zinfo->eventRecorders[req.srcId];
    assert_msg(evRec, "TimingCache is not connected to TimingCore");
    uint32_t initialRecords = evRec->numRecords();

    bool hasWritebackRecord = false;
    TimingRecord writebackRecord;
    bool hasAccessRecord = false;
    TimingRecord accessRecord;
    uint64_t evDoneCycle = 0;
    
    uint64_t respCycle = req.cycle;
    bool skipAccess = cc->startAccess(req); //may need to skip access due to races (NOTE: may change req.type!)
    if (likely(!skipAccess)) {
        bool updateReplacement = (req.type == GETS) || (req.type == GETX);
        int32_t lineId = array->lookup(req.lineAddr, &req, updateReplacement);
        respCycle += accLat;

        if (lineId == -1 /*&& cc->shouldAllocate(req)*/) {
            assert(cc->shouldAllocate(req)); //dsm: for now, we don't deal with non-inclusion in TimingCache

            //Make space for new line
            Address wbLineAddr;
            lineId = array->preinsert(req.lineAddr, &req, &wbLineAddr); //find the lineId to replace
            trace(Cache, "[%s] Evicting 0x%lx", name.c_str(), wbLineAddr);

            //Evictions are not in the critical path in any sane implementation -- we do not include their delays
            //NOTE: We might be "evicting" an invalid line for all we know. Coherence controllers will know what to do
            evDoneCycle = cc->processEviction(req, wbLineAddr, lineId, respCycle); //if needed, send invalidates/downgrades to lower level, and wb to upper level

            array->postinsert(req.lineAddr, &req, lineId); //do the actual insertion. NOTE: Now we must split insert into a 2-phase thing because cc unlocks us.

            if (evRec->numRecords() > initialRecords) {
                assert_msg(evRec->numRecords() == initialRecords + 1, "evRec records on eviction %ld", evRec->numRecords());
                writebackRecord = evRec->getRecord(initialRecords);
                hasWritebackRecord = true;
                evRec->popRecord();
            }
        }

        uint64_t getDoneCycle = respCycle;
        respCycle = cc->processAccess(req, lineId, respCycle, &getDoneCycle);

        if (evRec->numRecords() > initialRecords) {
            assert_msg(evRec->numRecords() == initialRecords + 1, "evRec records %ld", evRec->numRecords());
            accessRecord = evRec->getRecord(initialRecords);
            hasAccessRecord = true;
            evRec->popRecord();
        }

        // At this point we have all the info we need to hammer out the timing record
        TimingRecord tr = {req.lineAddr << lineBits, req.cycle, respCycle, req.type, NULL, NULL}; //note the end event is the response, not the wback

        if (getDoneCycle - req.cycle == accLat) {
            // Hit
            assert(!hasWritebackRecord);
            assert(!hasAccessRecord);
            uint64_t hitLat = respCycle - req.cycle; // accLat + invLat
            HitEvent* ev = new (evRec) HitEvent(this, hitLat, domain);
            ev->setMinStartCycle(req.cycle);
            tr.startEvent = tr.endEvent = ev;
        } else {
            assert_msg(getDoneCycle == respCycle, "gdc %ld rc %ld", getDoneCycle, respCycle);

            // Miss events:
            // MissStart (does high-prio lookup) -> getEvent || evictionEvent || replEvent (if needed) -> MissWriteback

            MissStartEvent* mse = new (evRec) MissStartEvent(this, accLat, domain);
            MissResponseEvent* mre = new (evRec) MissResponseEvent(this, mse, domain);
            MissWritebackEvent* mwe = new (evRec) MissWritebackEvent(this, mse, accLat, domain);

            mse->setMinStartCycle(req.cycle);
            mre->setMinStartCycle(getDoneCycle);
            mwe->setMinStartCycle(MAX(evDoneCycle, getDoneCycle));

            // Tie two events to an optional timing record
            // TODO: Promote to evRec if this is more generally useful
            auto connect = [evRec](const TimingRecord* r, TimingEvent* startEv, TimingEvent* endEv, uint64_t startCycle, uint64_t endCycle) {
                assert_msg(startCycle <= endCycle, "start > end? %ld %ld", startCycle, endCycle);
                if (r) {
                    assert_msg(startCycle <= r->reqCycle, "%ld / %ld", startCycle, r->reqCycle);
                    assert_msg(r->respCycle <= endCycle, "%ld %ld %ld %ld", startCycle, r->reqCycle, r->respCycle, endCycle);
                    uint64_t upLat = r->reqCycle - startCycle;
                    uint64_t downLat = endCycle - r->respCycle;

                    if (upLat) {
                        DelayEvent* dUp = new (evRec) DelayEvent(upLat);
                        dUp->setMinStartCycle(startCycle);
                        startEv->addChild(dUp, evRec)->addChild(r->startEvent, evRec);
                    } else {
                        startEv->addChild(r->startEvent, evRec);
                    }

                    if (downLat) {
                        DelayEvent* dDown = new (evRec) DelayEvent(downLat);
                        dDown->setMinStartCycle(r->respCycle);
                        r->endEvent->addChild(dDown, evRec)->addChild(endEv, evRec);
                    } else {
                        r->endEvent->addChild(endEv, evRec);
                    }
                } else {
                    if (startCycle == endCycle) {
                        startEv->addChild(endEv, evRec);
                    } else {
                        DelayEvent* dEv = new (evRec) DelayEvent(endCycle - startCycle);
                        dEv->setMinStartCycle(startCycle);
                        startEv->addChild(dEv, evRec)->addChild(endEv, evRec);
                    }
                }
            };

            // Get path
            connect(hasAccessRecord? &accessRecord : NULL, mse, mre, req.cycle + accLat, getDoneCycle);
            mre->addChild(mwe, evRec);

            // Eviction path
            if (evDoneCycle) {
                connect(hasWritebackRecord? &writebackRecord : NULL, mse, mwe, req.cycle + accLat, evDoneCycle);
            }

            // Replacement path
            if (evDoneCycle && cands > ways) {
                uint32_t replLookups = (cands + (ways-1))/ways - 1; // e.g., with 4 ways, 5-8 -> 1, 9-12 -> 2, etc.
                assert(replLookups);

                uint32_t fringeAccs = ways - 1;
                uint32_t accsSoFar = 0;

                TimingEvent* p = mse;

                // Candidate lookup events
                while (accsSoFar < replLookups) {
                    uint32_t preDelay = accsSoFar? 0 : tagLat;
                    uint32_t postDelay = tagLat - MIN(tagLat - 1, fringeAccs);
                    uint32_t accs = MIN(fringeAccs, replLookups - accsSoFar);
                    //info("ReplAccessEvent rl %d fa %d preD %d postD %d accs %d", replLookups, fringeAccs, preDelay, postDelay, accs);
                    ReplAccessEvent* raEv = new (evRec) ReplAccessEvent(this, accs, preDelay, postDelay, domain);
                    raEv->setMinStartCycle(req.cycle /*lax...*/);
                    accsSoFar += accs;
                    p->addChild(raEv, evRec);
                    p = raEv;
                    fringeAccs *= ways - 1;
                }

                // Swap events -- typically, one read and one write work for 1-2 swaps. Exact number depends on layout.
                ReplAccessEvent* rdEv = new (evRec) ReplAccessEvent(this, 1, tagLat, tagLat, domain);
                rdEv->setMinStartCycle(req.cycle /*lax...*/);
                ReplAccessEvent* wrEv = new (evRec) ReplAccessEvent(this, 1, 0, 0, domain);
                wrEv->setMinStartCycle(req.cycle /*lax...*/);

                p->addChild(rdEv, evRec)->addChild(wrEv, evRec)->addChild(mwe, evRec);
            }


            tr.startEvent = mse;
            tr.endEvent = mre; // note the end event is the response, not the wback
        }
        evRec->pushRecord(tr);
    }

    cc->endAccess(req);

    assert_msg(respCycle >= req.cycle, "[%s] resp < req? 0x%lx type %s childState %s, respCycle %ld reqCycle %ld",
            name.c_str(), req.lineAddr, AccessTypeName(req.type), MESIStateName(*req.state), respCycle, req.cycle);
    return respCycle;
}