void MESIBottomCC::processInval(Address lineAddr, uint32_t lineId, InvType type, bool* reqWriteback) { MESIState* state = &array[lineId]; assert(*state != I); switch (type) { case INVX: //lose exclusivity //Hmmm, do we have to propagate loss of exclusivity down the tree? (nah, topcc will do this automatically -- it knows the final state, always!) assert_msg(*state == E || *state == M, "Invalid state %s", MESIStateName(*state)); if (*state == M) *reqWriteback = true; *state = S; profINVX.inc(); break; case INV: //invalidate assert(*state != I); if (*state == M) *reqWriteback = true; *state = I; profINV.inc(); break; case FWD: //forward assert_msg(*state == S, "Invalid state %s on FWD", MESIStateName(*state)); profFWD.inc(); break; default: panic("!?"); } //NOTE: BottomCC never calls up on an invalidate, so it adds no extra latency }
uint64_t Cache::access(MemReq& req) { uint64_t respCycle = req.cycle; bool skipAccess = cc->startAccess(req); //may need to skip access due to races (NOTE: may change req.type!) if( skipAccess) std::cout<<"skip access"<<std::endl; if (likely(!skipAccess)) { bool updateReplacement = (req.type == GETS) || (req.type == GETX); int32_t lineId = array->lookup(req.lineAddr, &req, updateReplacement); respCycle += accLat; if (lineId == -1 && cc->shouldAllocate(req)) { //Make space for new line Address wbLineAddr; lineId = array->preinsert(req.lineAddr, &req, &wbLineAddr); //find the lineId to replace trace(Cache, "[%s] Evicting 0x%lx", name.c_str(), wbLineAddr); //Evictions are not in the critical path in any sane implementation -- we do not include their delays //NOTE: We might be "evicting" an invalid line for all we know. Coherence controllers will know what to do cc->processEviction(req, wbLineAddr, lineId, respCycle); //1. if needed, send invalidates/downgrades to lower level array->postinsert(req.lineAddr, &req, lineId); //do the actual insertion. NOTE: Now we must split insert into a 2-phase thing because cc unlocks us. } respCycle = cc->processAccess(req, lineId, respCycle); } cc->endAccess(req); assert_msg(respCycle >= req.cycle, "[%s] resp < req? 0x%lx type %s childState %s, respCycle %ld reqCycle %ld", name.c_str(), req.lineAddr, AccessTypeName(req.type), MESIStateName(*req.state), respCycle, req.cycle); return respCycle; }
uint64_t MESIBottomCC::processEviction(Address wbLineAddr, uint32_t lineId, bool lowerLevelWriteback, uint64_t cycle, uint32_t srcId) { MESIState* state = &array[lineId]; if (lowerLevelWriteback) { //If this happens, when tcc issued the invalidations, it got a writeback. This means we have to do a PUTX, i.e. we have to transition to M if we are in E assert(*state == M || *state == E); //Must have exclusive permission! *state = M; //Silent E->M transition (at eviction); now we'll do a PUTX } uint64_t respCycle = cycle; switch (*state) { case I: break; //Nothing to do case S: case E: { MemReq req = {wbLineAddr, PUTS, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/}; //std::cout<<"access:("<<std::hex<<addr<<","<<wbLineAddr<<"PUTS)"<<std::endl; respCycle = parents[getParentId(wbLineAddr)]->access(req); } break; case M: { MemReq req = {wbLineAddr, PUTX, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/}; //std::cout<<"dirty eviction "<<std::hex<<(wbLineAddr)<<","<<srcId<<std::endl; //std::cout<<parents[getParentId(wbLineAddr)]->getName()<<std::endl; respCycle = parents[getParentId(wbLineAddr)]->access(req); } break; default: panic("!?"); } assert_msg(*state == I, "Wrong final state %s on eviction", MESIStateName(*state)); return respCycle; }
uint64_t exclusive_MESIBottomCC::processEviction(Address wbLineAddr, uint32_t lineId, bool lowerLevelWriteback, uint64_t cycle, uint32_t srcId) { //we don't do lower level writeback because the cache is exclusive MESIState* state = &array[lineId]; uint64_t respCycle = cycle; switch (*state) { case I: break; //Nothing to do case S: case E: { MemReq req = {wbLineAddr, PUTS, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/}; respCycle = parents[getParentId(wbLineAddr)]->access(req); } break; case M: { MemReq req = {wbLineAddr, PUTX, selfId, state, cycle, &ccLock, *state, srcId, 0 /*no flags*/}; respCycle = parents[getParentId(wbLineAddr)]->access(req); } break; default: panic("!?"); } assert_msg(*state == I, "Wrong final state %s on eviction", MESIStateName(*state)); return respCycle; }
uint64_t MESIBottomCC::processAccess(Address lineAddr, uint32_t lineId, AccessType type, uint64_t cycle, uint32_t srcId, uint32_t flags) { uint64_t respCycle = cycle; //uint64_t origin_addr = req.lineAddr; MESIState* state = &array[lineId]; switch (type) { // A PUTS/PUTX does nothing w.r.t. higher coherence levels --- it dies here case PUTS: //Clean writeback, nothing to do (except profiling) assert(*state != I); profPUTS.inc(); break; case PUTX: //Dirty writeback assert(*state == M || *state == E); if (*state == E) { //Silent transition, record that block was written to *state = M; } profPUTX.inc(); break; case GETS: if (*state == I) { uint32_t parentId = getParentId(lineAddr); MemReq req = {lineAddr, GETS, selfId, state, cycle, &ccLock, *state, srcId, flags}; //std::cout<<"GETS access "<<std::hex<<(lineAddr)<<","<<srcId<<std::endl; uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; profGETSMiss.inc(); assert(*state == S || *state == E); } else { profGETSHit.inc(); } break; case GETX: if (*state == I || *state == S) { //Profile before access, state changes if (*state == I) profGETXMissIM.inc(); else profGETXMissSM.inc(); uint32_t parentId = getParentId(lineAddr); MemReq req = {lineAddr, GETX, selfId, state, cycle, &ccLock, *state, srcId, flags}; uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; //std::cout<<"GETX access "<<std::hex<<(lineAddr)<<","<<srcId<<std::endl; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; } else { if (*state == E) { // Silent transition // NOTE: When do we silent-transition E->M on an ML hierarchy... on a GETX, or on a PUTX? /* Actually, on both: on a GETX b/c line's going to be modified anyway, and must do it if it is the L1 (it's OK not * to transition if L2+, we'll TX on the PUTX or invalidate, but doing it this way minimizes the differences between * L1 and L2+ controllers); and on a PUTX, because receiving a PUTX while we're in E indicates the child did a silent * transition and now that it is evictiong, it's our turn to maintain M info. */ *state = M; } profGETXHit.inc(); } assert_msg(*state == M, "Wrong final state on GETX, lineId %d numLines %d, finalState %s", lineId, numLines, MESIStateName(*state)); break; default: panic("!?"); } assert_msg(respCycle >= cycle, "XXX %ld %ld", respCycle, cycle); return respCycle; }
uint64_t flexclusive_MESIBottomCC::processAccess(Address lineAddr, uint32_t lineId, AccessType type, uint64_t cycle, uint32_t srcId, uint32_t flags, CLUState cs) { uint64_t respCycle = cycle; if (cs == EX) { if ((int)lineId == -1) { //info("The line id is %d", (int) lineId); assert_msg((type == GETS || type == GETX) , "The type is %d", type); if (type == GETS) profGETSMiss.inc(); else profGETXMissIM.inc(); if (!(flags & MemReq::INNER_COPY)) { // i.e. if line was found in inner // levels in case of excl llc MESIState dummyState = I; // does this affect race conditions ? MemReq req = {lineAddr, type, selfId, &dummyState, cycle, &ccLock, dummyState, srcId, flags}; uint32_t parentId = getParentId(lineAddr); uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; } assert_msg(respCycle >= cycle, "XXX %ld %ld", respCycle, cycle); return respCycle; } MESIState* state = &array[lineId]; switch (type) { // A PUTS/PUTX does nothing w.r.t. higher coherence levels --- it dies // here case PUTS: // Clean writeback, nothing to do (except profiling) // assert(*state == I); //we can't assert this a // a copy of the data may still be there // in the cache from somewhere else if (flags & MemReq::INNER_COPY) { } // assert(*state == I)} else *state = E; // receive the data in exclusive state // for multithreaded application, may need to // receive data in shared state also profPUTS.inc(); profExclWB.inc(); break; case PUTX: // Dirty writeback //assert(*state == I); // Silent transition, record that block was written to if (flags & MemReq::INNER_COPY) { //assert(*state == I); } else *state = M; profPUTX.inc(); profExclWB.inc(); break; case GETS: if (*state == I && (!(flags & MemReq::INNER_COPY))) { uint32_t parentId = getParentId(lineAddr); MESIState dummyState = I; // does this affect race conditions ? MemReq req = {lineAddr, GETS, selfId, &dummyState, cycle, &ccLock, dummyState, srcId, flags}; uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; profGETSMiss.inc(); } else { profGETSHit.inc(); } if (!(flags & MemReq::PREFETCH)) *state = I; else *state = E; break; case GETX: if ((*state == I || *state == S) && (!(flags & MemReq::INNER_COPY))) { // Profile before access, state changes if (*state == I) profGETXMissIM.inc(); else profGETXMissSM.inc(); uint32_t parentId = getParentId(lineAddr); MemReq req = {lineAddr, GETX, selfId, state, cycle, &ccLock, *state, srcId, flags}; uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; } else { // means state is E or M profGETXHit.inc(); } if (!(flags & MemReq::PREFETCH)) *state = I; // inv because cache is exclusive else *state = E; break; default: panic("!?"); } } else { MESIState* state = &array[lineId]; switch (type) { // A PUTS/PUTX does nothing w.r.t. higher coherence levels --- it dies // here case PUTS: // Clean writeback, nothing to do (except profiling) assert(*state != I); profPUTS.inc(); break; case PUTX: // Dirty writeback assert(*state == M || *state == E); if (*state == E) { // Silent transition, record that block was written to *state = M; } profPUTX.inc(); break; case GETS: if (*state == I) { uint32_t parentId = getParentId(lineAddr); if (!(flags & MemReq::INNER_COPY)) { MemReq req = {lineAddr, GETS, selfId, state, cycle, &ccLock, *state, srcId, flags}; uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; } else { // also need to send invx to the parents *state = S; // don't change respCycle } profGETSMiss.inc(); assert(*state == S || *state == E); } else { profGETSHit.inc(); } break; case GETX: if (*state == I || *state == S) { // Profile before access, state changes if (*state == I) profGETXMissIM.inc(); else profGETXMissSM.inc(); if ((flags & MemReq::INNER_COPY)) { // means line was not found in // the non-inclusive llc // assert(*state == I); //not true anymore, as we always check for // inner copy *state = M; // since it is a GETX, the final state should be M } else if (!(flags & MemReq::INNER_COPY)) { uint32_t parentId = getParentId(lineAddr); MemReq req = {lineAddr, GETX, selfId, state, cycle, &ccLock, *state, srcId, flags}; uint32_t nextLevelLat = parents[parentId]->access(req) - cycle; uint32_t netLat = parentRTTs[parentId]; profGETNextLevelLat.inc(nextLevelLat); profGETNetLat.inc(netLat); respCycle += nextLevelLat + netLat; } } else { if (*state == E) { // Silent transition // NOTE: When do we silent-transition E->M on an ML hierarchy... on // a GETX, or on a PUTX? /* Actually, on both: on a GETX b/c line's going to be modified * anyway, and must do it if it is the L1 (it's OK not * to transition if L2+, we'll TX on the PUTX or invalidate, but * doing it this way minimizes the differences between * L1 and L2+ controllers); and on a PUTX, because receiving a PUTX * while we're in E indicates the child did a silent * transition and now that it is evictiong, it's our turn to * maintain M info. */ *state = M; } profGETXHit.inc(); } assert_msg( *state == M, "Wrong final state on GETX, lineId %d numLines %d, finalState %s", lineId, numLines, MESIStateName(*state)); break; default: panic("!?"); } } assert_msg(respCycle >= cycle, "XXX %ld %ld", respCycle, cycle); return respCycle; }
// TODO(dsm): This is copied verbatim from Cache. We should split Cache into different methods, then call those. uint64_t TimingCache::access(MemReq& req) { EventRecorder* evRec = zinfo->eventRecorders[req.srcId]; assert_msg(evRec, "TimingCache is not connected to TimingCore"); uint32_t initialRecords = evRec->numRecords(); bool hasWritebackRecord = false; TimingRecord writebackRecord; bool hasAccessRecord = false; TimingRecord accessRecord; uint64_t evDoneCycle = 0; uint64_t respCycle = req.cycle; bool skipAccess = cc->startAccess(req); //may need to skip access due to races (NOTE: may change req.type!) if (likely(!skipAccess)) { bool updateReplacement = (req.type == GETS) || (req.type == GETX); int32_t lineId = array->lookup(req.lineAddr, &req, updateReplacement); respCycle += accLat; if (lineId == -1 /*&& cc->shouldAllocate(req)*/) { assert(cc->shouldAllocate(req)); //dsm: for now, we don't deal with non-inclusion in TimingCache //Make space for new line Address wbLineAddr; lineId = array->preinsert(req.lineAddr, &req, &wbLineAddr); //find the lineId to replace trace(Cache, "[%s] Evicting 0x%lx", name.c_str(), wbLineAddr); //Evictions are not in the critical path in any sane implementation -- we do not include their delays //NOTE: We might be "evicting" an invalid line for all we know. Coherence controllers will know what to do evDoneCycle = cc->processEviction(req, wbLineAddr, lineId, respCycle); //if needed, send invalidates/downgrades to lower level, and wb to upper level array->postinsert(req.lineAddr, &req, lineId); //do the actual insertion. NOTE: Now we must split insert into a 2-phase thing because cc unlocks us. if (evRec->numRecords() > initialRecords) { assert_msg(evRec->numRecords() == initialRecords + 1, "evRec records on eviction %ld", evRec->numRecords()); writebackRecord = evRec->getRecord(initialRecords); hasWritebackRecord = true; evRec->popRecord(); } } uint64_t getDoneCycle = respCycle; respCycle = cc->processAccess(req, lineId, respCycle, &getDoneCycle); if (evRec->numRecords() > initialRecords) { assert_msg(evRec->numRecords() == initialRecords + 1, "evRec records %ld", evRec->numRecords()); accessRecord = evRec->getRecord(initialRecords); hasAccessRecord = true; evRec->popRecord(); } // At this point we have all the info we need to hammer out the timing record TimingRecord tr = {req.lineAddr << lineBits, req.cycle, respCycle, req.type, NULL, NULL}; //note the end event is the response, not the wback if (getDoneCycle - req.cycle == accLat) { // Hit assert(!hasWritebackRecord); assert(!hasAccessRecord); uint64_t hitLat = respCycle - req.cycle; // accLat + invLat HitEvent* ev = new (evRec) HitEvent(this, hitLat, domain); ev->setMinStartCycle(req.cycle); tr.startEvent = tr.endEvent = ev; } else { assert_msg(getDoneCycle == respCycle, "gdc %ld rc %ld", getDoneCycle, respCycle); // Miss events: // MissStart (does high-prio lookup) -> getEvent || evictionEvent || replEvent (if needed) -> MissWriteback MissStartEvent* mse = new (evRec) MissStartEvent(this, accLat, domain); MissResponseEvent* mre = new (evRec) MissResponseEvent(this, mse, domain); MissWritebackEvent* mwe = new (evRec) MissWritebackEvent(this, mse, accLat, domain); mse->setMinStartCycle(req.cycle); mre->setMinStartCycle(getDoneCycle); mwe->setMinStartCycle(MAX(evDoneCycle, getDoneCycle)); // Tie two events to an optional timing record // TODO: Promote to evRec if this is more generally useful auto connect = [evRec](const TimingRecord* r, TimingEvent* startEv, TimingEvent* endEv, uint64_t startCycle, uint64_t endCycle) { assert_msg(startCycle <= endCycle, "start > end? %ld %ld", startCycle, endCycle); if (r) { assert_msg(startCycle <= r->reqCycle, "%ld / %ld", startCycle, r->reqCycle); assert_msg(r->respCycle <= endCycle, "%ld %ld %ld %ld", startCycle, r->reqCycle, r->respCycle, endCycle); uint64_t upLat = r->reqCycle - startCycle; uint64_t downLat = endCycle - r->respCycle; if (upLat) { DelayEvent* dUp = new (evRec) DelayEvent(upLat); dUp->setMinStartCycle(startCycle); startEv->addChild(dUp, evRec)->addChild(r->startEvent, evRec); } else { startEv->addChild(r->startEvent, evRec); } if (downLat) { DelayEvent* dDown = new (evRec) DelayEvent(downLat); dDown->setMinStartCycle(r->respCycle); r->endEvent->addChild(dDown, evRec)->addChild(endEv, evRec); } else { r->endEvent->addChild(endEv, evRec); } } else { if (startCycle == endCycle) { startEv->addChild(endEv, evRec); } else { DelayEvent* dEv = new (evRec) DelayEvent(endCycle - startCycle); dEv->setMinStartCycle(startCycle); startEv->addChild(dEv, evRec)->addChild(endEv, evRec); } } }; // Get path connect(hasAccessRecord? &accessRecord : NULL, mse, mre, req.cycle + accLat, getDoneCycle); mre->addChild(mwe, evRec); // Eviction path if (evDoneCycle) { connect(hasWritebackRecord? &writebackRecord : NULL, mse, mwe, req.cycle + accLat, evDoneCycle); } // Replacement path if (evDoneCycle && cands > ways) { uint32_t replLookups = (cands + (ways-1))/ways - 1; // e.g., with 4 ways, 5-8 -> 1, 9-12 -> 2, etc. assert(replLookups); uint32_t fringeAccs = ways - 1; uint32_t accsSoFar = 0; TimingEvent* p = mse; // Candidate lookup events while (accsSoFar < replLookups) { uint32_t preDelay = accsSoFar? 0 : tagLat; uint32_t postDelay = tagLat - MIN(tagLat - 1, fringeAccs); uint32_t accs = MIN(fringeAccs, replLookups - accsSoFar); //info("ReplAccessEvent rl %d fa %d preD %d postD %d accs %d", replLookups, fringeAccs, preDelay, postDelay, accs); ReplAccessEvent* raEv = new (evRec) ReplAccessEvent(this, accs, preDelay, postDelay, domain); raEv->setMinStartCycle(req.cycle /*lax...*/); accsSoFar += accs; p->addChild(raEv, evRec); p = raEv; fringeAccs *= ways - 1; } // Swap events -- typically, one read and one write work for 1-2 swaps. Exact number depends on layout. ReplAccessEvent* rdEv = new (evRec) ReplAccessEvent(this, 1, tagLat, tagLat, domain); rdEv->setMinStartCycle(req.cycle /*lax...*/); ReplAccessEvent* wrEv = new (evRec) ReplAccessEvent(this, 1, 0, 0, domain); wrEv->setMinStartCycle(req.cycle /*lax...*/); p->addChild(rdEv, evRec)->addChild(wrEv, evRec)->addChild(mwe, evRec); } tr.startEvent = mse; tr.endEvent = mre; // note the end event is the response, not the wback } evRec->pushRecord(tr); } cc->endAccess(req); assert_msg(respCycle >= req.cycle, "[%s] resp < req? 0x%lx type %s childState %s, respCycle %ld reqCycle %ld", name.c_str(), req.lineAddr, AccessTypeName(req.type), MESIStateName(*req.state), respCycle, req.cycle); return respCycle; }