void mdldebug( MDL mdl, const char *format, ... ) { static char ach[MDLPRINTF_STRING_MAXLEN]; va_list args; if (mdl->bDiag) { va_start( args, format); vsnprintf( ach, MDLPRINTF_STRING_MAXLEN, format, args); mdlDiag( mdl, ach); va_end( args); } }
void *mdlAquire(MDL mdl,int cid,int iIndex,int id) { CACHE *cc = &mdl->pmdl[id]->cache[cid]; CACHE *c,*cflsh; char *pLine,*srce,*dest; int iElt,iLine,i,j,n; int iVictim,iLineVic,idVic,*pi; char ach[80]; if (cc->iType == MDL_ROCACHE) { return(&cc->pData[iIndex*cc->iDataSize]); } c = &mdl->cache[cid]; /* ** Note that for KSR even local requests are cached, because ** even they have to aquire a lock to access the memory. ** This excludes local memory "cheats" where the COcache is ** being used! ** Determine memory block key value and cache line. */ iElt = iIndex & MDL_CACHE_MASK; iLine = iIndex >> MDL_CACHELINE_BITS; i = c->pTrans[iLine % c->iTransMask]; /* ** Check for a match! */ while (i) { if (c->pTag[i].id == id) { if (c->pTag[i].iLine == iLine) { ++c->pTag[i].nLock; pLine = &c->pLine[i*c->iLineSize]; return(&pLine[iElt*c->iDataSize]); } } i = c->pTag[i].iLink; } /* ** Cache Miss. ** Victim Search! ** Note: if more than 1771875 cache lines are present this random ** number generation may have to be changed, although none of the ** code will break in this case. The only problem may be non-optimal ** cache line replacement. Maybe give a warning at initialization? */ iVictim = MDL_RAND(mdl)%c->nLines; for (i=0;i<c->nLines;++i) { if (!c->pTag[iVictim].nLock) { /* ** Found victim. */ iLineVic = c->pTag[iVictim].iLine; idVic = c->pTag[iVictim].id; /* ** 'pLine' will point to the actual data line in the cache. */ pLine = &c->pLine[iVictim*c->iLineSize]; if (iLineVic >= 0) { /* ** Flush element since it is valid! */ cflsh = &mdl->pmdl[idVic]->cache[cid]; dest = &cflsh->pData[iLineVic*c->iLineSize]; /* ** Make sure we don't combine beyond the number ** of data elements! */ j = iLineVic*c->nLineElts; n = j + c->nLineElts; if (n > cflsh->nData) n = cflsh->nData; n -= j; n *= c->iDataSize; /* ** Lock data line and combine! */ pthread_mutex_lock(&cflsh->pMux[iLineVic]); for (j=0;j<n;j+=c->iDataSize) { (*c->combine)(&dest[j],&pLine[j]); } pthread_mutex_unlock(&cflsh->pMux[iLineVic]); /* ** If valid iLine then "unlink" it from the cache. */ pi = &c->pTrans[iLineVic % c->iTransMask]; while (*pi != iVictim) pi = &c->pTag[*pi].iLink; *pi = c->pTag[iVictim].iLink; } c->pTag[iVictim].id = id; c->pTag[iVictim].iLine = iLine; c->pTag[iVictim].nLock = 1; /* ** Add the modified victim tag back into the cache. ** Note: the new element is placed at the head of the chain. */ pi = &c->pTrans[iLine % c->iTransMask]; c->pTag[iVictim].iLink = *pi; *pi = iVictim; /* ** Grab new cache line, don't really need a lock here. */ srce = &cc->pData[iLine*c->iLineSize]; for (j=0;j<c->iLineSize;++j) { pLine[j] = srce[j]; } /* ** Call the initializer function for all elements in ** the cache line. */ for (j=0;j<c->iLineSize;j+=c->iDataSize) { (*c->init)(&pLine[j]); } return(&pLine[iElt*c->iDataSize]); } if (++iVictim == c->nLines) iVictim = 0; } /* ** Cache Failure! */ sprintf(ach,"MDL CACHE FAILURE: cid == %d, no unlocked lines!\n",cid); mdlDiag(mdl,ach); exit(1); }
/* ** Initialize a Read-Only caching space. */ void mdlROcache(MDL mdl,int cid,void *pData,int iDataSize,int nData) { CACHE *c; int id; CAHEAD caIn; char achDiag[256]; /* SHMEM */ int i; long lTemp; c = CacheInitialize(mdl,cid,pData,iDataSize,nData); c->iType = MDL_ROCACHE; /* ** For an ROcache these two functions are not needed. */ c->init = NULL; c->combine = NULL; sprintf(achDiag, "%d: before CI, cache %d\n", mdl->idSelf, cid); mdlDiag(mdl, achDiag); /* ** THIS IS A SYNCHRONIZE!!! */ caIn.cid = cid; caIn.mid = MDL_MID_CACHEIN; caIn.id = mdl->idSelf; if(mdl->idSelf == 0) { c->nCheckIn = 1; while(c->nCheckIn < mdl->nThreads) { mdlCacheReceive(mdl, NULL); } } else { /* ** Must use non-blocking sends here, we will never wait ** for these sends to complete, but will know for sure ** that they have completed. */ MPI_Send(&caIn,sizeof(CAHEAD),MPI_BYTE, 0, MDL_TAG_CACHECOM, MPI_COMM_WORLD); } sprintf(achDiag, "%d: In CI, cache %d\n", mdl->idSelf, cid); mdlDiag(mdl, achDiag); if(mdl->idSelf == 0) { for(id = 1; id < mdl->nThreads; id++) { MPI_Send(&caIn,sizeof(CAHEAD),MPI_BYTE, id, MDL_TAG_CACHECOM, MPI_COMM_WORLD); } } else { c->nCheckIn = 0; while (c->nCheckIn == 0) { mdlCacheReceive(mdl,NULL); } } sprintf(achDiag, "%d: After CI, cache %d\n", mdl->idSelf, cid); mdlDiag(mdl, achDiag); AdjustDataSize(mdl); MPI_Barrier(MPI_COMM_WORLD); /* SHMEM */ lTemp = (long) nData; shmem_fcollect(shmem_array,&lTemp,1,0,0,mdl->nThreads,pSync); c->pDataMax=0; for (i=0;i<mdl->nThreads;++i) { if (c->pDataMax < shmem_array[i]) c->pDataMax=shmem_array[i]; } c->pDataMax *= iDataSize; mdlDiag(mdl, "After RO shmem_collect\n"); /* ** Set up array to track pData on all other PEs. */ lTemp = (long) pData; shmem_fcollect(c->shmem_pData,&lTemp,1,0,0,mdl->nThreads,pSync); /*if (mdl->idSelf==0) { * for (i=0;i<mdl->nThreads;++i) * fprintf(stderr,"i: %d shmem_pData: %ld\n",i,c->shmem_pData[i]); *}*/ mdlDiag(mdl, "After RO shmem_pData exchange\n"); }
int mdlCacheReceive(MDL mdl,char *pLine) { CACHE *c; CAHEAD *ph = (CAHEAD *)mdl->pszRcv; char *pszRcv = &mdl->pszRcv[sizeof(CAHEAD)]; CAHEAD *phRpl; char *pszRpl; char *t; int id, iTag; int n,i; MPI_Status status; int ret; int iLineSize; int iDataSize; #if 0 char achDiag[256]; #endif ret = MPI_Wait(&mdl->ReqRcv, &status); assert(ret == MPI_SUCCESS); #if 0 sprintf(achDiag, "%d: cache %d, message %d, from %d, rec top\n", mdl->idSelf, ph->cid, ph->mid, ph->id); mdlDiag(mdl, achDiag); #endif c = &mdl->cache[ph->cid]; assert(c->iType != MDL_NOCACHE); switch (ph->mid) { case MDL_MID_CACHEIN: ++c->nCheckIn; ret = 0; break; case MDL_MID_CACHEOUT: ++c->nCheckOut; ret = 0; break; case MDL_MID_CACHEREQ: /* ** This is the tricky part! Here is where the real deadlock ** difficulties surface. Making sure to have one buffer per ** thread solves those problems here. */ pszRpl = &mdl->ppszRpl[ph->id][sizeof(CAHEAD)]; phRpl = (CAHEAD *)mdl->ppszRpl[ph->id]; phRpl->cid = ph->cid; phRpl->mid = MDL_MID_CACHERPL; phRpl->id = mdl->idSelf; t = &c->pData[ph->iLine*c->iLineSize]; if(t+c->iLineSize > c->pData + c->nData*c->iDataSize) iLineSize = c->pData + c->nData*c->iDataSize - t; else iLineSize = c->iLineSize; for (i=0;i<iLineSize;++i) pszRpl[i] = t[i]; if(mdl->pmidRpl[ph->id] != -1) { MPI_Wait(&mdl->pReqRpl[ph->id], &status); } mdl->pmidRpl[ph->id] = 0; MPI_Isend(phRpl,sizeof(CAHEAD)+iLineSize,MPI_BYTE, ph->id, MDL_TAG_CACHECOM, MPI_COMM_WORLD, &mdl->pReqRpl[ph->id]); ret = 0; break; case MDL_MID_CACHEFLSH: assert(c->iType == MDL_COCACHE); i = ph->iLine*MDL_CACHELINE_ELTS; t = &c->pData[i*c->iDataSize]; /* ** Make sure we don't combine beyond the number of data elements! */ n = i + MDL_CACHELINE_ELTS; if (n > c->nData) n = c->nData; n -= i; n *= c->iDataSize; iDataSize = c->iDataSize; for (i=0;i<n;i+=iDataSize) { (*c->combine)(&t[i],&pszRcv[i]); } ret = 0; break; case MDL_MID_CACHERPL: /* ** For now assume no prefetching! ** This means that this WILL be the reply to this Aquire ** request. */ assert(pLine != NULL); iLineSize = c->iLineSize; for (i=0;i<iLineSize;++i) pLine[i] = pszRcv[i]; if (c->iType == MDL_COCACHE && c->init) { /* ** Call the initializer function for all elements in ** the cache line. */ for (i=0;i<c->iLineSize;i+=c->iDataSize) { (*c->init)(&pLine[i]); } } ret = 1; break; case MDL_MID_CACHEDONE: /* * No more caches, shouldn't get here. */ assert(0); break; default: assert(0); } #if 0 sprintf(achDiag, "%d: cache %d, message %d rec bottom\n", mdl->idSelf, ph->cid, ph->mid); mdlDiag(mdl, achDiag); #endif /* * Fire up next receive */ id = MPI_ANY_SOURCE; iTag = MDL_TAG_CACHECOM; MPI_Irecv(mdl->pszRcv,mdl->iCaBufSize, MPI_BYTE, id, iTag, MPI_COMM_WORLD, &mdl->ReqRcv); return ret; }
void mdlFinishCache(MDL mdl,int cid) { CACHE *c = &mdl->cache[cid]; CAHEAD caOut; CAHEAD *caFlsh = (CAHEAD *)mdl->pszFlsh; char *pszFlsh = &mdl->pszFlsh[sizeof(CAHEAD)]; int i,id; char *t; int j, iKey; MPI_Status status; MPI_Request reqFlsh; int index, flag, nFlush; char ach[256]; if (c->iType == MDL_COCACHE) { sprintf(ach,"Flushing COCACHE id %d\n",cid); mdlDiag(mdl,ach); /* * Extra checkout to let everybody finish before * flushes start. */ caOut.cid = cid; caOut.mid = MDL_MID_CACHEOUT; caOut.id = mdl->idSelf; for(id = 0; id < mdl->nThreads; id++) { if(id == mdl->idSelf) continue; MPI_Send(&caOut,sizeof(CAHEAD),MPI_BYTE, id, MDL_TAG_CACHECOM, MPI_COMM_WORLD); } ++c->nCheckOut; while(c->nCheckOut < mdl->nThreads) mdlCacheReceive(mdl, NULL); c->nCheckOut = 0; /* ** Must flush all valid data elements. */ caFlsh->cid = cid; caFlsh->mid = MDL_MID_CACHEFLSH; caFlsh->id = mdl->idSelf; nFlush=0; for (i=1;i<c->nLines;++i) { iKey = c->pTag[i].iKey; if (iKey >= 0) { /* ** Flush element since it is valid! */ id = iKey & c->iIdMask; caFlsh->iLine = iKey >> c->iInvKeyShift; t = &c->pLine[i*c->iLineSize]; for(j = 0; j < c->iLineSize; ++j) pszFlsh[j] = t[j]; /* * Use Synchronous send so as not to * overwhelm the receiver. */ MPI_Issend(caFlsh, sizeof(CAHEAD)+c->iLineSize, MPI_CHAR, id, MDL_TAG_CACHECOM, MPI_COMM_WORLD, &reqFlsh); /* * Wait for the Flush to complete. */ while(1) { mdlCacheCheck(mdl); /* service incoming */ MPI_Test(&reqFlsh, &flag, &status); if(flag == 1) /* Flush request received */ break; } ++nFlush; } }
/* ** Initialize a combiner caching space. */ void mdlCOcache(MDL mdl,int cid,void *pData,int iDataSize,int nData, void (*init)(void *),void (*combine)(void *,void *)) { CACHE *c; int i,id; CAHEAD caIn; long lTemp; c = CacheInitialize(mdl,cid,pData,iDataSize,nData); c->iType = MDL_COCACHE; assert(init); c->init = init; assert(combine); c->combine = combine; mdlDiag(mdl, "Before CO sync\n"); /* ** THIS IS A SYNCHRONIZE!!! */ caIn.cid = cid; caIn.mid = MDL_MID_CACHEIN; caIn.id = mdl->idSelf; if(mdl->idSelf == 0) { c->nCheckIn = 1; while(c->nCheckIn < mdl->nThreads) { mdlCacheReceive(mdl, NULL); } } else { /* ** Must use non-blocking sends here, we will never wait ** for these sends to complete, but will know for sure ** that they have completed. */ MPI_Send(&caIn,sizeof(CAHEAD),MPI_CHAR, 0, MDL_TAG_CACHECOM, MPI_COMM_WORLD); } mdlDiag(mdl, "After sends CO sync\n"); if(mdl->idSelf == 0) { for(id = 1; id < mdl->nThreads; id++) { MPI_Send(&caIn,sizeof(CAHEAD),MPI_CHAR, id, MDL_TAG_CACHECOM, MPI_COMM_WORLD); } } else { c->nCheckIn = 0; while (c->nCheckIn == 0) { mdlCacheReceive(mdl,NULL); } } mdlDiag(mdl, "After CO sync\n"); AdjustDataSize(mdl); lTemp = (long) nData; shmem_fcollect(shmem_array,&lTemp,1,0,0,mdl->nThreads,pSync); c->pDataMax=0; for (i=0;i<mdl->nThreads;++i) { if (c->pDataMax < shmem_array[i]) c->pDataMax=shmem_array[i]; } c->pDataMax *= iDataSize; mdlDiag(mdl, "After CO shmem_collect\n"); /* ** Set up array to track pData on all other PEs. */ lTemp = (long) pData; shmem_fcollect(c->shmem_pData,&lTemp,1,0,0,mdl->nThreads,pSync); if (mdl->idSelf==0) { for (i=0;i<mdl->nThreads;++i) fprintf(stderr,"i: %d shmem_pData: %ld\n",i,c->shmem_pData[i]); } mdlDiag(mdl, "After CO shmem_pData exchange\n"); }
/* ** Common initialization for all types of caches. */ CACHE *CacheInitialize(MDL mdl,int cid,void *pData,int iDataSize,int nData) { CACHE *c; int i,nMaxCacheIds; char ach[256]; /* ** Allocate more cache spaces if required! */ assert(cid >= 0); if (cid >= mdl->nMaxCacheIds) { /* ** reallocate cache spaces, adding space for 2 new cache spaces ** including the one just defined. */ nMaxCacheIds = cid + 3; mdl->cache = realloc(mdl->cache,nMaxCacheIds*sizeof(CACHE)); assert(mdl->cache != NULL); /* ** Initialize the new cache slots. */ for (i=mdl->nMaxCacheIds;i<nMaxCacheIds;++i) { mdl->cache[i].iType = MDL_NOCACHE; } mdl->nMaxCacheIds = nMaxCacheIds; } c = &mdl->cache[cid]; assert(c->iType == MDL_NOCACHE); c->pData = pData; c->iDataSize = iDataSize; c->nData = nData; c->iLineSize = MDL_CACHELINE_ELTS*c->iDataSize; c->iKeyShift = 0; while((1 << c->iKeyShift) < mdl->nThreads) ++c->iKeyShift; c->iIdMask = (1 << c->iKeyShift) - 1; if(c->iKeyShift < MDL_CACHELINE_BITS) { /* * Key will be (index & MDL_INDEX_MASK) | id. */ c->iInvKeyShift = MDL_CACHELINE_BITS; c->iKeyShift = 0; } else { /* * Key will be (index & MDL_INDEX_MASK) << KeyShift | id. */ c->iInvKeyShift = c->iKeyShift; c->iKeyShift -= MDL_CACHELINE_BITS; } /* ** Determine the number of cache lines to be allocated. */ c->nLines = (MDL_CACHE_SIZE/c->iDataSize) >> MDL_CACHELINE_BITS; assert(c->nLines < MDL_RANDMOD); sprintf(ach,"CacheInitialize: iDataSize=%d iLineSize=%d nLines=%d\n", c->iDataSize,c->iLineSize,c->nLines); mdlDiag(mdl, ach); c->nTrans = 1; while(c->nTrans < c->nLines) c->nTrans *= 2; c->nTrans *= 2; c->iTransMask = c->nTrans-1; /* ** Set up the translation table. */ c->pTrans = malloc(c->nTrans*sizeof(int)); assert(c->pTrans != NULL); for (i=0;i<c->nTrans;++i) c->pTrans[i] = 0; /* ** Set up the tags. Note pTag[0] is a Sentinel! */ c->pTag = malloc(c->nLines*sizeof(CTAG)); assert(c->pTag != NULL); for (i=0;i<c->nLines;++i) { c->pTag[i].iKey = -1; /* invalid */ c->pTag[i].nLock = 0; c->pTag[i].nLast = 0; /* !!! */ c->pTag[i].iLink = 0; } c->pTag[0].nLock = 1; /* always locked */ c->pTag[0].nLast = INT_MAX; /* always Most Recently Used */ c->nAccess = 0; c->nAccHigh = 0; c->nMiss = 0; /* !!!, not NB */ c->nColl = 0; /* !!!, not NB */ c->nMin = 0; /* !!!, not NB */ c->nKeyMax = 500; /* !!!, not NB */ c->pbKey = malloc(c->nKeyMax); /* !!!, not NB */ assert(c->pbKey != NULL); /* !!!, not NB */ for (i=0;i<c->nKeyMax;++i) c->pbKey[i] = 0; /* !!!, not NB */ /* ** Allocate cache data lines. */ c->pLine = malloc(c->nLines*c->iLineSize); assert(c->pLine != NULL); c->nCheckOut = 0; return(c); }