コード例 #1
0
ファイル: mdl.c プロジェクト: srs51/SRS-3000
void mdldebug( MDL mdl, const char *format, ... )
{
     static char ach[MDLPRINTF_STRING_MAXLEN];
     va_list args;

     if (mdl->bDiag) {	
         va_start( args, format);
	 vsnprintf( ach, MDLPRINTF_STRING_MAXLEN, format, args);
	 mdlDiag( mdl, ach);
	 va_end( args);
         }
}
コード例 #2
0
ファイル: mdl.c プロジェクト: N-BodyShop/mdl
void *mdlAquire(MDL mdl,int cid,int iIndex,int id)
{
    CACHE *cc = &mdl->pmdl[id]->cache[cid];
    CACHE *c,*cflsh;
    char *pLine,*srce,*dest;
    int iElt,iLine,i,j,n;
    int iVictim,iLineVic,idVic,*pi;
    char ach[80];
    
    if (cc->iType == MDL_ROCACHE) {
		return(&cc->pData[iIndex*cc->iDataSize]);
		}
    c = &mdl->cache[cid];
    /*
     ** Note that for KSR even local requests are cached, because
     ** even they have to aquire a lock to access the memory.
     ** This excludes local memory "cheats" where the COcache is
     ** being used!
     ** Determine memory block key value and cache line.
     */
    iElt = iIndex & MDL_CACHE_MASK;
    iLine = iIndex >> MDL_CACHELINE_BITS;
    i = c->pTrans[iLine % c->iTransMask];
    /*
     ** Check for a match!
     */
    while (i) {
		if (c->pTag[i].id == id) {
			if (c->pTag[i].iLine == iLine) {
				++c->pTag[i].nLock;
				pLine = &c->pLine[i*c->iLineSize];
				return(&pLine[iElt*c->iDataSize]);
				}
			}
		i = c->pTag[i].iLink;
		}
    /*
     ** Cache Miss.
     **	Victim Search!
     ** Note: if more than 1771875 cache lines are present this random
     ** number generation may have to be changed, although none of the
     ** code will break in this case. The only problem may be non-optimal
     ** cache line replacement. Maybe give a warning at initialization?
     */
    iVictim = MDL_RAND(mdl)%c->nLines;
    for (i=0;i<c->nLines;++i) {
		if (!c->pTag[iVictim].nLock) {
			/*
			 ** Found victim.
			 */
			iLineVic = c->pTag[iVictim].iLine;
			idVic = c->pTag[iVictim].id;
			/*
			 ** 'pLine' will point to the actual data line in the cache.
			 */
			pLine = &c->pLine[iVictim*c->iLineSize];
			if (iLineVic >= 0) {
				/*
				 ** Flush element since it is valid!
				 */
				cflsh = &mdl->pmdl[idVic]->cache[cid];	
				dest = &cflsh->pData[iLineVic*c->iLineSize];
				/*
				 ** Make sure we don't combine beyond the number
				 ** of data elements!
				 */
				j = iLineVic*c->nLineElts;
				n = j + c->nLineElts;
				if (n > cflsh->nData) n = cflsh->nData;
				n -= j;
				n *= c->iDataSize;
				/*
				 ** Lock data line and combine!
				 */
				pthread_mutex_lock(&cflsh->pMux[iLineVic]);
				for (j=0;j<n;j+=c->iDataSize) {
					(*c->combine)(&dest[j],&pLine[j]);
					}
				pthread_mutex_unlock(&cflsh->pMux[iLineVic]);
				/*
				 ** If valid iLine then "unlink" it from the cache.
				 */
				pi = &c->pTrans[iLineVic % c->iTransMask];
				while (*pi != iVictim) pi = &c->pTag[*pi].iLink;
				*pi = c->pTag[iVictim].iLink;
				}
			c->pTag[iVictim].id = id;	
			c->pTag[iVictim].iLine = iLine;
			c->pTag[iVictim].nLock = 1;
			/*
			 **	Add the modified victim tag back into the cache.
			 ** Note: the new element is placed at the head of the chain.
			 */
			pi = &c->pTrans[iLine % c->iTransMask];
			c->pTag[iVictim].iLink = *pi;
			*pi = iVictim;
			/*
			 ** Grab new cache line, don't really need a lock here.
			 */
			srce = &cc->pData[iLine*c->iLineSize];
			for (j=0;j<c->iLineSize;++j) {
				pLine[j] = srce[j];
				}
			/*
			 ** Call the initializer function for all elements in 
			 ** the cache line.
			 */
			for (j=0;j<c->iLineSize;j+=c->iDataSize) {
				(*c->init)(&pLine[j]);
				}
			return(&pLine[iElt*c->iDataSize]);
			}
		if (++iVictim == c->nLines) iVictim = 0;
		}
    /*
     ** Cache Failure!
     */
    sprintf(ach,"MDL CACHE FAILURE: cid == %d, no unlocked lines!\n",cid);
    mdlDiag(mdl,ach);
    exit(1);
    }
コード例 #3
0
ファイル: mdl.c プロジェクト: N-BodyShop/mdl
/*
 ** Initialize a Read-Only caching space.
 */
void mdlROcache(MDL mdl,int cid,void *pData,int iDataSize,int nData)
{
	CACHE *c;
	int id;
	CAHEAD caIn;
	char achDiag[256];
	/* SHMEM */
	int i;
	long lTemp;

	c = CacheInitialize(mdl,cid,pData,iDataSize,nData);
	c->iType = MDL_ROCACHE;
	/*
	 ** For an ROcache these two functions are not needed.
	 */
	c->init = NULL;
	c->combine = NULL;
	sprintf(achDiag, "%d: before CI, cache %d\n", mdl->idSelf, cid);
	mdlDiag(mdl, achDiag);
	/*
	 ** THIS IS A SYNCHRONIZE!!!
	 */
	caIn.cid = cid;
	caIn.mid = MDL_MID_CACHEIN;
	caIn.id = mdl->idSelf;
	if(mdl->idSelf == 0) {
	    c->nCheckIn = 1;
	    while(c->nCheckIn < mdl->nThreads) {
		  mdlCacheReceive(mdl, NULL);
	    }
	}
	else {
		/*
		 ** Must use non-blocking sends here, we will never wait
		 ** for these sends to complete, but will know for sure
		 ** that they have completed.
		 */
		MPI_Send(&caIn,sizeof(CAHEAD),MPI_BYTE, 0,
			       MDL_TAG_CACHECOM, MPI_COMM_WORLD);
		}
	sprintf(achDiag, "%d: In CI, cache %d\n", mdl->idSelf, cid);
	mdlDiag(mdl, achDiag);
	if(mdl->idSelf == 0) {
	    for(id = 1; id < mdl->nThreads; id++) {
		MPI_Send(&caIn,sizeof(CAHEAD),MPI_BYTE, id,
			       MDL_TAG_CACHECOM, MPI_COMM_WORLD);
		}
	    }
	else {
	    c->nCheckIn = 0;
	    while (c->nCheckIn == 0) {
		  mdlCacheReceive(mdl,NULL);
		}	
	    }	
	sprintf(achDiag, "%d: After CI, cache %d\n", mdl->idSelf, cid);
	mdlDiag(mdl, achDiag);
	AdjustDataSize(mdl);
	MPI_Barrier(MPI_COMM_WORLD);

	/* SHMEM */
	lTemp = (long) nData;
	shmem_fcollect(shmem_array,&lTemp,1,0,0,mdl->nThreads,pSync);
	c->pDataMax=0;
        for (i=0;i<mdl->nThreads;++i) {
	    if (c->pDataMax < shmem_array[i]) c->pDataMax=shmem_array[i];
	    }
	c->pDataMax *= iDataSize;
	mdlDiag(mdl, "After RO shmem_collect\n");

	/*
	** Set up array to track pData on all other PEs.
	*/
	lTemp = (long) pData;
	shmem_fcollect(c->shmem_pData,&lTemp,1,0,0,mdl->nThreads,pSync);
	/*if (mdl->idSelf==0) {
	 * for (i=0;i<mdl->nThreads;++i)
	 *   fprintf(stderr,"i: %d  shmem_pData: %ld\n",i,c->shmem_pData[i]);
	 *}*/
	mdlDiag(mdl, "After RO shmem_pData exchange\n");

	}
コード例 #4
0
ファイル: mdl.c プロジェクト: N-BodyShop/mdl
int mdlCacheReceive(MDL mdl,char *pLine)
{
	CACHE *c;
	CAHEAD *ph = (CAHEAD *)mdl->pszRcv;
	char *pszRcv = &mdl->pszRcv[sizeof(CAHEAD)];
	CAHEAD *phRpl;
	char *pszRpl;
	char *t;
	int id, iTag;
	int n,i;
	MPI_Status status;
	int ret;
	int iLineSize;
	int iDataSize;
#if 0
	char achDiag[256];
#endif

	ret = MPI_Wait(&mdl->ReqRcv, &status);
	assert(ret == MPI_SUCCESS);
#if 0
	sprintf(achDiag, "%d: cache %d, message %d, from %d, rec top\n",
		mdl->idSelf, ph->cid, ph->mid, ph->id);
	mdlDiag(mdl, achDiag);
#endif

	c = &mdl->cache[ph->cid];
	assert(c->iType != MDL_NOCACHE);
	
	switch (ph->mid) {
	case MDL_MID_CACHEIN:
		++c->nCheckIn;
		ret = 0;
		break;
	case MDL_MID_CACHEOUT:
		++c->nCheckOut;
		ret = 0;
		break;
	case MDL_MID_CACHEREQ:
		/*
		 ** This is the tricky part! Here is where the real deadlock
		 ** difficulties surface. Making sure to have one buffer per
		 ** thread solves those problems here.
		 */
		pszRpl = &mdl->ppszRpl[ph->id][sizeof(CAHEAD)];
		phRpl = (CAHEAD *)mdl->ppszRpl[ph->id];
		phRpl->cid = ph->cid;
		phRpl->mid = MDL_MID_CACHERPL;
		phRpl->id = mdl->idSelf;
		t = &c->pData[ph->iLine*c->iLineSize];
		if(t+c->iLineSize > c->pData + c->nData*c->iDataSize)
			iLineSize = c->pData + c->nData*c->iDataSize - t;
		else
			iLineSize = c->iLineSize;
		for (i=0;i<iLineSize;++i) pszRpl[i] = t[i];
		if(mdl->pmidRpl[ph->id] != -1) {
			MPI_Wait(&mdl->pReqRpl[ph->id], &status);
		        }
		mdl->pmidRpl[ph->id] = 0;
		MPI_Isend(phRpl,sizeof(CAHEAD)+iLineSize,MPI_BYTE,
			 ph->id, MDL_TAG_CACHECOM, MPI_COMM_WORLD,
			  &mdl->pReqRpl[ph->id]); 
		ret = 0;
		break;
	case MDL_MID_CACHEFLSH:
		assert(c->iType == MDL_COCACHE);
		i = ph->iLine*MDL_CACHELINE_ELTS;
		t = &c->pData[i*c->iDataSize];
		/*
		 ** Make sure we don't combine beyond the number of data elements!
		 */
		n = i + MDL_CACHELINE_ELTS;
		if (n > c->nData) n = c->nData;
		n -= i;
		n *= c->iDataSize;
		iDataSize = c->iDataSize;
		for (i=0;i<n;i+=iDataSize) {
			(*c->combine)(&t[i],&pszRcv[i]);
			}
		ret = 0;
		break;
	case MDL_MID_CACHERPL:
		/*
		 ** For now assume no prefetching!
		 ** This means that this WILL be the reply to this Aquire
		 ** request.
		 */
		assert(pLine != NULL);
		iLineSize = c->iLineSize;
		for (i=0;i<iLineSize;++i) pLine[i] = pszRcv[i];
		if (c->iType == MDL_COCACHE && c->init) {
			/*
			 ** Call the initializer function for all elements in 
			 ** the cache line.
			 */
			for (i=0;i<c->iLineSize;i+=c->iDataSize) {
				(*c->init)(&pLine[i]);
				}
			}
		ret = 1;
		break;
	case MDL_MID_CACHEDONE:
	      /*
	       * No more caches, shouldn't get here.
	       */
		assert(0);
		break;
	default:
		assert(0);
		}

#if 0
	sprintf(achDiag, "%d: cache %d, message %d rec bottom\n", mdl->idSelf,
		ph->cid, ph->mid);
	mdlDiag(mdl, achDiag);
#endif
	/*
	 * Fire up next receive
	 */
	id = MPI_ANY_SOURCE;
	iTag = MDL_TAG_CACHECOM;
	MPI_Irecv(mdl->pszRcv,mdl->iCaBufSize, MPI_BYTE, id,
		 iTag, MPI_COMM_WORLD, &mdl->ReqRcv);

	return ret;
	}
コード例 #5
0
ファイル: mdl.rand.c プロジェクト: N-BodyShop/mdl
void mdlFinishCache(MDL mdl,int cid)
{
	CACHE *c = &mdl->cache[cid];
	CAHEAD caOut;
	CAHEAD *caFlsh = (CAHEAD *)mdl->pszFlsh;
	char *pszFlsh = &mdl->pszFlsh[sizeof(CAHEAD)];
	int i,id;
	char *t;
	int j, iKey;
	MPI_Status status;
	MPI_Request reqFlsh;
	int index, flag, nFlush;
	char ach[256];

	if (c->iType == MDL_COCACHE) {
	        sprintf(ach,"Flushing COCACHE id %d\n",cid);
	        mdlDiag(mdl,ach);
		/*
		 * Extra checkout to let everybody finish before
		 * flushes start.
		 */
		caOut.cid = cid;
		caOut.mid = MDL_MID_CACHEOUT;
		caOut.id = mdl->idSelf;
		for(id = 0; id < mdl->nThreads; id++) {
		    if(id == mdl->idSelf)
			continue;
		    MPI_Send(&caOut,sizeof(CAHEAD),MPI_BYTE, id,
			     MDL_TAG_CACHECOM, MPI_COMM_WORLD);
		    }
		++c->nCheckOut;
		while(c->nCheckOut < mdl->nThreads)
		    mdlCacheReceive(mdl, NULL);
		c->nCheckOut = 0;
		/*
		 ** Must flush all valid data elements.
		 */
		caFlsh->cid = cid;
		caFlsh->mid = MDL_MID_CACHEFLSH;
		caFlsh->id = mdl->idSelf;
		nFlush=0;
		for (i=1;i<c->nLines;++i) {
			iKey = c->pTag[i].iKey;
			if (iKey >= 0) {
				/*
				 ** Flush element since it is valid!
				 */
				id = iKey & c->iIdMask;
				caFlsh->iLine = iKey >> c->iInvKeyShift;
				t = &c->pLine[i*c->iLineSize];
				for(j = 0; j < c->iLineSize; ++j)
				    pszFlsh[j] = t[j];
				/*
				 * Use Synchronous send so as not to
				 * overwhelm the receiver.
				 */
				MPI_Issend(caFlsh, sizeof(CAHEAD)+c->iLineSize,
					 MPI_CHAR, id, MDL_TAG_CACHECOM,
					 MPI_COMM_WORLD, &reqFlsh); 
				/*
				 * Wait for the Flush to complete.
				*/
				while(1) {
				    mdlCacheCheck(mdl); /* service incoming */				    
				    MPI_Test(&reqFlsh, &flag, &status);
				    if(flag == 1) /* Flush request received */
					break;
				    }
				++nFlush;
				}
		        }
コード例 #6
0
ファイル: mdl.rand.c プロジェクト: N-BodyShop/mdl
/*
 ** Initialize a combiner caching space.
 */
void mdlCOcache(MDL mdl,int cid,void *pData,int iDataSize,int nData,
				void (*init)(void *),void (*combine)(void *,void *))
{
	CACHE *c;
	int i,id;
	CAHEAD caIn;
	long lTemp;

	c = CacheInitialize(mdl,cid,pData,iDataSize,nData);
	c->iType = MDL_COCACHE;
	assert(init);
	c->init = init;
	assert(combine);
	c->combine = combine;
	mdlDiag(mdl, "Before CO sync\n");
	/*
	 ** THIS IS A SYNCHRONIZE!!!
	 */
	caIn.cid = cid;
	caIn.mid = MDL_MID_CACHEIN;
	caIn.id = mdl->idSelf;
	if(mdl->idSelf == 0) {
	    c->nCheckIn = 1;
	    while(c->nCheckIn < mdl->nThreads) {
		mdlCacheReceive(mdl, NULL);
		}
	    }
	else {
		/*
		 ** Must use non-blocking sends here, we will never wait
		 ** for these sends to complete, but will know for sure
		 ** that they have completed.
		 */
		MPI_Send(&caIn,sizeof(CAHEAD),MPI_CHAR, 0,
			       MDL_TAG_CACHECOM, MPI_COMM_WORLD);
		}
	mdlDiag(mdl, "After sends CO sync\n");
	if(mdl->idSelf == 0) {
	    for(id = 1; id < mdl->nThreads; id++) {
		MPI_Send(&caIn,sizeof(CAHEAD),MPI_CHAR, id,
			       MDL_TAG_CACHECOM, MPI_COMM_WORLD);
		}
	    }
	else {
	    c->nCheckIn = 0;
	    while (c->nCheckIn == 0) {
		    mdlCacheReceive(mdl,NULL);
		    }	
	    }	
	mdlDiag(mdl, "After CO sync\n");
	AdjustDataSize(mdl);
	lTemp = (long) nData;
	shmem_fcollect(shmem_array,&lTemp,1,0,0,mdl->nThreads,pSync);
	c->pDataMax=0;
        for (i=0;i<mdl->nThreads;++i) {
	    if (c->pDataMax < shmem_array[i]) c->pDataMax=shmem_array[i];
	}
	c->pDataMax *= iDataSize;
	mdlDiag(mdl, "After CO shmem_collect\n");

	/*
	** Set up array to track pData on all other PEs.
	*/
	lTemp = (long) pData;
	shmem_fcollect(c->shmem_pData,&lTemp,1,0,0,mdl->nThreads,pSync);
	if (mdl->idSelf==0) {
	  for (i=0;i<mdl->nThreads;++i)
	    fprintf(stderr,"i: %d  shmem_pData: %ld\n",i,c->shmem_pData[i]);
	}
	mdlDiag(mdl, "After CO shmem_pData exchange\n");

	}
コード例 #7
0
ファイル: mdl.rand.c プロジェクト: N-BodyShop/mdl
/*
 ** Common initialization for all types of caches.
 */
CACHE *CacheInitialize(MDL mdl,int cid,void *pData,int iDataSize,int nData)
{
	CACHE *c;
	int i,nMaxCacheIds;
	char ach[256];

	/*
	 ** Allocate more cache spaces if required!
	 */
	assert(cid >= 0);
	if (cid >= mdl->nMaxCacheIds) {
		/*
		 ** reallocate cache spaces, adding space for 2 new cache spaces
		 ** including the one just defined.
		 */
		nMaxCacheIds = cid + 3;
		mdl->cache = realloc(mdl->cache,nMaxCacheIds*sizeof(CACHE));
		assert(mdl->cache != NULL);
		/*
		 ** Initialize the new cache slots.
		 */
		for (i=mdl->nMaxCacheIds;i<nMaxCacheIds;++i) {
			mdl->cache[i].iType = MDL_NOCACHE;
			}
		mdl->nMaxCacheIds = nMaxCacheIds;
		}
	c = &mdl->cache[cid];
	assert(c->iType == MDL_NOCACHE);
	c->pData = pData;
	c->iDataSize = iDataSize;
	c->nData = nData;
	c->iLineSize = MDL_CACHELINE_ELTS*c->iDataSize;
	c->iKeyShift = 0;
	while((1 << c->iKeyShift) < mdl->nThreads) ++c->iKeyShift;
	c->iIdMask = (1 << c->iKeyShift) - 1;
	
	if(c->iKeyShift < MDL_CACHELINE_BITS) {
	  /*
	   * Key will be (index & MDL_INDEX_MASK) | id.
	   */
	    c->iInvKeyShift = MDL_CACHELINE_BITS;
	    c->iKeyShift = 0;
	    }
	else {
	  /*
	   * Key will be (index & MDL_INDEX_MASK) << KeyShift | id.
	   */
	    c->iInvKeyShift = c->iKeyShift;
	    c->iKeyShift -= MDL_CACHELINE_BITS;
	    }
	/*
	 ** Determine the number of cache lines to be allocated.
	 */
	c->nLines = (MDL_CACHE_SIZE/c->iDataSize) >> MDL_CACHELINE_BITS;
	assert(c->nLines < MDL_RANDMOD);
	sprintf(ach,"CacheInitialize: iDataSize=%d iLineSize=%d nLines=%d\n",
		c->iDataSize,c->iLineSize,c->nLines);
	mdlDiag(mdl, ach);
	c->nTrans = 1;
	while(c->nTrans < c->nLines) c->nTrans *= 2;
	c->nTrans *= 2;
	c->iTransMask = c->nTrans-1;
	/*
	 **	Set up the translation table.
	 */
	c->pTrans = malloc(c->nTrans*sizeof(int));	
	assert(c->pTrans != NULL);
	for (i=0;i<c->nTrans;++i) c->pTrans[i] = 0;
	/*
	 ** Set up the tags. Note pTag[0] is a Sentinel!
	 */
	c->pTag = malloc(c->nLines*sizeof(CTAG));
	assert(c->pTag != NULL);
	for (i=0;i<c->nLines;++i) {
		c->pTag[i].iKey = -1;	/* invalid */	
		c->pTag[i].nLock = 0;
		c->pTag[i].nLast = 0;	/* !!! */
		c->pTag[i].iLink = 0;
		}
	c->pTag[0].nLock = 1;		/* always locked */
	c->pTag[0].nLast = INT_MAX;  	/* always Most Recently Used */
	c->nAccess = 0;
	c->nAccHigh = 0;
	c->nMiss = 0;				/* !!!, not NB */
	c->nColl = 0;				/* !!!, not NB */
	c->nMin = 0;				/* !!!, not NB */	
	c->nKeyMax = 500;				/* !!!, not NB */
	c->pbKey = malloc(c->nKeyMax);			/* !!!, not NB */
	assert(c->pbKey != NULL);			/* !!!, not NB */
	for (i=0;i<c->nKeyMax;++i) c->pbKey[i] = 0;	/* !!!, not NB */
	/*
	 ** Allocate cache data lines.
	 */
	c->pLine = malloc(c->nLines*c->iLineSize);
	assert(c->pLine != NULL);
	c->nCheckOut = 0;

	return(c);
	}