void static inline handoverSysvshmMessage(char *newmsg,int total_size,int rank,int broot){ CmiAssert(rank == 0); #if CMK_BROADCAST_SPANNING_TREE if (rank == DGRAM_BROADCAST #if CMK_NODE_QUEUE_AVAILABLE || rank == DGRAM_NODEBROADCAST #endif ){ SendSpanningChildren(NULL, 0, total_size, newmsg,broot,rank); } #elif CMK_BROADCAST_HYPERCUBE if (rank == DGRAM_BROADCAST #if CMK_NODE_QUEUE_AVAILABLE || rank == DGRAM_NODEBROADCAST #endif ){ SendHypercube(NULL, 0, total_size, newmsg,broot,rank); } #endif switch (rank) { case DGRAM_BROADCAST: { CmiPushPE(0, newmsg); break; } default: { CmiPushPE(rank, newmsg); } } /* end of switch */ }
/* called in PumpMsgs */ int PumpPersistent() { PersistentReceivesTable *slot = persistentReceivesTableHead; int status = 0; while (slot) { unsigned int size = *(slot->recvSizePtr[0]); if (size > 0) { char *msg = slot->messagePtr[0]; /*CmiPrintf("[%d] size: %d rank:%d msg:%p %p\n", CmiMyPe(), size, CMI_DEST_RANK(msg), msg, slot->messagePtr);*/ #if 0 void *dupmsg; dupmsg = CmiAlloc(size); _MEMCHECK(dupmsg); memcpy(dupmsg, msg, size); msg = dupmsg; #else /* return messagePtr directly and user MUST make sure not to delete it. */ /*CmiPrintf("[%d] %p size:%d rank:%d root:%d\n", CmiMyPe(), msg, size, CMI_DEST_RANK(msg), CMI_BROADCAST_ROOT(msg));*/ CmiReference(msg); #endif CmiPushPE(CMI_DEST_RANK_NET(msg), msg); #if CMK_BROADCAST_SPANNING_TREE if (CMI_BROADCAST_ROOT(msg)) SendSpanningChildrenNet(size, msg); #endif *(slot->recvSizePtr[0]) = 0; status = 1; } slot = slot->next; } return status; }
static INLINE_KEYWORD void processProcBcastMsg(int size, char *msg) { /* Since this function is only called on intermediate nodes, * the rank of this msg should be 0. */ CmiAssert(CMI_DEST_RANK(msg)==0); /*CmiPushPE(CMI_DEST_RANK(msg), msg);*/ #if CMK_BROADCAST_SPANNING_TREE SendSpanningChildrenProc(size, msg); #elif CMK_BROADCAST_HYPERCUBE SendHyperCubeProc(size, msg); #endif #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE /* same message may be sent out, make a copy of it */ if (CmiNumNodes()>1 && CmiGetReference(msg)>1) { void *newmsg; newmsg = CopyMsg(msg, size); CmiFree(msg); msg = newmsg; } #endif CmiPushPE(0, msg); }
/** * Returns 1 if this "msg" is an out-of-order message, or * this "msg" is a late message which triggers the process * of all buffered ooo msgs. * --Chao Mei */ static int checkMsgInOrder(char *msg, MsgOrderInfo *info) { int srcpe, destrank; int incomingSeqNo, expectedSeqNo; int curOffset, maxOffset; int i, curWinSize; void **destMsgBuffer = NULL; /* numMsg is the number of msgs to be processed in this buffer*/ /* Reason to have this extra copy of msgs to be processed: Reduce the atomic granularity */ void **toProcessMsgBuffer; int numMsgs = 0; srcpe = CMI_MSG_SRCPE(msg); destrank = CMI_DEST_RANK(msg); incomingSeqNo = CMI_MSG_SEQNO(msg); CmiLock(cmplHdlrThdLock); expectedSeqNo = getNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe); if (expectedSeqNo == incomingSeqNo) { /* Two cases: has ooo msg buffered or not */ maxOffset = (info->oooMaxOffset)[srcpe]; if (maxOffset>0) { MACHSTATE1(4, "Processing all buffered ooo msgs (maxOffset=%d) including the just recved begin {", maxOffset); curWinSize = info->CUR_WINDOW_SIZE[srcpe]; toProcessMsgBuffer = malloc((curWinSize+1)*sizeof(void *)); /* process the msg just recved */ toProcessMsgBuffer[numMsgs++] = msg; /* process the buffered ooo msg until the first empty slot in the window */ destMsgBuffer = (info->oooMsgBuffer)[srcpe]; for (curOffset=0; curOffset<maxOffset; curOffset++) { char *curMsg = destMsgBuffer[curOffset]; if (curMsg == NULL) { CmiAssert(curOffset!=(maxOffset-1)); break; } toProcessMsgBuffer[numMsgs++] = curMsg; destMsgBuffer[curOffset] = NULL; } /* Update expected seqno, maxOffset and slide the window */ if (curOffset < maxOffset) { int i; /** * now, the seqno of the next to-be-recved msg should be * "expectedSeqNo+curOffset+1" as the seqno of the just * processed msg is "expectedSeqNo+curOffset. We need to slide * the msg buffer window from "curOffset+1" because the first * element of the buffer window should always points to the ooo * msg that's 1 in terms of seqno ahead of the next to-be-recved * msg. --Chao Mei */ /* moving [curOffset+1, maxOffset) to [0, maxOffset-curOffset-1) in the window */ /* The following two loops could be combined --Chao Mei */ for (i=0; i<maxOffset-curOffset-1; i++) { destMsgBuffer[i] = destMsgBuffer[curOffset+i+1]; } for (i=maxOffset-curOffset-1; i<maxOffset; i++) { destMsgBuffer[i] = NULL; } (info->oooMaxOffset)[srcpe] = maxOffset-curOffset-1; setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo+curOffset); } else { /* there's no remaining buffered ooo msgs */ (info->oooMaxOffset)[srcpe] = 0; setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo+maxOffset); } CmiUnlock(cmplHdlrThdLock); /* Process the msgs */ for (i=0; i<numMsgs; i++) { char *curMsg = toProcessMsgBuffer[i]; if (CMI_BROADCAST_ROOT(curMsg)>0) { #if CMK_OFFLOAD_BCAST_PROCESS PCQueuePush(CsvAccess(procBcastQ), curMsg); #else processProcBcastMsg(CMI_MSG_SIZE(curMsg), curMsg); #endif } else { CmiPushPE(CMI_DEST_RANK(curMsg), curMsg); } } free(toProcessMsgBuffer); MACHSTATE1(4, "Processing all buffered ooo msgs (actually processed %d) end }", curOffset); /** * Since we have processed all buffered ooo msgs including * this just recved one, 1 should be returned so that this * msg no longer needs processing */ return 1; } else { /* An expected msg recved without any ooo msg buffered */ MACHSTATE1(4, "Receiving an expected msg with seqno=%d\n", incomingSeqNo); setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo); CmiUnlock(cmplHdlrThdLock); return 0; } } MACHSTATE2(4, "Receiving an out-of-order msg with seqno=%d, but expect seqno=%d", incomingSeqNo, expectedSeqNo); curWinSize = info->CUR_WINDOW_SIZE[srcpe]; if ((info->oooMsgBuffer)[srcpe]==NULL) { (info->oooMsgBuffer)[srcpe] = malloc(curWinSize*sizeof(void *)); memset((info->oooMsgBuffer)[srcpe], 0, curWinSize*sizeof(void *)); } destMsgBuffer = (info->oooMsgBuffer)[srcpe]; curOffset = incomingSeqNo - expectedSeqNo; maxOffset = (info->oooMaxOffset)[srcpe]; if (curOffset<0) { /* It's possible that the seqNo starts with another round (exceeding MAX_MSG_SEQNO) with 1 */ curOffset += MAX_MSG_SEQNO; } if (curOffset > curWinSize) { int newWinSize; if (curOffset > MAX_WINDOW_SIZE) { CmiAbort("Exceeding the MAX_WINDOW_SIZE!\n"); } newWinSize = ((curOffset/curWinSize)+1)*curWinSize; /*CmiPrintf("[%d]: WARNING: INCREASING WINDOW SIZE FROM %d TO %d\n", CmiMyPe(), curWinSize, newWinSize);*/ (info->oooMsgBuffer)[srcpe] = malloc(newWinSize*sizeof(void *)); memset((info->oooMsgBuffer)[srcpe], 0, newWinSize*sizeof(void *)); memcpy((info->oooMsgBuffer)[srcpe], destMsgBuffer, curWinSize*sizeof(void *)); info->CUR_WINDOW_SIZE[srcpe] = newWinSize; free(destMsgBuffer); destMsgBuffer = (info->oooMsgBuffer)[srcpe]; } CmiAssert(destMsgBuffer[curOffset-1] == NULL); destMsgBuffer[curOffset-1] = msg; if (curOffset > maxOffset) (info->oooMaxOffset)[srcpe] = curOffset; CmiUnlock(cmplHdlrThdLock); return 1; }
inline void emptyAllRecvBufs(){ int i; for(i=0;i<pxshmContext->nodesize;i++){ if(i != pxshmContext->noderank){ sharedBufData *recvBuf = &(pxshmContext->recvBufs[i]); if(recvBuf->header->count > 0){ #if PXSHM_STATS pxshmContext->lockRecvCount++; #endif #if PXSHM_OSSPINLOCK if(! OSSpinLockTry(&recvBuf->header->lock)){ #elif PXSHM_LOCK if(sem_trywait(recvBuf->mutex) < 0){ #elif PXSHM_FENCE recvBuf->header->flagReceiver = 1; recvBuf->header->turn = SENDER; CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); //if((recvBuf->header->flagSender && recvBuf->header->turn == SENDER)){ if((recvBuf->header->flagSender)){ recvBuf->header->flagReceiver = 0; #endif }else{ MACHSTATE1(3,"emptyRecvBuf to be called for rank %d",i); emptyRecvBuf(recvBuf); #if PXSHM_OSSPINLOCK OSSpinLockUnlock(&recvBuf->header->lock); #elif PXSHM_LOCK sem_post(recvBuf->mutex); #elif PXSHM_FENCE CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); recvBuf->header->flagReceiver = 0; #endif } } } } }; inline void flushAllSendQs(){ int i; #if SENDQ_LIST int index_prev = -1; i = sendQ_head_index; while (i!= -1) { PxshmSendQ *sendQ = pxshmContext->sendQs[i]; CmiAssert(i != pxshmContext->noderank); if(sendQ->numEntries > 0){ #else for(i=0;i<pxshmContext->nodesize;i++) { if (i == pxshmContext->noderank) continue; PxshmSendQ *sendQ = pxshmContext->sendQs[i]; if(sendQ->numEntries > 0) { #endif #if PXSHM_OSSPINLOCK if(OSSpinLockTry(&pxshmContext->sendBufs[i].header->lock)){ #elif PXSHM_LOCK if(sem_trywait(pxshmContext->sendBufs[i].mutex) >= 0){ #elif PXSHM_FENCE pxshmContext->sendBufs[i].header->flagSender = 1; pxshmContext->sendBufs[i].header->turn = RECEIVER; CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); if(!(pxshmContext->sendBufs[i].header->flagReceiver && pxshmContext->sendBufs[i].header->turn == RECEIVER)){ #endif MACHSTATE1(3,"flushSendQ %d",i); flushSendQ(sendQ); #if PXSHM_OSSPINLOCK OSSpinLockUnlock(&pxshmContext->sendBufs[i].header->lock); #elif PXSHM_LOCK sem_post(pxshmContext->sendBufs[i].mutex); #elif PXSHM_FENCE CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); pxshmContext->sendBufs[i].header->flagSender = 0; #endif }else{ #if PXSHM_FENCE pxshmContext->sendBufs[i].header->flagSender = 0; #endif } } #if SENDQ_LIST if (sendQ->numEntries == 0) { if (index_prev != -1) pxshmContext->sendQs[index_prev]->next = sendQ->next; else sendQ_head_index = sendQ->next; i = sendQ->next; sendQ->next = -2; } else { index_prev = i; i = sendQ->next; } #endif } }; void static inline handoverPxshmMessage(char *newmsg,int total_size,int rank,int broot); void emptyRecvBuf(sharedBufData *recvBuf){ int numMessages = recvBuf->header->count; int i=0; char *ptr=recvBuf->data; for(i=0;i<numMessages;i++){ int size; int rank, srcpe, seqno, magic, i; unsigned int broot; char *msg = ptr; char *newMsg; #if CMK_NET_VERSION DgramHeaderBreak(msg, rank, srcpe, magic, seqno, broot); size = CmiMsgHeaderGetLength(msg); #else size = CmiGetMsgSize(msg); #endif newMsg = (char *)CmiAlloc(size); memcpy(newMsg,msg,size); #if CMK_NET_VERSION handoverPxshmMessage(newMsg,size,rank,broot); #else handleOneRecvedMsg(size, newMsg); #endif ptr += size; MACHSTATE3(3,"message of size %d recvd ends at ptr-data %d total bytes %d bytes %d",size,ptr-recvBuf->data,recvBuf->header->bytes); } #if 1 if(ptr - recvBuf->data != recvBuf->header->bytes){ CmiPrintf("[%d] ptr - recvBuf->data %d recvBuf->header->bytes %d numMessages %d \n",_Cmi_mynode, ptr - recvBuf->data, recvBuf->header->bytes,numMessages); } #endif CmiAssert(ptr - recvBuf->data == recvBuf->header->bytes); recvBuf->header->count=0; recvBuf->header->bytes=0; } #if CMK_NET_VERSION void static inline handoverPxshmMessage(char *newmsg,int total_size,int rank,int broot){ CmiAssert(rank == 0); #if CMK_BROADCAST_SPANNING_TREE if (rank == DGRAM_BROADCAST #if CMK_NODE_QUEUE_AVAILABLE || rank == DGRAM_NODEBROADCAST #endif ){ SendSpanningChildren(NULL, 0, total_size, newmsg,broot,rank); } #elif CMK_BROADCAST_HYPERCUBE if (rank == DGRAM_BROADCAST #if CMK_NODE_QUEUE_AVAILABLE || rank == DGRAM_NODEBROADCAST #endif ){ SendHypercube(NULL, 0, total_size, newmsg,broot,rank); } #endif switch (rank) { case DGRAM_BROADCAST: { CmiPushPE(0, newmsg); break; } default: { CmiPushPE(rank, newmsg); } } /* end of switch */ } #endif /************************** *sendQ helper functions * ****************/ void initSendQ(PxshmSendQ *q,int size, int rank){ q->data = (OutgoingMsgRec *)calloc(size, sizeof(OutgoingMsgRec)); q->size = size; q->numEntries = 0; q->begin = 0; q->end = 0; q->rank = rank; #if SENDQ_LIST q->next = -2; #endif } void pushSendQ(PxshmSendQ *q, char *msg, int size, int *refcount){ if(q->numEntries == q->size){ //need to resize OutgoingMsgRec *oldData = q->data; int newSize = q->size<<1; q->data = (OutgoingMsgRec *)calloc(newSize, sizeof(OutgoingMsgRec)); //copy head to the beginning of the new array CmiAssert(q->begin == q->end); CmiAssert(q->begin < q->size); memcpy(&(q->data[0]),&(oldData[q->begin]),sizeof(OutgoingMsgRec)*(q->size - q->begin)); if(q->end!=0){ memcpy(&(q->data[(q->size - q->begin)]),&(oldData[0]),sizeof(OutgoingMsgRec)*(q->end)); } free(oldData); q->begin = 0; q->end = q->size; q->size = newSize; } OutgoingMsgRec *omg = &q->data[q->end]; omg->size = size; omg->data = msg; omg->refcount = refcount; (q->end)++; if(q->end >= q->size){ q->end -= q->size; } q->numEntries++; } OutgoingMsgRec * popSendQ(PxshmSendQ *q){ OutgoingMsgRec * ret; if(0 == q->numEntries){ return NULL; } ret = &q->data[q->begin]; (q->begin)++; if(q->begin >= q->size){ q->begin -= q->size; } q->numEntries--; return ret; }