/** * lapi completion handler on the recv side. It's responsible to push messages * to the destination proc or relay broadcast messages. --Chao Mei * * Note: The completion handler could be executed on any cores within a node ??? * So in SMP mode when there's a comm thread, the completion handler should be carefully * dealt with. * * Given lapi also provides an internal lapi thread to deal with network progress which * will call this function (???), we should be careful with the following situations: * 1) non SMP mode, with interrupt (lapi internal completion thread) * 2) non SMP mode, with polling (machine layer is responsible for network progress) * 3) SMP mode, no comm thread, with polling * 4) SMP mode, no comm thread, with interrupt * 5) SMP mode, with comm thread, with polling (not yet implemented, comm server is empty right now) * 6) SMP mode, with comm thread, with interrupt?? * * Currently, SMP mode without comm thread is undergoing implementation. * * This function is executed by LAPI internal threads. It seems that the number of internal * completion handler threads could vary during the program. LAPI adaptively creates more * threads if there are more outstanding messages!!!! This means pcqueue needs protection * even in the nonsmp case!!!! * * --Chao Mei */ static void PumpMsgsComplete(lapi_handle_t *myLapiContext, void *am_info) { int i; char *msg = am_info; int broot, destrank; MACHSTATE3(2,"[%d] PumpMsgsComplete with msg %p (isImm=%d) begin {",CmiMyNode(), msg, CmiIsImmediate(msg)); #if ENSURE_MSG_PAIRORDER MACHSTATE3(2,"msg %p info: srcpe=%d, seqno=%d", msg, CMI_MSG_SRCPE(msg), CMI_MSG_SEQNO(msg)); #endif /** * First, we check if the msg is a broadcast msg via spanning * tree. If it is, it needs to call SendSpanningChildren to * relay the broadcast, and then send the msg to every cores on * this node. * * After the first check, we deal with normal messages. * --Chao Mei */ /* It's the right place to relay the broadcast message */ /** * 1. For in-order delivery, because this is the handler for * receiving a message, and we assume the cross-network msgs are * always delivered to the first proc (rank 0) of this node, we * select the srcpe of the bcast msgs and the next msg seq no * correspondingly. * * --Chao Mei */ #if ENSURE_MSG_PAIRORDER broot = CMI_BROADCAST_ROOT(msg); destrank = CMI_DEST_RANK(msg); /* Only check proc-level msgs */ if (broot>=0 #if CMK_NODE_QUEUE_AVAILABLE && destrank != DGRAM_NODEMESSAGE #endif ) { MsgOrderInfo *info; info = &CpvAccessOther(p2pMsgSeqInfo, destrank); MACHSTATE1(2, "Check msg in-order for p2p msg %p", msg); if (checkMsgInOrder(msg,info)) { MACHSTATE(2,"} PumpMsgsComplete end "); return; } } #endif handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg); MACHSTATE(2,"} PumpMsgsComplete end "); return; }
inline void emptyAllRecvBufs(){ int i; for(i=0;i<pxshmContext->nodesize;i++){ if(i != pxshmContext->noderank){ sharedBufData *recvBuf = &(pxshmContext->recvBufs[i]); if(recvBuf->header->count > 0){ #if PXSHM_STATS pxshmContext->lockRecvCount++; #endif #if PXSHM_OSSPINLOCK if(! OSSpinLockTry(&recvBuf->header->lock)){ #elif PXSHM_LOCK if(sem_trywait(recvBuf->mutex) < 0){ #elif PXSHM_FENCE recvBuf->header->flagReceiver = 1; recvBuf->header->turn = SENDER; CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); //if((recvBuf->header->flagSender && recvBuf->header->turn == SENDER)){ if((recvBuf->header->flagSender)){ recvBuf->header->flagReceiver = 0; #endif }else{ MACHSTATE1(3,"emptyRecvBuf to be called for rank %d",i); emptyRecvBuf(recvBuf); #if PXSHM_OSSPINLOCK OSSpinLockUnlock(&recvBuf->header->lock); #elif PXSHM_LOCK sem_post(recvBuf->mutex); #elif PXSHM_FENCE CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); recvBuf->header->flagReceiver = 0; #endif } } } } }; inline void flushAllSendQs(){ int i; #if SENDQ_LIST int index_prev = -1; i = sendQ_head_index; while (i!= -1) { PxshmSendQ *sendQ = pxshmContext->sendQs[i]; CmiAssert(i != pxshmContext->noderank); if(sendQ->numEntries > 0){ #else for(i=0;i<pxshmContext->nodesize;i++) { if (i == pxshmContext->noderank) continue; PxshmSendQ *sendQ = pxshmContext->sendQs[i]; if(sendQ->numEntries > 0) { #endif #if PXSHM_OSSPINLOCK if(OSSpinLockTry(&pxshmContext->sendBufs[i].header->lock)){ #elif PXSHM_LOCK if(sem_trywait(pxshmContext->sendBufs[i].mutex) >= 0){ #elif PXSHM_FENCE pxshmContext->sendBufs[i].header->flagSender = 1; pxshmContext->sendBufs[i].header->turn = RECEIVER; CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); if(!(pxshmContext->sendBufs[i].header->flagReceiver && pxshmContext->sendBufs[i].header->turn == RECEIVER)){ #endif MACHSTATE1(3,"flushSendQ %d",i); flushSendQ(sendQ); #if PXSHM_OSSPINLOCK OSSpinLockUnlock(&pxshmContext->sendBufs[i].header->lock); #elif PXSHM_LOCK sem_post(pxshmContext->sendBufs[i].mutex); #elif PXSHM_FENCE CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); pxshmContext->sendBufs[i].header->flagSender = 0; #endif }else{ #if PXSHM_FENCE pxshmContext->sendBufs[i].header->flagSender = 0; #endif } } #if SENDQ_LIST if (sendQ->numEntries == 0) { if (index_prev != -1) pxshmContext->sendQs[index_prev]->next = sendQ->next; else sendQ_head_index = sendQ->next; i = sendQ->next; sendQ->next = -2; } else { index_prev = i; i = sendQ->next; } #endif } }; void emptyRecvBuf(sharedBufData *recvBuf){ int numMessages = recvBuf->header->count; int i=0; char *ptr=recvBuf->data; for(i=0;i<numMessages;i++){ int size; int rank, srcpe, seqno, magic, i; unsigned int broot; char *msg = ptr; char *newMsg; size = CMI_MSG_SIZE(msg); newMsg = (char *)CmiAlloc(size); memcpy(newMsg,msg,size); handleOneRecvedMsg(size, newMsg); ptr += size; MACHSTATE3(3,"message of size %d recvd ends at ptr-data %d total bytes %d bytes %d",size,ptr-recvBuf->data,recvBuf->header->bytes); } #if 1 if(ptr - recvBuf->data != recvBuf->header->bytes){ CmiPrintf("[%d] ptr - recvBuf->data %d recvBuf->header->bytes %d numMessages %d \n",_Cmi_mynode, ptr - recvBuf->data, recvBuf->header->bytes,numMessages); } #endif CmiAssert(ptr - recvBuf->data == recvBuf->header->bytes); recvBuf->header->count=0; recvBuf->header->bytes=0; } /************************** *sendQ helper functions * ****************/ void initSendQ(PxshmSendQ *q,int size, int rank){ q->data = (OutgoingMsgRec *)calloc(size, sizeof(OutgoingMsgRec)); q->size = size; q->numEntries = 0; q->begin = 0; q->end = 0; q->rank = rank; #if SENDQ_LIST q->next = -2; #endif } void pushSendQ(PxshmSendQ *q, char *msg, int size, int *refcount){ if(q->numEntries == q->size){ //need to resize OutgoingMsgRec *oldData = q->data; int newSize = q->size<<1; q->data = (OutgoingMsgRec *)calloc(newSize, sizeof(OutgoingMsgRec)); //copy head to the beginning of the new array CmiAssert(q->begin == q->end); CmiAssert(q->begin < q->size); memcpy(&(q->data[0]),&(oldData[q->begin]),sizeof(OutgoingMsgRec)*(q->size - q->begin)); if(q->end!=0){ memcpy(&(q->data[(q->size - q->begin)]),&(oldData[0]),sizeof(OutgoingMsgRec)*(q->end)); } free(oldData); q->begin = 0; q->end = q->size; q->size = newSize; } OutgoingMsgRec *omg = &q->data[q->end]; omg->size = size; omg->data = msg; omg->refcount = refcount; (q->end)++; if(q->end >= q->size){ q->end -= q->size; } q->numEntries++; } OutgoingMsgRec * popSendQ(PxshmSendQ *q){ OutgoingMsgRec * ret; if(0 == q->numEntries){ return NULL; } ret = &q->data[q->begin]; (q->begin)++; if(q->begin >= q->size){ q->begin -= q->size; } q->numEntries--; return ret; }