Exemple #1
0
static INLINE_KEYWORD void lapiSendFn(int destNode, int size, char *msg, scompl_hndlr_t *shdlr, void *sinfo) {
    lapi_xfer_t xfer_cmd;

    MACHSTATE3(2,"lapiSendFn to destNode=%d with msg %p (isImm=%d) begin {",destNode,msg, CmiIsImmediate(msg));
    MACHSTATE3(2, "inside lapiSendFn 1: size=%d, sinfo=%p, deliverable=%d", size, sinfo, deliverable);

    MACHSTATE2(2, "Ready to call LAPI_Xfer with destNode=%d, destRank=%d",destNode,CMI_DEST_RANK(msg));

    xfer_cmd.Am.Xfer_type = LAPI_AM_XFER;
    xfer_cmd.Am.flags     = 0;
    xfer_cmd.Am.tgt       = destNode;
    xfer_cmd.Am.hdr_hdl   = lapiHeaderHandler;
    xfer_cmd.Am.uhdr_len  = 0;
    xfer_cmd.Am.uhdr      = NULL;
    xfer_cmd.Am.udata     = msg;
    xfer_cmd.Am.udata_len = size;
    xfer_cmd.Am.shdlr     = shdlr;
    xfer_cmd.Am.sinfo     = sinfo;
    xfer_cmd.Am.tgt_cntr  = NULL;
    xfer_cmd.Am.org_cntr  = NULL;
    xfer_cmd.Am.cmpl_cntr = NULL;

    check_lapi(LAPI_Xfer,(lapiContext, &xfer_cmd));

    MACHSTATE(2,"} lapiSendFn end");
}
Exemple #2
0
/**
  * lapi completion handler on the recv side. It's responsible to push messages
  * to the destination proc or relay broadcast messages. --Chao Mei
  *
  * Note: The completion handler could be executed on any cores within a node ???
  * So in SMP mode when there's a comm thread, the completion handler should be carefully
  * dealt with.
  *
  * Given lapi also provides an internal lapi thread to deal with network progress which
  * will call this function (???), we should be careful with the following situations:
  * 1) non SMP mode, with interrupt (lapi internal completion thread)
  * 2) non SMP mode, with polling (machine layer is responsible for network progress)
  * 3) SMP mode, no comm thread, with polling
  * 4) SMP mode, no comm thread, with interrupt
  * 5) SMP mode, with comm thread, with polling (not yet implemented, comm server is empty right now)
  * 6) SMP mode, with comm thread, with interrupt??
  *
  * Currently, SMP mode without comm thread is undergoing implementation.
  *
  * This function is executed by LAPI internal threads. It seems that the number of internal
  * completion handler threads could vary during the program. LAPI adaptively creates more
  * threads if there are more outstanding messages!!!! This means pcqueue needs protection
  * even in the nonsmp case!!!!
  *
  * --Chao Mei
  */
static void PumpMsgsComplete(lapi_handle_t *myLapiContext, void *am_info) {
    int i;
    char *msg = am_info;
    int broot, destrank;

    MACHSTATE3(2,"[%d] PumpMsgsComplete with msg %p (isImm=%d) begin {",CmiMyNode(), msg, CmiIsImmediate(msg));
#if ENSURE_MSG_PAIRORDER
    MACHSTATE3(2,"msg %p info: srcpe=%d, seqno=%d", msg, CMI_MSG_SRCPE(msg), CMI_MSG_SEQNO(msg));
#endif
    /**
     * First, we check if the msg is a broadcast msg via spanning
     * tree. If it is, it needs to call SendSpanningChildren to
     * relay the broadcast, and then send the msg to every cores on
     * this node.
     *
     * After the first check, we deal with normal messages.
     * --Chao Mei
     */
    /* It's the right place to relay the broadcast message */
    /**
     * 1. For in-order delivery, because this is the handler for
     * receiving a message, and we assume the cross-network msgs are
     * always delivered to the first proc (rank 0) of this node, we
     * select the srcpe of the bcast msgs and the next msg seq no
     * correspondingly.
     *
     * --Chao Mei
     */
#if ENSURE_MSG_PAIRORDER
    broot = CMI_BROADCAST_ROOT(msg);
    destrank = CMI_DEST_RANK(msg);
    /* Only check proc-level msgs */
    if (broot>=0
#if CMK_NODE_QUEUE_AVAILABLE
            && destrank != DGRAM_NODEMESSAGE
#endif
       ) {
        MsgOrderInfo *info;
        info = &CpvAccessOther(p2pMsgSeqInfo, destrank);
        MACHSTATE1(2, "Check msg in-order for p2p msg %p", msg);

        if (checkMsgInOrder(msg,info)) {
            MACHSTATE(2,"} PumpMsgsComplete end ");
            return;
        }
    }
#endif

    handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);

    MACHSTATE(2,"} PumpMsgsComplete end ");
    return;
}
Exemple #3
0
void emptyRecvBuf(sharedBufData *recvBuf){
 	int numMessages = recvBuf->header->count;
	int i=0;

	char *ptr=recvBuf->data;

	for(i=0;i<numMessages;i++){
		int size;
		int rank, srcpe, seqno, magic, i;
		unsigned int broot;
		char *msg = ptr;
		char *newMsg;

		DgramHeaderBreak(msg, rank, srcpe, magic, seqno, broot);
		size = CmiMsgHeaderGetLength(msg);
	
		newMsg = (char *)CmiAlloc(size);
		memcpy(newMsg,msg,size);

		handoverSysvshmMessage(newMsg,size,rank,broot);
		
		ptr += size;

		MACHSTATE3(3,"message of size %d recvd ends at ptr-data %d total bytes %d bytes %d",size,ptr-recvBuf->data,recvBuf->header->bytes);
	}
	CmiAssert(ptr - recvBuf->data == recvBuf->header->bytes);
	recvBuf->header->count=0;
	recvBuf->header->bytes=0;
}
Exemple #4
0
void CmiSendMessageSysvshm(OutgoingMsg ogm,OtherNode node,int rank,unsigned int broot){
	struct sembuf sb;
	
#if SYSVSHM_STATS
	double _startSendTime = CmiWallTimer();
#endif

	
	int dstRank = SysvshmRank(ogm->dst);
	MEMDEBUG(CmiMemoryCheck());
  
	DgramHeaderMake(ogm->data,rank,ogm->src,Cmi_charmrun_pid,1, broot);
	
  
	MACHSTATE4(3,"Send Msg Sysvshm ogm %p size %d dst %d dstRank %d",ogm,ogm->size,ogm->dst,dstRank);

	CmiAssert(dstRank >=0 && dstRank != sysvshmContext->noderank);
	
	sharedBufData *dstBuf = &(sysvshmContext->sendBufs[dstRank]);

	ACQUIRENW(sysvshmContext->noderank);
	if(semop(dstBuf->semid, &sb, 1)<0) {
		/**failed to get the lock 
		insert into q and retain the message*/

		pushSendQ(sysvshmContext->sendQs[dstRank],ogm);
		ogm->refcount++;
		MEMDEBUG(CmiMemoryCheck());
		return;
	}else{
		/***
		 * We got the lock for this buffer
		 * first write all the messages in the sendQ and then write this guy
		 * */
		 if(sysvshmContext->sendQs[dstRank]->numEntries == 0){
				/* send message user event */
				int ret = sendMessage(ogm,dstBuf,sysvshmContext->sendQs[dstRank]);
				MACHSTATE(3,"Sysvshm Send succeeded immediately");
		 }else{
				ogm->refcount+=2;/*this message should not get deleted when the queue is flushed*/
			 	pushSendQ(sysvshmContext->sendQs[dstRank],ogm);
				MACHSTATE3(3,"Sysvshm ogm %p pushed to sendQ length %d refcount %d",ogm,sysvshmContext->sendQs[dstRank]->numEntries,ogm->refcount);
				int sent = flushSendQ(dstRank);
				ogm->refcount--; /*if it has been sent, can be deleted by caller, if not will be deleted when queue is flushed*/
				MACHSTATE1(3,"Sysvshm flushSendQ sent %d messages",sent);
		 }
		 /* unlock the recvbuffer*/
		RELEASE(sysvshmContext->noderank);
		CmiAssert(semop(dstBuf->semid, &sb, 1)>=0);
	}
#if SYSVSHM_STATS
		sysvshmContext->sendCount ++;
		sysvshmContext->sendTime += (CmiWallTimer()-_startSendTime);
#endif
	MEMDEBUG(CmiMemoryCheck());

};
Exemple #5
0
/****************
 *copy this message into the sharedBuf
 If it does not succeed
 *put it into the sendQ 
 *NOTE: This method is called only after obtaining the corresponding mutex
 * ********/
int sendMessage(OutgoingMsg ogm,sharedBufData *dstBuf,SysvshmSendQ *dstSendQ){

	if(dstBuf->header->bytes+ogm->size <= SHMBUFLEN){
		/**copy  this message to sharedBuf **/
		dstBuf->header->count++;
		memcpy(dstBuf->data+dstBuf->header->bytes,ogm->data,ogm->size);
		dstBuf->header->bytes += ogm->size;
		MACHSTATE4(3,"Sysvshm send done ogm %p size %d dstBuf->header->count %d dstBuf->header->bytes %d",ogm,ogm->size,dstBuf->header->count,dstBuf->header->bytes);
		return 1;
	}
	/***
	 * Shared Buffer is too full for this message
	 * **/
	printf("send buffer is too full\n");
	pushSendQ(dstSendQ,ogm);
	ogm->refcount++;
	MACHSTATE3(3,"Sysvshm send ogm %p size %d queued refcount %d",ogm,ogm->size,ogm->refcount);
	return 0;
}
Exemple #6
0
void calculateNodeSizeAndRank(char **argv){
	sysvshmContext->nodesize=1;
	MACHSTATE(3,"calculateNodeSizeAndRank start");
	CmiGetArgIntDesc(argv, "+nodesize", &(sysvshmContext->nodesize),"Number of cores in this node");
	MACHSTATE1(3,"calculateNodeSizeAndRank argintdesc %d",sysvshmContext->nodesize);

	sysvshmContext->noderank = _Cmi_mynode % (sysvshmContext->nodesize);
	
	MACHSTATE1(3,"calculateNodeSizeAndRank noderank %d",sysvshmContext->noderank);
	
	sysvshmContext->nodestart = _Cmi_mynode -sysvshmContext->noderank;
	
	MACHSTATE(3,"calculateNodeSizeAndRank nodestart ");

	sysvshmContext->nodeend = sysvshmContext->nodestart + sysvshmContext->nodesize -1;

	if(sysvshmContext->nodeend >= _Cmi_numnodes){
		sysvshmContext->nodeend = _Cmi_numnodes-1;
		sysvshmContext->nodesize = (sysvshmContext->nodeend - sysvshmContext->nodestart) +1;
	}
	
	MACHSTATE3(3,"calculateNodeSizeAndRank nodestart %d nodesize %d noderank %d",sysvshmContext->nodestart,sysvshmContext->nodesize,sysvshmContext->noderank);
}
Exemple #7
0
void calculateNodeSizeAndRank(char **argv){
	pxshmContext->nodesize=1;
	MACHSTATE(3,"calculateNodeSizeAndRank start");
	//CmiGetArgIntDesc(argv, "+nodesize", &(pxshmContext->nodesize),"Number of cores in this node (for non-smp case).Used by the shared memory communication layer");
	CmiGetArgIntDesc(argv, "+nodesize", &(pxshmContext->nodesize),"Number of cores in this node");
	MACHSTATE1(3,"calculateNodeSizeAndRank argintdesc %d",pxshmContext->nodesize);

	pxshmContext->noderank = _Cmi_mynode % (pxshmContext->nodesize);
	
	MACHSTATE1(3,"calculateNodeSizeAndRank noderank %d",pxshmContext->noderank);
	
	pxshmContext->nodestart = _Cmi_mynode -pxshmContext->noderank;
	
	MACHSTATE(3,"calculateNodeSizeAndRank nodestart ");

	pxshmContext->nodeend = pxshmContext->nodestart + pxshmContext->nodesize -1;

	if(pxshmContext->nodeend >= _Cmi_numnodes){
		pxshmContext->nodeend = _Cmi_numnodes-1;
		pxshmContext->nodesize = (pxshmContext->nodeend - pxshmContext->nodestart) +1;
	}
	
	MACHSTATE3(3,"calculateNodeSizeAndRank nodestart %d nodesize %d noderank %d",pxshmContext->nodestart,pxshmContext->nodesize,pxshmContext->noderank);
}
Exemple #8
0
inline void emptyAllRecvBufs(){
	int i;
	for(i=0;i<pxshmContext->nodesize;i++){
		if(i != pxshmContext->noderank){
			sharedBufData *recvBuf = &(pxshmContext->recvBufs[i]);
			if(recvBuf->header->count > 0){

#if PXSHM_STATS
				pxshmContext->lockRecvCount++;
#endif

#if PXSHM_OSSPINLOCK
				if(! OSSpinLockTry(&recvBuf->header->lock)){
#elif PXSHM_LOCK
				if(sem_trywait(recvBuf->mutex) < 0){
#elif PXSHM_FENCE
				recvBuf->header->flagReceiver = 1;
				recvBuf->header->turn = SENDER;
				CmiMemoryReadFence(0,0);
				CmiMemoryWriteFence(0,0);
				//if((recvBuf->header->flagSender && recvBuf->header->turn == SENDER)){
				if((recvBuf->header->flagSender)){
					recvBuf->header->flagReceiver = 0;
#endif
				}else{


					MACHSTATE1(3,"emptyRecvBuf to be called for rank %d",i);			
					emptyRecvBuf(recvBuf);

#if PXSHM_OSSPINLOCK
					OSSpinLockUnlock(&recvBuf->header->lock);
#elif PXSHM_LOCK
					sem_post(recvBuf->mutex);
#elif PXSHM_FENCE
					CmiMemoryReadFence(0,0);
					CmiMemoryWriteFence(0,0);
					recvBuf->header->flagReceiver = 0;
#endif

				}
			
			}
		}
	}
};

inline void flushAllSendQs(){
	int i;
#if SENDQ_LIST
        int index_prev = -1;

        i =  sendQ_head_index;
        while (i!= -1) {
                PxshmSendQ *sendQ = pxshmContext->sendQs[i];
                CmiAssert(i !=  pxshmContext->noderank);
		if(sendQ->numEntries > 0){
#else
        for(i=0;i<pxshmContext->nodesize;i++) {
                if (i == pxshmContext->noderank) continue;
                PxshmSendQ *sendQ = pxshmContext->sendQs[i];
                if(sendQ->numEntries > 0) {
#endif
	
#if PXSHM_OSSPINLOCK
		        if(OSSpinLockTry(&pxshmContext->sendBufs[i].header->lock)){
#elif PXSHM_LOCK
			if(sem_trywait(pxshmContext->sendBufs[i].mutex) >= 0){
#elif PXSHM_FENCE
			pxshmContext->sendBufs[i].header->flagSender = 1;
			pxshmContext->sendBufs[i].header->turn = RECEIVER;
			CmiMemoryReadFence(0,0);			
			CmiMemoryWriteFence(0,0);
			if(!(pxshmContext->sendBufs[i].header->flagReceiver && pxshmContext->sendBufs[i].header->turn == RECEIVER)){
#endif

				MACHSTATE1(3,"flushSendQ %d",i);
				flushSendQ(sendQ);
				
#if PXSHM_OSSPINLOCK	
				OSSpinLockUnlock(&pxshmContext->sendBufs[i].header->lock);
#elif PXSHM_LOCK
				sem_post(pxshmContext->sendBufs[i].mutex);
#elif PXSHM_FENCE
				CmiMemoryReadFence(0,0);			
				CmiMemoryWriteFence(0,0);
				pxshmContext->sendBufs[i].header->flagSender = 0;
#endif
			}else{

#if PXSHM_FENCE
			  pxshmContext->sendBufs[i].header->flagSender = 0;
#endif				

			}

		}        
#if SENDQ_LIST
                if (sendQ->numEntries == 0) {
                    if (index_prev != -1)
                        pxshmContext->sendQs[index_prev]->next = sendQ->next;
                    else
                        sendQ_head_index = sendQ->next;
                    i = sendQ->next;
                    sendQ->next = -2;
                }
                else {
                    index_prev = i;
                    i = sendQ->next;
                }
#endif
	}	
};


void emptyRecvBuf(sharedBufData *recvBuf){
 	int numMessages = recvBuf->header->count;
	int i=0;

	char *ptr=recvBuf->data;

	for(i=0;i<numMessages;i++){
		int size;
		int rank, srcpe, seqno, magic, i;
		unsigned int broot;
		char *msg = ptr;
		char *newMsg;

		size = CMI_MSG_SIZE(msg);
	
		newMsg = (char *)CmiAlloc(size);
		memcpy(newMsg,msg,size);

		handleOneRecvedMsg(size, newMsg);
		
		ptr += size;

		MACHSTATE3(3,"message of size %d recvd ends at ptr-data %d total bytes %d bytes %d",size,ptr-recvBuf->data,recvBuf->header->bytes);
	}
#if 1
  if(ptr - recvBuf->data != recvBuf->header->bytes){
		CmiPrintf("[%d] ptr - recvBuf->data  %d recvBuf->header->bytes %d numMessages %d \n",_Cmi_mynode, ptr - recvBuf->data, recvBuf->header->bytes,numMessages);
	}
#endif
	CmiAssert(ptr - recvBuf->data == recvBuf->header->bytes);
	recvBuf->header->count=0;
	recvBuf->header->bytes=0;
}


/**************************
 *sendQ helper functions
 * ****************/

void initSendQ(PxshmSendQ *q,int size, int rank){
	q->data = (OutgoingMsgRec *)calloc(size, sizeof(OutgoingMsgRec));

	q->size = size;
	q->numEntries = 0;

	q->begin = 0;
	q->end = 0;

        q->rank = rank;
#if SENDQ_LIST
        q->next = -2;
#endif
}

void pushSendQ(PxshmSendQ *q, char *msg, int size, int *refcount){
	if(q->numEntries == q->size){
		//need to resize 
		OutgoingMsgRec *oldData = q->data;
		int newSize = q->size<<1;
		q->data = (OutgoingMsgRec *)calloc(newSize, sizeof(OutgoingMsgRec));
		//copy head to the beginning of the new array
		CmiAssert(q->begin == q->end);

		CmiAssert(q->begin < q->size);
		memcpy(&(q->data[0]),&(oldData[q->begin]),sizeof(OutgoingMsgRec)*(q->size - q->begin));

		if(q->end!=0){
			memcpy(&(q->data[(q->size - q->begin)]),&(oldData[0]),sizeof(OutgoingMsgRec)*(q->end));
		}
		free(oldData);
		q->begin = 0;
		q->end = q->size;
		q->size = newSize;
	}
	OutgoingMsgRec *omg = &q->data[q->end];
        omg->size = size;
        omg->data = msg;
        omg->refcount = refcount;
	(q->end)++;
	if(q->end >= q->size){
		q->end -= q->size;
	}
	q->numEntries++;
}

OutgoingMsgRec * popSendQ(PxshmSendQ *q){
	OutgoingMsgRec * ret;
	if(0 == q->numEntries){
		return NULL;
	}

	ret = &q->data[q->begin];
	(q->begin)++;
	if(q->begin >= q->size){
		q->begin -= q->size;
	}
	
	q->numEntries--;
	return ret;
}