/* initnode node table reply format: +------------------------------------------------------- | 4 bytes | Number of nodes n ^ | | (big-endian binary integer) 4+12*n bytes +------------------------------------------------- | ^ | (one entry for each node) ^ | | | 4 bytes | Number of PEs for this node | | n | 4 bytes | IP address of this node 12*n bytes | | | 4 bytes | Data (UDP) port of this node | | v | | (big-endian binary integers) v v ---+---------------------------------------------------- */ static void node_addresses_store(ChMessage *msg) { ChMessageInt_t *n32=(ChMessageInt_t *)msg->data; ChNodeinfo *d=(ChNodeinfo *)(n32+1); int nodestart; int i,j,n; MACHSTATE(1,"node_addresses_store {"); _Cmi_numnodes=ChMessageInt(n32[0]); if ((sizeof(ChMessageInt_t)+sizeof(ChNodeinfo)*_Cmi_numnodes) !=(unsigned int)msg->len) {printf("Node table has inconsistent length!");machine_exit(1);} nodes = (OtherNode)malloc(_Cmi_numnodes * sizeof(struct OtherNodeStruct)); nodestart=0; for (i=0; i<_Cmi_numnodes; i++) { nodes[i].nodestart = nodestart; nodes[i].nodesize = ChMessageInt(d[i].nPE); MACHSTATE2(3,"node %d nodesize %d",i,nodes[i].nodesize); nodes[i].mach_id = ChMessageInt(d[i].mach_id); nodes[i].IP=d[i].IP; if (i==_Cmi_mynode) { Cmi_nodestart=nodes[i].nodestart; _Cmi_mynodesize=nodes[i].nodesize; Cmi_self_IP=nodes[i].IP; } nodes[i].dataport = ChMessageInt(d[i].dataport); nodes[i].addr = skt_build_addr(nodes[i].IP,nodes[i].dataport); #if CMK_USE_TCP nodes[i].sock = INVALID_SOCKET; #endif nodestart+=nodes[i].nodesize; } _Cmi_numpes=nodestart; n = _Cmi_numpes; #ifdef CMK_CPV_IS_SMP n += _Cmi_numnodes; #endif nodes_by_pe = (OtherNode*)malloc(n * sizeof(OtherNode)); _MEMCHECK(nodes_by_pe); for (i=0; i<_Cmi_numnodes; i++) { OtherNode node = nodes + i; OtherNode_init(node); for (j=0; j<node->nodesize; j++) { nodes_by_pe[j + node->nodestart] = node; } } #ifdef CMK_CPV_IS_SMP /* index for communication threads */ for (i=_Cmi_numpes; i<_Cmi_numpes+_Cmi_numnodes; i++) { OtherNode node = nodes + i-_Cmi_numpes; nodes_by_pe[i] = node; } #endif MACHSTATE(1,"} node_addresses_store"); }
static void CmiIdleLock_sleep(CmiIdleLock *l,int msTimeout) { if (l->hasMessages) return; l->isSleeping=1; MACHSTATE(4,"Processor going to sleep {") WaitForSingleObject(l->sem,msTimeout); MACHSTATE(4,"} Processor awake again") l->isSleeping=0; }
/** * lapi completion handler on the recv side. It's responsible to push messages * to the destination proc or relay broadcast messages. --Chao Mei * * Note: The completion handler could be executed on any cores within a node ??? * So in SMP mode when there's a comm thread, the completion handler should be carefully * dealt with. * * Given lapi also provides an internal lapi thread to deal with network progress which * will call this function (???), we should be careful with the following situations: * 1) non SMP mode, with interrupt (lapi internal completion thread) * 2) non SMP mode, with polling (machine layer is responsible for network progress) * 3) SMP mode, no comm thread, with polling * 4) SMP mode, no comm thread, with interrupt * 5) SMP mode, with comm thread, with polling (not yet implemented, comm server is empty right now) * 6) SMP mode, with comm thread, with interrupt?? * * Currently, SMP mode without comm thread is undergoing implementation. * * This function is executed by LAPI internal threads. It seems that the number of internal * completion handler threads could vary during the program. LAPI adaptively creates more * threads if there are more outstanding messages!!!! This means pcqueue needs protection * even in the nonsmp case!!!! * * --Chao Mei */ static void PumpMsgsComplete(lapi_handle_t *myLapiContext, void *am_info) { int i; char *msg = am_info; int broot, destrank; MACHSTATE3(2,"[%d] PumpMsgsComplete with msg %p (isImm=%d) begin {",CmiMyNode(), msg, CmiIsImmediate(msg)); #if ENSURE_MSG_PAIRORDER MACHSTATE3(2,"msg %p info: srcpe=%d, seqno=%d", msg, CMI_MSG_SRCPE(msg), CMI_MSG_SEQNO(msg)); #endif /** * First, we check if the msg is a broadcast msg via spanning * tree. If it is, it needs to call SendSpanningChildren to * relay the broadcast, and then send the msg to every cores on * this node. * * After the first check, we deal with normal messages. * --Chao Mei */ /* It's the right place to relay the broadcast message */ /** * 1. For in-order delivery, because this is the handler for * receiving a message, and we assume the cross-network msgs are * always delivered to the first proc (rank 0) of this node, we * select the srcpe of the bcast msgs and the next msg seq no * correspondingly. * * --Chao Mei */ #if ENSURE_MSG_PAIRORDER broot = CMI_BROADCAST_ROOT(msg); destrank = CMI_DEST_RANK(msg); /* Only check proc-level msgs */ if (broot>=0 #if CMK_NODE_QUEUE_AVAILABLE && destrank != DGRAM_NODEMESSAGE #endif ) { MsgOrderInfo *info; info = &CpvAccessOther(p2pMsgSeqInfo, destrank); MACHSTATE1(2, "Check msg in-order for p2p msg %p", msg); if (checkMsgInOrder(msg,info)) { MACHSTATE(2,"} PumpMsgsComplete end "); return; } } #endif handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg); MACHSTATE(2,"} PumpMsgsComplete end "); return; }
static void CmiIdleLock_sleep(CmiIdleLock *l,int msTimeout) { struct timespec wakeup; if (l->hasMessages) return; l->isSleeping=1; MACHSTATE(4,"Processor going to sleep {") pthread_mutex_lock(&l->mutex); getTimespec(msTimeout,&wakeup); while (!l->hasMessages) if (ETIMEDOUT==pthread_cond_timedwait(&l->cond,&l->mutex,&wakeup)) break; pthread_mutex_unlock(&l->mutex); MACHSTATE(4,"} Processor awake again") l->isSleeping=0; }
static INLINE_KEYWORD void lapiSendFn(int destNode, int size, char *msg, scompl_hndlr_t *shdlr, void *sinfo) { lapi_xfer_t xfer_cmd; MACHSTATE3(2,"lapiSendFn to destNode=%d with msg %p (isImm=%d) begin {",destNode,msg, CmiIsImmediate(msg)); MACHSTATE3(2, "inside lapiSendFn 1: size=%d, sinfo=%p, deliverable=%d", size, sinfo, deliverable); MACHSTATE2(2, "Ready to call LAPI_Xfer with destNode=%d, destRank=%d",destNode,CMI_DEST_RANK(msg)); xfer_cmd.Am.Xfer_type = LAPI_AM_XFER; xfer_cmd.Am.flags = 0; xfer_cmd.Am.tgt = destNode; xfer_cmd.Am.hdr_hdl = lapiHeaderHandler; xfer_cmd.Am.uhdr_len = 0; xfer_cmd.Am.uhdr = NULL; xfer_cmd.Am.udata = msg; xfer_cmd.Am.udata_len = size; xfer_cmd.Am.shdlr = shdlr; xfer_cmd.Am.sinfo = sinfo; xfer_cmd.Am.tgt_cntr = NULL; xfer_cmd.Am.org_cntr = NULL; xfer_cmd.Am.cmpl_cntr = NULL; check_lapi(LAPI_Xfer,(lapiContext, &xfer_cmd)); MACHSTATE(2,"} lapiSendFn end"); }
/****************** * Initialization routine * currently just testing start up * ****************/ void CmiInitSysvshm(char **argv){ MACHSTATE(3,"CminitSysvshm start"); sysvshmContext = (SysvshmContext *)malloc(sizeof(SysvshmContext)); if(Cmi_charmrun_pid <= 0){ CmiAbort("sysvshm must be run with charmrun"); } calculateNodeSizeAndRank(argv); if(sysvshmContext->nodesize == 1){ return; } MACHSTATE1(3,"CminitSysvshm %d calculateNodeSizeAndRank",sysvshmContext->nodesize); setupSharedBuffers(); MACHSTATE2(3,"CminitSysvshm %d %d setupSharedBuffers",Cmi_charmrun_pid,sysvshmContext->nodesize); initAllSendQs(); MACHSTATE2(3,"CminitSysvshm %d %d initAllSendQs",Cmi_charmrun_pid,sysvshmContext->nodesize); MACHSTATE2(3,"CminitSysvshm %d %d done",Cmi_charmrun_pid,sysvshmContext->nodesize); #if SYSVSHM_STATS sysvshmContext->sendCount=0; sysvshmContext->sendTime=0.0; sysvshmContext->validCheckCount=0; sysvshmContext->validCheckTime=0.0; sysvshmContext->commServerTime = 0; sysvshmContext->lockRecvCount = 0; #endif };
static void CmiIdleLock_addMessage(CmiIdleLock *l) { l->hasMessages=1; if (l->isSleeping) { /*The PE is sleeping on this lock-- wake him*/ MACHSTATE(4,"Waking sleeping processor") ReleaseSemaphore(l->sem,1,NULL); } }
CmiCommHandle CmiAsyncBroadcastFn(int size, char *msg) { #if ENSURE_MSG_PAIRORDER /* Not sure how to add the msg seq no for async broadcast messages --Chao Mei */ /* so abort here ! */ CmiAssert(0); return 0; #else int i, rank; int mype = CmiMyPe(); #if ENABLE_CONVERSE_QD CQdCreate(CpvAccess(cQdState), CmiNumPes()-1); #endif MACHSTATE1(3,"[%d] Sending async broadcast message from {",CmiMyNode()); CMI_BROADCAST_ROOT(msg) = 0; void *handle = malloc(sizeof(int)); *((int *)handle) = CmiNumPes()-1; for (i=mype+1; i<CmiNumPes(); i++) { CMI_DEST_RANK(msg) = CmiRankOf(i); lapiSendFn(CmiNodeOf(i), size, msg, DeliveredMsg, handle); } for (i=0; i<mype; i++) { CMI_DEST_RANK(msg) = CmiRankOf(i); lapiSendFn(CmiNodeOf(i), size, msg, DeliveredMsg, handle); } MACHSTATE(3,"} Sending async broadcast message end"); return handle; #endif }
static void CmiNotifyStillIdle(CmiIdleState *s) { #if CMK_SHARED_VARS_UNAVAILABLE /*No comm. thread-- listen on sockets for incoming messages*/ MACHSTATE(1,"idle commserver {") CommunicationServer(Cmi_idlepoll?0:10, COMM_SERVER_FROM_SMP); MACHSTATE(1,"} idle commserver") #else #if CMK_SHARED_VARS_POSIX_THREADS_SMP if(_Cmi_sleepOnIdle ){ #endif int nSpins=20; /*Number of times to spin before sleeping*/ s->nIdles++; if (s->nIdles>nSpins) { /*Start giving some time back to the OS*/ s->sleepMs+=2; if (s->sleepMs>10) s->sleepMs=10; } /*Comm. thread will listen on sockets-- just sleep*/ if (s->sleepMs>0) { MACHSTATE1(3,"idle lock(%d) {",CmiMyPe()) CmiIdleLock_sleep(&s->cs->idle,s->sleepMs); CsdResetPeriodic(); /* check ccd callbacks when I am awakened */ MACHSTATE1(3,"} idle lock(%d)",CmiMyPe()) } #if CMK_SHARED_VARS_POSIX_THREADS_SMP }
static void CmiIdleLock_wakeup(CmiIdleLock *l) { l->hasMessages=1; MACHSTATE(4,"Waking sleeping processor") /*The PE is sleeping on this condition variable-- wake him*/ pthread_mutex_lock(&l->mutex); pthread_cond_signal(&l->cond); pthread_mutex_unlock(&l->mutex); }
static void CommunicationsClock(void) { MACHSTATE(1,"CommunicationsClock"); Cmi_clock = GetClock(); if (Cmi_clock > Cmi_ack_last + 0.5*Cmi_ack_delay) { MACHSTATE(2,"CommunicationsClock timing out acks"); Cmi_ack_last=Cmi_clock; writeableAcks=1; writeableDgrams=1; } if (Cmi_clock > Cmi_check_last + Cmi_check_delay) { MACHSTATE(4,"CommunicationsClock pinging charmrun"); Cmi_check_last = Cmi_clock; ctrl_sendone_nolock("ping",NULL,0,NULL,0); /*Charmrun may have died*/ } }
void CmiSendMessageSysvshm(OutgoingMsg ogm,OtherNode node,int rank,unsigned int broot){ struct sembuf sb; #if SYSVSHM_STATS double _startSendTime = CmiWallTimer(); #endif int dstRank = SysvshmRank(ogm->dst); MEMDEBUG(CmiMemoryCheck()); DgramHeaderMake(ogm->data,rank,ogm->src,Cmi_charmrun_pid,1, broot); MACHSTATE4(3,"Send Msg Sysvshm ogm %p size %d dst %d dstRank %d",ogm,ogm->size,ogm->dst,dstRank); CmiAssert(dstRank >=0 && dstRank != sysvshmContext->noderank); sharedBufData *dstBuf = &(sysvshmContext->sendBufs[dstRank]); ACQUIRENW(sysvshmContext->noderank); if(semop(dstBuf->semid, &sb, 1)<0) { /**failed to get the lock insert into q and retain the message*/ pushSendQ(sysvshmContext->sendQs[dstRank],ogm); ogm->refcount++; MEMDEBUG(CmiMemoryCheck()); return; }else{ /*** * We got the lock for this buffer * first write all the messages in the sendQ and then write this guy * */ if(sysvshmContext->sendQs[dstRank]->numEntries == 0){ /* send message user event */ int ret = sendMessage(ogm,dstBuf,sysvshmContext->sendQs[dstRank]); MACHSTATE(3,"Sysvshm Send succeeded immediately"); }else{ ogm->refcount+=2;/*this message should not get deleted when the queue is flushed*/ pushSendQ(sysvshmContext->sendQs[dstRank],ogm); MACHSTATE3(3,"Sysvshm ogm %p pushed to sendQ length %d refcount %d",ogm,sysvshmContext->sendQs[dstRank]->numEntries,ogm->refcount); int sent = flushSendQ(dstRank); ogm->refcount--; /*if it has been sent, can be deleted by caller, if not will be deleted when queue is flushed*/ MACHSTATE1(3,"Sysvshm flushSendQ sent %d messages",sent); } /* unlock the recvbuffer*/ RELEASE(sysvshmContext->noderank); CmiAssert(semop(dstBuf->semid, &sb, 1)>=0); } #if SYSVSHM_STATS sysvshmContext->sendCount ++; sysvshmContext->sendTime += (CmiWallTimer()-_startSendTime); #endif MEMDEBUG(CmiMemoryCheck()); };
/*************** * calculate the name of the shared objects and semaphores * * name scheme * shared memory: charm_pxshm_<recvernoderank>_<sendernoderank> * semaphore : charm_pxshm_<recvernoderank>_<sendernoderank>.sem for semaphore for that shared object * the semaphore name used by us is the same as the shared memory object name * the posix library adds the semaphore tag // in linux at least . other machines might need more portable code * * open these shared objects and semaphores * *********/ void setupSharedBuffers(){ int i=0; allocBufNameStrings(&(pxshmContext->recvBufNames)); MACHSTATE(3,"allocBufNameStrings for recvBufNames done"); MEMDEBUG(CmiMemoryCheck()); allocBufNameStrings((&pxshmContext->sendBufNames)); MACHSTATE(3,"allocBufNameStrings for sendBufNames done"); for(i=0;i<pxshmContext->nodesize;i++){ if(i != pxshmContext->noderank){ snprintf(pxshmContext->recvBufNames[i],NAMESTRLEN-1,"%s_%d_%d",pxshmContext->prefixStr,pxshmContext->noderank+pxshmContext->nodestart,i+pxshmContext->nodestart); MACHSTATE2(3,"recvBufName %s with rank %d",pxshmContext->recvBufNames[i],i) snprintf(pxshmContext->sendBufNames[i],NAMESTRLEN-1,"%s_%d_%d",pxshmContext->prefixStr,i+pxshmContext->nodestart,pxshmContext->noderank+pxshmContext->nodestart); MACHSTATE2(3,"sendBufName %s with rank %d",pxshmContext->sendBufNames[i],i); } } createShmObjectsAndSems(&(pxshmContext->recvBufs),pxshmContext->recvBufNames); createShmObjectsAndSems(&(pxshmContext->sendBufs),pxshmContext->sendBufNames); for(i=0;i<pxshmContext->nodesize;i++){ if(i != pxshmContext->noderank){ //CmiAssert(pxshmContext->sendBufs[i].header->count == 0); pxshmContext->sendBufs[i].header->count = 0; pxshmContext->sendBufs[i].header->bytes = 0; } } #if CMK_SMP && ( CMK_CRAYXE || CMK_CRAYXC ) if (PMI_Barrier() != GNI_RC_SUCCESS) return; #else if (CmiBarrier() != 0) return; #endif freeSharedBuffers(); pxshm_freed = 1; }
void calculateNodeSizeAndRank(char **argv){ sysvshmContext->nodesize=1; MACHSTATE(3,"calculateNodeSizeAndRank start"); CmiGetArgIntDesc(argv, "+nodesize", &(sysvshmContext->nodesize),"Number of cores in this node"); MACHSTATE1(3,"calculateNodeSizeAndRank argintdesc %d",sysvshmContext->nodesize); sysvshmContext->noderank = _Cmi_mynode % (sysvshmContext->nodesize); MACHSTATE1(3,"calculateNodeSizeAndRank noderank %d",sysvshmContext->noderank); sysvshmContext->nodestart = _Cmi_mynode -sysvshmContext->noderank; MACHSTATE(3,"calculateNodeSizeAndRank nodestart "); sysvshmContext->nodeend = sysvshmContext->nodestart + sysvshmContext->nodesize -1; if(sysvshmContext->nodeend >= _Cmi_numnodes){ sysvshmContext->nodeend = _Cmi_numnodes-1; sysvshmContext->nodesize = (sysvshmContext->nodeend - sysvshmContext->nodestart) +1; } MACHSTATE3(3,"calculateNodeSizeAndRank nodestart %d nodesize %d noderank %d",sysvshmContext->nodestart,sysvshmContext->nodesize,sysvshmContext->noderank); }
void calculateNodeSizeAndRank(char **argv){ pxshmContext->nodesize=1; MACHSTATE(3,"calculateNodeSizeAndRank start"); //CmiGetArgIntDesc(argv, "+nodesize", &(pxshmContext->nodesize),"Number of cores in this node (for non-smp case).Used by the shared memory communication layer"); CmiGetArgIntDesc(argv, "+nodesize", &(pxshmContext->nodesize),"Number of cores in this node"); MACHSTATE1(3,"calculateNodeSizeAndRank argintdesc %d",pxshmContext->nodesize); pxshmContext->noderank = _Cmi_mynode % (pxshmContext->nodesize); MACHSTATE1(3,"calculateNodeSizeAndRank noderank %d",pxshmContext->noderank); pxshmContext->nodestart = _Cmi_mynode -pxshmContext->noderank; MACHSTATE(3,"calculateNodeSizeAndRank nodestart "); pxshmContext->nodeend = pxshmContext->nodestart + pxshmContext->nodesize -1; if(pxshmContext->nodeend >= _Cmi_numnodes){ pxshmContext->nodeend = _Cmi_numnodes-1; pxshmContext->nodesize = (pxshmContext->nodeend - pxshmContext->nodestart) +1; } MACHSTATE3(3,"calculateNodeSizeAndRank nodestart %d nodesize %d noderank %d",pxshmContext->nodestart,pxshmContext->nodesize,pxshmContext->noderank); }
/** lapi header handler: executed on the recv side, when the * first packet of the recving msg arrives, it is called to * prepare the memory buffer in the user space for recving the * data --Chao Mei */ static void* PumpMsgsBegin(lapi_handle_t *myLapiContext, void *hdr, uint *uhdr_len, lapi_return_info_t *msg_info, compl_hndlr_t **comp_h, void **comp_am_info) { void *msg_buf; MACHSTATE1(2,"[%d] PumpMsgsBegin begin {",CmiMyNode()); /* prepare the space for receiving the data, set the completion handler to be executed inline */ msg_buf = (void *)CmiAlloc(msg_info->msg_len); msg_info->ret_flags = LAPI_SEND_REPLY; *comp_h = PumpMsgsComplete; *comp_am_info = msg_buf; MACHSTATE(2,"} PumpMsgsBegin end"); return msg_buf; }
void CmiStateInit(int pe, int rank, CmiState state) { #if CMK_SMP_MULTIQ int i; #endif MACHSTATE(4,"StateInit") state->pe = pe; state->rank = rank; if (rank==CmiMyNodeSize()) return; /* Communications thread */ #if !CMK_SMP_MULTIQ state->recv = CMIQueueCreate(); #else for(i=0; i<MULTIQ_GRPSIZE; i++) state->recv[i]=CMIQueueCreate(); state->myGrpIdx = rank % MULTIQ_GRPSIZE; state->curPolledIdx = 0; #endif state->localqueue = CdsFifo_Create(); CmiIdleLock_init(&state->idle); }
CmiCommHandle CmiAsyncNodeBroadcastFn(int size, char *msg) { int i; #if ENABLE_CONVERSE_QD CQdCreate(CpvAccess(cQdState), CmiNumNodes()-1); #endif MACHSTATE1(3,"[%d] Sending async node broadcast message from {",CmiMyNode()); CMI_BROADCAST_ROOT(msg) = 0; CMI_DEST_RANK(msg) =DGRAM_NODEMESSAGE; void *handle = malloc(sizeof(int)); *((int *)handle) = CmiNumNodes()-1; for (i=CmiMyNode()+1; i<CmiNumNodes(); i++) { lapiSendFn(i, size, msg, DeliveredMsg, handle); } for (i=0; i<CmiMyNode(); i++) { lapiSendFn(i, size, msg, DeliveredMsg, handle); } MACHSTATE(3,"} Sending async broadcast message end"); return handle; }
/* The following two are callbacks for sync and async send respectively */ static void ReleaseMsg(lapi_handle_t *myLapiContext, void *msg, lapi_sh_info_t *info) { MACHSTATE2(2,"[%d] ReleaseMsg begin %p {",CmiMyNode(),msg); check_lapi_err(info->reason, "ReleaseMsg", __LINE__); CmiFree(msg); MACHSTATE(2,"} ReleaseMsg end"); }
void CmiSendMessagePxshm(char *msg, int size, int dstnode, int *refcount) { #if PXSHM_STATS double _startSendTime = CmiWallTimer(); #endif LrtsPrepareEnvelope(msg, size); int dstRank = PxshmRank(dstnode); MEMDEBUG(CmiMemoryCheck()); /* MACHSTATE4(3,"Send Msg Pxshm ogm %p size %d dst %d dstRank %d",ogm,ogm->size,ogm->dst,dstRank); MACHSTATE4(3,"Send Msg Pxshm ogm %p size %d dst %d dstRank %d",ogm,ogm->size,ogm->dst,dstRank); */ CmiAssert(dstRank >=0 && dstRank != pxshmContext->noderank); sharedBufData *dstBuf = &(pxshmContext->sendBufs[dstRank]); PxshmSendQ *sendQ = pxshmContext->sendQs[dstRank]; #if PXSHM_OSSPINLOCK if(! OSSpinLockTry(&dstBuf->header->lock)){ #elif PXSHM_LOCK if(sem_trywait(dstBuf->mutex) < 0){ #elif PXSHM_FENCE dstBuf->header->flagSender = 1; dstBuf->header->turn = RECEIVER; CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); //if(dstBuf->header->flagReceiver && dstBuf->header->turn == RECEIVER){ if(dstBuf->header->flagReceiver){ dstBuf->header->flagSender = 0; #endif /**failed to get the lock insert into q and retain the message*/ #if SENDQ_LIST if (sendQ->numEntries == 0 && sendQ->next == -2) { sendQ->next = sendQ_head_index; sendQ_head_index = dstRank; } #endif pushSendQ(pxshmContext->sendQs[dstRank], msg, size, refcount); (*refcount)++; MEMDEBUG(CmiMemoryCheck()); return; }else{ /*** * We got the lock for this buffer * first write all the messages in the sendQ and then write this guy * */ if(pxshmContext->sendQs[dstRank]->numEntries == 0){ // send message user event int ret = sendMessage(msg,size,refcount,dstBuf,pxshmContext->sendQs[dstRank]); #if SENDQ_LIST if (sendQ->numEntries > 0 && sendQ->next == -2) { sendQ->next = sendQ_head_index; sendQ_head_index = dstRank; } #endif MACHSTATE(3,"Pxshm Send succeeded immediately"); }else{ (*refcount)+=2;/*this message should not get deleted when the queue is flushed*/ pushSendQ(pxshmContext->sendQs[dstRank],msg,size,refcount); // MACHSTATE3(3,"Pxshm ogm %p pushed to sendQ length %d refcount %d",ogm,pxshmContext->sendQs[dstRank]->numEntries,ogm->refcount); int sent = flushSendQ(sendQ); (*refcount)--; /*if it has been sent, can be deleted by caller, if not will be deleted when queue is flushed*/ MACHSTATE1(3,"Pxshm flushSendQ sent %d messages",sent); } /* unlock the recvbuffer*/ #if PXSHM_OSSPINLOCK OSSpinLockUnlock(&dstBuf->header->lock); #elif PXSHM_LOCK sem_post(dstBuf->mutex); #elif PXSHM_FENCE CmiMemoryReadFence(0,0); CmiMemoryWriteFence(0,0); dstBuf->header->flagSender = 0; #endif } #if PXSHM_STATS pxshmContext->sendCount ++; pxshmContext->sendTime += (CmiWallTimer()-_startSendTime); #endif MEMDEBUG(CmiMemoryCheck()); }; inline void emptyAllRecvBufs(); inline void flushAllSendQs(); /********** * Extract all the messages from the recvBuffers you can * Flush all sendQs * ***/ inline void CommunicationServerPxshm(){ #if PXSHM_STATS double _startCommServerTime =CmiWallTimer(); #endif MEMDEBUG(CmiMemoryCheck()); emptyAllRecvBufs(); flushAllSendQs(); #if PXSHM_STATS pxshmContext->commServerTime += (CmiWallTimer()-_startCommServerTime); #endif MEMDEBUG(CmiMemoryCheck()); }; static void CmiNotifyStillIdlePxshm(CmiIdleState *s){ CommunicationServerPxshm(); } static void CmiNotifyBeginIdlePxshm(CmiIdleState *s) { CmiNotifyStillIdle(s); }
/****************** * Initialization routine * currently just testing start up * ****************/ void CmiInitPxshm(char **argv){ char *env; MACHSTATE(3,"CminitPxshm start"); pxshmContext = (PxshmContext *)calloc(1,sizeof(PxshmContext)); calculateNodeSizeAndRank(argv); if(pxshmContext->nodesize == 1) return; MACHSTATE1(3,"CminitPxshm %d calculateNodeSizeAndRank",pxshmContext->nodesize); env = getenv("CHARM_PXSHM_POOL_SIZE"); if (env) { SHMBUFLEN = CmiReadSize(env); } env = getenv("CHARM_PXSHM_MESSAGE_MAX_SIZE"); if (env) { SHMMAXSIZE = CmiReadSize(env); } if (SHMMAXSIZE > SHMBUFLEN) CmiAbort("Error> Pxshm pool size is set too small in env variable CHARM_PXSHM_POOL_SIZE"); SENDQSTARTSIZE = 32 * pxshmContext->nodesize; if (_Cmi_mynode == 0) printf("Charm++> pxshm enabled: %d cores per node, buffer size: %.1fMB\n", pxshmContext->nodesize, SHMBUFLEN/1024.0/1024.0); #if CMK_CRAYXE || CMK_CRAYXC srand(getpid()); int Cmi_charmrun_pid = rand(); PMI_Bcast(&Cmi_charmrun_pid, sizeof(int)); snprintf(&(pxshmContext->prefixStr[0]),PREFIXSTRLEN-1,"charm_pxshm_%d",Cmi_charmrun_pid); #endif MACHSTATE2(3,"CminitPxshm %s %d pre setupSharedBuffers",pxshmContext->prefixStr,pxshmContext->nodesize); setupSharedBuffers(); MACHSTATE2(3,"CminitPxshm %s %d setupSharedBuffers",pxshmContext->prefixStr,pxshmContext->nodesize); initAllSendQs(); MACHSTATE2(3,"CminitPxshm %s %d initAllSendQs",pxshmContext->prefixStr,pxshmContext->nodesize); MACHSTATE2(3,"CminitPxshm %s %d done",pxshmContext->prefixStr,pxshmContext->nodesize); #if PXSHM_STATS pxshmContext->sendCount=0; pxshmContext->sendTime=0.0; pxshmContext->validCheckCount=0; pxshmContext->validCheckTime=0.0; pxshmContext->commServerTime = 0; pxshmContext->lockRecvCount = 0; #endif signal(SIGSEGV, cleanupOnAllSigs); signal(SIGFPE, cleanupOnAllSigs); signal(SIGILL, cleanupOnAllSigs); signal(SIGTERM, cleanupOnAllSigs); signal(SIGABRT, cleanupOnAllSigs); signal(SIGQUIT, cleanupOnAllSigs); signal(SIGBUS, cleanupOnAllSigs); signal(SIGINT, cleanupOnAllSigs); signal(SIGTRAP, cleanupOnAllSigs); #if 0 char name[64]; gethostname(name,64); printf("[%d] name: %s\n", myrank, name); #endif };
static void DeliveredMsg(lapi_handle_t *myLapiContext, void *msg, lapi_sh_info_t *info) { MACHSTATE1(2,"[%d] DeliveredMsg begin {",CmiMyNode()); check_lapi_err(info->reason, "DeliveredMsg", __LINE__); *((int *)msg) = *((int *)msg) - 1; MACHSTATE(2,"} DeliveredMsg end"); }
static void CmiStartThreads(char **argv) { pthread_t pid; size_t i; int ok, tocreate; pthread_attr_t attr; int start, end; MACHSTATE(4,"CmiStartThreads") CmiMemLock_lock=CmiCreateLock(); comm_mutex=CmiCreateLock(); _smp_mutex = CmiCreateLock(); #if defined(CMK_NO_ASM_AVAILABLE) && CMK_PCQUEUE_LOCK cmiMemoryLock = CmiCreateLock(); if (CmiMyNode()==0) printf("Charm++ warning> fences and atomic operations not available in native assembly\n"); #endif #if ! (CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD) pthread_key_create(&Cmi_state_key, 0); Cmi_state_vector = (CmiState)calloc(_Cmi_mynodesize+1, sizeof(struct CmiStateStruct)); for (i=0; i<_Cmi_mynodesize; i++) CmiStateInit(i+Cmi_nodestart, i, CmiGetStateN(i)); /*Create a fake state structure for the comm. thread*/ /* CmiStateInit(-1,_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); */ CmiStateInit(_Cmi_mynode+CmiNumPes(),_Cmi_mynodesize,CmiGetStateN(_Cmi_mynodesize)); #else /* for main thread */ Cmi_state_vector = (CmiState *)calloc(_Cmi_mynodesize+1, sizeof(CmiState)); #if CMK_CONVERSE_MPI /* main thread is communication thread */ if(!CharmLibInterOperate) { CmiStateInit(_Cmi_mynode+CmiNumPes(), _Cmi_mynodesize, &Cmi_mystate); Cmi_state_vector[_Cmi_mynodesize] = &Cmi_mystate; } else #endif { /* main thread is of rank 0 */ CmiStateInit(Cmi_nodestart, 0, &Cmi_mystate); Cmi_state_vector[0] = &Cmi_mystate; } #endif #if CMK_MULTICORE || CMK_SMP_NO_COMMTHD if (!Cmi_commthread) tocreate = _Cmi_mynodesize-1; else #endif tocreate = _Cmi_mynodesize; #if CMK_CONVERSE_MPI if(!CharmLibInterOperate) { start = 0; end = tocreate - 1; /* skip comm thread */ } else #endif { start = 1; end = tocreate; /* skip rank 0 main thread */ } for (i=start; i<=end; i++) { pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); ok = pthread_create(&pid, &attr, call_startfn, (void *)i); if (ok<0) PerrorExit("pthread_create"); pthread_attr_destroy(&attr); } #if ! (CMK_HAS_TLS_VARIABLES && !CMK_NOT_USE_TLS_THREAD) #if CMK_CONVERSE_MPI if(!CharmLibInterOperate) pthread_setspecific(Cmi_state_key, Cmi_state_vector+_Cmi_mynodesize); else #endif pthread_setspecific(Cmi_state_key, Cmi_state_vector); #endif MACHSTATE(4,"CmiStartThreads done") }