Exemple #1
0
/******************
 * 	Initialization routine
 * 	currently just testing start up
 * ****************/
void CmiInitSysvshm(char **argv){
	MACHSTATE(3,"CminitSysvshm start");
	sysvshmContext = (SysvshmContext *)malloc(sizeof(SysvshmContext));

	if(Cmi_charmrun_pid <= 0){
		CmiAbort("sysvshm must be run with charmrun");
	}
	calculateNodeSizeAndRank(argv);
	if(sysvshmContext->nodesize == 1){
		return;
	}
	MACHSTATE1(3,"CminitSysvshm  %d calculateNodeSizeAndRank",sysvshmContext->nodesize);

	setupSharedBuffers();

	MACHSTATE2(3,"CminitSysvshm %d %d setupSharedBuffers",Cmi_charmrun_pid,sysvshmContext->nodesize);

	initAllSendQs();
	
	MACHSTATE2(3,"CminitSysvshm %d %d initAllSendQs",Cmi_charmrun_pid,sysvshmContext->nodesize);

	MACHSTATE2(3,"CminitSysvshm %d %d done",Cmi_charmrun_pid,sysvshmContext->nodesize);


#if SYSVSHM_STATS
	sysvshmContext->sendCount=0;
	sysvshmContext->sendTime=0.0;
	sysvshmContext->validCheckCount=0;
	sysvshmContext->validCheckTime=0.0;
	sysvshmContext->commServerTime = 0;
	sysvshmContext->lockRecvCount = 0;
#endif

};
Exemple #2
0
static INLINE_KEYWORD void lapiSendFn(int destNode, int size, char *msg, scompl_hndlr_t *shdlr, void *sinfo) {
    lapi_xfer_t xfer_cmd;

    MACHSTATE3(2,"lapiSendFn to destNode=%d with msg %p (isImm=%d) begin {",destNode,msg, CmiIsImmediate(msg));
    MACHSTATE3(2, "inside lapiSendFn 1: size=%d, sinfo=%p, deliverable=%d", size, sinfo, deliverable);

    MACHSTATE2(2, "Ready to call LAPI_Xfer with destNode=%d, destRank=%d",destNode,CMI_DEST_RANK(msg));

    xfer_cmd.Am.Xfer_type = LAPI_AM_XFER;
    xfer_cmd.Am.flags     = 0;
    xfer_cmd.Am.tgt       = destNode;
    xfer_cmd.Am.hdr_hdl   = lapiHeaderHandler;
    xfer_cmd.Am.uhdr_len  = 0;
    xfer_cmd.Am.uhdr      = NULL;
    xfer_cmd.Am.udata     = msg;
    xfer_cmd.Am.udata_len = size;
    xfer_cmd.Am.shdlr     = shdlr;
    xfer_cmd.Am.sinfo     = sinfo;
    xfer_cmd.Am.tgt_cntr  = NULL;
    xfer_cmd.Am.org_cntr  = NULL;
    xfer_cmd.Am.cmpl_cntr = NULL;

    check_lapi(LAPI_Xfer,(lapiContext, &xfer_cmd));

    MACHSTATE(2,"} lapiSendFn end");
}
Exemple #3
0
/* initnode node table reply format:
 +------------------------------------------------------- 
 | 4 bytes  |   Number of nodes n                       ^
 |          |   (big-endian binary integer)       4+12*n bytes
 +-------------------------------------------------     |
 ^  |        (one entry for each node)            ^     |
 |  | 4 bytes  |   Number of PEs for this node    |     |
 n  | 4 bytes  |   IP address of this node   12*n bytes |
 |  | 4 bytes  |   Data (UDP) port of this node   |     |
 v  |          |   (big-endian binary integers)   v     v
 ---+----------------------------------------------------
*/
static void node_addresses_store(ChMessage *msg)
{
  ChMessageInt_t *n32=(ChMessageInt_t *)msg->data;
  ChNodeinfo *d=(ChNodeinfo *)(n32+1);
  int nodestart;
  int i,j,n;
  MACHSTATE(1,"node_addresses_store {");	
  _Cmi_numnodes=ChMessageInt(n32[0]);

  if ((sizeof(ChMessageInt_t)+sizeof(ChNodeinfo)*_Cmi_numnodes)
         !=(unsigned int)msg->len)
    {printf("Node table has inconsistent length!");machine_exit(1);}

  nodes = (OtherNode)malloc(_Cmi_numnodes * sizeof(struct OtherNodeStruct));
  nodestart=0;
  for (i=0; i<_Cmi_numnodes; i++) {
    nodes[i].nodestart = nodestart;
    nodes[i].nodesize  = ChMessageInt(d[i].nPE);
    MACHSTATE2(3,"node %d nodesize %d",i,nodes[i].nodesize);
    nodes[i].mach_id = ChMessageInt(d[i].mach_id);

    nodes[i].IP=d[i].IP;
    if (i==_Cmi_mynode) {
      Cmi_nodestart=nodes[i].nodestart;
      _Cmi_mynodesize=nodes[i].nodesize;
      Cmi_self_IP=nodes[i].IP;
    }
    nodes[i].dataport = ChMessageInt(d[i].dataport);
    nodes[i].addr = skt_build_addr(nodes[i].IP,nodes[i].dataport);

#if CMK_USE_TCP
    nodes[i].sock = INVALID_SOCKET;
#endif
    nodestart+=nodes[i].nodesize;

  }
  _Cmi_numpes=nodestart;
  n = _Cmi_numpes;
#ifdef CMK_CPV_IS_SMP
  n += _Cmi_numnodes;
#endif
  nodes_by_pe = (OtherNode*)malloc(n * sizeof(OtherNode));
  _MEMCHECK(nodes_by_pe);
  for (i=0; i<_Cmi_numnodes; i++) {
    OtherNode node = nodes + i;
    OtherNode_init(node);
    for (j=0; j<node->nodesize; j++) {
      nodes_by_pe[j + node->nodestart] = node;
    }
  }
#ifdef CMK_CPV_IS_SMP
  /* index for communication threads */
  for (i=_Cmi_numpes; i<_Cmi_numpes+_Cmi_numnodes; i++) {
    OtherNode node = nodes + i-_Cmi_numpes;
    nodes_by_pe[i] = node;
  }
#endif
  MACHSTATE(1,"} node_addresses_store");
}
Exemple #4
0
void GarbageCollectMsg(OutgoingMsg ogm)
{
  MACHSTATE2(3,"GarbageCollectMsg called on ogm %p refcount %d",ogm,ogm->refcount);
	if (ogm->refcount == 0) {
    if (ogm->freemode == 'A') {
      ogm->freemode = 'X';
    } else {
      if (ogm->freemode != 'G') CmiFree(ogm->data);
      FreeOutgoingMsg(ogm);
    }
  }
}
Exemple #5
0
static void processBcastQs() {
#if CMK_OFFLOAD_BCAST_PROCESS
    char *msg;
    do {
        msg = CMIQueuePop(CsvAccess(procBcastQ));
        if (!msg) break;
        MACHSTATE2(4, "[%d]: process a proc-level bcast msg %p begin{", CmiMyNode(), msg);
        processProcBcastMsg(CMI_MSG_SIZE(msg), msg);
        MACHSTATE2(4, "[%d]: process a proc-level bcast msg %p end}", CmiMyNode(), msg);
    } while (1);
#if CMK_NODE_QUEUE_AVAILABLE
    do {
        msg = CMIQueuePop(CsvAccess(nodeBcastQ));
        if (!msg) break;
        MACHSTATE2(4, "[%d]: process a node-level bcast msg %p begin{", CmiMyNode(), msg);
        processNodeBcastMsg(CMI_MSG_SIZE(msg), msg);
        MACHSTATE2(4, "[%d]: process a node-level bcast msg %p end}", CmiMyNode(), msg);
    } while (1);
#endif
#endif
}
Exemple #6
0
/***************
 * 	calculate the name of the shared objects and semaphores
 * 	
 * 	name scheme
 * 	shared memory: charm_pxshm_<recvernoderank>_<sendernoderank>  
 *  semaphore    : charm_pxshm_<recvernoderank>_<sendernoderank>.sem for semaphore for that shared object
 *                the semaphore name used by us is the same as the shared memory object name
 *                the posix library adds the semaphore tag // in linux at least . other machines might need more portable code
 *
 * 	open these shared objects and semaphores
 * *********/
void setupSharedBuffers(){
	int i=0;

	allocBufNameStrings(&(pxshmContext->recvBufNames));
	
	MACHSTATE(3,"allocBufNameStrings for recvBufNames done");
	MEMDEBUG(CmiMemoryCheck());

	allocBufNameStrings((&pxshmContext->sendBufNames));
	
	MACHSTATE(3,"allocBufNameStrings for sendBufNames done");

	for(i=0;i<pxshmContext->nodesize;i++){
		if(i != pxshmContext->noderank){
			snprintf(pxshmContext->recvBufNames[i],NAMESTRLEN-1,"%s_%d_%d",pxshmContext->prefixStr,pxshmContext->noderank+pxshmContext->nodestart,i+pxshmContext->nodestart);
			MACHSTATE2(3,"recvBufName %s with rank %d",pxshmContext->recvBufNames[i],i)
			snprintf(pxshmContext->sendBufNames[i],NAMESTRLEN-1,"%s_%d_%d",pxshmContext->prefixStr,i+pxshmContext->nodestart,pxshmContext->noderank+pxshmContext->nodestart);
			MACHSTATE2(3,"sendBufName %s with rank %d",pxshmContext->sendBufNames[i],i);
		}
	}
	
	createShmObjectsAndSems(&(pxshmContext->recvBufs),pxshmContext->recvBufNames);
	createShmObjectsAndSems(&(pxshmContext->sendBufs),pxshmContext->sendBufNames);
	
	for(i=0;i<pxshmContext->nodesize;i++){
		if(i != pxshmContext->noderank){
			//CmiAssert(pxshmContext->sendBufs[i].header->count == 0);
			pxshmContext->sendBufs[i].header->count = 0;
			pxshmContext->sendBufs[i].header->bytes = 0;
		}
	}

#if CMK_SMP && ( CMK_CRAYXE || CMK_CRAYXC )
        if (PMI_Barrier() != GNI_RC_SUCCESS) return;
#else
        if (CmiBarrier() != 0) return;
#endif
        freeSharedBuffers();
        pxshm_freed = 1;
}
Exemple #7
0
void allocBufNameStrings(char ***bufName){
	int i,count;
	
	int totalAlloc = sizeof(char)*NAMESTRLEN*(pxshmContext->nodesize-1);
	char *tmp = malloc(totalAlloc);
	
	MACHSTATE2(3,"allocBufNameStrings tmp %p totalAlloc %d",tmp,totalAlloc);

	*bufName = (char **)malloc(sizeof(char *)*pxshmContext->nodesize);
	
	for(i=0,count=0;i<pxshmContext->nodesize;i++){
		if(i != pxshmContext->noderank){
			(*bufName)[i] = &(tmp[count*NAMESTRLEN*sizeof(char)]);
			count++;
		}else{
			(*bufName)[i] = NULL;
		}
	}
}
Exemple #8
0
/******************
 * 	Initialization routine
 * 	currently just testing start up
 * ****************/
void CmiInitPxshm(char **argv){
        char *env;
        MACHSTATE(3,"CminitPxshm start");

	pxshmContext = (PxshmContext *)calloc(1,sizeof(PxshmContext));

	calculateNodeSizeAndRank(argv);
	if(pxshmContext->nodesize == 1) return;
	
	MACHSTATE1(3,"CminitPxshm  %d calculateNodeSizeAndRank",pxshmContext->nodesize);

        env = getenv("CHARM_PXSHM_POOL_SIZE");
        if (env) {
            SHMBUFLEN = CmiReadSize(env);
        }
        env = getenv("CHARM_PXSHM_MESSAGE_MAX_SIZE");
        if (env) {
            SHMMAXSIZE = CmiReadSize(env);
        }
        if (SHMMAXSIZE > SHMBUFLEN)
            CmiAbort("Error> Pxshm pool size is set too small in env variable CHARM_PXSHM_POOL_SIZE");

        SENDQSTARTSIZE = 32 * pxshmContext->nodesize;

        if (_Cmi_mynode == 0)
            printf("Charm++> pxshm enabled: %d cores per node, buffer size: %.1fMB\n", pxshmContext->nodesize, SHMBUFLEN/1024.0/1024.0);

#if CMK_CRAYXE || CMK_CRAYXC
        srand(getpid());
        int Cmi_charmrun_pid = rand();
        PMI_Bcast(&Cmi_charmrun_pid, sizeof(int));
        snprintf(&(pxshmContext->prefixStr[0]),PREFIXSTRLEN-1,"charm_pxshm_%d",Cmi_charmrun_pid);
#endif

	MACHSTATE2(3,"CminitPxshm %s %d pre setupSharedBuffers",pxshmContext->prefixStr,pxshmContext->nodesize);

	setupSharedBuffers();

	MACHSTATE2(3,"CminitPxshm %s %d setupSharedBuffers",pxshmContext->prefixStr,pxshmContext->nodesize);

	initAllSendQs();
	
	MACHSTATE2(3,"CminitPxshm %s %d initAllSendQs",pxshmContext->prefixStr,pxshmContext->nodesize);

	MACHSTATE2(3,"CminitPxshm %s %d done",pxshmContext->prefixStr,pxshmContext->nodesize);

#if PXSHM_STATS
	pxshmContext->sendCount=0;
	pxshmContext->sendTime=0.0;
	pxshmContext->validCheckCount=0;
	pxshmContext->validCheckTime=0.0;
	pxshmContext->commServerTime = 0;
	pxshmContext->lockRecvCount = 0;
#endif

        signal(SIGSEGV, cleanupOnAllSigs);
        signal(SIGFPE, cleanupOnAllSigs);
        signal(SIGILL, cleanupOnAllSigs);
        signal(SIGTERM, cleanupOnAllSigs);
        signal(SIGABRT, cleanupOnAllSigs);
        signal(SIGQUIT, cleanupOnAllSigs);
        signal(SIGBUS, cleanupOnAllSigs);
        signal(SIGINT, cleanupOnAllSigs);
        signal(SIGTRAP, cleanupOnAllSigs);

#if 0
        char name[64];
        gethostname(name,64);
        printf("[%d] name: %s\n", myrank, name);
#endif
};
Exemple #9
0
/**
 * Returns 1 if this "msg" is an out-of-order message, or
 * this "msg" is a late message which triggers the process
 * of all buffered ooo msgs.
 * --Chao Mei
 */
static int checkMsgInOrder(char *msg, MsgOrderInfo *info) {
    int srcpe, destrank;
    int incomingSeqNo, expectedSeqNo;
    int curOffset, maxOffset;
    int i, curWinSize;
    void **destMsgBuffer = NULL;

    /* numMsg is the number of msgs to be processed in this buffer*/
    /* Reason to have this extra copy of msgs to be processed: Reduce the atomic granularity */
    void **toProcessMsgBuffer;
    int numMsgs = 0;

    srcpe = CMI_MSG_SRCPE(msg);
    destrank = CMI_DEST_RANK(msg);
    incomingSeqNo = CMI_MSG_SEQNO(msg);

    CmiLock(cmplHdlrThdLock);

    expectedSeqNo = getNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe);
    if (expectedSeqNo == incomingSeqNo) {
        /* Two cases: has ooo msg buffered or not */
        maxOffset = (info->oooMaxOffset)[srcpe];
        if (maxOffset>0) {
            MACHSTATE1(4, "Processing all buffered ooo msgs (maxOffset=%d) including the just recved begin {", maxOffset);
            curWinSize = info->CUR_WINDOW_SIZE[srcpe];
            toProcessMsgBuffer = malloc((curWinSize+1)*sizeof(void *));
            /* process the msg just recved */
            toProcessMsgBuffer[numMsgs++] = msg;
            /* process the buffered ooo msg until the first empty slot in the window */
            destMsgBuffer = (info->oooMsgBuffer)[srcpe];
            for (curOffset=0; curOffset<maxOffset; curOffset++) {
                char *curMsg = destMsgBuffer[curOffset];
                if (curMsg == NULL) {
                    CmiAssert(curOffset!=(maxOffset-1));
                    break;
                }
                toProcessMsgBuffer[numMsgs++] = curMsg;
                destMsgBuffer[curOffset] = NULL;
            }
            /* Update expected seqno, maxOffset and slide the window */
            if (curOffset < maxOffset) {
                int i;
                /**
                 * now, the seqno of the next to-be-recved msg should be
                 * "expectedSeqNo+curOffset+1" as the seqno of the just
                 * processed msg is "expectedSeqNo+curOffset. We need to slide
                 * the msg buffer window from "curOffset+1" because the first
                 * element of the buffer window should always points to the ooo
                 * msg that's 1 in terms of seqno ahead of the next to-be-recved
                 * msg. --Chao Mei
                 */

                /* moving [curOffset+1, maxOffset) to [0, maxOffset-curOffset-1) in the window */
                /* The following two loops could be combined --Chao Mei */
                for (i=0; i<maxOffset-curOffset-1; i++) {
                    destMsgBuffer[i] = destMsgBuffer[curOffset+i+1];
                }
                for (i=maxOffset-curOffset-1; i<maxOffset; i++) {
                    destMsgBuffer[i] = NULL;
                }
                (info->oooMaxOffset)[srcpe] = maxOffset-curOffset-1;
                setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo+curOffset);
            } else {
                /* there's no remaining buffered ooo msgs */
                (info->oooMaxOffset)[srcpe] = 0;
                setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo+maxOffset);
            }

            CmiUnlock(cmplHdlrThdLock);

            /* Process the msgs */
            for (i=0; i<numMsgs; i++) {
                char *curMsg = toProcessMsgBuffer[i];
                if (CMI_BROADCAST_ROOT(curMsg)>0) {

#if CMK_OFFLOAD_BCAST_PROCESS
                    PCQueuePush(CsvAccess(procBcastQ), curMsg);
#else
                    processProcBcastMsg(CMI_MSG_SIZE(curMsg), curMsg);
#endif
                } else {
                    CmiPushPE(CMI_DEST_RANK(curMsg), curMsg);
                }
            }

            free(toProcessMsgBuffer);

            MACHSTATE1(4, "Processing all buffered ooo msgs (actually processed %d) end }", curOffset);
            /**
             * Since we have processed all buffered ooo msgs including
             * this just recved one, 1 should be returned so that this
             * msg no longer needs processing
             */
            return 1;
        } else {
            /* An expected msg recved without any ooo msg buffered */
            MACHSTATE1(4, "Receiving an expected msg with seqno=%d\n", incomingSeqNo);
            setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo);

            CmiUnlock(cmplHdlrThdLock);
            return 0;
        }
    }

    MACHSTATE2(4, "Receiving an out-of-order msg with seqno=%d, but expect seqno=%d", incomingSeqNo, expectedSeqNo);
    curWinSize = info->CUR_WINDOW_SIZE[srcpe];
    if ((info->oooMsgBuffer)[srcpe]==NULL) {
        (info->oooMsgBuffer)[srcpe] = malloc(curWinSize*sizeof(void *));
        memset((info->oooMsgBuffer)[srcpe], 0, curWinSize*sizeof(void *));
    }
    destMsgBuffer = (info->oooMsgBuffer)[srcpe];
    curOffset = incomingSeqNo - expectedSeqNo;
    maxOffset = (info->oooMaxOffset)[srcpe];
    if (curOffset<0) {
        /* It's possible that the seqNo starts with another round (exceeding MAX_MSG_SEQNO) with 1 */
        curOffset += MAX_MSG_SEQNO;
    }
    if (curOffset > curWinSize) {
        int newWinSize;
        if (curOffset > MAX_WINDOW_SIZE) {
            CmiAbort("Exceeding the MAX_WINDOW_SIZE!\n");
        }
        newWinSize = ((curOffset/curWinSize)+1)*curWinSize;
        /*CmiPrintf("[%d]: WARNING: INCREASING WINDOW SIZE FROM %d TO %d\n", CmiMyPe(), curWinSize, newWinSize);*/
        (info->oooMsgBuffer)[srcpe] = malloc(newWinSize*sizeof(void *));
        memset((info->oooMsgBuffer)[srcpe], 0, newWinSize*sizeof(void *));
        memcpy((info->oooMsgBuffer)[srcpe], destMsgBuffer, curWinSize*sizeof(void *));
        info->CUR_WINDOW_SIZE[srcpe] = newWinSize;
        free(destMsgBuffer);
        destMsgBuffer = (info->oooMsgBuffer)[srcpe];
    }
    CmiAssert(destMsgBuffer[curOffset-1] == NULL);
    destMsgBuffer[curOffset-1] = msg;
    if (curOffset > maxOffset) (info->oooMaxOffset)[srcpe] = curOffset;

    CmiUnlock(cmplHdlrThdLock);
    return 1;
}
Exemple #10
0
/* The following two are callbacks for sync and async send respectively */
static void ReleaseMsg(lapi_handle_t *myLapiContext, void *msg, lapi_sh_info_t *info) {
    MACHSTATE2(2,"[%d] ReleaseMsg begin %p {",CmiMyNode(),msg);
    check_lapi_err(info->reason, "ReleaseMsg", __LINE__);
    CmiFree(msg);
    MACHSTATE(2,"} ReleaseMsg end");
}