Beispiel #1
0
CmiCommHandle CmiAsyncBroadcastFn(int size, char *msg) {
#if ENSURE_MSG_PAIRORDER
    /* Not sure how to add the msg seq no for async broadcast messages --Chao Mei */
    /* so abort here ! */
    CmiAssert(0);
    return 0;
#else
    int i, rank;
    int mype = CmiMyPe();
#if ENABLE_CONVERSE_QD
    CQdCreate(CpvAccess(cQdState), CmiNumPes()-1);
#endif
    MACHSTATE1(3,"[%d] Sending async broadcast message from {",CmiMyNode());
    CMI_BROADCAST_ROOT(msg) = 0;
    void *handle = malloc(sizeof(int));
    *((int *)handle) = CmiNumPes()-1;

    for (i=mype+1; i<CmiNumPes(); i++) {
        CMI_DEST_RANK(msg) = CmiRankOf(i);
        lapiSendFn(CmiNodeOf(i), size, msg, DeliveredMsg, handle);
    }
    for (i=0; i<mype; i++) {
        CMI_DEST_RANK(msg) = CmiRankOf(i);
        lapiSendFn(CmiNodeOf(i), size, msg, DeliveredMsg, handle);
    }

    MACHSTATE(3,"} Sending async broadcast message end");
    return handle;
#endif
}
Beispiel #2
0
static void SendSpanningChildrenProc(int size, char *msg) {
    int startnode = CMI_BROADCAST_ROOT(msg)-1;
    SendSpanningChildren(size, msg, 0, startnode);
#if CMK_SMP
    /* second send msgs to my peers on this node */
    SendToPeers(size, msg);
#endif
}
Beispiel #3
0
static void handleOneBcastMsg(int size, char *msg) {
    CmiAssert(CMI_BROADCAST_ROOT(msg)!=0);
#if CMK_OFFLOAD_BCAST_PROCESS
    if (CMI_BROADCAST_ROOT(msg)>0) {
        CMIQueuePush(CsvAccess(procBcastQ), msg);
    } else {
#if CMK_NODE_QUEUE_AVAILABLE
        CMIQueuePush(CsvAccess(nodeBcastQ), msg);
#endif
    }
#else
    if (CMI_BROADCAST_ROOT(msg)>0) {
        processProcBcastMsg(size, msg);
    } else {
#if CMK_NODE_QUEUE_AVAILABLE
        processNodeBcastMsg(size, msg);
#endif
    }
#endif
}
Beispiel #4
0
/* send msg along the hypercube in broadcast. (Sameer) */
static void SendHyperCubeProc(int size, char *msg) {
    int startpe = CMI_BROADCAST_ROOT(msg)-1;
    int startnode = CmiNodeOf(startpe);
#if CMK_SMP
    if (startpe > CmiNumPes()) startnode = startpe - CmiNumPes();
#endif
    SendHyperCube(size, msg, 0, startnode);
#if CMK_SMP
    /* second send msgs to my peers on this node */
    SendToPeers(size, msg);
#endif
}
Beispiel #5
0
/**
  * lapi completion handler on the recv side. It's responsible to push messages
  * to the destination proc or relay broadcast messages. --Chao Mei
  *
  * Note: The completion handler could be executed on any cores within a node ???
  * So in SMP mode when there's a comm thread, the completion handler should be carefully
  * dealt with.
  *
  * Given lapi also provides an internal lapi thread to deal with network progress which
  * will call this function (???), we should be careful with the following situations:
  * 1) non SMP mode, with interrupt (lapi internal completion thread)
  * 2) non SMP mode, with polling (machine layer is responsible for network progress)
  * 3) SMP mode, no comm thread, with polling
  * 4) SMP mode, no comm thread, with interrupt
  * 5) SMP mode, with comm thread, with polling (not yet implemented, comm server is empty right now)
  * 6) SMP mode, with comm thread, with interrupt??
  *
  * Currently, SMP mode without comm thread is undergoing implementation.
  *
  * This function is executed by LAPI internal threads. It seems that the number of internal
  * completion handler threads could vary during the program. LAPI adaptively creates more
  * threads if there are more outstanding messages!!!! This means pcqueue needs protection
  * even in the nonsmp case!!!!
  *
  * --Chao Mei
  */
static void PumpMsgsComplete(lapi_handle_t *myLapiContext, void *am_info) {
    int i;
    char *msg = am_info;
    int broot, destrank;

    MACHSTATE3(2,"[%d] PumpMsgsComplete with msg %p (isImm=%d) begin {",CmiMyNode(), msg, CmiIsImmediate(msg));
#if ENSURE_MSG_PAIRORDER
    MACHSTATE3(2,"msg %p info: srcpe=%d, seqno=%d", msg, CMI_MSG_SRCPE(msg), CMI_MSG_SEQNO(msg));
#endif
    /**
     * First, we check if the msg is a broadcast msg via spanning
     * tree. If it is, it needs to call SendSpanningChildren to
     * relay the broadcast, and then send the msg to every cores on
     * this node.
     *
     * After the first check, we deal with normal messages.
     * --Chao Mei
     */
    /* It's the right place to relay the broadcast message */
    /**
     * 1. For in-order delivery, because this is the handler for
     * receiving a message, and we assume the cross-network msgs are
     * always delivered to the first proc (rank 0) of this node, we
     * select the srcpe of the bcast msgs and the next msg seq no
     * correspondingly.
     *
     * --Chao Mei
     */
#if ENSURE_MSG_PAIRORDER
    broot = CMI_BROADCAST_ROOT(msg);
    destrank = CMI_DEST_RANK(msg);
    /* Only check proc-level msgs */
    if (broot>=0
#if CMK_NODE_QUEUE_AVAILABLE
            && destrank != DGRAM_NODEMESSAGE
#endif
       ) {
        MsgOrderInfo *info;
        info = &CpvAccessOther(p2pMsgSeqInfo, destrank);
        MACHSTATE1(2, "Check msg in-order for p2p msg %p", msg);

        if (checkMsgInOrder(msg,info)) {
            MACHSTATE(2,"} PumpMsgsComplete end ");
            return;
        }
    }
#endif

    handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);

    MACHSTATE(2,"} PumpMsgsComplete end ");
    return;
}
Beispiel #6
0
CmiCommHandle CmiAsyncNodeBroadcastFn(int size, char *msg) {
    int i;

#if ENABLE_CONVERSE_QD
    CQdCreate(CpvAccess(cQdState), CmiNumNodes()-1);
#endif

    MACHSTATE1(3,"[%d] Sending async node broadcast message from {",CmiMyNode());
    CMI_BROADCAST_ROOT(msg) = 0;
    CMI_DEST_RANK(msg) =DGRAM_NODEMESSAGE;
    void *handle = malloc(sizeof(int));
    *((int *)handle) = CmiNumNodes()-1;
    for (i=CmiMyNode()+1; i<CmiNumNodes(); i++) {
        lapiSendFn(i, size, msg, DeliveredMsg, handle);
    }
    for (i=0; i<CmiMyNode(); i++) {
        lapiSendFn(i, size, msg, DeliveredMsg, handle);
    }

    MACHSTATE(3,"} Sending async broadcast message end");
    return handle;
}
Beispiel #7
0
/* called in PumpMsgs */
int PumpPersistent()
{
  PersistentReceivesTable *slot = persistentReceivesTableHead;
  int status = 0;
  while (slot) {
    unsigned int size = *(slot->recvSizePtr[0]);
    if (size > 0)
    {
      char *msg = slot->messagePtr[0];
/*CmiPrintf("[%d] size: %d rank:%d msg:%p %p\n", CmiMyPe(), size, CMI_DEST_RANK(msg), msg, slot->messagePtr);*/

#if 0
      void *dupmsg;
      dupmsg = CmiAlloc(size);
      
      _MEMCHECK(dupmsg);
      memcpy(dupmsg, msg, size);
      msg = dupmsg;
#else
      /* return messagePtr directly and user MUST make sure not to delete it. */
      /*CmiPrintf("[%d] %p size:%d rank:%d root:%d\n", CmiMyPe(), msg, size, CMI_DEST_RANK(msg), CMI_BROADCAST_ROOT(msg));*/

      CmiReference(msg);
#endif
      CmiPushPE(CMI_DEST_RANK_NET(msg), msg);
#if CMK_BROADCAST_SPANNING_TREE
      if (CMI_BROADCAST_ROOT(msg))
          SendSpanningChildrenNet(size, msg);
#endif
      *(slot->recvSizePtr[0]) = 0;
      status = 1;
    }
    slot = slot->next;
  }
  return status;
}
Beispiel #8
0
static void SendHyperCubeNode(int size, char *msg) {
    int startnode = -CMI_BROADCAST_ROOT(msg)-1;
    SendHyperCube(size, msg, DGRAM_NODEMESSAGE, startnode);
}
Beispiel #9
0
static void SendSpanningChildrenNode(int size, char *msg) {
    int startnode = -CMI_BROADCAST_ROOT(msg)-1;
    SendSpanningChildren(size, msg, DGRAM_NODEMESSAGE, startnode);
}
Beispiel #10
0
/**
 * Returns 1 if this "msg" is an out-of-order message, or
 * this "msg" is a late message which triggers the process
 * of all buffered ooo msgs.
 * --Chao Mei
 */
static int checkMsgInOrder(char *msg, MsgOrderInfo *info) {
    int srcpe, destrank;
    int incomingSeqNo, expectedSeqNo;
    int curOffset, maxOffset;
    int i, curWinSize;
    void **destMsgBuffer = NULL;

    /* numMsg is the number of msgs to be processed in this buffer*/
    /* Reason to have this extra copy of msgs to be processed: Reduce the atomic granularity */
    void **toProcessMsgBuffer;
    int numMsgs = 0;

    srcpe = CMI_MSG_SRCPE(msg);
    destrank = CMI_DEST_RANK(msg);
    incomingSeqNo = CMI_MSG_SEQNO(msg);

    CmiLock(cmplHdlrThdLock);

    expectedSeqNo = getNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe);
    if (expectedSeqNo == incomingSeqNo) {
        /* Two cases: has ooo msg buffered or not */
        maxOffset = (info->oooMaxOffset)[srcpe];
        if (maxOffset>0) {
            MACHSTATE1(4, "Processing all buffered ooo msgs (maxOffset=%d) including the just recved begin {", maxOffset);
            curWinSize = info->CUR_WINDOW_SIZE[srcpe];
            toProcessMsgBuffer = malloc((curWinSize+1)*sizeof(void *));
            /* process the msg just recved */
            toProcessMsgBuffer[numMsgs++] = msg;
            /* process the buffered ooo msg until the first empty slot in the window */
            destMsgBuffer = (info->oooMsgBuffer)[srcpe];
            for (curOffset=0; curOffset<maxOffset; curOffset++) {
                char *curMsg = destMsgBuffer[curOffset];
                if (curMsg == NULL) {
                    CmiAssert(curOffset!=(maxOffset-1));
                    break;
                }
                toProcessMsgBuffer[numMsgs++] = curMsg;
                destMsgBuffer[curOffset] = NULL;
            }
            /* Update expected seqno, maxOffset and slide the window */
            if (curOffset < maxOffset) {
                int i;
                /**
                 * now, the seqno of the next to-be-recved msg should be
                 * "expectedSeqNo+curOffset+1" as the seqno of the just
                 * processed msg is "expectedSeqNo+curOffset. We need to slide
                 * the msg buffer window from "curOffset+1" because the first
                 * element of the buffer window should always points to the ooo
                 * msg that's 1 in terms of seqno ahead of the next to-be-recved
                 * msg. --Chao Mei
                 */

                /* moving [curOffset+1, maxOffset) to [0, maxOffset-curOffset-1) in the window */
                /* The following two loops could be combined --Chao Mei */
                for (i=0; i<maxOffset-curOffset-1; i++) {
                    destMsgBuffer[i] = destMsgBuffer[curOffset+i+1];
                }
                for (i=maxOffset-curOffset-1; i<maxOffset; i++) {
                    destMsgBuffer[i] = NULL;
                }
                (info->oooMaxOffset)[srcpe] = maxOffset-curOffset-1;
                setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo+curOffset);
            } else {
                /* there's no remaining buffered ooo msgs */
                (info->oooMaxOffset)[srcpe] = 0;
                setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo+maxOffset);
            }

            CmiUnlock(cmplHdlrThdLock);

            /* Process the msgs */
            for (i=0; i<numMsgs; i++) {
                char *curMsg = toProcessMsgBuffer[i];
                if (CMI_BROADCAST_ROOT(curMsg)>0) {

#if CMK_OFFLOAD_BCAST_PROCESS
                    PCQueuePush(CsvAccess(procBcastQ), curMsg);
#else
                    processProcBcastMsg(CMI_MSG_SIZE(curMsg), curMsg);
#endif
                } else {
                    CmiPushPE(CMI_DEST_RANK(curMsg), curMsg);
                }
            }

            free(toProcessMsgBuffer);

            MACHSTATE1(4, "Processing all buffered ooo msgs (actually processed %d) end }", curOffset);
            /**
             * Since we have processed all buffered ooo msgs including
             * this just recved one, 1 should be returned so that this
             * msg no longer needs processing
             */
            return 1;
        } else {
            /* An expected msg recved without any ooo msg buffered */
            MACHSTATE1(4, "Receiving an expected msg with seqno=%d\n", incomingSeqNo);
            setNextExpectedMsgSeqNo(info->expectedMsgSeqNo, srcpe, expectedSeqNo);

            CmiUnlock(cmplHdlrThdLock);
            return 0;
        }
    }

    MACHSTATE2(4, "Receiving an out-of-order msg with seqno=%d, but expect seqno=%d", incomingSeqNo, expectedSeqNo);
    curWinSize = info->CUR_WINDOW_SIZE[srcpe];
    if ((info->oooMsgBuffer)[srcpe]==NULL) {
        (info->oooMsgBuffer)[srcpe] = malloc(curWinSize*sizeof(void *));
        memset((info->oooMsgBuffer)[srcpe], 0, curWinSize*sizeof(void *));
    }
    destMsgBuffer = (info->oooMsgBuffer)[srcpe];
    curOffset = incomingSeqNo - expectedSeqNo;
    maxOffset = (info->oooMaxOffset)[srcpe];
    if (curOffset<0) {
        /* It's possible that the seqNo starts with another round (exceeding MAX_MSG_SEQNO) with 1 */
        curOffset += MAX_MSG_SEQNO;
    }
    if (curOffset > curWinSize) {
        int newWinSize;
        if (curOffset > MAX_WINDOW_SIZE) {
            CmiAbort("Exceeding the MAX_WINDOW_SIZE!\n");
        }
        newWinSize = ((curOffset/curWinSize)+1)*curWinSize;
        /*CmiPrintf("[%d]: WARNING: INCREASING WINDOW SIZE FROM %d TO %d\n", CmiMyPe(), curWinSize, newWinSize);*/
        (info->oooMsgBuffer)[srcpe] = malloc(newWinSize*sizeof(void *));
        memset((info->oooMsgBuffer)[srcpe], 0, newWinSize*sizeof(void *));
        memcpy((info->oooMsgBuffer)[srcpe], destMsgBuffer, curWinSize*sizeof(void *));
        info->CUR_WINDOW_SIZE[srcpe] = newWinSize;
        free(destMsgBuffer);
        destMsgBuffer = (info->oooMsgBuffer)[srcpe];
    }
    CmiAssert(destMsgBuffer[curOffset-1] == NULL);
    destMsgBuffer[curOffset-1] = msg;
    if (curOffset > maxOffset) (info->oooMaxOffset)[srcpe] = curOffset;

    CmiUnlock(cmplHdlrThdLock);
    return 1;
}