示例#1
0
//@cindex sendOpNV
void
sendOpNV(OpCode op, GlobalTaskId task, int nelem, 
	 StgWord *datablock, int narg, ...)
{
    va_list ap;
    int i;
    StgWord arg;

    va_start(ap, narg);

    traceSendOp(op, task, 0, 0);
    IF_PAR_DEBUG(trace,
		 fprintf(stderr,"~~ sendOpNV: op = %x (%s), task = %x, narg = %d, nelem = %d",
		       op, getOpName(op), task, narg, nelem));

    pvm_initsend(PvmDataRaw);

    for (i = 0; i < narg; ++i) {
	arg = va_arg(ap, StgWord);
        IF_PAR_DEBUG(trace,
		     fprintf(stderr,"~~ sendOpNV: arg = %d\n",arg));
	PutArgN(i, arg);
    }
    arg = (StgWord) nelem;
    PutArgN(narg, arg);

/*  for (i=0; i < nelem; ++i) fprintf(stderr, "%d ",datablock[i]); */
/*  fprintf(stderr," in sendOpNV\n");*/

    PutArgs(datablock, nelem);
    va_end(ap);

    pvm_send(task, op);
}
示例#2
0
//@cindex startUpPE
void
startUpPE(void)
{ 
  mytid = _my_gtid;	/* Initialise PVM and get task id into global var.*/
  
  IF_PAR_DEBUG(verbose,
	       fprintf(stderr,"== [%x] PEStartup: Task id = [%x], No. PEs = %d \n", 
		       mytid, mytid, nPEs));
  checkComms(pvm_joingroup(PEGROUP), "PEStartup");
  IF_PAR_DEBUG(verbose,
	       fprintf(stderr,"== [%x] PEStartup: Joined PEGROUP\n", mytid));
}
示例#3
0
文件: MPIComm.c 项目: jberthold/ghc
/* MP_sync synchronises all nodes in a parallel computation:
 * sets:
 *     thisPE - GlobalTaskId: node's own task Id
 *                     (logical node address for messages)
 * Returns: Bool: success (1) or failure (0)
 *
 * MPI Version:
 *   the number of nodes is checked by counting nodes in WORLD
 *   (could also be done by sync message)
 *   Own ID is known before, but returned only here.
 */
rtsBool MP_sync(void) {
    // initialise counters/constants and allocate/attach the buffer

    // buffer size default is 20, use RTS option -qq<N> to change it
    maxMsgs = RtsFlags.ParFlags.sendBufferSize;
    // and resulting buffer space

    // checks inside RtsFlags.c:
    // DATASPACEWORDS * sizeof(StgWord) < INT_MAX / 2
    // maxMsgs <= max(20,nPEs)
    // Howver, they might be just too much in combination.
    if (INT_MAX / sizeof(StgWord) < DATASPACEWORDS * maxMsgs) {
        IF_PAR_DEBUG(mpcomm,
                     debugBelch("requested buffer sizes too large, adjusting...\n"));
        do {
            maxMsgs--;
        } while (maxMsgs > 0 &&
                 INT_MAX / sizeof(StgWord) < DATASPACEWORDS * maxMsgs);
        if (maxMsgs == 0) {
            // should not be possible with checks inside RtsFlags.c, see above
            barf("pack buffer too large to allocate, aborting program.");
        } else {
            IF_PAR_DEBUG(mpcomm,
                         debugBelch("send buffer size reduced to %d messages.\n",
                                    maxMsgs));
        }
    }

    bufsize = maxMsgs * DATASPACEWORDS * sizeof(StgWord);

    mpiMsgBuffer = (void*) stgMallocBytes(bufsize, "mpiMsgBuffer");

    requests = (MPI_Request*)
               stgMallocBytes(maxMsgs * sizeof(MPI_Request),"requests");

    msgCount = 0; // when maxMsgs reached

    thisPE = mpiMyRank + 1;

    IF_PAR_DEBUG(mpcomm,
                 debugBelch("Node %d synchronising.\n", thisPE));

    MPI_Barrier(MPI_COMM_WORLD); // unnecessary...
    // but currently used to synchronize system times

    return rtsTrue;
}
示例#4
0
文件: MPIComm.c 项目: jberthold/ghc
/* MP_start starts up the node:
 *   - connects to the MP-System used,
 *   - determines wether we are main thread
 *   - starts up other nodes in case we are first and
 *     the MP-System requires to spawn nodes from here.
 * Parameters:
 *     IN    argv  - char**: program arguments
 * Sets:
 *           nPEs - int: no. of PEs to expect/start
 *  IAmMainThread - rtsBool: wether this node is main PE
 * Returns: Bool: success or failure
 *
 * MPI Version:
 *   nodes are spawned by startup script calling mpirun
 *   This function only connects to MPI and determines the main PE.
 */
rtsBool MP_start(int* argc, char** argv[]) {

    IF_PAR_DEBUG(mpcomm,
                 debugBelch("MPI_Init: starting MPI-Comm...\n"));

    MPI_Init(argc, argv); // MPI sez: can modify args

    MPI_Comm_rank(MPI_COMM_WORLD, &mpiMyRank);
    IF_PAR_DEBUG(mpcomm,
                 debugBelch("I am node %d.\n", mpiMyRank));

    if (!mpiMyRank) // we declare node 0 as main PE.
        IAmMainThread = rtsTrue;

    MPI_Comm_size(MPI_COMM_WORLD, &mpiWorldSize);

    // we should have a correct argument...
    ASSERT(argv && (*argv)[1]);
    nPEs = atoi((*argv)[1]);

    if (nPEs) { // we have been given a size, so check it:
        IF_PAR_DEBUG(mpcomm,
                     debugBelch("Expecting to find %d processors, found %d.",
                                nPEs, mpiWorldSize));
        if ((int)nPEs > mpiWorldSize)
            IF_PAR_DEBUG(mpcomm,
                         debugBelch("WARNING: Too few processors started!"));
    } else {  // otherwise, no size was given
        IF_PAR_DEBUG(mpcomm,
                     debugBelch("No size, given, started program on %d processors.",
                                mpiWorldSize));
    }
    nPEs = mpiWorldSize; //  (re-)set size from MPI (in any case)

    // System communicator sysComm is duplicated from COMM_WORLD
    // but has its own context
    MPI_Comm_dup(MPI_COMM_WORLD, &sysComm);

    // adjust args (ignoring nPEs argument added by the start script)
    (*argv)[1] = (*argv)[0];   /* ignore the nPEs argument */
    (*argv)++;
    (*argc)--;

    return rtsTrue;
}
示例#5
0
//@cindex shutDownPE
void
shutDownPE(void)
{    
  IF_PAR_DEBUG(verbose,
	       fprintf(stderr, "== [%x] PEshutdown\n", mytid));

  checkComms(pvm_lvgroup(PEGROUP),"PEShutDown");
  checkComms(pvm_exit(),"PEShutDown");
}
示例#6
0
//@cindex waitForPEOp
rtsPacket 
waitForPEOp(OpCode op, GlobalTaskId who, void(*processUnexpected)(rtsPacket) )
{
  rtsPacket p;
  int nbytes;
  OpCode opCode;
  GlobalTaskId sender_id;
  rtsBool match;

  IF_PAR_DEBUG(verbose,
	       fprintf(stderr,"~~ waitForPEOp: expecting op = %x (%s), who = [%x]\n", 
		       op, getOpName(op), who)); 

  do {
    while((p = pvm_recv(ANY_TASK,ANY_OPCODE)) < 0)
      pvm_perror("waitForPEOp: Waiting for PEOp");
      
    pvm_bufinfo( p, &nbytes, &opCode, &sender_id );
    match = (op == ANY_OPCODE || op == opCode) && 
            (who == ANY_TASK || who == sender_id);

    if (match) {
      IF_PAR_DEBUG(verbose,
		   fprintf(stderr,
			   "~~waitForPEOp: Qapla! received: OpCode = %#x (%s), sender_id = [%x]",
			   opCode, getOpName(opCode), sender_id)); 

      return(p);
    }

    /* Handle the unexpected OpCodes */
    if (processUnexpected!=NULL) {
      (*processUnexpected)(p);
    } else {
      IF_PAR_DEBUG(verbose,
		   fprintf(stderr,
			   "~~ waitForPEOp: ignoring OpCode = %#x (%s), sender_id = [%x]",
			   opCode, getOpName(opCode), sender_id)); 
    }

  } while(rtsTrue);
}
示例#7
0
//@cindex traceSendOp
static void
traceSendOp(OpCode op, GlobalTaskId dest UNUSED,
	     unsigned int data1 UNUSED, unsigned int data2 UNUSED)
{
    char *OpName;

    OpName = getOpName(op);
    IF_PAR_DEBUG(trace,
		 fprintf(stderr," %s [%x,%x] sent from %x to %x", 
		       OpName, data1, data2, mytid, dest));
}
示例#8
0
文件: SysMan.c 项目: ygmpkk/house
/* 
   Create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread 
   (which starts execution and performs IO) is created by forking SysMan 
*/
static int
createPEs(int total_nPEs) {
  int i, spawn_nPEs, iSpawn = 0, nArch, nHost;
  struct pvmhostinfo *hostp; 
  int sysman_host;

  spawn_nPEs = total_nPEs-1;
  if (spawn_nPEs > 0) {
    IF_PAR_DEBUG(verbose,
		 fprintf(stderr, "==== [%x] Spawning %d PEs(%s) ...\n", 
			 sysman_id, spawn_nPEs, petask);
		 fprintf(stderr, "  args: ");
		 for (i = 0; pargv[i]; ++i)
		   fprintf(stderr, "%s, ", pargv[i]);
		 fprintf(stderr, "\n"));

    pvm_config(&nHost,&nArch,&hostp);
    sysman_host=pvm_tidtohost(sysman_id);
	
    /* create PEs on the specific machines in the specified order! */
    for (i=0; (iSpawn<spawn_nPEs) && (i<nHost); i++)
      if (hostp[i].hi_tid != sysman_host) { 
	checkComms(pvm_spawn(petask, pargv, spawn_flag+PvmTaskHost, 
			     hostp[i].hi_name, 1, gtids+iSpawn),
		   "SysMan startup");
	IF_PAR_DEBUG(verbose,
		     fprintf(stderr, "==== [%x] Spawned PE %d onto %s\n",
			     sysman_id, i, hostp[i].hi_name));
	iSpawn++;
      }
      
    /* create additional PEs anywhere you like */
    if (iSpawn<spawn_nPEs) { 
      checkComms(pvm_spawn(petask, pargv, spawn_flag, "", 
			   spawn_nPEs-iSpawn, gtids+iSpawn),
		 "SysMan startup");
	IF_PAR_DEBUG(verbose,
		     fprintf(stderr,"==== [%x] Spawned %d additional PEs anywhere\n",
			     sysman_id, spawn_nPEs-iSpawn));
      }   
    }
示例#9
0
文件: MPIComm.c 项目: jberthold/ghc
uint32_t MP_recv(uint32_t maxlength, StgWord8 *destination,
                 OpCode *retcode, PEId *sender) {
    /* MPI: Use MPI_Probe to get size, sender and code; check maxlength;
     *      receive required data size (must be known when receiving in
     *      MPI)
     *   No special buffer is needed here, can receive into *destination.
     */
    int source, size, code;
    int haveSysMsg = rtsFalse;
    code = MIN_PEOPS-1;

    IF_PAR_DEBUG(mpcomm,
                 debugBelch("MP_recv for MPI.\n"));

    // priority for system messages, probed before accepting anything
    // non-blocking probe,
    MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, sysComm, &haveSysMsg, &status);
    // blocking probe for other message, returns source and tag in status
    if (!haveSysMsg) {
        // there is no system message: get metadata of first msg on MPI_COMM_WORLD
        MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        source = status.MPI_SOURCE;
        code = status.MPI_TAG;
    }
    else {
        // there is a system message:
        // still need to probe on MPI_COMM_WORLD for the system message to
        // get status of the message and the messages size
        source = status.MPI_SOURCE;
        code = status.MPI_TAG;
        MPI_Probe(source, code, MPI_COMM_WORLD, &status);
    }
    if (status.MPI_ERROR != MPI_SUCCESS) {
        debugBelch("MPI: Error receiving message.\n");
        barf("PE %d aborting execution.\n", thisPE);
    }

    // get and check msg. size
    // size = status.st_length;
    MPI_Get_count(&status, MPI_BYTE, &size);
    if (maxlength < (uint32_t)size)
        barf("wrong MPI message length (%d, too big)!!!", size);
    MPI_Recv(destination, size, MPI_BYTE, source, code, MPI_COMM_WORLD, &status);

    *retcode = status.MPI_TAG;
    *sender  = 1+status.MPI_SOURCE;

    // If we received a sys-message on COMM_WORLD, we need to receive it
    // also on sysComm:
    // if MPI_Iprobe on sysComm has failed, a sysMessage might have
    // arived later -
    // don't use haveSysMsg for the decision, use ISSYSCODE(code) instead.
    if (ISSYSCODE(code)) {
        MPI_Recv(&pingMessage2, 1, MPI_INT, source, code, sysComm, &status);

        if (code == PP_FINISH) {
            finishRecvd++;
        }
    }
    IF_PAR_DEBUG(mpcomm,
                 debugBelch("MPI Message from PE %d with code %d.\n",
                            *sender, *retcode));

    ASSERT(*sender == (PEId)source+1 && *retcode == (OpCode) code);
    return (uint32_t) size;
}
示例#10
0
文件: MPIComm.c 项目: jberthold/ghc
rtsBool MP_send(PEId node, OpCode tag, StgWord8 *data, uint32_t length) {
    /* MPI normally uses blocking send operations (MPI_*send). When
     * using nonblocking operations (MPI_I*send), dataspace must remain
     * untouched until the message has been delivered (MPI_Wait)!
     *
     * We copy the data to be sent into the mpiMsgBuffer and call MPI_Isend.
     * We can reuse slots in the buffer where messages are already delivered.
     * The requests array stores a request for each send operation which
     * can be tested by MPI_Testany for delivered messages.
     * MP_send should return false to indicate a send failure (the
     * message buffer has 0 free slots).
     *
     */
    int sendIndex;
    int hasFreeSpace;
    //MPI_Status* status;

    StgPtr sendPos; // used for pointer arithmetics, based on assumption that
    // sizeof(void*)==sizeof(StgWord) (see includes/stg/Types.h)

    ASSERT(node > 0 && node <= nPEs);


    IF_PAR_DEBUG(mpcomm,
                 debugBelch("MPI sending message to PE %u "
                            "(tag %d (%s), datasize %u)\n",
                            node, tag, getOpName(tag), length));
    // adjust node no.
    node--;
    //case each slot in buffer has been used
    if (msgCount == maxMsgs) {
        // looking for free space in buffer
        IF_PAR_DEBUG(mpcomm,
                     debugBelch("looking for free space in buffer\n"));
        MPI_Testany(msgCount, requests, &sendIndex, &hasFreeSpace,
                    MPI_STATUS_IGNORE);
        // if (status->MPI_ERROR)
        //   barf("a send operation returned an error with code %d"
        //        "and sendIndex is %d and hasFreeSpace %d",
        //        status->MPI_ERROR,  sendIndex,  hasFreeSpace);
    }
    //case still slots in buffer unused
    else {
        hasFreeSpace = 1;
        sendIndex = msgCount++;
    }
    // send the message
    if (!hasFreeSpace) {
        IF_PAR_DEBUG(mpcomm,
                     debugBelch("MPI CANCELED sending message to PE %u "
                                "(tag %d (%s), datasize %u)\n",
                                node, tag, getOpName(tag), length));
        return rtsFalse;
    }
    //calculate offset in mpiMsgBuffer
    // using ptr. arithmetics and void* size (see includes/stg/Types.h)
    sendPos = ((StgPtr)mpiMsgBuffer) + sendIndex * DATASPACEWORDS;
    memcpy((void*)sendPos, data, length);

    if (ISSYSCODE(tag)) {
        // case system message (workaroud: send it on both communicators,
        // because there is no receive on two comunicators.)
        MPI_Isend(&pingMessage, 1, MPI_INT, node, tag,
                  sysComm, &sysRequest);
    }
    MPI_Isend(sendPos, length, MPI_BYTE, node, tag,
              MPI_COMM_WORLD, &(requests[sendIndex]));
    IF_PAR_DEBUG(mpcomm,
                 debugBelch("Done sending message to PE %u\n", node+1));
    return rtsTrue;
}
示例#11
0
文件: MPIComm.c 项目: jberthold/ghc
/* MP_quit disconnects current node from MP-System:
 * Parameters:
 *     IN isError - error number, 0 if normal exit
 * Returns: Bool: success (1) or failure (0)
 *
 * MPI Version: MPI requires that all sent messages must be received
 * before quitting. Receive or cancel all pending messages (using msg.
 * count), then quit from MPI.
 */
rtsBool MP_quit(int isError) {
    StgWord data[2];
    MPI_Request sysRequest2;

    data[0] = PP_FINISH;
    data[1] = isError;

    if (IAmMainThread) {
        int i;

        IF_PAR_DEBUG(mpcomm,
                     debugBelch("Main PE stopping MPI system (exit code: %d)\n",
                                isError));

        // bcast FINISH to other PEs
        for (i=2; i<=(int)nPEs; i++) {
            // synchronous send operation in order 2..nPEs ... might slow down.
            MPI_Isend(&pingMessage, 1, MPI_INT, i-1, PP_FINISH,
                      sysComm, &sysRequest2);
            MPI_Send(data,2*sizeof(StgWord),MPI_BYTE,i-1, PP_FINISH, MPI_COMM_WORLD);
            MPI_Wait(&sysRequest2, MPI_STATUS_IGNORE);
        }

        // receive answers from all children (just counting)
        while (finishRecvd < mpiWorldSize-1) {
            MPI_Recv(data, 2*sizeof(StgWord), MPI_BYTE, MPI_ANY_SOURCE, PP_FINISH,
                     MPI_COMM_WORLD, &status);
            ASSERT(status.MPI_TAG == PP_FINISH);
            // and receive corresponding sysComm ping:
            MPI_Recv(&pingMessage, 1, MPI_INT, status.MPI_SOURCE, PP_FINISH,
                     sysComm, MPI_STATUS_IGNORE);
            IF_PAR_DEBUG(mpcomm,
                         debugBelch("Received FINISH reply from %d\n",
                                    status.MPI_SOURCE));
            finishRecvd++;
        }

    } else {

        IF_PAR_DEBUG(mpcomm,
                     debugBelch("Non-main PE stopping MPI system (exit code: %d)\n",
                                isError));
        // send FINISH to rank 0
        MPI_Isend(&pingMessage, 1, MPI_INT, 0, PP_FINISH, sysComm, &sysRequest2);
        MPI_Send(data, 2*sizeof(StgWord), MPI_BYTE, 0, PP_FINISH, MPI_COMM_WORLD);
        // can omit: MPI_Wait(&sysRequest2, MPI_STATUS_IGNORE);

        // if non-main PE terminates first, await answer
        if (finishRecvd < 1) {
            MPI_Recv(data, 2*sizeof(StgWord), MPI_BYTE, 0, PP_FINISH,
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            MPI_Recv(&pingMessage, 1, MPI_INT, 0, PP_FINISH,
                     sysComm, MPI_STATUS_IGNORE);
            finishRecvd++;
        }
    }

    // TODO: receive or cancel all pending messages...
    /* ------------------------------------------------
     *q&d solution:
     * receive anything retrievable by MPI_Probe
     * then get in sync
     * then again receive remaining messages
     *
     * (since buffering is used, and buffers are detached to force
     * messages, a PE might get stuck detaching its mpiMsgBuffer, and
     * send another message as soon as buffer space is available again.
     * The other PEs will not )
     *
     * ---------------------------------------------- */
    {
        // allocate fresh buffer to avoid overflow
        void* voidbuffer;
        int voidsize;

        // we might come here because of requesting too much buffer (bug!)
        voidsize = (INT_MAX / sizeof(StgWord) < DATASPACEWORDS)?\
                   INT_MAX : DATASPACEWORDS * sizeof(StgWord);

        voidbuffer = (void*)
                     stgMallocBytes(voidsize, "voidBuffer");

        // receive whatever is out there...
        while (MP_probe()) {
            MPI_Recv(voidbuffer, voidsize, MPI_BYTE,
                     MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            if (ISSYSCODE(status.MPI_TAG))
                MPI_Recv(voidbuffer, 1, MPI_INT,
                         MPI_ANY_SOURCE, MPI_ANY_TAG,
                         sysComm, MPI_STATUS_IGNORE);
        }
        MPI_Barrier(MPI_COMM_WORLD);
        // all in sync (noone sends further messages), receive rest
        while (MP_probe()) {
            MPI_Recv(voidbuffer, voidsize, MPI_BYTE,
                     MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            if (ISSYSCODE(status.MPI_TAG))
                MPI_Recv(voidbuffer, 1, MPI_INT,
                         MPI_ANY_SOURCE, MPI_ANY_TAG,
                         sysComm, MPI_STATUS_IGNORE);
        }
        stgFree(voidbuffer);
    }
    // end of q&d

    IF_PAR_DEBUG(mpcomm,
                 debugBelch("detaching MPI buffer\n"));
    stgFree(mpiMsgBuffer);

    IF_PAR_DEBUG(mpcomm,
                 debugBelch("Goodbye\n"));
    MPI_Finalize();

    /* indicate that quit has been executed */
    nPEs = 0;

    return rtsTrue;
}