Example #1
0
IRLOG_IOStruct *IRLOG_CreateOutputStruct(const char *filename)
{
    IRLOG_IOStruct *pOutput = NULL;

    /* allocate a data structure */
    pOutput = (IRLOG_IOStruct *) MPL_malloc(sizeof(IRLOG_IOStruct), MPL_MEM_DEBUG);
    if (pOutput == NULL) {
        MPL_error_printf("malloc failed - %s\n", strerror(errno));
        return NULL;
    }

    /* open the output clog file */
    pOutput->f = fopen(filename, "wb");
    if (pOutput->f == NULL) {
        MPL_error_printf("Unable to open output file '%s' - %s\n", filename, strerror(errno));
        MPL_free(pOutput);
        return NULL;
    }

    /* set all the data fields */
    pOutput->header.type = RLOG_INVALID_TYPE;
    pOutput->pCurHeader = pOutput->buffer;
    pOutput->pNextHeader = pOutput->buffer;
    pOutput->pEnd = &pOutput->buffer[RLOG_BUFFSIZE];

    return pOutput;
}
Example #2
0
void mpiexec_usage( const char *msg )
{
    if (msg) {
	MPL_error_printf( "%s", msg );
	if (msg[strlen(msg)-1] != '\n') {
	    MPL_error_printf( "\n" );
	}
    }
    MPL_usage_printf( "Usage: mpiexec %s\n", MPIE_ArgDescription() );
    exit( -1 );
}
static int external32_basic_convert(char *dest_buf,
                                    char *src_buf,
                                    int dest_el_size,
                                    int src_el_size,
                                    DLOOP_Offset count)
{
    char *src_ptr = src_buf, *dest_ptr = dest_buf;
    char *src_end = (char *)(src_buf + ((int)count * src_el_size));

    MPIR_Assert(dest_buf && src_buf);

    if (src_el_size == dest_el_size)
    {
        if (src_el_size == 2)
        {
            while(src_ptr != src_end)
            {
                BASIC_convert16((*(TWO_BYTE_BASIC_TYPE *)src_ptr),
                                (*(TWO_BYTE_BASIC_TYPE *)dest_ptr));

                src_ptr += src_el_size;
                dest_ptr += dest_el_size;
            }
        }
        else if (src_el_size == 4)
        {
            while(src_ptr != src_end)
            {
                BASIC_convert32((*(FOUR_BYTE_BASIC_TYPE *)src_ptr),
                                (*(FOUR_BYTE_BASIC_TYPE *)dest_ptr));

                src_ptr += src_el_size;
                dest_ptr += dest_el_size;
            }
        }
        else if (src_el_size == 8)
        {
            while(src_ptr != src_end)
            {
                BASIC_convert64(src_ptr, dest_ptr);

                src_ptr += src_el_size;
                dest_ptr += dest_el_size;
            }
        }
    }
    else
    {
        /* TODO */
	MPL_error_printf( "Conversion of types whose size is not the same as the size in external32 is not supported\n" );
	MPID_Abort( 0, MPI_SUCCESS, 1, "Aborting with internal error" );
	/* There is no way to return an error code, so an abort is the 
	   only choice (the return value of this routine is not 
	   an error code) */
    }
    return 0;
}
Example #4
0
int MPID_Abort(MPIR_Comm * comm, int mpi_errno, int exit_code, const char *error_msg)
{
    char sys_str[MPI_MAX_ERROR_STRING + 5] = "";
    char comm_str[MPI_MAX_ERROR_STRING] = "";
    char world_str[MPI_MAX_ERROR_STRING] = "";
    char error_str[2 * MPI_MAX_ERROR_STRING + 128];
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_ABORT);
    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_ABORT);

    if (MPIR_Process.comm_world) {
        int rank = MPIR_Process.comm_world->rank;
        snprintf(world_str, sizeof(world_str), " on node %d", rank);
    }

    if (comm) {
        int rank = comm->rank;
        int context_id = comm->context_id;
        snprintf(comm_str, sizeof(comm_str), " (rank %d in comm %d)", rank, context_id);
    }

    if (!error_msg)
        error_msg = "Internal error";

    if (mpi_errno != MPI_SUCCESS) {
        char msg[MPI_MAX_ERROR_STRING] = "";
        MPIR_Err_get_string(mpi_errno, msg, MPI_MAX_ERROR_STRING, NULL);
        snprintf(sys_str, sizeof(msg), " (%s)", msg);
    }
    MPL_snprintf(error_str, sizeof(error_str), "Abort(%d)%s%s: %s%s\n",
                 exit_code, world_str, comm_str, error_msg, sys_str);
    MPL_error_printf("%s", error_str);

    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_ABORT);
    fflush(stderr);
    fflush(stdout);
    if (NULL == comm || (MPIR_Comm_size(comm) == 1 && comm->comm_kind == MPIR_COMM_KIND__INTRACOMM))
        MPL_exit(exit_code);

    if (comm != MPIR_Process.comm_world) {
        MPIDIG_comm_abort(comm, exit_code);
    } else {
#ifdef USE_PMIX_API
        PMIx_Abort(exit_code, error_msg, NULL, 0);
#elif defined(USE_PMI2_API)
        PMI2_Abort(TRUE, error_msg);
#else
        PMI_Abort(exit_code, error_msg);
#endif
    }
    return 0;
}
Example #5
0
IRLOG_IOStruct *IRLOG_CreateInputStruct(const char *filename)
{
    int num_read;
    IRLOG_IOStruct *pInput;

    /* allocate an input structure */
    pInput = (IRLOG_IOStruct *) MPL_malloc(sizeof(IRLOG_IOStruct), MPL_MEM_DEBUG);
    if (pInput == NULL) {
        MPL_error_printf("malloc failed - %s\n", strerror(errno));
        return NULL;
    }
    /* open the input clog file */
    pInput->f = fopen(filename, "rb");
    if (pInput->f == NULL) {
        MPL_error_printf("fopen(%s) failed, error: %s\n", filename, strerror(errno));
        MPL_free(pInput);
        return NULL;
    }
    /* read some data */
    num_read = (int) fread(pInput->buffer, 1, RLOG_BUFFSIZE, pInput->f);
    if (num_read == 0) {
        MPL_error_printf("Unable to read data from the input file.\n");
        fclose(pInput->f);
        MPL_free(pInput);
        return NULL;
    }
    /* set the data fields and get the first record */
    pInput->pCurHeader = pInput->buffer;
    pInput->pNextHeader = pInput->buffer;
    pInput->pEnd = pInput->buffer + num_read;
    if (IRLOG_GetNextRecord(pInput)) {
        MPL_error_printf("Unable to get the first record from the file.\n");
        fclose(pInput->f);
        MPL_free(pInput);
        return NULL;
    }
    return pInput;
}
Example #6
0
void AppendFile(FILE *fout, FILE *fin)
{
    int total;
    int num_read, num_written;
    char *buffer, *buf;

    buffer = (char*)MPL_malloc(sizeof(char) * BUFFER_SIZE);

    total = ftell(fin);
    fseek(fin, 0L, SEEK_SET);

    while (total)
    {
	num_read = fread(buffer, 1, min(BUFFER_SIZE, total), fin);
	if (num_read == 0)
	{
	    MPL_error_printf("failed to read from input file\n");
	    return;
	}
	total -= num_read;
	buf = buffer;
	while (num_read)
	{
	    num_written = fwrite(buf, 1, num_read, fout);
	    if (num_written == 0)
	    {
		MPL_error_printf("failed to write to output file\n");
		return;
	    }
	    num_read -= num_written;
	    buf += num_written;
	}
    }

    MPL_free(buffer);
}
Example #7
0
static int WriteFileData(const char *pBuffer, int length, FILE * fout)
{
    int num_written;

    while (length) {
        num_written = (int) fwrite(pBuffer, 1, length, fout);
        if (num_written == -1) {
            MPL_error_printf("Error: fwrite failed - %s\n", strerror(errno));
            return errno;
        }

        /*printf("fwrite(%d)", num_written);fflush(stdout); */

        length -= num_written;
        pBuffer += num_written;
    }
    return 0;
}
Example #8
0
static int ReadFileData(char *pBuffer, int length, FILE * fin)
{
    int num_read;

    while (length) {
        num_read = (int) fread(pBuffer, 1, length, fin);
        if (num_read == -1) {
            MPL_error_printf("Error: fread failed - %s\n", strerror(errno));
            return errno;
        }

        /*printf("fread(%d)", num_read);fflush(stdout); */

        length -= num_read;
        pBuffer += num_read;
    }
    return 0;
}
Example #9
0
RLOG_Struct* RLOG_InitLog(int rank, int size)
{
    RLOG_Struct* pRLOG;

    pRLOG = (RLOG_Struct*)MPL_malloc(sizeof(RLOG_Struct));
    if (pRLOG == NULL)
	return NULL;

    pRLOG->nRank = rank;
    pRLOG->nSize = size;
    pRLOG->nRecursion = 0;
    pRLOG->nCurEventId = RLOG_FIRST_EVENT_ID;
    pRLOG->dFirstTimestamp = 0.0;
    MPL_snprintf(pRLOG->pszFileName, 256, "log%d.irlog", rank);

    pRLOG->pOutput = NULL;
    pRLOG->pOutput = IRLOG_CreateOutputStruct(pRLOG->pszFileName);
    if (pRLOG->pOutput == NULL)
    {
	MPL_error_printf("RLOG Error: unable to allocate an output structure.\n");
	MPL_free(pRLOG);
	return NULL;
    }

    RLOG_EnableLogging(pRLOG);

    /* save the parts of the header and event that do not change */
    pRLOG->DiskEvent.event = RLOG_GetNextEventID(pRLOG);
    pRLOG->DiskEvent.rank = rank;
    pRLOG->DiskHeader.type = RLOG_EVENT_TYPE;
    pRLOG->DiskHeader.length = sizeof(RLOG_HEADER) + sizeof(RLOG_EVENT);
    /* put the description of the state in the log file */
    RLOG_DescribeState(pRLOG, pRLOG->DiskEvent.event, "RLOG_DISK", "255   0   0");

    RLOG_DisableLogging(pRLOG);

    return pRLOG;
}
Example #10
0
void SaveArrow(RLOG_IARROW *pArrow)
{
    ArrowNode *pNode;
    StartArrowStruct *pStart, *pStartIter;
    EndArrowStruct *pEnd, *pEndIter;
    RLOG_ARROW arrow;

    if (g_fArrow == NULL)
    {
	MPL_strncpy(g_pszArrowFilename, "ArrowFile.tmp", 1024);
	g_fArrow = fopen(g_pszArrowFilename, "w+b");
	if (g_fArrow == NULL)
	{
	    MPL_error_printf("unable to open ArrowFile.tmp\n");
	    return;
	}
    }

    if (pArrow->sendrecv == RLOG_SENDER)
    {
	pNode = GetArrowNode(pArrow->remote);
	pEnd = ExtractEndNode(pNode, pArrow->rank, pArrow->tag);
	if (pEnd == NULL)
	{
	    pStart = (StartArrowStruct *)MPL_malloc(sizeof(StartArrowStruct));
	    pStart->src = pArrow->rank;
	    pStart->tag = pArrow->tag;
	    pStart->length = pArrow->length;
	    pStart->start_time = pArrow->timestamp;
	    pStart->next = NULL;
	    if (pNode->pStartList == NULL)
	    {
		pNode->pStartList = pStart;
	    }
	    else
	    {
		pStartIter = pNode->pStartList;
		while (pStartIter->next != NULL)
		    pStartIter = pStartIter->next;
		pStartIter->next = pStart;
	    }
	    return;
	}
	arrow.src = pArrow->rank;
	arrow.dest = pArrow->remote;
	arrow.length = pArrow->length;
	arrow.start_time = pEnd->timestamp;
	arrow.end_time = pArrow->timestamp;
	arrow.tag = pArrow->tag;
	arrow.leftright = RLOG_ARROW_LEFT;
	/* fwrite(&arrow, sizeof(RLOG_ARROW), 1, g_fArrow); */
	WriteFileData(&arrow, sizeof(RLOG_ARROW), g_fArrow);
	MPL_free(pEnd);
    }
    else
    {
	arrow.dest = pArrow->rank;
	arrow.end_time = pArrow->timestamp;
	arrow.tag = pArrow->tag;
	arrow.length = pArrow->length;

	pNode = GetArrowNode(pArrow->rank);
	pStart = ExtractStartNode(pNode, pArrow->remote, pArrow->tag);
	if (pStart != NULL)
	{
	    arrow.src = pStart->src;
	    arrow.start_time = pStart->start_time;
	    arrow.length = pStart->length; /* the sender length is more accurate than the receiver length */
	    arrow.leftright = RLOG_ARROW_RIGHT;
	    MPL_free(pStart);
	    /* fwrite(&arrow, sizeof(RLOG_ARROW), 1, g_fArrow); */
	    WriteFileData(&arrow, sizeof(RLOG_ARROW), g_fArrow);
	}
	else
	{
	    pEnd = (EndArrowStruct *)MPL_malloc(sizeof(EndArrowStruct));
	    pEnd->src = pArrow->remote;
	    pEnd->tag = pArrow->tag;
	    pEnd->timestamp = pArrow->timestamp;
	    pEnd->next = NULL;
	    if (pNode->pEndList == NULL)
	    {
		pNode->pEndList = pEnd;
	    }
	    else
	    {
		pEndIter = pNode->pEndList;
		while (pEndIter->next != NULL)
		    pEndIter = pEndIter->next;
		pEndIter->next = pEnd;
	    }
	}
    }

    /* fwrite(pArrow, sizeof(RLOG_IARROW), 1, g_fArrow); */
}
Example #11
0
static int fPMI_Handle_spawn(PMIProcess * pentry)
{
    char inbuf[PMIU_MAXLINE];
    char *(args[PMI_MAX_ARGS]);
    char key[MAXKEYLEN];
    char outbuf[PMIU_MAXLINE];
    ProcessWorld *pWorld;
    ProcessApp *app = 0;
    int preputNum = 0, rc;
    int i;
    int totspawns = 0, spawnnum = 0;
    PMIKVSpace *kvs = 0;
    /* Variables for info */
    char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL];
    int curInfoIdx = -1;

    DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_spawn\n"));

    if (!pentry->spawnWorld) {
        pWorld = (ProcessWorld *) MPL_malloc(sizeof(ProcessWorld), MPL_MEM_PM);
        if (!pWorld)
            return 1;

        pentry->spawnWorld = pWorld;
        pWorld->apps = 0;
        pWorld->nProcess = 0;
        pWorld->nextWorld = 0;
        pWorld->nApps = 0;
        pWorld->worldNum = pUniv.nWorlds++;
        /* FIXME: What should be the defaults for the spawned env?
         * Should the default be the env ov the spawner? */
        pWorld->genv = 0;
        pentry->spawnKVS = fPMIKVSAllocate();
    } else {
        pWorld = pentry->spawnWorld;
    }
    kvs = pentry->spawnKVS;

    /* Note that each mcmd=spawn creates an app.  When all apps
     * are present, then then can be linked to a world.  A
     * spawnmultiple command makes use of multiple mcmd=spawn PMI
     * commands */

    /* Create a new app */
    app = (ProcessApp *) MPL_malloc(sizeof(ProcessApp), MPL_MEM_PM);
    if (!app)
        return 1;
    app->myAppNum = 0;
    app->exename = 0;
    app->arch = 0;
    app->path = 0;
    app->wdir = 0;
    app->hostname = 0;
    app->args = 0;
    app->nArgs = 0;
    app->soft.nelm = 0;
    app->nProcess = 0;
    app->pState = 0;
    app->nextApp = 0;
    app->env = 0;
    app->pWorld = pWorld;

    /* Add to the pentry spawn structure */
    if (pentry->spawnAppTail) {
        pentry->spawnAppTail->nextApp = app;
    } else {
        pentry->spawnApp = app;
        pWorld->apps = app;
    }
    pentry->spawnAppTail = app;

    for (i = 0; i < PMI_MAX_ARGS; i++)
        args[i] = 0;

    /* Get lines until we find either cmd or mcmd (an error) or endcmd
     * (expected end) */
    while ((rc = PMIUBufferedReadLine(pentry, inbuf, sizeof(inbuf))) > 0) {
        char *cmdPtr, *valPtr, *p;

        /* Find the command = format */
        p = inbuf;
        /* Find first nonblank */
        while (*p && isascii(*p) && isspace(*p))
            p++;
        if (!*p) {
            /* Empty string.  Ignore */
            continue;
        }
        cmdPtr = p++;
        /* Find '=' */
        while (*p && *p != '=')
            p++;
        if (!*p) {
            /* No =.  Check for endcmd */
            p--;
            /* Trim spaces */
            while (isascii(*p) && isspace(*p))
                p--;
            /* Add null to end */
            *++p = 0;
            if (strcmp("endcmd", cmdPtr) == 0) {
                break;
            }
            /* FIXME: Otherwise, we have a problem */
            MPL_error_printf("Malformed PMI command (no endcmd seen\n");
            return 1;
        } else {
            *p = 0;
        }

        /* Found an = .  value is the rest of the line */
        valPtr = ++p;
        while (*p && *p != '\n')
            p++;
        if (*p)
            *p = 0;     /* Remove the newline */

        /* Now, process the cmd and value */
        if (strcmp("nprocs", cmdPtr) == 0) {
            app->nProcess = atoi(valPtr);
            pWorld->nProcess += app->nProcess;
        } else if (strcmp("execname", cmdPtr) == 0) {
            app->exename = MPL_strdup(valPtr);
        } else if (strcmp("totspawns", cmdPtr) == 0) {
            /* This tells us how many separate spawn commands
             * we expect to see (e.g., for spawn multiple).
             * Each spawn command is a separate "app" */
            totspawns = atoi(valPtr);
        } else if (strcmp("spawnssofar", cmdPtr) == 0) {
            /* This tells us which app we are (starting from 1) */
            spawnnum = atoi(valPtr);
            app->myAppNum = spawnnum - 1;
        } else if (strcmp("argcnt", cmdPtr) == 0) {
            /* argcnt may not be set before the args */
            app->nArgs = atoi(valPtr);
        } else if (strncmp("arg", cmdPtr, 3) == 0) {
            int argnum;
            /* argcnt may not be set before the args */
            /* Handle arg%d.  Values are 1 - origin */
            argnum = atoi(cmdPtr + 3) - 1;
            if (argnum < 0 || argnum >= PMI_MAX_ARGS) {
                MPL_error_printf
                    ("Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n",
                     argnum, PMI_MAX_ARGS - 1);
                return 1;
            }
            args[argnum] = MPL_strdup(valPtr);
        } else if (strcmp("preput_num", cmdPtr) == 0) {
            preputNum = atoi(valPtr);
        } else if (strncmp("preput_key_", cmdPtr, 11) == 0) {
            /* Save the key */
            MPL_strncpy(key, valPtr, sizeof(key));
        } else if (strncmp("preput_val_", cmdPtr, 11) == 0) {
            /* Place the key,val into the space associate with the current
             * PMI group */
            fPMIKVSAddPair(kvs, key, valPtr);
        }
        /* Info is on a per-app basis (it is an array of info items in
         * spawn multiple).  We can ignore most info values.
         * The ones that are handled are processed by a
         * separate routine (not yet implemented).
         * simple_pmi.c sends (key,value), so we can keep just the
         * last key and pass the key/value to the registered info
         * handler, along with tha app structure.  Alternately,
         * we could save all info items and let the user's
         * spawner handle it */
        else if (strcmp("info_num", cmdPtr) == 0) {
            /* Number of info values */
            ;
        } else if (strncmp("info_key_", cmdPtr, 9) == 0) {
            /* The actual name has a digit, which indicates *which* info
             * key this is */
            curInfoIdx = atoi(cmdPtr + 9);
            MPL_strncpy(curInfoKey, valPtr, sizeof(curInfoKey));
        } else if (strncmp("info_val_", cmdPtr, 9) == 0) {
            /* The actual name has a digit, which indicates *which* info
             * value this is */
            int idx = atoi(cmdPtr + 9);
            if (idx != curInfoIdx) {
                MPL_error_printf
                    ("Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n",
                     curInfoIdx, idx);
                return 1;
            } else {
                MPL_strncpy(curInfoVal, valPtr, sizeof(curInfoVal));
                /* Apply this info item */
                fPMIInfoKey(app, curInfoKey, curInfoVal);
                /* printf("Got info %s+%s\n", curInfoKey, curInfoVal); */
            }
        } else {
            MPL_error_printf("Unrecognized PMI subcommand on spawnmult: %s\n", cmdPtr);
            return 1;
        }
    }

    if (app->nArgs > 0) {
        app->args = (const char **) MPL_malloc(app->nArgs * sizeof(char *), MPL_MEM_PM);
        for (i = 0; i < app->nArgs; i++) {
            app->args[i] = args[i];
            args[i] = 0;
        }
    }

    pWorld->nApps++;

    /* Now that we've read the commands, invoke the user's spawn command */
    if (totspawns == spawnnum) {
        PMISetupNewGroup(pWorld->nProcess, kvs);

        if (userSpawner) {
            rc = (*userSpawner) (pWorld, userSpawnerData);
        } else {
            MPL_error_printf("Unable to spawn %s\n", app->exename);
            rc = 1;
            MPIE_PrintProcessWorld(stdout, pWorld);
        }

        MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc);
        PMIWriteLine(pentry->fd, outbuf);
        DBG_PRINTFCOND(pmidebug, ("%s", outbuf));

        /* Clear for the next spawn */
        pentry->spawnApp = 0;
        pentry->spawnAppTail = 0;
        pentry->spawnKVS = 0;
        pentry->spawnWorld = 0;
    }

    /* If totspawnnum != spawnnum, then we are expecting a
     * spawnmult with additional items */
    return 0;
}
Example #12
0
RLOG_IOStruct *RLOG_CreateInputStruct(const char *filename)
{
    int i, j, rank_index, cur_rank, min_rank = 0;
    RLOG_IOStruct *pInput;
    int type, length;

    /* allocate an input structure */
    pInput = (RLOG_IOStruct*)MPIU_Malloc(sizeof(RLOG_IOStruct));
    if (pInput == NULL)
    {
	MPL_error_printf("malloc failed - %s\n", strerror(errno));
	return NULL;
    }
    pInput->ppCurEvent = NULL;
    pInput->ppCurGlobalEvent = NULL;
    pInput->gppCurEvent = NULL;
    pInput->gppPrevEvent = NULL;
    pInput->ppEventOffset = NULL;
    pInput->ppNumEvents = NULL;
    pInput->nNumArrows = 0;
    /* open the input rlog file */
    pInput->f = fopen(filename, "rb");
    if (pInput->f == NULL)
    {
	MPL_error_printf("fopen(%s) failed, error: %s\n", filename, strerror(errno));
	MPIU_Free(pInput);
	return NULL;
    }
    pInput->nNumRanks = 0;
    /* read the sections */
    while (fread(&type, sizeof(int), 1, pInput->f))
    {
	fread(&length, sizeof(int), 1, pInput->f);
	switch (type)
	{
	case RLOG_HEADER_SECTION:
	    /*printf("type: RLOG_HEADER_SECTION, length: %d\n", length);*/
	    if (length != sizeof(RLOG_FILE_HEADER))
	    {
		MPL_error_printf("error in header size %d != %d\n", length, 
				  (int)sizeof(RLOG_FILE_HEADER));
	    }
	    if (ReadFileData((char*)&pInput->header, sizeof(RLOG_FILE_HEADER), pInput->f))
	    {
		rlog_err_printf("reading rlog header failed\n");
		return NULL;
	    }
	    
	    pInput->nNumRanks = pInput->header.nMaxRank + 1 - pInput->header.nMinRank;
	    min_rank = pInput->header.nMinRank;
	    
	    pInput->pRank = (int*)MPIU_Malloc(pInput->nNumRanks * sizeof(int));
	    pInput->pNumEventRecursions = (int*)MPIU_Malloc(pInput->nNumRanks * sizeof(int));
	    pInput->ppNumEvents = (int**)MPIU_Malloc(pInput->nNumRanks * sizeof(int*));
	    pInput->ppCurEvent = (int**)MPIU_Malloc(pInput->nNumRanks * sizeof(int*));
	    pInput->ppCurGlobalEvent = (int**)MPIU_Malloc(pInput->nNumRanks * sizeof(int*));
	    pInput->gppCurEvent = (RLOG_EVENT**)MPIU_Malloc(pInput->nNumRanks * sizeof(RLOG_EVENT*));
	    pInput->gppPrevEvent = (RLOG_EVENT**)MPIU_Malloc(pInput->nNumRanks * sizeof(RLOG_EVENT*));
	    pInput->ppEventOffset = (long**)MPIU_Malloc(pInput->nNumRanks * sizeof(long*));
	    for (i=0; i<pInput->nNumRanks; i++)
	    {
		pInput->pRank[i] = -1;
		pInput->pNumEventRecursions[i] = 0;
		pInput->ppNumEvents[i] = NULL;
		pInput->ppCurEvent[i] = NULL;
		pInput->ppCurGlobalEvent[i] = NULL;
		pInput->gppCurEvent[i] = NULL;
		pInput->gppPrevEvent[i] = NULL;
		pInput->ppEventOffset[i] = NULL;
	    }
	    break;
	case RLOG_STATE_SECTION:
	    /*printf("type: RLOG_STATE_SECTION, length: %d\n", length);*/
	    pInput->nNumStates = length / sizeof(RLOG_STATE);
	    pInput->nStateOffset = ftell(pInput->f);
	    fseek(pInput->f, length, SEEK_CUR);
	    break;
	case RLOG_ARROW_SECTION:
	    /*printf("type: RLOG_ARROW_SECTION, length: %d\n", length);*/
	    pInput->nNumArrows = length / sizeof(RLOG_ARROW);
	    pInput->nArrowOffset = ftell(pInput->f);
	    fseek(pInput->f, length, SEEK_CUR);
	    break;
	case RLOG_EVENT_SECTION:
	    /*printf("type: RLOG_EVENT_SECTION, length: %d, ", length);*/
	    fread(&cur_rank, sizeof(int), 1, pInput->f);
	    if (cur_rank - min_rank >= pInput->nNumRanks)
	    {
		MPL_error_printf("Error: event section out of range - %d <= %d <= %d\n", pInput->header.nMinRank, cur_rank, pInput->header.nMaxRank);
		MPIU_Free(pInput);
		return NULL;
	    }
	    rank_index = cur_rank - min_rank;
	    fread(&pInput->pNumEventRecursions[rank_index], sizeof(int), 1, pInput->f);
	    /*printf("levels: %d\n", pInput->nNumEventRecursions);*/
	    if (pInput->pNumEventRecursions[rank_index])
	    {
		pInput->ppCurEvent[rank_index] = (int*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(int));
		pInput->ppCurGlobalEvent[rank_index] = (int*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(int));
		pInput->gppCurEvent[rank_index] = (RLOG_EVENT*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(RLOG_EVENT));
		pInput->gppPrevEvent[rank_index] = (RLOG_EVENT*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(RLOG_EVENT));
		pInput->ppNumEvents[rank_index] = (int*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(int));
		pInput->ppEventOffset[rank_index] = (long*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(long));
	    }
	    for (i=0; i<pInput->pNumEventRecursions[rank_index]; i++)
	    {
		fread(&pInput->ppNumEvents[rank_index][i], sizeof(int), 1, pInput->f);
		/*printf(" level %2d: %d events\n", i, pInput->pNumEvents[i]);*/
	    }
	    if (pInput->pNumEventRecursions[rank_index])
	    {
		pInput->ppEventOffset[rank_index][0] = ftell(pInput->f);
		for (i=1; i<pInput->pNumEventRecursions[rank_index]; i++)
		{
		    pInput->ppEventOffset[rank_index][i] = pInput->ppEventOffset[rank_index][i-1] + (pInput->ppNumEvents[rank_index][i-1] * sizeof(RLOG_EVENT));
		}
	    }
	    length -= ((pInput->pNumEventRecursions[rank_index] + 2) * sizeof(int));
	    fseek(pInput->f, length, SEEK_CUR);
	    break;
	default:
	    /*printf("unknown section: type %d, length %d\n", type, length);*/
	    fseek(pInput->f, length, SEEK_CUR);
	    break;
	}
    }
    /* reset the iterators */
    RLOG_ResetStateIter(pInput);
    RLOG_ResetArrowIter(pInput);
    for (j=0; j<pInput->nNumRanks; j++)
    {
	for (i=0; i<pInput->pNumEventRecursions[j]; i++)
	    RLOG_ResetEventIter(pInput, j+pInput->header.nMinRank, i);
    }
    RLOG_ResetGlobalIter(pInput);

    return pInput;
}
Example #13
0
static int ModifyArrows(FILE *f, int nNumArrows, int nMin, double *pOffsets, int n)
{
    RLOG_ARROW arrow, *pArray;
    int i, index, bModified;
    long arrow_pos;
    int error;
    double temp_time;

    fseek(f, 0, SEEK_CUR);
    arrow_pos = ftell(f);
    if (arrow_pos == -1)
	return errno;
    pArray = (RLOG_ARROW*)MPIU_Malloc(nNumArrows * sizeof(RLOG_ARROW));
    if (pArray)
    {
	MPL_msg_printf("Modifying %d arrows\n", nNumArrows);
	/* read the arrows */
	fseek(f, 0, SEEK_CUR);
	error = ReadFileData((char*)pArray, nNumArrows * sizeof(RLOG_ARROW), f);
	if (error)
	{
	    MPIU_Free(pArray);
	    return error;
	}

	/* modify the arrows */
	for (i=0; i<nNumArrows; i++)
	{
	    arrow = pArray[i];

	    bModified = FALSE;
	    index = (arrow.leftright == RLOG_ARROW_RIGHT) ? arrow.src - nMin : arrow.dest - nMin;
	    if (index >= 0 && index < n && pOffsets[index] != 0)
	    {
		arrow.start_time += pOffsets[index];
		bModified = TRUE;
	    }
	    index = (arrow.leftright == RLOG_ARROW_RIGHT) ? arrow.dest - nMin : arrow.src - nMin;
	    if (index >= 0 && index < n && pOffsets[index] != 0)
	    {
		arrow.end_time += pOffsets[index];
		bModified = TRUE;
	    }
	    if (bModified)
	    {
		if (arrow.start_time > arrow.end_time)
		{
		    temp_time = arrow.start_time;
		    arrow.start_time = arrow.end_time;
		    arrow.end_time = temp_time;
		    arrow.leftright = (arrow.leftright == RLOG_ARROW_LEFT) ? RLOG_ARROW_RIGHT : RLOG_ARROW_LEFT;
		}
		pArray[i] = arrow;
	    }
	}

	/* sort the arrows */
	qsort(pArray, (size_t)nNumArrows, sizeof(RLOG_ARROW), 
	      (int (*)(const void *,const void*))compareArrows);

	/* write the arrows back */
	fseek(f, arrow_pos, SEEK_SET);
	error = WriteFileData((char*)pArray, nNumArrows * sizeof(RLOG_ARROW), f);
	if (error)
	{
	    MPIU_Free(pArray);
	    return error;
	}
	fseek(f, 0, SEEK_CUR);
	MPIU_Free(pArray);
    }
    else
    {
	MPL_error_printf("Error: unable to allocate an array big enough to hold %d arrows\n", nNumArrows);
	return -1;
    }
    return 0;
}
Example #14
0
/*
 * Input Parameters:
 *   portLen - Number of characters in portString
 * Output Parameters:
 *   fdout - An fd that is listening for connection attempts.
 *           Use PMIServAcceptFromPort to process reads from this fd
 *   portString - The name of a port that can be used to connect to 
 *           this process (using connect).
 */
int PMIServGetPort( int *fdout, char *portString, int portLen )
{
    int                fd = -1;
    struct sockaddr_in sa;
    int                optval = 1;
    int                portnum;
    char               *range_ptr;
    int                low_port=0, high_port=0;

    /* Under cygwin we may want to use 1024 as a low port number */
    /* a low and high port of zero allows the system to choose 
       the port value */
    
    /* Get the low and high portnumber range.  zero may be used to allow
       the system to choose.  There is a priority to these values, 
       we keep going until we get one (and skip if none is found) */
    
    range_ptr = getenv( "MPIEXEC_PORTRANGE" );
    if (!range_ptr) {
	range_ptr = getenv( "MPIEXEC_PORT_RANGE" );
    }
    if (!range_ptr) {
	range_ptr = getenv( "MPICH_PORT_RANGE" );
    }
    if (range_ptr) {
	char *p;
	/* Look for n:m format */
	p = range_ptr;
	while (*p && isspace(*p)) p++;
	while (*p && isdigit(*p)) low_port = 10 * low_port + (*p++ - '0');
	if (*p == ':') {
	    p++;
	    while (*p && isdigit(*p)) high_port = 10 * high_port + (*p++ - '0');
	}
	if (*p) {
	    MPL_error_printf( "Invalid character %c in MPIEXEC_PORTRANGE\n", 
			       *p );
	    return -1;
	}
    }

    for (portnum=low_port; portnum<=high_port; portnum++) {
	memset( (void *)&sa, 0, sizeof(sa) );
	sa.sin_family	   = AF_INET;
	sa.sin_port	   = htons( portnum );
	sa.sin_addr.s_addr = INADDR_ANY;
    
	fd = socket( AF_INET, SOCK_STREAM, TCP );
	if (fd < 0) {
	    /* Failure; return immediately */
	    return fd;
	}
    
	if (setsockopt( fd, IPPROTO_TCP, TCP_NODELAY, 
		    (char *)&optval, sizeof(optval) )) {
	    MPL_internal_sys_error_printf( "setsockopt", errno, 0 );
	}
	
	if (bind( fd, (struct sockaddr *)&sa, sizeof(sa) ) < 0) {
	    close( fd );
	    fd = -1;
	    if (errno != EADDRINUSE && errno != EADDRNOTAVAIL) {
		return -1;
	    }
	}
	else {
	    /* Success! We have a port.  */
	    break;
	}
    }
    
    if (fd < 0) {
	/* We were unable to find a usable port */
	return -1;
    }

    DBG_PRINTF( ("Listening on fd %d\n", fd) );
    /* Listen is a non-blocking call that enables connections */
    listen( fd, MAX_PENDING_CONN );

    /* Make sure that this fd doesn't get sent to the children */
    fcntl( fd, F_SETFD, FD_CLOEXEC );
    
    *fdout = fd;
    if (portnum == 0) {
	socklen_t sinlen = sizeof(sa);
	/* We asked for *any* port, so we need to find which
	   port we actually got */
	getsockname( fd, (struct sockaddr *)&sa, &sinlen );
	portnum = ntohs(sa.sin_port);
    }

    /* Create the port string */
    {
	char hostname[MAX_HOST_NAME+1];
	hostname[0] = 0;
	MPIE_GetMyHostName( hostname, sizeof(hostname) );
	MPL_snprintf( portString, portLen, "%s:%d", hostname, portnum );
    }
    
    return 0;
}
Example #15
0
/* IO Handler for the listen socket
   Respond to a connection request by creating a new socket, which is
   then registered.
   Initialize the startup handshake.
 */
int PMIServAcceptFromPort( int fd, int rdwr, void *data )
{
    int             newfd;
    struct sockaddr sock;
    socklen_t       addrlen = sizeof(sock);
    int             id;
    ProcessUniverse *univ = (ProcessUniverse *)data;
    ProcessWorld    *pWorld = univ->worlds;
    ProcessApp      *app;

    /* Get the new socket */
    MPIE_SYSCALL(newfd,accept,( fd, &sock, &addrlen ));
    DBG_PRINTF(("Acquired new socket in accept (fd = %d)\n", newfd ));
    if (newfd < 0) {
	DBG(perror("Error on accept: " ));
	return newfd;
    }

#ifdef FOO
    /* Mark this fd as non-blocking */
    flags = fcntl( newfd, F_GETFL, 0 );
    if (flags >= 0) {
	flags |= O_NDELAY;
	fcntl( newfd, F_SETFL, flags );
    }
#endif
    /* Make sure that exec'd processes don't get this fd */
    fcntl( newfd, F_SETFD, FD_CLOEXEC );

    /* Find the matching process.  Do this by reading from the socket and 
       getting the id value with which process was created. */
    id = PMI_Init_port_connection( newfd );
    if (id >= 0) {
	/* find the matching entry */
	ProcessState *pState = 0;
	int           nSoFar = 0;
	PMIProcess   *pmiprocess;

	/* This code assigns processes to the states in a pWorld
	   by using the id as the rank, and finding the corresponding
	   process among the ranks */
	while (pWorld) {
	    app = pWorld->apps;
	    while (app) {
		if (app->nProcess > id - nSoFar) {
		    /* Found the matching app */
		    pState = app->pState + (id - nSoFar);
		    break;
		}
		else {
		    nSoFar += app->nProcess;
		}
		app = app->nextApp;
	    }
	    pWorld = pWorld->nextWorld;
	}
	if (!pState) {
	    /* We have a problem */
	    MPL_error_printf( "Unable to find process with PMI_ID = %d in the universe", id );
	    return -1;
	}

	/* Now, initialize the connection */
	/* Create the new process structure (see PMISetupFinishInServer
	   for this step when a pre-existing FD is used */
	DBG_PRINTF( ("Server connection to id = %d on fd %d\n", id, newfd ));
	pmiprocess = PMISetupNewProcess( newfd, pState );
	PMI_Init_remote_proc( newfd, pmiprocess );
	MPIE_IORegister( newfd, IO_READ, PMIServHandleInput, 
			 pmiprocess );
    }
    else {
	/* Error, the id should never be less than zero or unset */
	/* An alternative would be to dynamically assign the ranks
	   as processes come in (but we'd still need to use the 
	   PMI_ID to identify the ProcessApp) */
	DBG_PRINTF(("Found an invalid id\n" ));
	return -1;
    }

    /* Return success. */
    return 0;
}
Example #16
0
/* Close one side of each pipe pair and replace stdout/err with the pipes */
int mypostfork( void *predata, void *data, ProcessState *pState )
{
    SetupInfo *s = (SetupInfo *)predata;
    int curarg=0;

    IOLabelSetupInClient( &s->labelinfo );
    PMISetupInClient( 1, &s->pmiinfo );

    /* Now, we *also* change the process state to insert the 
       interposed remote shell routine.  This is probably not
       where we want this in the final version (because MPIE_ExecProgram
       does a lot under the assumption that the started program will
       know what to do with new environment variables), but this
       will allow us to start. */
    {
	ProcessApp *app = pState->app;
	const char **newargs = 0;
	char *pmiDebugStr = 0;
	int j;
	char rankStr[12];

	/* Insert into app->args */
        newargs = (const char **) MPL_malloc( (app->nArgs + 14 + 1) *
					  sizeof(char *) );
	if (!pState->hostname) {
	    MPL_error_printf( "No hostname avaliable for %s\n", app->exename );
	    exit(1);
	}

	snprintf( rankStr, sizeof(rankStr)-1, "%d", pState->id );
	rankStr[12-1] = 0;
	curarg = 0;
        newargs[curarg++] = MPL_strdup( "-Y" );

	newargs[curarg++] = pState->hostname;
	curarg += AddEnvSetToCmdLine( "PMI_PORT", s->pmiinfo.portName, 
				      newargs + curarg );
	curarg += AddEnvSetToCmdLine( "PMI_ID", rankStr, newargs + curarg );
	pmiDebugStr = getenv( "PMI_DEBUG" );
	if (pmiDebugStr) {
	    /* Use this to help debug the connection process */
	    curarg += AddEnvSetToCmdLine( "PMI_DEBUG", pmiDebugStr, 
					  newargs + curarg );
	}

	newargs[curarg++] = app->exename;
	for (j=0; j<app->nArgs; j++) {
	    newargs[j+curarg] = app->args[j];
	}
	newargs[j+curarg] = 0;
	app->exename = MPL_strdup( "/usr/bin/ssh" );

	app->args = newargs;
	app->nArgs += curarg;

	if (MPIE_Debug) {
	    printf( "cmd = %s\n", app->exename ); fflush(stdout);
	    printf( "Number of args = %d\n", app->nArgs );
	    for (j=0; j<app->nArgs; j++) {
		printf( "argv[%d] = %s\n", j, app->args[j] ); fflush(stdout);
	    }
	}
    }

    return 0;
}
Example #17
0
int main(int argc, char *argv[])
{
    RLOG_FILE_HEADER header;
    RLOG_State_list *pState;

    int nNumInputs;
    IRLOG_IOStruct *pInput;
    IRLOG_IOStruct **ppInput;

    int nMaxRank = 0;
    int nMinRank = MAX_RANK;
    int nNumStates = 0;
    int type, length;
    int nMaxLevel = 0;
    int nNumLevels = 0;
    int nTotalNumEvents = 0;
    int nNumEvents;
    RecursionStruct *pLevel;
    FILE *fout;
    int i, j;
    int nRank;

    if (argc < 3)
    {
	MPL_error_printf("Usage:\nirlog2rlog out.rlog in0.irlog in1.irlog ...\nirlog2rlog out.rlog n\n");
	return 0;
    }

    if (argc == 3 && IsNumber(argv[2]))
    {
	GenerateNewArgv(&argc, &argv, atoi(argv[2]));
    }

    nNumInputs = argc - 2;

    /* open the output rlog file */
    fout = fopen(argv[1], "wb");
    if (fout == NULL)
    {
	MPL_error_printf("unable to open output file '%s'\n", argv[1]);
	return -1;
    }

    /* read the arrows from all the files in order */
    ppInput = (IRLOG_IOStruct**)MPL_malloc(nNumInputs * sizeof(IRLOG_IOStruct*));
    for (i=0; i<nNumInputs; i++)
    {
	ppInput[i] = IRLOG_CreateInputStruct(argv[i+2]);
	if (ppInput[i] == NULL)
	{
	    MPL_error_printf("Unable to create an input structure for '%s', skipping\n", argv[i+2]);
	}
    }
    for (i=0; i<nNumInputs; i++)
    {
	if (ppInput[i] == NULL)
	{
	    for (j=i; j<nNumInputs-1; j++)
		ppInput[j] = ppInput[j+1];
	    nNumInputs--;
	    i--;
	}
    }
    MPL_msg_printf("reading the arrows from all the input files.\n");fflush(stdout);
    ReadAllArrows(ppInput, nNumInputs);

    nNumInputs = argc - 2;

    /* read, parse and save all the data from the irlog files */
    for (i=0; i<nNumInputs; i++)
    {
	pInput = IRLOG_CreateInputStruct(argv[i+2]);
	if (pInput == NULL)
	{
	    MPL_error_printf("Unable to create an input structure for '%s', skipping\n", argv[i+2]);
	}
	else
	{
	    MPL_msg_printf("reading irlog file: %s\n", argv[i+2]);fflush(stdout);
	    for(;;)
	    {
		switch (pInput->header.type)
		{
		case RLOG_STATE_TYPE:
		    SaveState(&pInput->record.state);
		    break;
		case RLOG_COMM_TYPE:
		    nMaxRank = (pInput->record.comm.rank > nMaxRank) ? pInput->record.comm.rank : nMaxRank;
		    nMinRank = (pInput->record.comm.rank < nMinRank) ? pInput->record.comm.rank : nMinRank;
		    break;
		case RLOG_IARROW_TYPE:
		    /* SaveArrow(&pInput->record.iarrow); */
		    break;
		case RLOG_EVENT_TYPE:
		    SaveEvent(&pInput->record.event);
		    break;
		default:
		    MPL_error_printf("Unknown irlog record type: %d\n", pInput->header.type);
		    break;
		}
		
		if (IRLOG_GetNextRecord(pInput))
		{
		    IRLOG_CloseInputStruct(&pInput);
		    break;
		}
	    }
	}
    }

    /* set the fields in the header */
    header.nMinRank = FindMinRank(g_pLevel);
    header.nMaxRank = FindMaxRank(g_pLevel);
    if (nMinRank != header.nMinRank)
	MPL_error_printf("minimum event rank %d does not equal the minimum comm record rank %d\n", header.nMinRank, nMinRank);
    if (nMaxRank != header.nMaxRank)
	MPL_error_printf("maximum event rank %d does not equal the maximum comm record rank %d\n", header.nMaxRank, nMaxRank);

    /* write header */
    MPL_msg_printf("writing header.\n");fflush(stdout);
    type = RLOG_HEADER_SECTION;
    length = sizeof(RLOG_FILE_HEADER);
    /* fwrite(&type, sizeof(int), 1, fout); */
    WriteFileData(&type, sizeof(int), fout);
    /* fwrite(&length, sizeof(int), 1, fout);*/
    WriteFileData(&length, sizeof(int), fout);
    /* fwrite(&header, sizeof(RLOG_FILE_HEADER), 1, fout); */
    WriteFileData(&header, sizeof(RLOG_FILE_HEADER), fout);

    /* write states */
    if (g_pList)
    {
	MPL_msg_printf("writing states.\n");fflush(stdout);
    }
    pState = g_pList;
    while (pState)
    {
	nNumStates++;
	pState = pState->next;
    }
    type = RLOG_STATE_SECTION;
    length = nNumStates * sizeof(RLOG_STATE);
    /* fwrite(&type, sizeof(int), 1, fout); */
    WriteFileData(&type, sizeof(int), fout);
    /* fwrite(&length, sizeof(int), 1, fout); */
    WriteFileData(&length, sizeof(int), fout);
    pState = g_pList;
    while (pState)
    {
	/* fwrite(pState, sizeof(RLOG_STATE), 1, fout); */
	WriteFileData(pState, sizeof(RLOG_STATE), fout);
	pState = pState->next;
    }

    /* write arrows */
    if (g_fArrow)
    {
	MPL_msg_printf("writing arrows.\n");fflush(stdout);
	type = RLOG_ARROW_SECTION;
	length = ftell(g_fArrow);
	/* fwrite(&type, sizeof(int), 1, fout); */
	WriteFileData(&type, sizeof(int), fout);
	/* fwrite(&length, sizeof(int), 1, fout); */
	WriteFileData(&length, sizeof(int), fout);
	AppendFile(fout, g_fArrow);
    }

    /* write events */
    while (g_pLevel)
    {
	pLevel = FindMinLevel(g_pLevel);
	nNumLevels = FindMaxRecursion(g_pLevel, pLevel->rank);
	nRank = pLevel->rank;

	/* count the events for this rank */
	nNumEvents = 0;
	for (i=0; i<nNumLevels; i++)
	{
	    pLevel = GetLevel(pLevel->rank, i);
	    nNumEvents += pLevel->num_events;
	}
	/* write an event section for this rank */
	type = RLOG_EVENT_SECTION;
	length = sizeof(int) + sizeof(int) + (nNumLevels * sizeof(int)) + (nNumEvents * sizeof(RLOG_EVENT));
	/* fwrite(&type, sizeof(int), 1, fout); */
	WriteFileData(&type, sizeof(int), fout);
	/* fwrite(&length, sizeof(int), 1, fout); */
	WriteFileData(&length, sizeof(int), fout);
        /* fwrite(&nRank, sizeof(int), 1, fout); */
	WriteFileData(&nRank, sizeof(int), fout);
	/* fwrite(&nNumLevels, sizeof(int), 1, fout); */
	WriteFileData(&nNumLevels, sizeof(int), fout);
	for (i=0; i<nNumLevels; i++)
	{
	    pLevel = GetLevel(nRank, i);
	    /* fwrite(&pLevel->num_events, sizeof(int), 1, fout); */
	    WriteFileData(&pLevel->num_events, sizeof(int), fout);
	}
	for (i=0; i<nNumLevels; i++)
	{
	    MPL_msg_printf("writing event level %d:%d\n", nRank, i);fflush(stdout);
	    pLevel = GetLevel(nRank, i);
	    AppendFile(fout, pLevel->fout);
	}
	/* remove this rank from the list of levels */
	RemoveLevel(nRank);
    }

    /* free resources */
    while (g_pList)
    {
	pState = g_pList;
	g_pList = g_pList->next;
	MPL_free(pState);
    }
    if (g_fArrow)
    {
	fclose(g_fArrow);
	unlink(g_pszArrowFilename);
    }

    if (s_bFreeArgv)
	MPL_free(argv);

    return 0;
}
Example #18
0
int IRLOG_GetNextRecord(IRLOG_IOStruct * pInput)
{
    int num_valid, num_read;

    pInput->pCurHeader = pInput->pNextHeader;

    if (pInput->pEnd - pInput->pCurHeader < sizeof(RLOG_HEADER)) {
        num_valid = (int) (pInput->pEnd - pInput->pCurHeader);
        if (pInput->pCurHeader != pInput->buffer)
            memcpy(pInput->buffer, pInput->pCurHeader, num_valid);
        ReadFileData(pInput->buffer + num_valid, sizeof(RLOG_HEADER) - num_valid, pInput->f);
        pInput->pCurHeader = pInput->buffer;
        pInput->pNextHeader = pInput->buffer;
        pInput->pEnd = pInput->buffer + sizeof(RLOG_HEADER);
    }

    /* copy the current header into a temporary variable so the bytes can be manipulated */
    memcpy(&pInput->header, pInput->pCurHeader, sizeof(RLOG_HEADER));
    /*
     * CLOGByteSwapDouble(&(header.timestamp), 1);
     * CLOGByteSwapInt(&(header.rectype), 1);
     * CLOGByteSwapInt(&(header.length), 1);
     */

    while (pInput->pCurHeader + pInput->header.length > pInput->pEnd) {
        num_valid = (int) (pInput->pEnd - pInput->pCurHeader);
        if (pInput->pCurHeader != pInput->buffer)
            memcpy(pInput->buffer, pInput->pCurHeader, num_valid);
        num_read = (int) fread(pInput->buffer + num_valid, 1, RLOG_BUFFSIZE - num_valid, pInput->f);
        if (num_read == 0) {
            MPL_error_printf("RLOG Error: unable to get the next record.\n");
            return 1;
        }
        pInput->pEnd = pInput->buffer + num_valid + num_read;
        pInput->pCurHeader = pInput->buffer;
    }

    pInput->pNextHeader = pInput->pCurHeader + pInput->header.length;

    switch (pInput->header.type) {
        case RLOG_INVALID_TYPE:
            MPL_error_printf("RLOG Error: invalid record type.\n");
            return 1;
            break;
        case RLOG_ENDLOG_TYPE:
            return 1;
            break;
        case RLOG_EVENT_TYPE:
            memcpy(&pInput->record.event, pInput->pCurHeader + sizeof(RLOG_HEADER),
                   sizeof(RLOG_EVENT));
            break;
        case RLOG_IARROW_TYPE:
            memcpy(&pInput->record.iarrow, pInput->pCurHeader + sizeof(RLOG_HEADER),
                   sizeof(RLOG_IARROW));
            break;
        case RLOG_STATE_TYPE:
            memcpy(&pInput->record.state, pInput->pCurHeader + sizeof(RLOG_HEADER),
                   sizeof(RLOG_STATE));
            break;
        case RLOG_COMM_TYPE:
            memcpy(&pInput->record.comm, pInput->pCurHeader + sizeof(RLOG_HEADER),
                   sizeof(RLOG_COMM));
            break;
        default:
            MPL_error_printf("RLOG Error: unknown record type %d.\n", pInput->header.type);
            return 1;
            break;
    }

    return 0;
}
Example #19
0
int MPID_nem_ptl_poll(int is_blocking_poll)
{
    int mpi_errno = MPI_SUCCESS;
    ptl_event_t event;
    int ret;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL);

    /* MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL); */

    while (1) {
        int ctl_event = FALSE;

        /* Check the rptls EQ first. It should never return an event. */
        ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_rpt_eq, &event);
        MPIU_Assert(ret == PTL_EQ_EMPTY);

        /* check EQs for events */
        ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_eq, &event);
        MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped");
        if (ret == PTL_EQ_EMPTY) {
            ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_get_eq, &event);
            MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped");

            if (ret == PTL_EQ_EMPTY) {
                ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_control_eq, &event);
                MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped");

                if (ret == PTL_EQ_EMPTY) {
                    ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_origin_eq, &event);
                    MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped");
                } else {
                    ctl_event = TRUE;
                }

                /* all EQs are empty */
                if (ret == PTL_EQ_EMPTY)
                    break;
            }
        }
        MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqget", "**ptleqget %s", MPID_nem_ptl_strerror(ret));
        MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "Received event %s pt_idx=%d ni_fail=%s list=%s user_ptr=%p hdr_data=%#lx mlength=%lu rlength=%lu",
                                                MPID_nem_ptl_strevent(&event), event.pt_index, MPID_nem_ptl_strnifail(event.ni_fail_type),
                                                MPID_nem_ptl_strlist(event.ptl_list), event.user_ptr, event.hdr_data, event.mlength, event.rlength));
        MPIR_ERR_CHKANDJUMP2(event.ni_fail_type != PTL_NI_OK && event.ni_fail_type != PTL_NI_NO_MATCH, mpi_errno, MPI_ERR_OTHER, "**ptlni_fail", "**ptlni_fail %s %s", MPID_nem_ptl_strevent(&event), MPID_nem_ptl_strnifail(event.ni_fail_type));

        /* special case for events on the control portal */
        if (ctl_event) {
            mpi_errno = MPID_nem_ptl_nm_ctl_event_handler(&event);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            continue;
        }

        switch (event.type) {
        case PTL_EVENT_PUT:
            if (event.ptl_list == PTL_OVERFLOW_LIST)
                break;
        case PTL_EVENT_PUT_OVERFLOW:
        case PTL_EVENT_GET:
        case PTL_EVENT_SEND:
        case PTL_EVENT_REPLY:
        case PTL_EVENT_SEARCH: {
            MPID_Request * const req = event.user_ptr;
            MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "req = %p", req);
            MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "REQ_PTL(req)->event_handler = %p", REQ_PTL(req)->event_handler);
            if (REQ_PTL(req)->event_handler) {
                mpi_errno = REQ_PTL(req)->event_handler(&event);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            }
            break;
        }
        case PTL_EVENT_AUTO_FREE:
            mpi_errno = append_overflow((size_t)event.user_ptr);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            break;
        case PTL_EVENT_AUTO_UNLINK:
            overflow_me_handle[(size_t)event.user_ptr] = PTL_INVALID_HANDLE;
            break;
        case PTL_EVENT_LINK:
            /* ignore */
            break;
        case PTL_EVENT_ACK:
        default:
            MPL_error_printf("Received unexpected event type: %d %s", event.type, MPID_nem_ptl_strevent(&event));
            MPIR_ERR_INTERNALANDJUMP(mpi_errno, "Unexpected event type");
        }
    }

 fn_exit:
    /* MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL); */
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
Example #20
0
/* Note that envp is common but not standard */
int main( int argc, char *argv[], char *envp[] )
{
    int          rc;
    int          erc = 0;  /* Other (exceptional) return codes */
    int          reason, signaled = 0;
    SetupInfo    s;
    char         portString[MAX_PORT_STRING];

    /* MPIE_ProcessInit initializes the global pUniv */
    MPIE_ProcessInit();
    /* Set a default for the universe size */
    pUniv.size = 64;

    /* Set defaults for any arguments that are options.  Also check the
       environment for special options, such as debugging.  Set 
       some defaults in pUniv */
    MPIE_CheckEnv( &pUniv, 0, 0 );
    IOLabelCheckEnv( );

    /* Handle the command line arguments.  Use the routine from util/cmnargs.c
       to fill in the universe */
    MPIE_Args( argc, argv, &pUniv, 0, 0 );
    /* If there were any soft arguments, we need to handle them now */
    rc = MPIE_InitWorldWithSoft( &pUniv.worlds[0], pUniv.size );
    if (!rc) {
	MPL_error_printf( "Unable to process soft arguments\n" );
	exit(1);
    }

    if (pUniv.fromSingleton) {
	/* The MPI process is already running.  We create a simple entry
	   for a single process rather than creating the process */
	MPIE_SetupSingleton( &pUniv );
    }


    rc = MPIE_ChooseHosts( &pUniv.worlds[0], MPIE_ReadMachines, 0 );
    if (rc) {
	MPL_error_printf( "Unable to assign hosts to processes\n" );
	exit(1);
    }

    if (MPIE_Debug) MPIE_PrintProcessUniverse( stdout, &pUniv );

    DBG_PRINTF( ("timeout_seconds = %d\n", pUniv.timeout) );

    /* Get the common port for creating PMI connections to the created
       processes */
    rc = PMIServSetupPort( &pUniv, portString, sizeof(portString) );
    if (rc) {
	MPL_error_printf( "Unable to setup port for listener\n" );
	exit(1);
    }
    s.pmiinfo.portName = portString;

#ifdef USE_MPI_STAGE_EXECUTABLES
    /* Hook for later use in staging executables */
    if (?stageExes) {
	rc = MPIE_StageExecutables( &pUniv.worlds[0] );
	if (!rc) ...;
    }
#endif

    PMIServInit(myspawn,&s);
    s.pmiinfo.pWorld = &pUniv.worlds[0];
    PMISetupNewGroup( pUniv.worlds[0].nProcess, 0 );
    MPIE_ForwardCommonSignals(); 
    if (!pUniv.fromSingleton) {
	MPIE_ForkProcesses( &pUniv.worlds[0], envp, mypreamble, &s,
			mypostfork, 0, mypostamble, 0 );
    }
    else {
	/* FIXME: The singleton code goes here */
	MPL_error_printf( "Singleton init not supported\n" );
	exit(1);
    }
    reason = MPIE_IOLoop( pUniv.timeout );

    if (reason == IOLOOP_TIMEOUT) {
	/* Exited due to timeout.  Generate an error message and
	   terminate the children */
	if (pUniv.timeout > 60) {
	    MPL_error_printf( "Timeout of %d minutes expired; job aborted\n",
			       pUniv.timeout / 60 );
	}
	else {
	    MPL_error_printf( "Timeout of %d seconds expired; job aborted\n",
			       pUniv.timeout );
	}
	erc = 1;
	MPIE_KillUniverse( &pUniv );
    }

    /* Wait for all processes to exit and gather information on them.
       We do this through the SIGCHLD handler. We also bound the length
       of time that we wait to 2 seconds.
    */
    MPIE_WaitForProcesses( &pUniv, 2 );

    /* Compute the return code (max for now) */
    rc = MPIE_ProcessGetExitStatus( &signaled );

    /* Optionally provide detailed information about failed processes */
    if ( (rc && printFailure) || signaled) 
	MPIE_PrintFailureReasons( stderr );

    /* If the processes exited normally (or were already gone) but we
       had an exceptional exit, such as a timeout, use the erc value */
    if (!rc && erc) rc = erc;

    return( rc );
}