IRLOG_IOStruct *IRLOG_CreateOutputStruct(const char *filename) { IRLOG_IOStruct *pOutput = NULL; /* allocate a data structure */ pOutput = (IRLOG_IOStruct *) MPL_malloc(sizeof(IRLOG_IOStruct), MPL_MEM_DEBUG); if (pOutput == NULL) { MPL_error_printf("malloc failed - %s\n", strerror(errno)); return NULL; } /* open the output clog file */ pOutput->f = fopen(filename, "wb"); if (pOutput->f == NULL) { MPL_error_printf("Unable to open output file '%s' - %s\n", filename, strerror(errno)); MPL_free(pOutput); return NULL; } /* set all the data fields */ pOutput->header.type = RLOG_INVALID_TYPE; pOutput->pCurHeader = pOutput->buffer; pOutput->pNextHeader = pOutput->buffer; pOutput->pEnd = &pOutput->buffer[RLOG_BUFFSIZE]; return pOutput; }
void mpiexec_usage( const char *msg ) { if (msg) { MPL_error_printf( "%s", msg ); if (msg[strlen(msg)-1] != '\n') { MPL_error_printf( "\n" ); } } MPL_usage_printf( "Usage: mpiexec %s\n", MPIE_ArgDescription() ); exit( -1 ); }
static int external32_basic_convert(char *dest_buf, char *src_buf, int dest_el_size, int src_el_size, DLOOP_Offset count) { char *src_ptr = src_buf, *dest_ptr = dest_buf; char *src_end = (char *)(src_buf + ((int)count * src_el_size)); MPIR_Assert(dest_buf && src_buf); if (src_el_size == dest_el_size) { if (src_el_size == 2) { while(src_ptr != src_end) { BASIC_convert16((*(TWO_BYTE_BASIC_TYPE *)src_ptr), (*(TWO_BYTE_BASIC_TYPE *)dest_ptr)); src_ptr += src_el_size; dest_ptr += dest_el_size; } } else if (src_el_size == 4) { while(src_ptr != src_end) { BASIC_convert32((*(FOUR_BYTE_BASIC_TYPE *)src_ptr), (*(FOUR_BYTE_BASIC_TYPE *)dest_ptr)); src_ptr += src_el_size; dest_ptr += dest_el_size; } } else if (src_el_size == 8) { while(src_ptr != src_end) { BASIC_convert64(src_ptr, dest_ptr); src_ptr += src_el_size; dest_ptr += dest_el_size; } } } else { /* TODO */ MPL_error_printf( "Conversion of types whose size is not the same as the size in external32 is not supported\n" ); MPID_Abort( 0, MPI_SUCCESS, 1, "Aborting with internal error" ); /* There is no way to return an error code, so an abort is the only choice (the return value of this routine is not an error code) */ } return 0; }
int MPID_Abort(MPIR_Comm * comm, int mpi_errno, int exit_code, const char *error_msg) { char sys_str[MPI_MAX_ERROR_STRING + 5] = ""; char comm_str[MPI_MAX_ERROR_STRING] = ""; char world_str[MPI_MAX_ERROR_STRING] = ""; char error_str[2 * MPI_MAX_ERROR_STRING + 128]; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_ABORT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_ABORT); if (MPIR_Process.comm_world) { int rank = MPIR_Process.comm_world->rank; snprintf(world_str, sizeof(world_str), " on node %d", rank); } if (comm) { int rank = comm->rank; int context_id = comm->context_id; snprintf(comm_str, sizeof(comm_str), " (rank %d in comm %d)", rank, context_id); } if (!error_msg) error_msg = "Internal error"; if (mpi_errno != MPI_SUCCESS) { char msg[MPI_MAX_ERROR_STRING] = ""; MPIR_Err_get_string(mpi_errno, msg, MPI_MAX_ERROR_STRING, NULL); snprintf(sys_str, sizeof(msg), " (%s)", msg); } MPL_snprintf(error_str, sizeof(error_str), "Abort(%d)%s%s: %s%s\n", exit_code, world_str, comm_str, error_msg, sys_str); MPL_error_printf("%s", error_str); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_ABORT); fflush(stderr); fflush(stdout); if (NULL == comm || (MPIR_Comm_size(comm) == 1 && comm->comm_kind == MPIR_COMM_KIND__INTRACOMM)) MPL_exit(exit_code); if (comm != MPIR_Process.comm_world) { MPIDIG_comm_abort(comm, exit_code); } else { #ifdef USE_PMIX_API PMIx_Abort(exit_code, error_msg, NULL, 0); #elif defined(USE_PMI2_API) PMI2_Abort(TRUE, error_msg); #else PMI_Abort(exit_code, error_msg); #endif } return 0; }
IRLOG_IOStruct *IRLOG_CreateInputStruct(const char *filename) { int num_read; IRLOG_IOStruct *pInput; /* allocate an input structure */ pInput = (IRLOG_IOStruct *) MPL_malloc(sizeof(IRLOG_IOStruct), MPL_MEM_DEBUG); if (pInput == NULL) { MPL_error_printf("malloc failed - %s\n", strerror(errno)); return NULL; } /* open the input clog file */ pInput->f = fopen(filename, "rb"); if (pInput->f == NULL) { MPL_error_printf("fopen(%s) failed, error: %s\n", filename, strerror(errno)); MPL_free(pInput); return NULL; } /* read some data */ num_read = (int) fread(pInput->buffer, 1, RLOG_BUFFSIZE, pInput->f); if (num_read == 0) { MPL_error_printf("Unable to read data from the input file.\n"); fclose(pInput->f); MPL_free(pInput); return NULL; } /* set the data fields and get the first record */ pInput->pCurHeader = pInput->buffer; pInput->pNextHeader = pInput->buffer; pInput->pEnd = pInput->buffer + num_read; if (IRLOG_GetNextRecord(pInput)) { MPL_error_printf("Unable to get the first record from the file.\n"); fclose(pInput->f); MPL_free(pInput); return NULL; } return pInput; }
void AppendFile(FILE *fout, FILE *fin) { int total; int num_read, num_written; char *buffer, *buf; buffer = (char*)MPL_malloc(sizeof(char) * BUFFER_SIZE); total = ftell(fin); fseek(fin, 0L, SEEK_SET); while (total) { num_read = fread(buffer, 1, min(BUFFER_SIZE, total), fin); if (num_read == 0) { MPL_error_printf("failed to read from input file\n"); return; } total -= num_read; buf = buffer; while (num_read) { num_written = fwrite(buf, 1, num_read, fout); if (num_written == 0) { MPL_error_printf("failed to write to output file\n"); return; } num_read -= num_written; buf += num_written; } } MPL_free(buffer); }
static int WriteFileData(const char *pBuffer, int length, FILE * fout) { int num_written; while (length) { num_written = (int) fwrite(pBuffer, 1, length, fout); if (num_written == -1) { MPL_error_printf("Error: fwrite failed - %s\n", strerror(errno)); return errno; } /*printf("fwrite(%d)", num_written);fflush(stdout); */ length -= num_written; pBuffer += num_written; } return 0; }
static int ReadFileData(char *pBuffer, int length, FILE * fin) { int num_read; while (length) { num_read = (int) fread(pBuffer, 1, length, fin); if (num_read == -1) { MPL_error_printf("Error: fread failed - %s\n", strerror(errno)); return errno; } /*printf("fread(%d)", num_read);fflush(stdout); */ length -= num_read; pBuffer += num_read; } return 0; }
RLOG_Struct* RLOG_InitLog(int rank, int size) { RLOG_Struct* pRLOG; pRLOG = (RLOG_Struct*)MPL_malloc(sizeof(RLOG_Struct)); if (pRLOG == NULL) return NULL; pRLOG->nRank = rank; pRLOG->nSize = size; pRLOG->nRecursion = 0; pRLOG->nCurEventId = RLOG_FIRST_EVENT_ID; pRLOG->dFirstTimestamp = 0.0; MPL_snprintf(pRLOG->pszFileName, 256, "log%d.irlog", rank); pRLOG->pOutput = NULL; pRLOG->pOutput = IRLOG_CreateOutputStruct(pRLOG->pszFileName); if (pRLOG->pOutput == NULL) { MPL_error_printf("RLOG Error: unable to allocate an output structure.\n"); MPL_free(pRLOG); return NULL; } RLOG_EnableLogging(pRLOG); /* save the parts of the header and event that do not change */ pRLOG->DiskEvent.event = RLOG_GetNextEventID(pRLOG); pRLOG->DiskEvent.rank = rank; pRLOG->DiskHeader.type = RLOG_EVENT_TYPE; pRLOG->DiskHeader.length = sizeof(RLOG_HEADER) + sizeof(RLOG_EVENT); /* put the description of the state in the log file */ RLOG_DescribeState(pRLOG, pRLOG->DiskEvent.event, "RLOG_DISK", "255 0 0"); RLOG_DisableLogging(pRLOG); return pRLOG; }
void SaveArrow(RLOG_IARROW *pArrow) { ArrowNode *pNode; StartArrowStruct *pStart, *pStartIter; EndArrowStruct *pEnd, *pEndIter; RLOG_ARROW arrow; if (g_fArrow == NULL) { MPL_strncpy(g_pszArrowFilename, "ArrowFile.tmp", 1024); g_fArrow = fopen(g_pszArrowFilename, "w+b"); if (g_fArrow == NULL) { MPL_error_printf("unable to open ArrowFile.tmp\n"); return; } } if (pArrow->sendrecv == RLOG_SENDER) { pNode = GetArrowNode(pArrow->remote); pEnd = ExtractEndNode(pNode, pArrow->rank, pArrow->tag); if (pEnd == NULL) { pStart = (StartArrowStruct *)MPL_malloc(sizeof(StartArrowStruct)); pStart->src = pArrow->rank; pStart->tag = pArrow->tag; pStart->length = pArrow->length; pStart->start_time = pArrow->timestamp; pStart->next = NULL; if (pNode->pStartList == NULL) { pNode->pStartList = pStart; } else { pStartIter = pNode->pStartList; while (pStartIter->next != NULL) pStartIter = pStartIter->next; pStartIter->next = pStart; } return; } arrow.src = pArrow->rank; arrow.dest = pArrow->remote; arrow.length = pArrow->length; arrow.start_time = pEnd->timestamp; arrow.end_time = pArrow->timestamp; arrow.tag = pArrow->tag; arrow.leftright = RLOG_ARROW_LEFT; /* fwrite(&arrow, sizeof(RLOG_ARROW), 1, g_fArrow); */ WriteFileData(&arrow, sizeof(RLOG_ARROW), g_fArrow); MPL_free(pEnd); } else { arrow.dest = pArrow->rank; arrow.end_time = pArrow->timestamp; arrow.tag = pArrow->tag; arrow.length = pArrow->length; pNode = GetArrowNode(pArrow->rank); pStart = ExtractStartNode(pNode, pArrow->remote, pArrow->tag); if (pStart != NULL) { arrow.src = pStart->src; arrow.start_time = pStart->start_time; arrow.length = pStart->length; /* the sender length is more accurate than the receiver length */ arrow.leftright = RLOG_ARROW_RIGHT; MPL_free(pStart); /* fwrite(&arrow, sizeof(RLOG_ARROW), 1, g_fArrow); */ WriteFileData(&arrow, sizeof(RLOG_ARROW), g_fArrow); } else { pEnd = (EndArrowStruct *)MPL_malloc(sizeof(EndArrowStruct)); pEnd->src = pArrow->remote; pEnd->tag = pArrow->tag; pEnd->timestamp = pArrow->timestamp; pEnd->next = NULL; if (pNode->pEndList == NULL) { pNode->pEndList = pEnd; } else { pEndIter = pNode->pEndList; while (pEndIter->next != NULL) pEndIter = pEndIter->next; pEndIter->next = pEnd; } } } /* fwrite(pArrow, sizeof(RLOG_IARROW), 1, g_fArrow); */ }
static int fPMI_Handle_spawn(PMIProcess * pentry) { char inbuf[PMIU_MAXLINE]; char *(args[PMI_MAX_ARGS]); char key[MAXKEYLEN]; char outbuf[PMIU_MAXLINE]; ProcessWorld *pWorld; ProcessApp *app = 0; int preputNum = 0, rc; int i; int totspawns = 0, spawnnum = 0; PMIKVSpace *kvs = 0; /* Variables for info */ char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL]; int curInfoIdx = -1; DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_spawn\n")); if (!pentry->spawnWorld) { pWorld = (ProcessWorld *) MPL_malloc(sizeof(ProcessWorld), MPL_MEM_PM); if (!pWorld) return 1; pentry->spawnWorld = pWorld; pWorld->apps = 0; pWorld->nProcess = 0; pWorld->nextWorld = 0; pWorld->nApps = 0; pWorld->worldNum = pUniv.nWorlds++; /* FIXME: What should be the defaults for the spawned env? * Should the default be the env ov the spawner? */ pWorld->genv = 0; pentry->spawnKVS = fPMIKVSAllocate(); } else { pWorld = pentry->spawnWorld; } kvs = pentry->spawnKVS; /* Note that each mcmd=spawn creates an app. When all apps * are present, then then can be linked to a world. A * spawnmultiple command makes use of multiple mcmd=spawn PMI * commands */ /* Create a new app */ app = (ProcessApp *) MPL_malloc(sizeof(ProcessApp), MPL_MEM_PM); if (!app) return 1; app->myAppNum = 0; app->exename = 0; app->arch = 0; app->path = 0; app->wdir = 0; app->hostname = 0; app->args = 0; app->nArgs = 0; app->soft.nelm = 0; app->nProcess = 0; app->pState = 0; app->nextApp = 0; app->env = 0; app->pWorld = pWorld; /* Add to the pentry spawn structure */ if (pentry->spawnAppTail) { pentry->spawnAppTail->nextApp = app; } else { pentry->spawnApp = app; pWorld->apps = app; } pentry->spawnAppTail = app; for (i = 0; i < PMI_MAX_ARGS; i++) args[i] = 0; /* Get lines until we find either cmd or mcmd (an error) or endcmd * (expected end) */ while ((rc = PMIUBufferedReadLine(pentry, inbuf, sizeof(inbuf))) > 0) { char *cmdPtr, *valPtr, *p; /* Find the command = format */ p = inbuf; /* Find first nonblank */ while (*p && isascii(*p) && isspace(*p)) p++; if (!*p) { /* Empty string. Ignore */ continue; } cmdPtr = p++; /* Find '=' */ while (*p && *p != '=') p++; if (!*p) { /* No =. Check for endcmd */ p--; /* Trim spaces */ while (isascii(*p) && isspace(*p)) p--; /* Add null to end */ *++p = 0; if (strcmp("endcmd", cmdPtr) == 0) { break; } /* FIXME: Otherwise, we have a problem */ MPL_error_printf("Malformed PMI command (no endcmd seen\n"); return 1; } else { *p = 0; } /* Found an = . value is the rest of the line */ valPtr = ++p; while (*p && *p != '\n') p++; if (*p) *p = 0; /* Remove the newline */ /* Now, process the cmd and value */ if (strcmp("nprocs", cmdPtr) == 0) { app->nProcess = atoi(valPtr); pWorld->nProcess += app->nProcess; } else if (strcmp("execname", cmdPtr) == 0) { app->exename = MPL_strdup(valPtr); } else if (strcmp("totspawns", cmdPtr) == 0) { /* This tells us how many separate spawn commands * we expect to see (e.g., for spawn multiple). * Each spawn command is a separate "app" */ totspawns = atoi(valPtr); } else if (strcmp("spawnssofar", cmdPtr) == 0) { /* This tells us which app we are (starting from 1) */ spawnnum = atoi(valPtr); app->myAppNum = spawnnum - 1; } else if (strcmp("argcnt", cmdPtr) == 0) { /* argcnt may not be set before the args */ app->nArgs = atoi(valPtr); } else if (strncmp("arg", cmdPtr, 3) == 0) { int argnum; /* argcnt may not be set before the args */ /* Handle arg%d. Values are 1 - origin */ argnum = atoi(cmdPtr + 3) - 1; if (argnum < 0 || argnum >= PMI_MAX_ARGS) { MPL_error_printf ("Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n", argnum, PMI_MAX_ARGS - 1); return 1; } args[argnum] = MPL_strdup(valPtr); } else if (strcmp("preput_num", cmdPtr) == 0) { preputNum = atoi(valPtr); } else if (strncmp("preput_key_", cmdPtr, 11) == 0) { /* Save the key */ MPL_strncpy(key, valPtr, sizeof(key)); } else if (strncmp("preput_val_", cmdPtr, 11) == 0) { /* Place the key,val into the space associate with the current * PMI group */ fPMIKVSAddPair(kvs, key, valPtr); } /* Info is on a per-app basis (it is an array of info items in * spawn multiple). We can ignore most info values. * The ones that are handled are processed by a * separate routine (not yet implemented). * simple_pmi.c sends (key,value), so we can keep just the * last key and pass the key/value to the registered info * handler, along with tha app structure. Alternately, * we could save all info items and let the user's * spawner handle it */ else if (strcmp("info_num", cmdPtr) == 0) { /* Number of info values */ ; } else if (strncmp("info_key_", cmdPtr, 9) == 0) { /* The actual name has a digit, which indicates *which* info * key this is */ curInfoIdx = atoi(cmdPtr + 9); MPL_strncpy(curInfoKey, valPtr, sizeof(curInfoKey)); } else if (strncmp("info_val_", cmdPtr, 9) == 0) { /* The actual name has a digit, which indicates *which* info * value this is */ int idx = atoi(cmdPtr + 9); if (idx != curInfoIdx) { MPL_error_printf ("Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n", curInfoIdx, idx); return 1; } else { MPL_strncpy(curInfoVal, valPtr, sizeof(curInfoVal)); /* Apply this info item */ fPMIInfoKey(app, curInfoKey, curInfoVal); /* printf("Got info %s+%s\n", curInfoKey, curInfoVal); */ } } else { MPL_error_printf("Unrecognized PMI subcommand on spawnmult: %s\n", cmdPtr); return 1; } } if (app->nArgs > 0) { app->args = (const char **) MPL_malloc(app->nArgs * sizeof(char *), MPL_MEM_PM); for (i = 0; i < app->nArgs; i++) { app->args[i] = args[i]; args[i] = 0; } } pWorld->nApps++; /* Now that we've read the commands, invoke the user's spawn command */ if (totspawns == spawnnum) { PMISetupNewGroup(pWorld->nProcess, kvs); if (userSpawner) { rc = (*userSpawner) (pWorld, userSpawnerData); } else { MPL_error_printf("Unable to spawn %s\n", app->exename); rc = 1; MPIE_PrintProcessWorld(stdout, pWorld); } MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc); PMIWriteLine(pentry->fd, outbuf); DBG_PRINTFCOND(pmidebug, ("%s", outbuf)); /* Clear for the next spawn */ pentry->spawnApp = 0; pentry->spawnAppTail = 0; pentry->spawnKVS = 0; pentry->spawnWorld = 0; } /* If totspawnnum != spawnnum, then we are expecting a * spawnmult with additional items */ return 0; }
RLOG_IOStruct *RLOG_CreateInputStruct(const char *filename) { int i, j, rank_index, cur_rank, min_rank = 0; RLOG_IOStruct *pInput; int type, length; /* allocate an input structure */ pInput = (RLOG_IOStruct*)MPIU_Malloc(sizeof(RLOG_IOStruct)); if (pInput == NULL) { MPL_error_printf("malloc failed - %s\n", strerror(errno)); return NULL; } pInput->ppCurEvent = NULL; pInput->ppCurGlobalEvent = NULL; pInput->gppCurEvent = NULL; pInput->gppPrevEvent = NULL; pInput->ppEventOffset = NULL; pInput->ppNumEvents = NULL; pInput->nNumArrows = 0; /* open the input rlog file */ pInput->f = fopen(filename, "rb"); if (pInput->f == NULL) { MPL_error_printf("fopen(%s) failed, error: %s\n", filename, strerror(errno)); MPIU_Free(pInput); return NULL; } pInput->nNumRanks = 0; /* read the sections */ while (fread(&type, sizeof(int), 1, pInput->f)) { fread(&length, sizeof(int), 1, pInput->f); switch (type) { case RLOG_HEADER_SECTION: /*printf("type: RLOG_HEADER_SECTION, length: %d\n", length);*/ if (length != sizeof(RLOG_FILE_HEADER)) { MPL_error_printf("error in header size %d != %d\n", length, (int)sizeof(RLOG_FILE_HEADER)); } if (ReadFileData((char*)&pInput->header, sizeof(RLOG_FILE_HEADER), pInput->f)) { rlog_err_printf("reading rlog header failed\n"); return NULL; } pInput->nNumRanks = pInput->header.nMaxRank + 1 - pInput->header.nMinRank; min_rank = pInput->header.nMinRank; pInput->pRank = (int*)MPIU_Malloc(pInput->nNumRanks * sizeof(int)); pInput->pNumEventRecursions = (int*)MPIU_Malloc(pInput->nNumRanks * sizeof(int)); pInput->ppNumEvents = (int**)MPIU_Malloc(pInput->nNumRanks * sizeof(int*)); pInput->ppCurEvent = (int**)MPIU_Malloc(pInput->nNumRanks * sizeof(int*)); pInput->ppCurGlobalEvent = (int**)MPIU_Malloc(pInput->nNumRanks * sizeof(int*)); pInput->gppCurEvent = (RLOG_EVENT**)MPIU_Malloc(pInput->nNumRanks * sizeof(RLOG_EVENT*)); pInput->gppPrevEvent = (RLOG_EVENT**)MPIU_Malloc(pInput->nNumRanks * sizeof(RLOG_EVENT*)); pInput->ppEventOffset = (long**)MPIU_Malloc(pInput->nNumRanks * sizeof(long*)); for (i=0; i<pInput->nNumRanks; i++) { pInput->pRank[i] = -1; pInput->pNumEventRecursions[i] = 0; pInput->ppNumEvents[i] = NULL; pInput->ppCurEvent[i] = NULL; pInput->ppCurGlobalEvent[i] = NULL; pInput->gppCurEvent[i] = NULL; pInput->gppPrevEvent[i] = NULL; pInput->ppEventOffset[i] = NULL; } break; case RLOG_STATE_SECTION: /*printf("type: RLOG_STATE_SECTION, length: %d\n", length);*/ pInput->nNumStates = length / sizeof(RLOG_STATE); pInput->nStateOffset = ftell(pInput->f); fseek(pInput->f, length, SEEK_CUR); break; case RLOG_ARROW_SECTION: /*printf("type: RLOG_ARROW_SECTION, length: %d\n", length);*/ pInput->nNumArrows = length / sizeof(RLOG_ARROW); pInput->nArrowOffset = ftell(pInput->f); fseek(pInput->f, length, SEEK_CUR); break; case RLOG_EVENT_SECTION: /*printf("type: RLOG_EVENT_SECTION, length: %d, ", length);*/ fread(&cur_rank, sizeof(int), 1, pInput->f); if (cur_rank - min_rank >= pInput->nNumRanks) { MPL_error_printf("Error: event section out of range - %d <= %d <= %d\n", pInput->header.nMinRank, cur_rank, pInput->header.nMaxRank); MPIU_Free(pInput); return NULL; } rank_index = cur_rank - min_rank; fread(&pInput->pNumEventRecursions[rank_index], sizeof(int), 1, pInput->f); /*printf("levels: %d\n", pInput->nNumEventRecursions);*/ if (pInput->pNumEventRecursions[rank_index]) { pInput->ppCurEvent[rank_index] = (int*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(int)); pInput->ppCurGlobalEvent[rank_index] = (int*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(int)); pInput->gppCurEvent[rank_index] = (RLOG_EVENT*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(RLOG_EVENT)); pInput->gppPrevEvent[rank_index] = (RLOG_EVENT*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(RLOG_EVENT)); pInput->ppNumEvents[rank_index] = (int*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(int)); pInput->ppEventOffset[rank_index] = (long*)MPIU_Malloc(pInput->pNumEventRecursions[rank_index] * sizeof(long)); } for (i=0; i<pInput->pNumEventRecursions[rank_index]; i++) { fread(&pInput->ppNumEvents[rank_index][i], sizeof(int), 1, pInput->f); /*printf(" level %2d: %d events\n", i, pInput->pNumEvents[i]);*/ } if (pInput->pNumEventRecursions[rank_index]) { pInput->ppEventOffset[rank_index][0] = ftell(pInput->f); for (i=1; i<pInput->pNumEventRecursions[rank_index]; i++) { pInput->ppEventOffset[rank_index][i] = pInput->ppEventOffset[rank_index][i-1] + (pInput->ppNumEvents[rank_index][i-1] * sizeof(RLOG_EVENT)); } } length -= ((pInput->pNumEventRecursions[rank_index] + 2) * sizeof(int)); fseek(pInput->f, length, SEEK_CUR); break; default: /*printf("unknown section: type %d, length %d\n", type, length);*/ fseek(pInput->f, length, SEEK_CUR); break; } } /* reset the iterators */ RLOG_ResetStateIter(pInput); RLOG_ResetArrowIter(pInput); for (j=0; j<pInput->nNumRanks; j++) { for (i=0; i<pInput->pNumEventRecursions[j]; i++) RLOG_ResetEventIter(pInput, j+pInput->header.nMinRank, i); } RLOG_ResetGlobalIter(pInput); return pInput; }
static int ModifyArrows(FILE *f, int nNumArrows, int nMin, double *pOffsets, int n) { RLOG_ARROW arrow, *pArray; int i, index, bModified; long arrow_pos; int error; double temp_time; fseek(f, 0, SEEK_CUR); arrow_pos = ftell(f); if (arrow_pos == -1) return errno; pArray = (RLOG_ARROW*)MPIU_Malloc(nNumArrows * sizeof(RLOG_ARROW)); if (pArray) { MPL_msg_printf("Modifying %d arrows\n", nNumArrows); /* read the arrows */ fseek(f, 0, SEEK_CUR); error = ReadFileData((char*)pArray, nNumArrows * sizeof(RLOG_ARROW), f); if (error) { MPIU_Free(pArray); return error; } /* modify the arrows */ for (i=0; i<nNumArrows; i++) { arrow = pArray[i]; bModified = FALSE; index = (arrow.leftright == RLOG_ARROW_RIGHT) ? arrow.src - nMin : arrow.dest - nMin; if (index >= 0 && index < n && pOffsets[index] != 0) { arrow.start_time += pOffsets[index]; bModified = TRUE; } index = (arrow.leftright == RLOG_ARROW_RIGHT) ? arrow.dest - nMin : arrow.src - nMin; if (index >= 0 && index < n && pOffsets[index] != 0) { arrow.end_time += pOffsets[index]; bModified = TRUE; } if (bModified) { if (arrow.start_time > arrow.end_time) { temp_time = arrow.start_time; arrow.start_time = arrow.end_time; arrow.end_time = temp_time; arrow.leftright = (arrow.leftright == RLOG_ARROW_LEFT) ? RLOG_ARROW_RIGHT : RLOG_ARROW_LEFT; } pArray[i] = arrow; } } /* sort the arrows */ qsort(pArray, (size_t)nNumArrows, sizeof(RLOG_ARROW), (int (*)(const void *,const void*))compareArrows); /* write the arrows back */ fseek(f, arrow_pos, SEEK_SET); error = WriteFileData((char*)pArray, nNumArrows * sizeof(RLOG_ARROW), f); if (error) { MPIU_Free(pArray); return error; } fseek(f, 0, SEEK_CUR); MPIU_Free(pArray); } else { MPL_error_printf("Error: unable to allocate an array big enough to hold %d arrows\n", nNumArrows); return -1; } return 0; }
/* * Input Parameters: * portLen - Number of characters in portString * Output Parameters: * fdout - An fd that is listening for connection attempts. * Use PMIServAcceptFromPort to process reads from this fd * portString - The name of a port that can be used to connect to * this process (using connect). */ int PMIServGetPort( int *fdout, char *portString, int portLen ) { int fd = -1; struct sockaddr_in sa; int optval = 1; int portnum; char *range_ptr; int low_port=0, high_port=0; /* Under cygwin we may want to use 1024 as a low port number */ /* a low and high port of zero allows the system to choose the port value */ /* Get the low and high portnumber range. zero may be used to allow the system to choose. There is a priority to these values, we keep going until we get one (and skip if none is found) */ range_ptr = getenv( "MPIEXEC_PORTRANGE" ); if (!range_ptr) { range_ptr = getenv( "MPIEXEC_PORT_RANGE" ); } if (!range_ptr) { range_ptr = getenv( "MPICH_PORT_RANGE" ); } if (range_ptr) { char *p; /* Look for n:m format */ p = range_ptr; while (*p && isspace(*p)) p++; while (*p && isdigit(*p)) low_port = 10 * low_port + (*p++ - '0'); if (*p == ':') { p++; while (*p && isdigit(*p)) high_port = 10 * high_port + (*p++ - '0'); } if (*p) { MPL_error_printf( "Invalid character %c in MPIEXEC_PORTRANGE\n", *p ); return -1; } } for (portnum=low_port; portnum<=high_port; portnum++) { memset( (void *)&sa, 0, sizeof(sa) ); sa.sin_family = AF_INET; sa.sin_port = htons( portnum ); sa.sin_addr.s_addr = INADDR_ANY; fd = socket( AF_INET, SOCK_STREAM, TCP ); if (fd < 0) { /* Failure; return immediately */ return fd; } if (setsockopt( fd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval) )) { MPL_internal_sys_error_printf( "setsockopt", errno, 0 ); } if (bind( fd, (struct sockaddr *)&sa, sizeof(sa) ) < 0) { close( fd ); fd = -1; if (errno != EADDRINUSE && errno != EADDRNOTAVAIL) { return -1; } } else { /* Success! We have a port. */ break; } } if (fd < 0) { /* We were unable to find a usable port */ return -1; } DBG_PRINTF( ("Listening on fd %d\n", fd) ); /* Listen is a non-blocking call that enables connections */ listen( fd, MAX_PENDING_CONN ); /* Make sure that this fd doesn't get sent to the children */ fcntl( fd, F_SETFD, FD_CLOEXEC ); *fdout = fd; if (portnum == 0) { socklen_t sinlen = sizeof(sa); /* We asked for *any* port, so we need to find which port we actually got */ getsockname( fd, (struct sockaddr *)&sa, &sinlen ); portnum = ntohs(sa.sin_port); } /* Create the port string */ { char hostname[MAX_HOST_NAME+1]; hostname[0] = 0; MPIE_GetMyHostName( hostname, sizeof(hostname) ); MPL_snprintf( portString, portLen, "%s:%d", hostname, portnum ); } return 0; }
/* IO Handler for the listen socket Respond to a connection request by creating a new socket, which is then registered. Initialize the startup handshake. */ int PMIServAcceptFromPort( int fd, int rdwr, void *data ) { int newfd; struct sockaddr sock; socklen_t addrlen = sizeof(sock); int id; ProcessUniverse *univ = (ProcessUniverse *)data; ProcessWorld *pWorld = univ->worlds; ProcessApp *app; /* Get the new socket */ MPIE_SYSCALL(newfd,accept,( fd, &sock, &addrlen )); DBG_PRINTF(("Acquired new socket in accept (fd = %d)\n", newfd )); if (newfd < 0) { DBG(perror("Error on accept: " )); return newfd; } #ifdef FOO /* Mark this fd as non-blocking */ flags = fcntl( newfd, F_GETFL, 0 ); if (flags >= 0) { flags |= O_NDELAY; fcntl( newfd, F_SETFL, flags ); } #endif /* Make sure that exec'd processes don't get this fd */ fcntl( newfd, F_SETFD, FD_CLOEXEC ); /* Find the matching process. Do this by reading from the socket and getting the id value with which process was created. */ id = PMI_Init_port_connection( newfd ); if (id >= 0) { /* find the matching entry */ ProcessState *pState = 0; int nSoFar = 0; PMIProcess *pmiprocess; /* This code assigns processes to the states in a pWorld by using the id as the rank, and finding the corresponding process among the ranks */ while (pWorld) { app = pWorld->apps; while (app) { if (app->nProcess > id - nSoFar) { /* Found the matching app */ pState = app->pState + (id - nSoFar); break; } else { nSoFar += app->nProcess; } app = app->nextApp; } pWorld = pWorld->nextWorld; } if (!pState) { /* We have a problem */ MPL_error_printf( "Unable to find process with PMI_ID = %d in the universe", id ); return -1; } /* Now, initialize the connection */ /* Create the new process structure (see PMISetupFinishInServer for this step when a pre-existing FD is used */ DBG_PRINTF( ("Server connection to id = %d on fd %d\n", id, newfd )); pmiprocess = PMISetupNewProcess( newfd, pState ); PMI_Init_remote_proc( newfd, pmiprocess ); MPIE_IORegister( newfd, IO_READ, PMIServHandleInput, pmiprocess ); } else { /* Error, the id should never be less than zero or unset */ /* An alternative would be to dynamically assign the ranks as processes come in (but we'd still need to use the PMI_ID to identify the ProcessApp) */ DBG_PRINTF(("Found an invalid id\n" )); return -1; } /* Return success. */ return 0; }
/* Close one side of each pipe pair and replace stdout/err with the pipes */ int mypostfork( void *predata, void *data, ProcessState *pState ) { SetupInfo *s = (SetupInfo *)predata; int curarg=0; IOLabelSetupInClient( &s->labelinfo ); PMISetupInClient( 1, &s->pmiinfo ); /* Now, we *also* change the process state to insert the interposed remote shell routine. This is probably not where we want this in the final version (because MPIE_ExecProgram does a lot under the assumption that the started program will know what to do with new environment variables), but this will allow us to start. */ { ProcessApp *app = pState->app; const char **newargs = 0; char *pmiDebugStr = 0; int j; char rankStr[12]; /* Insert into app->args */ newargs = (const char **) MPL_malloc( (app->nArgs + 14 + 1) * sizeof(char *) ); if (!pState->hostname) { MPL_error_printf( "No hostname avaliable for %s\n", app->exename ); exit(1); } snprintf( rankStr, sizeof(rankStr)-1, "%d", pState->id ); rankStr[12-1] = 0; curarg = 0; newargs[curarg++] = MPL_strdup( "-Y" ); newargs[curarg++] = pState->hostname; curarg += AddEnvSetToCmdLine( "PMI_PORT", s->pmiinfo.portName, newargs + curarg ); curarg += AddEnvSetToCmdLine( "PMI_ID", rankStr, newargs + curarg ); pmiDebugStr = getenv( "PMI_DEBUG" ); if (pmiDebugStr) { /* Use this to help debug the connection process */ curarg += AddEnvSetToCmdLine( "PMI_DEBUG", pmiDebugStr, newargs + curarg ); } newargs[curarg++] = app->exename; for (j=0; j<app->nArgs; j++) { newargs[j+curarg] = app->args[j]; } newargs[j+curarg] = 0; app->exename = MPL_strdup( "/usr/bin/ssh" ); app->args = newargs; app->nArgs += curarg; if (MPIE_Debug) { printf( "cmd = %s\n", app->exename ); fflush(stdout); printf( "Number of args = %d\n", app->nArgs ); for (j=0; j<app->nArgs; j++) { printf( "argv[%d] = %s\n", j, app->args[j] ); fflush(stdout); } } } return 0; }
int main(int argc, char *argv[]) { RLOG_FILE_HEADER header; RLOG_State_list *pState; int nNumInputs; IRLOG_IOStruct *pInput; IRLOG_IOStruct **ppInput; int nMaxRank = 0; int nMinRank = MAX_RANK; int nNumStates = 0; int type, length; int nMaxLevel = 0; int nNumLevels = 0; int nTotalNumEvents = 0; int nNumEvents; RecursionStruct *pLevel; FILE *fout; int i, j; int nRank; if (argc < 3) { MPL_error_printf("Usage:\nirlog2rlog out.rlog in0.irlog in1.irlog ...\nirlog2rlog out.rlog n\n"); return 0; } if (argc == 3 && IsNumber(argv[2])) { GenerateNewArgv(&argc, &argv, atoi(argv[2])); } nNumInputs = argc - 2; /* open the output rlog file */ fout = fopen(argv[1], "wb"); if (fout == NULL) { MPL_error_printf("unable to open output file '%s'\n", argv[1]); return -1; } /* read the arrows from all the files in order */ ppInput = (IRLOG_IOStruct**)MPL_malloc(nNumInputs * sizeof(IRLOG_IOStruct*)); for (i=0; i<nNumInputs; i++) { ppInput[i] = IRLOG_CreateInputStruct(argv[i+2]); if (ppInput[i] == NULL) { MPL_error_printf("Unable to create an input structure for '%s', skipping\n", argv[i+2]); } } for (i=0; i<nNumInputs; i++) { if (ppInput[i] == NULL) { for (j=i; j<nNumInputs-1; j++) ppInput[j] = ppInput[j+1]; nNumInputs--; i--; } } MPL_msg_printf("reading the arrows from all the input files.\n");fflush(stdout); ReadAllArrows(ppInput, nNumInputs); nNumInputs = argc - 2; /* read, parse and save all the data from the irlog files */ for (i=0; i<nNumInputs; i++) { pInput = IRLOG_CreateInputStruct(argv[i+2]); if (pInput == NULL) { MPL_error_printf("Unable to create an input structure for '%s', skipping\n", argv[i+2]); } else { MPL_msg_printf("reading irlog file: %s\n", argv[i+2]);fflush(stdout); for(;;) { switch (pInput->header.type) { case RLOG_STATE_TYPE: SaveState(&pInput->record.state); break; case RLOG_COMM_TYPE: nMaxRank = (pInput->record.comm.rank > nMaxRank) ? pInput->record.comm.rank : nMaxRank; nMinRank = (pInput->record.comm.rank < nMinRank) ? pInput->record.comm.rank : nMinRank; break; case RLOG_IARROW_TYPE: /* SaveArrow(&pInput->record.iarrow); */ break; case RLOG_EVENT_TYPE: SaveEvent(&pInput->record.event); break; default: MPL_error_printf("Unknown irlog record type: %d\n", pInput->header.type); break; } if (IRLOG_GetNextRecord(pInput)) { IRLOG_CloseInputStruct(&pInput); break; } } } } /* set the fields in the header */ header.nMinRank = FindMinRank(g_pLevel); header.nMaxRank = FindMaxRank(g_pLevel); if (nMinRank != header.nMinRank) MPL_error_printf("minimum event rank %d does not equal the minimum comm record rank %d\n", header.nMinRank, nMinRank); if (nMaxRank != header.nMaxRank) MPL_error_printf("maximum event rank %d does not equal the maximum comm record rank %d\n", header.nMaxRank, nMaxRank); /* write header */ MPL_msg_printf("writing header.\n");fflush(stdout); type = RLOG_HEADER_SECTION; length = sizeof(RLOG_FILE_HEADER); /* fwrite(&type, sizeof(int), 1, fout); */ WriteFileData(&type, sizeof(int), fout); /* fwrite(&length, sizeof(int), 1, fout);*/ WriteFileData(&length, sizeof(int), fout); /* fwrite(&header, sizeof(RLOG_FILE_HEADER), 1, fout); */ WriteFileData(&header, sizeof(RLOG_FILE_HEADER), fout); /* write states */ if (g_pList) { MPL_msg_printf("writing states.\n");fflush(stdout); } pState = g_pList; while (pState) { nNumStates++; pState = pState->next; } type = RLOG_STATE_SECTION; length = nNumStates * sizeof(RLOG_STATE); /* fwrite(&type, sizeof(int), 1, fout); */ WriteFileData(&type, sizeof(int), fout); /* fwrite(&length, sizeof(int), 1, fout); */ WriteFileData(&length, sizeof(int), fout); pState = g_pList; while (pState) { /* fwrite(pState, sizeof(RLOG_STATE), 1, fout); */ WriteFileData(pState, sizeof(RLOG_STATE), fout); pState = pState->next; } /* write arrows */ if (g_fArrow) { MPL_msg_printf("writing arrows.\n");fflush(stdout); type = RLOG_ARROW_SECTION; length = ftell(g_fArrow); /* fwrite(&type, sizeof(int), 1, fout); */ WriteFileData(&type, sizeof(int), fout); /* fwrite(&length, sizeof(int), 1, fout); */ WriteFileData(&length, sizeof(int), fout); AppendFile(fout, g_fArrow); } /* write events */ while (g_pLevel) { pLevel = FindMinLevel(g_pLevel); nNumLevels = FindMaxRecursion(g_pLevel, pLevel->rank); nRank = pLevel->rank; /* count the events for this rank */ nNumEvents = 0; for (i=0; i<nNumLevels; i++) { pLevel = GetLevel(pLevel->rank, i); nNumEvents += pLevel->num_events; } /* write an event section for this rank */ type = RLOG_EVENT_SECTION; length = sizeof(int) + sizeof(int) + (nNumLevels * sizeof(int)) + (nNumEvents * sizeof(RLOG_EVENT)); /* fwrite(&type, sizeof(int), 1, fout); */ WriteFileData(&type, sizeof(int), fout); /* fwrite(&length, sizeof(int), 1, fout); */ WriteFileData(&length, sizeof(int), fout); /* fwrite(&nRank, sizeof(int), 1, fout); */ WriteFileData(&nRank, sizeof(int), fout); /* fwrite(&nNumLevels, sizeof(int), 1, fout); */ WriteFileData(&nNumLevels, sizeof(int), fout); for (i=0; i<nNumLevels; i++) { pLevel = GetLevel(nRank, i); /* fwrite(&pLevel->num_events, sizeof(int), 1, fout); */ WriteFileData(&pLevel->num_events, sizeof(int), fout); } for (i=0; i<nNumLevels; i++) { MPL_msg_printf("writing event level %d:%d\n", nRank, i);fflush(stdout); pLevel = GetLevel(nRank, i); AppendFile(fout, pLevel->fout); } /* remove this rank from the list of levels */ RemoveLevel(nRank); } /* free resources */ while (g_pList) { pState = g_pList; g_pList = g_pList->next; MPL_free(pState); } if (g_fArrow) { fclose(g_fArrow); unlink(g_pszArrowFilename); } if (s_bFreeArgv) MPL_free(argv); return 0; }
int IRLOG_GetNextRecord(IRLOG_IOStruct * pInput) { int num_valid, num_read; pInput->pCurHeader = pInput->pNextHeader; if (pInput->pEnd - pInput->pCurHeader < sizeof(RLOG_HEADER)) { num_valid = (int) (pInput->pEnd - pInput->pCurHeader); if (pInput->pCurHeader != pInput->buffer) memcpy(pInput->buffer, pInput->pCurHeader, num_valid); ReadFileData(pInput->buffer + num_valid, sizeof(RLOG_HEADER) - num_valid, pInput->f); pInput->pCurHeader = pInput->buffer; pInput->pNextHeader = pInput->buffer; pInput->pEnd = pInput->buffer + sizeof(RLOG_HEADER); } /* copy the current header into a temporary variable so the bytes can be manipulated */ memcpy(&pInput->header, pInput->pCurHeader, sizeof(RLOG_HEADER)); /* * CLOGByteSwapDouble(&(header.timestamp), 1); * CLOGByteSwapInt(&(header.rectype), 1); * CLOGByteSwapInt(&(header.length), 1); */ while (pInput->pCurHeader + pInput->header.length > pInput->pEnd) { num_valid = (int) (pInput->pEnd - pInput->pCurHeader); if (pInput->pCurHeader != pInput->buffer) memcpy(pInput->buffer, pInput->pCurHeader, num_valid); num_read = (int) fread(pInput->buffer + num_valid, 1, RLOG_BUFFSIZE - num_valid, pInput->f); if (num_read == 0) { MPL_error_printf("RLOG Error: unable to get the next record.\n"); return 1; } pInput->pEnd = pInput->buffer + num_valid + num_read; pInput->pCurHeader = pInput->buffer; } pInput->pNextHeader = pInput->pCurHeader + pInput->header.length; switch (pInput->header.type) { case RLOG_INVALID_TYPE: MPL_error_printf("RLOG Error: invalid record type.\n"); return 1; break; case RLOG_ENDLOG_TYPE: return 1; break; case RLOG_EVENT_TYPE: memcpy(&pInput->record.event, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_EVENT)); break; case RLOG_IARROW_TYPE: memcpy(&pInput->record.iarrow, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_IARROW)); break; case RLOG_STATE_TYPE: memcpy(&pInput->record.state, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_STATE)); break; case RLOG_COMM_TYPE: memcpy(&pInput->record.comm, pInput->pCurHeader + sizeof(RLOG_HEADER), sizeof(RLOG_COMM)); break; default: MPL_error_printf("RLOG Error: unknown record type %d.\n", pInput->header.type); return 1; break; } return 0; }
int MPID_nem_ptl_poll(int is_blocking_poll) { int mpi_errno = MPI_SUCCESS; ptl_event_t event; int ret; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_PTL_POLL); /* MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_PTL_POLL); */ while (1) { int ctl_event = FALSE; /* Check the rptls EQ first. It should never return an event. */ ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_rpt_eq, &event); MPIU_Assert(ret == PTL_EQ_EMPTY); /* check EQs for events */ ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); if (ret == PTL_EQ_EMPTY) { ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_get_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); if (ret == PTL_EQ_EMPTY) { ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_control_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); if (ret == PTL_EQ_EMPTY) { ret = MPID_nem_ptl_rptl_eqget(MPIDI_nem_ptl_origin_eq, &event); MPIR_ERR_CHKANDJUMP(ret == PTL_EQ_DROPPED, mpi_errno, MPI_ERR_OTHER, "**eqdropped"); } else { ctl_event = TRUE; } /* all EQs are empty */ if (ret == PTL_EQ_EMPTY) break; } } MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptleqget", "**ptleqget %s", MPID_nem_ptl_strerror(ret)); MPL_DBG_MSG_FMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, (MPL_DBG_FDEST, "Received event %s pt_idx=%d ni_fail=%s list=%s user_ptr=%p hdr_data=%#lx mlength=%lu rlength=%lu", MPID_nem_ptl_strevent(&event), event.pt_index, MPID_nem_ptl_strnifail(event.ni_fail_type), MPID_nem_ptl_strlist(event.ptl_list), event.user_ptr, event.hdr_data, event.mlength, event.rlength)); MPIR_ERR_CHKANDJUMP2(event.ni_fail_type != PTL_NI_OK && event.ni_fail_type != PTL_NI_NO_MATCH, mpi_errno, MPI_ERR_OTHER, "**ptlni_fail", "**ptlni_fail %s %s", MPID_nem_ptl_strevent(&event), MPID_nem_ptl_strnifail(event.ni_fail_type)); /* special case for events on the control portal */ if (ctl_event) { mpi_errno = MPID_nem_ptl_nm_ctl_event_handler(&event); if (mpi_errno) MPIR_ERR_POP(mpi_errno); continue; } switch (event.type) { case PTL_EVENT_PUT: if (event.ptl_list == PTL_OVERFLOW_LIST) break; case PTL_EVENT_PUT_OVERFLOW: case PTL_EVENT_GET: case PTL_EVENT_SEND: case PTL_EVENT_REPLY: case PTL_EVENT_SEARCH: { MPID_Request * const req = event.user_ptr; MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "req = %p", req); MPL_DBG_MSG_P(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "REQ_PTL(req)->event_handler = %p", REQ_PTL(req)->event_handler); if (REQ_PTL(req)->event_handler) { mpi_errno = REQ_PTL(req)->event_handler(&event); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } break; } case PTL_EVENT_AUTO_FREE: mpi_errno = append_overflow((size_t)event.user_ptr); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; case PTL_EVENT_AUTO_UNLINK: overflow_me_handle[(size_t)event.user_ptr] = PTL_INVALID_HANDLE; break; case PTL_EVENT_LINK: /* ignore */ break; case PTL_EVENT_ACK: default: MPL_error_printf("Received unexpected event type: %d %s", event.type, MPID_nem_ptl_strevent(&event)); MPIR_ERR_INTERNALANDJUMP(mpi_errno, "Unexpected event type"); } } fn_exit: /* MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_PTL_POLL); */ return mpi_errno; fn_fail: goto fn_exit; }
/* Note that envp is common but not standard */ int main( int argc, char *argv[], char *envp[] ) { int rc; int erc = 0; /* Other (exceptional) return codes */ int reason, signaled = 0; SetupInfo s; char portString[MAX_PORT_STRING]; /* MPIE_ProcessInit initializes the global pUniv */ MPIE_ProcessInit(); /* Set a default for the universe size */ pUniv.size = 64; /* Set defaults for any arguments that are options. Also check the environment for special options, such as debugging. Set some defaults in pUniv */ MPIE_CheckEnv( &pUniv, 0, 0 ); IOLabelCheckEnv( ); /* Handle the command line arguments. Use the routine from util/cmnargs.c to fill in the universe */ MPIE_Args( argc, argv, &pUniv, 0, 0 ); /* If there were any soft arguments, we need to handle them now */ rc = MPIE_InitWorldWithSoft( &pUniv.worlds[0], pUniv.size ); if (!rc) { MPL_error_printf( "Unable to process soft arguments\n" ); exit(1); } if (pUniv.fromSingleton) { /* The MPI process is already running. We create a simple entry for a single process rather than creating the process */ MPIE_SetupSingleton( &pUniv ); } rc = MPIE_ChooseHosts( &pUniv.worlds[0], MPIE_ReadMachines, 0 ); if (rc) { MPL_error_printf( "Unable to assign hosts to processes\n" ); exit(1); } if (MPIE_Debug) MPIE_PrintProcessUniverse( stdout, &pUniv ); DBG_PRINTF( ("timeout_seconds = %d\n", pUniv.timeout) ); /* Get the common port for creating PMI connections to the created processes */ rc = PMIServSetupPort( &pUniv, portString, sizeof(portString) ); if (rc) { MPL_error_printf( "Unable to setup port for listener\n" ); exit(1); } s.pmiinfo.portName = portString; #ifdef USE_MPI_STAGE_EXECUTABLES /* Hook for later use in staging executables */ if (?stageExes) { rc = MPIE_StageExecutables( &pUniv.worlds[0] ); if (!rc) ...; } #endif PMIServInit(myspawn,&s); s.pmiinfo.pWorld = &pUniv.worlds[0]; PMISetupNewGroup( pUniv.worlds[0].nProcess, 0 ); MPIE_ForwardCommonSignals(); if (!pUniv.fromSingleton) { MPIE_ForkProcesses( &pUniv.worlds[0], envp, mypreamble, &s, mypostfork, 0, mypostamble, 0 ); } else { /* FIXME: The singleton code goes here */ MPL_error_printf( "Singleton init not supported\n" ); exit(1); } reason = MPIE_IOLoop( pUniv.timeout ); if (reason == IOLOOP_TIMEOUT) { /* Exited due to timeout. Generate an error message and terminate the children */ if (pUniv.timeout > 60) { MPL_error_printf( "Timeout of %d minutes expired; job aborted\n", pUniv.timeout / 60 ); } else { MPL_error_printf( "Timeout of %d seconds expired; job aborted\n", pUniv.timeout ); } erc = 1; MPIE_KillUniverse( &pUniv ); } /* Wait for all processes to exit and gather information on them. We do this through the SIGCHLD handler. We also bound the length of time that we wait to 2 seconds. */ MPIE_WaitForProcesses( &pUniv, 2 ); /* Compute the return code (max for now) */ rc = MPIE_ProcessGetExitStatus( &signaled ); /* Optionally provide detailed information about failed processes */ if ( (rc && printFailure) || signaled) MPIE_PrintFailureReasons( stderr ); /* If the processes exited normally (or were already gone) but we had an exceptional exit, such as a timeout, use the erc value */ if (!rc && erc) rc = erc; return( rc ); }