static int fPMI_Handle_spawn(PMIProcess * pentry) { char inbuf[PMIU_MAXLINE]; char *(args[PMI_MAX_ARGS]); char key[MAXKEYLEN]; char outbuf[PMIU_MAXLINE]; ProcessWorld *pWorld; ProcessApp *app = 0; int preputNum = 0, rc; int i; int totspawns = 0, spawnnum = 0; PMIKVSpace *kvs = 0; /* Variables for info */ char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL]; int curInfoIdx = -1; DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_spawn\n")); if (!pentry->spawnWorld) { pWorld = (ProcessWorld *) MPL_malloc(sizeof(ProcessWorld), MPL_MEM_PM); if (!pWorld) return 1; pentry->spawnWorld = pWorld; pWorld->apps = 0; pWorld->nProcess = 0; pWorld->nextWorld = 0; pWorld->nApps = 0; pWorld->worldNum = pUniv.nWorlds++; /* FIXME: What should be the defaults for the spawned env? * Should the default be the env ov the spawner? */ pWorld->genv = 0; pentry->spawnKVS = fPMIKVSAllocate(); } else { pWorld = pentry->spawnWorld; } kvs = pentry->spawnKVS; /* Note that each mcmd=spawn creates an app. When all apps * are present, then then can be linked to a world. A * spawnmultiple command makes use of multiple mcmd=spawn PMI * commands */ /* Create a new app */ app = (ProcessApp *) MPL_malloc(sizeof(ProcessApp), MPL_MEM_PM); if (!app) return 1; app->myAppNum = 0; app->exename = 0; app->arch = 0; app->path = 0; app->wdir = 0; app->hostname = 0; app->args = 0; app->nArgs = 0; app->soft.nelm = 0; app->nProcess = 0; app->pState = 0; app->nextApp = 0; app->env = 0; app->pWorld = pWorld; /* Add to the pentry spawn structure */ if (pentry->spawnAppTail) { pentry->spawnAppTail->nextApp = app; } else { pentry->spawnApp = app; pWorld->apps = app; } pentry->spawnAppTail = app; for (i = 0; i < PMI_MAX_ARGS; i++) args[i] = 0; /* Get lines until we find either cmd or mcmd (an error) or endcmd * (expected end) */ while ((rc = PMIUBufferedReadLine(pentry, inbuf, sizeof(inbuf))) > 0) { char *cmdPtr, *valPtr, *p; /* Find the command = format */ p = inbuf; /* Find first nonblank */ while (*p && isascii(*p) && isspace(*p)) p++; if (!*p) { /* Empty string. Ignore */ continue; } cmdPtr = p++; /* Find '=' */ while (*p && *p != '=') p++; if (!*p) { /* No =. Check for endcmd */ p--; /* Trim spaces */ while (isascii(*p) && isspace(*p)) p--; /* Add null to end */ *++p = 0; if (strcmp("endcmd", cmdPtr) == 0) { break; } /* FIXME: Otherwise, we have a problem */ MPL_error_printf("Malformed PMI command (no endcmd seen\n"); return 1; } else { *p = 0; } /* Found an = . value is the rest of the line */ valPtr = ++p; while (*p && *p != '\n') p++; if (*p) *p = 0; /* Remove the newline */ /* Now, process the cmd and value */ if (strcmp("nprocs", cmdPtr) == 0) { app->nProcess = atoi(valPtr); pWorld->nProcess += app->nProcess; } else if (strcmp("execname", cmdPtr) == 0) { app->exename = MPL_strdup(valPtr); } else if (strcmp("totspawns", cmdPtr) == 0) { /* This tells us how many separate spawn commands * we expect to see (e.g., for spawn multiple). * Each spawn command is a separate "app" */ totspawns = atoi(valPtr); } else if (strcmp("spawnssofar", cmdPtr) == 0) { /* This tells us which app we are (starting from 1) */ spawnnum = atoi(valPtr); app->myAppNum = spawnnum - 1; } else if (strcmp("argcnt", cmdPtr) == 0) { /* argcnt may not be set before the args */ app->nArgs = atoi(valPtr); } else if (strncmp("arg", cmdPtr, 3) == 0) { int argnum; /* argcnt may not be set before the args */ /* Handle arg%d. Values are 1 - origin */ argnum = atoi(cmdPtr + 3) - 1; if (argnum < 0 || argnum >= PMI_MAX_ARGS) { MPL_error_printf ("Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n", argnum, PMI_MAX_ARGS - 1); return 1; } args[argnum] = MPL_strdup(valPtr); } else if (strcmp("preput_num", cmdPtr) == 0) { preputNum = atoi(valPtr); } else if (strncmp("preput_key_", cmdPtr, 11) == 0) { /* Save the key */ MPL_strncpy(key, valPtr, sizeof(key)); } else if (strncmp("preput_val_", cmdPtr, 11) == 0) { /* Place the key,val into the space associate with the current * PMI group */ fPMIKVSAddPair(kvs, key, valPtr); } /* Info is on a per-app basis (it is an array of info items in * spawn multiple). We can ignore most info values. * The ones that are handled are processed by a * separate routine (not yet implemented). * simple_pmi.c sends (key,value), so we can keep just the * last key and pass the key/value to the registered info * handler, along with tha app structure. Alternately, * we could save all info items and let the user's * spawner handle it */ else if (strcmp("info_num", cmdPtr) == 0) { /* Number of info values */ ; } else if (strncmp("info_key_", cmdPtr, 9) == 0) { /* The actual name has a digit, which indicates *which* info * key this is */ curInfoIdx = atoi(cmdPtr + 9); MPL_strncpy(curInfoKey, valPtr, sizeof(curInfoKey)); } else if (strncmp("info_val_", cmdPtr, 9) == 0) { /* The actual name has a digit, which indicates *which* info * value this is */ int idx = atoi(cmdPtr + 9); if (idx != curInfoIdx) { MPL_error_printf ("Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n", curInfoIdx, idx); return 1; } else { MPL_strncpy(curInfoVal, valPtr, sizeof(curInfoVal)); /* Apply this info item */ fPMIInfoKey(app, curInfoKey, curInfoVal); /* printf("Got info %s+%s\n", curInfoKey, curInfoVal); */ } } else { MPL_error_printf("Unrecognized PMI subcommand on spawnmult: %s\n", cmdPtr); return 1; } } if (app->nArgs > 0) { app->args = (const char **) MPL_malloc(app->nArgs * sizeof(char *), MPL_MEM_PM); for (i = 0; i < app->nArgs; i++) { app->args[i] = args[i]; args[i] = 0; } } pWorld->nApps++; /* Now that we've read the commands, invoke the user's spawn command */ if (totspawns == spawnnum) { PMISetupNewGroup(pWorld->nProcess, kvs); if (userSpawner) { rc = (*userSpawner) (pWorld, userSpawnerData); } else { MPL_error_printf("Unable to spawn %s\n", app->exename); rc = 1; MPIE_PrintProcessWorld(stdout, pWorld); } MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc); PMIWriteLine(pentry->fd, outbuf); DBG_PRINTFCOND(pmidebug, ("%s", outbuf)); /* Clear for the next spawn */ pentry->spawnApp = 0; pentry->spawnAppTail = 0; pentry->spawnKVS = 0; pentry->spawnWorld = 0; } /* If totspawnnum != spawnnum, then we are expecting a * spawnmult with additional items */ return 0; }
/* Note that envp is common but not standard */ int main( int argc, char *argv[], char *envp[] ) { int rc; int erc = 0; /* Other (exceptional) return codes */ int reason, signaled = 0; SetupInfo s; char portString[MAX_PORT_STRING]; /* MPIE_ProcessInit initializes the global pUniv */ MPIE_ProcessInit(); /* Set a default for the universe size */ pUniv.size = 64; /* Set defaults for any arguments that are options. Also check the environment for special options, such as debugging. Set some defaults in pUniv */ MPIE_CheckEnv( &pUniv, 0, 0 ); IOLabelCheckEnv( ); /* Handle the command line arguments. Use the routine from util/cmnargs.c to fill in the universe */ MPIE_Args( argc, argv, &pUniv, 0, 0 ); /* If there were any soft arguments, we need to handle them now */ rc = MPIE_InitWorldWithSoft( &pUniv.worlds[0], pUniv.size ); if (!rc) { MPL_error_printf( "Unable to process soft arguments\n" ); exit(1); } if (pUniv.fromSingleton) { /* The MPI process is already running. We create a simple entry for a single process rather than creating the process */ MPIE_SetupSingleton( &pUniv ); } rc = MPIE_ChooseHosts( &pUniv.worlds[0], MPIE_ReadMachines, 0 ); if (rc) { MPL_error_printf( "Unable to assign hosts to processes\n" ); exit(1); } if (MPIE_Debug) MPIE_PrintProcessUniverse( stdout, &pUniv ); DBG_PRINTF( ("timeout_seconds = %d\n", pUniv.timeout) ); /* Get the common port for creating PMI connections to the created processes */ rc = PMIServSetupPort( &pUniv, portString, sizeof(portString) ); if (rc) { MPL_error_printf( "Unable to setup port for listener\n" ); exit(1); } s.pmiinfo.portName = portString; #ifdef USE_MPI_STAGE_EXECUTABLES /* Hook for later use in staging executables */ if (?stageExes) { rc = MPIE_StageExecutables( &pUniv.worlds[0] ); if (!rc) ...; } #endif PMIServInit(myspawn,&s); s.pmiinfo.pWorld = &pUniv.worlds[0]; PMISetupNewGroup( pUniv.worlds[0].nProcess, 0 ); MPIE_ForwardCommonSignals(); if (!pUniv.fromSingleton) { MPIE_ForkProcesses( &pUniv.worlds[0], envp, mypreamble, &s, mypostfork, 0, mypostamble, 0 ); } else { /* FIXME: The singleton code goes here */ MPL_error_printf( "Singleton init not supported\n" ); exit(1); } reason = MPIE_IOLoop( pUniv.timeout ); if (reason == IOLOOP_TIMEOUT) { /* Exited due to timeout. Generate an error message and terminate the children */ if (pUniv.timeout > 60) { MPL_error_printf( "Timeout of %d minutes expired; job aborted\n", pUniv.timeout / 60 ); } else { MPL_error_printf( "Timeout of %d seconds expired; job aborted\n", pUniv.timeout ); } erc = 1; MPIE_KillUniverse( &pUniv ); } /* Wait for all processes to exit and gather information on them. We do this through the SIGCHLD handler. We also bound the length of time that we wait to 2 seconds. */ MPIE_WaitForProcesses( &pUniv, 2 ); /* Compute the return code (max for now) */ rc = MPIE_ProcessGetExitStatus( &signaled ); /* Optionally provide detailed information about failed processes */ if ( (rc && printFailure) || signaled) MPIE_PrintFailureReasons( stderr ); /* If the processes exited normally (or were already gone) but we had an exceptional exit, such as a timeout, use the erc value */ if (!rc && erc) rc = erc; return( rc ); }