示例#1
0
static int fPMI_Handle_spawn(PMIProcess * pentry)
{
    char inbuf[PMIU_MAXLINE];
    char *(args[PMI_MAX_ARGS]);
    char key[MAXKEYLEN];
    char outbuf[PMIU_MAXLINE];
    ProcessWorld *pWorld;
    ProcessApp *app = 0;
    int preputNum = 0, rc;
    int i;
    int totspawns = 0, spawnnum = 0;
    PMIKVSpace *kvs = 0;
    /* Variables for info */
    char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL];
    int curInfoIdx = -1;

    DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_spawn\n"));

    if (!pentry->spawnWorld) {
        pWorld = (ProcessWorld *) MPL_malloc(sizeof(ProcessWorld), MPL_MEM_PM);
        if (!pWorld)
            return 1;

        pentry->spawnWorld = pWorld;
        pWorld->apps = 0;
        pWorld->nProcess = 0;
        pWorld->nextWorld = 0;
        pWorld->nApps = 0;
        pWorld->worldNum = pUniv.nWorlds++;
        /* FIXME: What should be the defaults for the spawned env?
         * Should the default be the env ov the spawner? */
        pWorld->genv = 0;
        pentry->spawnKVS = fPMIKVSAllocate();
    } else {
        pWorld = pentry->spawnWorld;
    }
    kvs = pentry->spawnKVS;

    /* Note that each mcmd=spawn creates an app.  When all apps
     * are present, then then can be linked to a world.  A
     * spawnmultiple command makes use of multiple mcmd=spawn PMI
     * commands */

    /* Create a new app */
    app = (ProcessApp *) MPL_malloc(sizeof(ProcessApp), MPL_MEM_PM);
    if (!app)
        return 1;
    app->myAppNum = 0;
    app->exename = 0;
    app->arch = 0;
    app->path = 0;
    app->wdir = 0;
    app->hostname = 0;
    app->args = 0;
    app->nArgs = 0;
    app->soft.nelm = 0;
    app->nProcess = 0;
    app->pState = 0;
    app->nextApp = 0;
    app->env = 0;
    app->pWorld = pWorld;

    /* Add to the pentry spawn structure */
    if (pentry->spawnAppTail) {
        pentry->spawnAppTail->nextApp = app;
    } else {
        pentry->spawnApp = app;
        pWorld->apps = app;
    }
    pentry->spawnAppTail = app;

    for (i = 0; i < PMI_MAX_ARGS; i++)
        args[i] = 0;

    /* Get lines until we find either cmd or mcmd (an error) or endcmd
     * (expected end) */
    while ((rc = PMIUBufferedReadLine(pentry, inbuf, sizeof(inbuf))) > 0) {
        char *cmdPtr, *valPtr, *p;

        /* Find the command = format */
        p = inbuf;
        /* Find first nonblank */
        while (*p && isascii(*p) && isspace(*p))
            p++;
        if (!*p) {
            /* Empty string.  Ignore */
            continue;
        }
        cmdPtr = p++;
        /* Find '=' */
        while (*p && *p != '=')
            p++;
        if (!*p) {
            /* No =.  Check for endcmd */
            p--;
            /* Trim spaces */
            while (isascii(*p) && isspace(*p))
                p--;
            /* Add null to end */
            *++p = 0;
            if (strcmp("endcmd", cmdPtr) == 0) {
                break;
            }
            /* FIXME: Otherwise, we have a problem */
            MPL_error_printf("Malformed PMI command (no endcmd seen\n");
            return 1;
        } else {
            *p = 0;
        }

        /* Found an = .  value is the rest of the line */
        valPtr = ++p;
        while (*p && *p != '\n')
            p++;
        if (*p)
            *p = 0;     /* Remove the newline */

        /* Now, process the cmd and value */
        if (strcmp("nprocs", cmdPtr) == 0) {
            app->nProcess = atoi(valPtr);
            pWorld->nProcess += app->nProcess;
        } else if (strcmp("execname", cmdPtr) == 0) {
            app->exename = MPL_strdup(valPtr);
        } else if (strcmp("totspawns", cmdPtr) == 0) {
            /* This tells us how many separate spawn commands
             * we expect to see (e.g., for spawn multiple).
             * Each spawn command is a separate "app" */
            totspawns = atoi(valPtr);
        } else if (strcmp("spawnssofar", cmdPtr) == 0) {
            /* This tells us which app we are (starting from 1) */
            spawnnum = atoi(valPtr);
            app->myAppNum = spawnnum - 1;
        } else if (strcmp("argcnt", cmdPtr) == 0) {
            /* argcnt may not be set before the args */
            app->nArgs = atoi(valPtr);
        } else if (strncmp("arg", cmdPtr, 3) == 0) {
            int argnum;
            /* argcnt may not be set before the args */
            /* Handle arg%d.  Values are 1 - origin */
            argnum = atoi(cmdPtr + 3) - 1;
            if (argnum < 0 || argnum >= PMI_MAX_ARGS) {
                MPL_error_printf
                    ("Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n",
                     argnum, PMI_MAX_ARGS - 1);
                return 1;
            }
            args[argnum] = MPL_strdup(valPtr);
        } else if (strcmp("preput_num", cmdPtr) == 0) {
            preputNum = atoi(valPtr);
        } else if (strncmp("preput_key_", cmdPtr, 11) == 0) {
            /* Save the key */
            MPL_strncpy(key, valPtr, sizeof(key));
        } else if (strncmp("preput_val_", cmdPtr, 11) == 0) {
            /* Place the key,val into the space associate with the current
             * PMI group */
            fPMIKVSAddPair(kvs, key, valPtr);
        }
        /* Info is on a per-app basis (it is an array of info items in
         * spawn multiple).  We can ignore most info values.
         * The ones that are handled are processed by a
         * separate routine (not yet implemented).
         * simple_pmi.c sends (key,value), so we can keep just the
         * last key and pass the key/value to the registered info
         * handler, along with tha app structure.  Alternately,
         * we could save all info items and let the user's
         * spawner handle it */
        else if (strcmp("info_num", cmdPtr) == 0) {
            /* Number of info values */
            ;
        } else if (strncmp("info_key_", cmdPtr, 9) == 0) {
            /* The actual name has a digit, which indicates *which* info
             * key this is */
            curInfoIdx = atoi(cmdPtr + 9);
            MPL_strncpy(curInfoKey, valPtr, sizeof(curInfoKey));
        } else if (strncmp("info_val_", cmdPtr, 9) == 0) {
            /* The actual name has a digit, which indicates *which* info
             * value this is */
            int idx = atoi(cmdPtr + 9);
            if (idx != curInfoIdx) {
                MPL_error_printf
                    ("Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n",
                     curInfoIdx, idx);
                return 1;
            } else {
                MPL_strncpy(curInfoVal, valPtr, sizeof(curInfoVal));
                /* Apply this info item */
                fPMIInfoKey(app, curInfoKey, curInfoVal);
                /* printf("Got info %s+%s\n", curInfoKey, curInfoVal); */
            }
        } else {
            MPL_error_printf("Unrecognized PMI subcommand on spawnmult: %s\n", cmdPtr);
            return 1;
        }
    }

    if (app->nArgs > 0) {
        app->args = (const char **) MPL_malloc(app->nArgs * sizeof(char *), MPL_MEM_PM);
        for (i = 0; i < app->nArgs; i++) {
            app->args[i] = args[i];
            args[i] = 0;
        }
    }

    pWorld->nApps++;

    /* Now that we've read the commands, invoke the user's spawn command */
    if (totspawns == spawnnum) {
        PMISetupNewGroup(pWorld->nProcess, kvs);

        if (userSpawner) {
            rc = (*userSpawner) (pWorld, userSpawnerData);
        } else {
            MPL_error_printf("Unable to spawn %s\n", app->exename);
            rc = 1;
            MPIE_PrintProcessWorld(stdout, pWorld);
        }

        MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc);
        PMIWriteLine(pentry->fd, outbuf);
        DBG_PRINTFCOND(pmidebug, ("%s", outbuf));

        /* Clear for the next spawn */
        pentry->spawnApp = 0;
        pentry->spawnAppTail = 0;
        pentry->spawnKVS = 0;
        pentry->spawnWorld = 0;
    }

    /* If totspawnnum != spawnnum, then we are expecting a
     * spawnmult with additional items */
    return 0;
}
示例#2
0
文件: mpiexec.c 项目: zhanglt/mpich
/* Note that envp is common but not standard */
int main( int argc, char *argv[], char *envp[] )
{
    int          rc;
    int          erc = 0;  /* Other (exceptional) return codes */
    int          reason, signaled = 0;
    SetupInfo    s;
    char         portString[MAX_PORT_STRING];

    /* MPIE_ProcessInit initializes the global pUniv */
    MPIE_ProcessInit();
    /* Set a default for the universe size */
    pUniv.size = 64;

    /* Set defaults for any arguments that are options.  Also check the
       environment for special options, such as debugging.  Set 
       some defaults in pUniv */
    MPIE_CheckEnv( &pUniv, 0, 0 );
    IOLabelCheckEnv( );

    /* Handle the command line arguments.  Use the routine from util/cmnargs.c
       to fill in the universe */
    MPIE_Args( argc, argv, &pUniv, 0, 0 );
    /* If there were any soft arguments, we need to handle them now */
    rc = MPIE_InitWorldWithSoft( &pUniv.worlds[0], pUniv.size );
    if (!rc) {
	MPL_error_printf( "Unable to process soft arguments\n" );
	exit(1);
    }

    if (pUniv.fromSingleton) {
	/* The MPI process is already running.  We create a simple entry
	   for a single process rather than creating the process */
	MPIE_SetupSingleton( &pUniv );
    }


    rc = MPIE_ChooseHosts( &pUniv.worlds[0], MPIE_ReadMachines, 0 );
    if (rc) {
	MPL_error_printf( "Unable to assign hosts to processes\n" );
	exit(1);
    }

    if (MPIE_Debug) MPIE_PrintProcessUniverse( stdout, &pUniv );

    DBG_PRINTF( ("timeout_seconds = %d\n", pUniv.timeout) );

    /* Get the common port for creating PMI connections to the created
       processes */
    rc = PMIServSetupPort( &pUniv, portString, sizeof(portString) );
    if (rc) {
	MPL_error_printf( "Unable to setup port for listener\n" );
	exit(1);
    }
    s.pmiinfo.portName = portString;

#ifdef USE_MPI_STAGE_EXECUTABLES
    /* Hook for later use in staging executables */
    if (?stageExes) {
	rc = MPIE_StageExecutables( &pUniv.worlds[0] );
	if (!rc) ...;
    }
#endif

    PMIServInit(myspawn,&s);
    s.pmiinfo.pWorld = &pUniv.worlds[0];
    PMISetupNewGroup( pUniv.worlds[0].nProcess, 0 );
    MPIE_ForwardCommonSignals(); 
    if (!pUniv.fromSingleton) {
	MPIE_ForkProcesses( &pUniv.worlds[0], envp, mypreamble, &s,
			mypostfork, 0, mypostamble, 0 );
    }
    else {
	/* FIXME: The singleton code goes here */
	MPL_error_printf( "Singleton init not supported\n" );
	exit(1);
    }
    reason = MPIE_IOLoop( pUniv.timeout );

    if (reason == IOLOOP_TIMEOUT) {
	/* Exited due to timeout.  Generate an error message and
	   terminate the children */
	if (pUniv.timeout > 60) {
	    MPL_error_printf( "Timeout of %d minutes expired; job aborted\n",
			       pUniv.timeout / 60 );
	}
	else {
	    MPL_error_printf( "Timeout of %d seconds expired; job aborted\n",
			       pUniv.timeout );
	}
	erc = 1;
	MPIE_KillUniverse( &pUniv );
    }

    /* Wait for all processes to exit and gather information on them.
       We do this through the SIGCHLD handler. We also bound the length
       of time that we wait to 2 seconds.
    */
    MPIE_WaitForProcesses( &pUniv, 2 );

    /* Compute the return code (max for now) */
    rc = MPIE_ProcessGetExitStatus( &signaled );

    /* Optionally provide detailed information about failed processes */
    if ( (rc && printFailure) || signaled) 
	MPIE_PrintFailureReasons( stderr );

    /* If the processes exited normally (or were already gone) but we
       had an exceptional exit, such as a timeout, use the erc value */
    if (!rc && erc) rc = erc;

    return( rc );
}