Пример #1
0
static char *getNLFromHosts(const char *hl_descr)
{
    static char *nl = NULL;
    char *host, *work, *tmp_descr;

    nl = realloc(nl, sizeof(char) * PSC_getNrOfNodes());
    if (!nl) {
	PSC_log(-1, "%s: no memory\n", __func__);
	return NULL;
    }
    memset(nl, 0, PSC_getNrOfNodes());

    tmp_descr = strdup(hl_descr);
    if (!tmp_descr) {
	fprintf(stderr, "%s: no memory\n", __func__);
	return NULL;
    }

    host = strtok_r(tmp_descr, ", ", &work);

    while (host) {
	PSnodes_ID_t node;
	struct hostent *hp = gethostbyname(host);
	struct in_addr addr;
	int err;

	if (!hp) break;

	memcpy(&addr, hp->h_addr_list[0], sizeof(addr));
	err = PSI_infoNodeID(-1, PSP_INFO_HOST, &addr.s_addr, &node, 1);

	if (err || node==-1) break;

	nl[node] = 1;
	host = strtok_r(NULL, ", ", &work);
    }

    if (host) printf("Illegal hostname '%s'\n", host);
    free(tmp_descr);
    return host ? nl : NULL;
}
Пример #2
0
int main(int argc, const char *argv[])
{
    int np, verbose;
    int rank, i, rc;
    int dup_argc;
    char **dup_argv;

    int waittime, killtime;
    unsigned int magic;

    /*
     * We can't use popt for argument parsing here. popt is not
     * capable to stop at the first unrecogniced option, i.e. at the
     * executable separation options to the mpirun command from
     * options to the application.
     */

    poptContext optCon;   /* context for parsing command-line options */

    struct poptOption optionsTable[] = {
	{ "np", '\0', POPT_ARG_INT | POPT_ARGFLAG_ONEDASH,
	  &np, 0, "number of processes to start", "num"},
	{ "wait", 'w', POPT_ARG_INT, &waittime, 0,
	  "Wait <n> seconds between each spawning step", "n"},
	{ "kill", 'k', POPT_ARG_INT, &killtime, 0,
	  "Kill all processes <n> seconds after the first exits", "n"},
	{ NULL, 'v', POPT_ARG_NONE,
	  &verbose, 0, "verbose mode", NULL},
	{ NULL, '\0', 0, NULL, 0, NULL, NULL}
    };

    /* The duplicated argv will contain the apps commandline */
    poptDupArgv(argc, argv, &dup_argc, (const char ***)&dup_argv);

    optCon = poptGetContext(NULL, dup_argc, (const char **)dup_argv,
			    optionsTable, 0);
    poptSetOtherOptionHelp(optCon, OTHER_OPTIONS_STR);

    /*
     * Split the argv into two parts:
     *  - first one containing the mpirun options
     *  - second one containing the apps argv
     * The first one is already parsed while splitting
     */
    while (1) {
	const char *unknownArg;

	np = -1;
	verbose = 0;
	waittime = 0;
	killtime = -1;

	rc = poptGetNextOpt(optCon);

	if ((unknownArg=poptGetArg(optCon))) {
	    /*
	     * Find the first unknown argument (which is the apps
	     * name) within dup_argv. Start searching from dup_argv's end
	     * since the apps name might be used within another
	     * options argument.
	     */
	    for (i=argc-1; i>0; i--) {
		if (strcmp(dup_argv[i], unknownArg)==0) {
		    dup_argc = i;
		    dup_argv[dup_argc] = NULL;
		    poptFreeContext(optCon);
		    optCon = poptGetContext(NULL,
					    dup_argc, (const char **)dup_argv,
					    optionsTable, 0);
		    poptSetOtherOptionHelp(optCon, OTHER_OPTIONS_STR);
		    break;
		}
	    }
	    if (i==0) {
		printf("unknownArg '%s' not found !?\n", unknownArg);
		exit(1);
	    }
	} else {
	    /* No unknownArg left, we are finished */
	    break;
	}
    }

    if (rc < -1) {
	/* an error occurred during option processing */
	poptPrintUsage(optCon, stderr, 0);
	fprintf(stderr, "%s: %s\n",
		poptBadOption(optCon, POPT_BADOPTION_NOALIAS),
		poptStrerror(rc));
	exit(1);
    }

    if (np == -1) {
	poptPrintUsage(optCon, stderr, 0);
	fprintf(stderr, "You have to give at least the -np argument.\n");
	exit(1);
    }

    if (!argv[dup_argc]) {
	poptPrintUsage(optCon, stderr, 0);
	fprintf(stderr, "No <command> specified.\n");
	exit(1);
    }

    free(dup_argv);

    if (verbose) {
	printf("The 'gmspawner' command-line is:\n");
	for (i=0; i<dup_argc; i++) {
	    printf("%s ", argv[i]);
	}
	printf("\b\n\n");

	printf("The applications command-line is:\n");
	for (i=dup_argc; i<argc; i++) {
	    printf("%s ", argv[i]);
	}
	printf("\b\n\n");
    }

    /* init PSI */
    if (!PSI_initClient(TG_GMSPAWNER)) {
	fprintf(stderr, "Initialization of PSI failed.");
	exit(10);
    }

    PSI_infoInt(-1, PSP_INFO_TASKRANK, NULL, &rank, 0);
    if (rank != np) {
	fprintf(stderr, "%s: rank(%d) != np(%d).\n", argv[dup_argc], rank, np);

	exit(1);
    }

    /* Propagate some environment variables */
    PSI_propEnv();
    PSI_propEnvList("PSI_EXPORTS");
    PSI_propEnvList("__PSI_EXPORTS");

    srandom(time(NULL));
    magic = random()%9999999;
    setIntEnv("GMPI_MAGIC", magic);

    setIntEnv("GMPI_NP", np);

    {
	char hostname[256];
	gethostname(hostname, sizeof(hostname));

	setPSIEnv("GMPI_MASTER", hostname, 1);
    }

    {
	int port = createListener(8000, np, magic, verbose);

	if (port>=0) setIntEnv("GMPI_PORT", port);
    }

    propagateEnv("GMPI_SHMEM", 1);
    propagateEnv("DISPLAY", 0);
    propagateEnv("GMPI_EAGER", 0);
    propagateEnv("GMPI_RECV", 1);

    PSC_setSigHandler(SIGALRM, sighandler);

    /* spawn all processes */

    PSI_RemoteArgs(argc - dup_argc, (char **)&argv[dup_argc],
		   &dup_argc, &dup_argv);

    for (rank=0; rank<np; rank++) {
	if (waittime && rank) sleep(waittime);

	setIntEnv("GMPI_ID", rank);
	setIntEnv("GMPI_BOARD", -1);

	char slavestring[20];
	PSnodes_ID_t node;
	struct in_addr ip;
	int err;

	err = PSI_infoNodeID(-1, PSP_INFO_RANKID, &rank, &node, 1);
	if (err) {
	    fprintf(stderr, "Could not determine rank %d's node.\n", rank);
	    exit(1);
	}

	err = PSI_infoUInt(-1, PSP_INFO_NODE, &node, &ip.s_addr, 1);
	if (err) {
	    fprintf(stderr, "Could not determine node %d's IP\n", node);
	    exit(1);
	}

	snprintf(slavestring, sizeof(slavestring), "%s", inet_ntoa(ip));
	setPSIEnv("GMPI_SLAVE", slavestring, 1);

	/* spawn the process */
	int error;
	if (!PSI_spawnRank(rank, ".", dup_argc, dup_argv, &error)) {
	    if (error) {
		char *errstr = strerror(error);
		fprintf(stderr, "Could not spawn process %d (%s) error = %s.\n",
			rank, dup_argv[0], errstr ? errstr : "UNKNOWN");
		exit(1);
	    }
	}
    }

    /* Wait for the spawned processes to complete */
    while (np) {
	static int firstClient=1;
	DDErrorMsg_t msg;
	int ret;

	ret = PSI_recvMsg((DDMsg_t *)&msg, sizeof(msg));
	if (msg.header.type != PSP_CD_SPAWNFINISH || ret != sizeof(msg)) {
	    fprintf(stderr, "got strange message type %s\n",
		    PSP_printMsg(msg.header.type));
	} else {
	    if (firstClient && killtime!=-1) {
		// printf("Alarm set to %d\n", killtime);
		if (killtime) {
		    alarm(killtime);
		    firstClient=0;
		} else {
		    /* Stop immediately */
		    exit(0);
		}
	    }
	    np--;
	    // printf("%d clients left\n", np);
	}
    }
    PSI_release(PSC_getMyTID());
    PSI_exitClient();

    return 0;
}