static char *getNLFromHosts(const char *hl_descr) { static char *nl = NULL; char *host, *work, *tmp_descr; nl = realloc(nl, sizeof(char) * PSC_getNrOfNodes()); if (!nl) { PSC_log(-1, "%s: no memory\n", __func__); return NULL; } memset(nl, 0, PSC_getNrOfNodes()); tmp_descr = strdup(hl_descr); if (!tmp_descr) { fprintf(stderr, "%s: no memory\n", __func__); return NULL; } host = strtok_r(tmp_descr, ", ", &work); while (host) { PSnodes_ID_t node; struct hostent *hp = gethostbyname(host); struct in_addr addr; int err; if (!hp) break; memcpy(&addr, hp->h_addr_list[0], sizeof(addr)); err = PSI_infoNodeID(-1, PSP_INFO_HOST, &addr.s_addr, &node, 1); if (err || node==-1) break; nl[node] = 1; host = strtok_r(NULL, ", ", &work); } if (host) printf("Illegal hostname '%s'\n", host); free(tmp_descr); return host ? nl : NULL; }
int main(int argc, const char *argv[]) { int np, verbose; int rank, i, rc; int dup_argc; char **dup_argv; int waittime, killtime; unsigned int magic; /* * We can't use popt for argument parsing here. popt is not * capable to stop at the first unrecogniced option, i.e. at the * executable separation options to the mpirun command from * options to the application. */ poptContext optCon; /* context for parsing command-line options */ struct poptOption optionsTable[] = { { "np", '\0', POPT_ARG_INT | POPT_ARGFLAG_ONEDASH, &np, 0, "number of processes to start", "num"}, { "wait", 'w', POPT_ARG_INT, &waittime, 0, "Wait <n> seconds between each spawning step", "n"}, { "kill", 'k', POPT_ARG_INT, &killtime, 0, "Kill all processes <n> seconds after the first exits", "n"}, { NULL, 'v', POPT_ARG_NONE, &verbose, 0, "verbose mode", NULL}, { NULL, '\0', 0, NULL, 0, NULL, NULL} }; /* The duplicated argv will contain the apps commandline */ poptDupArgv(argc, argv, &dup_argc, (const char ***)&dup_argv); optCon = poptGetContext(NULL, dup_argc, (const char **)dup_argv, optionsTable, 0); poptSetOtherOptionHelp(optCon, OTHER_OPTIONS_STR); /* * Split the argv into two parts: * - first one containing the mpirun options * - second one containing the apps argv * The first one is already parsed while splitting */ while (1) { const char *unknownArg; np = -1; verbose = 0; waittime = 0; killtime = -1; rc = poptGetNextOpt(optCon); if ((unknownArg=poptGetArg(optCon))) { /* * Find the first unknown argument (which is the apps * name) within dup_argv. Start searching from dup_argv's end * since the apps name might be used within another * options argument. */ for (i=argc-1; i>0; i--) { if (strcmp(dup_argv[i], unknownArg)==0) { dup_argc = i; dup_argv[dup_argc] = NULL; poptFreeContext(optCon); optCon = poptGetContext(NULL, dup_argc, (const char **)dup_argv, optionsTable, 0); poptSetOtherOptionHelp(optCon, OTHER_OPTIONS_STR); break; } } if (i==0) { printf("unknownArg '%s' not found !?\n", unknownArg); exit(1); } } else { /* No unknownArg left, we are finished */ break; } } if (rc < -1) { /* an error occurred during option processing */ poptPrintUsage(optCon, stderr, 0); fprintf(stderr, "%s: %s\n", poptBadOption(optCon, POPT_BADOPTION_NOALIAS), poptStrerror(rc)); exit(1); } if (np == -1) { poptPrintUsage(optCon, stderr, 0); fprintf(stderr, "You have to give at least the -np argument.\n"); exit(1); } if (!argv[dup_argc]) { poptPrintUsage(optCon, stderr, 0); fprintf(stderr, "No <command> specified.\n"); exit(1); } free(dup_argv); if (verbose) { printf("The 'gmspawner' command-line is:\n"); for (i=0; i<dup_argc; i++) { printf("%s ", argv[i]); } printf("\b\n\n"); printf("The applications command-line is:\n"); for (i=dup_argc; i<argc; i++) { printf("%s ", argv[i]); } printf("\b\n\n"); } /* init PSI */ if (!PSI_initClient(TG_GMSPAWNER)) { fprintf(stderr, "Initialization of PSI failed."); exit(10); } PSI_infoInt(-1, PSP_INFO_TASKRANK, NULL, &rank, 0); if (rank != np) { fprintf(stderr, "%s: rank(%d) != np(%d).\n", argv[dup_argc], rank, np); exit(1); } /* Propagate some environment variables */ PSI_propEnv(); PSI_propEnvList("PSI_EXPORTS"); PSI_propEnvList("__PSI_EXPORTS"); srandom(time(NULL)); magic = random()%9999999; setIntEnv("GMPI_MAGIC", magic); setIntEnv("GMPI_NP", np); { char hostname[256]; gethostname(hostname, sizeof(hostname)); setPSIEnv("GMPI_MASTER", hostname, 1); } { int port = createListener(8000, np, magic, verbose); if (port>=0) setIntEnv("GMPI_PORT", port); } propagateEnv("GMPI_SHMEM", 1); propagateEnv("DISPLAY", 0); propagateEnv("GMPI_EAGER", 0); propagateEnv("GMPI_RECV", 1); PSC_setSigHandler(SIGALRM, sighandler); /* spawn all processes */ PSI_RemoteArgs(argc - dup_argc, (char **)&argv[dup_argc], &dup_argc, &dup_argv); for (rank=0; rank<np; rank++) { if (waittime && rank) sleep(waittime); setIntEnv("GMPI_ID", rank); setIntEnv("GMPI_BOARD", -1); char slavestring[20]; PSnodes_ID_t node; struct in_addr ip; int err; err = PSI_infoNodeID(-1, PSP_INFO_RANKID, &rank, &node, 1); if (err) { fprintf(stderr, "Could not determine rank %d's node.\n", rank); exit(1); } err = PSI_infoUInt(-1, PSP_INFO_NODE, &node, &ip.s_addr, 1); if (err) { fprintf(stderr, "Could not determine node %d's IP\n", node); exit(1); } snprintf(slavestring, sizeof(slavestring), "%s", inet_ntoa(ip)); setPSIEnv("GMPI_SLAVE", slavestring, 1); /* spawn the process */ int error; if (!PSI_spawnRank(rank, ".", dup_argc, dup_argv, &error)) { if (error) { char *errstr = strerror(error); fprintf(stderr, "Could not spawn process %d (%s) error = %s.\n", rank, dup_argv[0], errstr ? errstr : "UNKNOWN"); exit(1); } } } /* Wait for the spawned processes to complete */ while (np) { static int firstClient=1; DDErrorMsg_t msg; int ret; ret = PSI_recvMsg((DDMsg_t *)&msg, sizeof(msg)); if (msg.header.type != PSP_CD_SPAWNFINISH || ret != sizeof(msg)) { fprintf(stderr, "got strange message type %s\n", PSP_printMsg(msg.header.type)); } else { if (firstClient && killtime!=-1) { // printf("Alarm set to %d\n", killtime); if (killtime) { alarm(killtime); firstClient=0; } else { /* Stop immediately */ exit(0); } } np--; // printf("%d clients left\n", np); } } PSI_release(PSC_getMyTID()); PSI_exitClient(); return 0; }