HYD_status HYDU_sock_is_local(char *host, int *is_local) { struct hostent *ht; char *host_ip = NULL, *lhost_ip = NULL; char lhost[MAX_HOSTNAME_LEN]; struct sockaddr_in sa; struct ifaddrs *ifaddr, *ifa; char buf[MAX_HOSTNAME_LEN]; HYD_status status = HYD_SUCCESS; *is_local = 0; /* Algorithm used: * * 1. Find the local host name * - If "host" matches the local host name, return. * 2. Find the IP address associated with "host" and the IP the local host * resolves to. * - If these IPs match, return. * 3. Find all local network IP addresses * - If the "host" IP address matches any of the local network IP * addresses, return. */ /* STEP 1: If "host" matches the local host name, return */ if (gethostname(lhost, MAX_HOSTNAME_LEN) < 0) { /* We can't figure out what my localhost name is. *sigh*. We * could return an error here, but we will just punt it to the * upper layer saying that we don't know if it is local. We * cannot try steps 2 and 3 either, since we don't have our * local hostname. */ goto fn_exit; } else if (!strcmp(lhost, host)) { *is_local = 1; goto fn_exit; } else { /* we have our local hostname, but that does not match the * provided hostname. Let's try to get our remote IP address * first. If we can't get that, we can give up. */ /* If we are unable to resolve the remote host name, it need * not be an error. It could mean that the user is using an * alias for the hostname (e.g., an ssh config alias) */ if ((ht = gethostbyname(host)) == NULL) goto fn_exit; memset((char *) &sa, 0, sizeof(struct sockaddr_in)); memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length); /* Find the IP address of the host */ host_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf, MAX_HOSTNAME_LEN)); HYDU_ASSERT(host_ip, status); } /* OK, if we are here, we got the remote IP. We have two ways of * getting the local IP: gethostbyname or getifaddrs. We'll try * both. */ /* STEP 2: Let's try the gethostbyname model */ if ((ht = gethostbyname(lhost))) { memset((char *) &sa, 0, sizeof(struct sockaddr_in)); memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length); /* Find the IP address of the host */ lhost_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf, MAX_HOSTNAME_LEN)); HYDU_ASSERT(lhost_ip, status); /* See if the IP address of the hostname we got matches the IP * address to which the local host resolves */ if (!strcmp(lhost_ip, host_ip)) { *is_local = 1; goto fn_exit; } } /* Either gethostbyname didn't resolve or we didn't find a match. * Either way, let's try the getifaddr model. */ /* STEP 3: Let's try the getifaddr model */ if (getifaddrs(&ifaddr) == -1) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "getifaddrs failed\n"); /* Find the IP addresses of all local interfaces */ for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { struct sockaddr_in *sa_ptr = (struct sockaddr_in *) ifa->ifa_addr; lhost_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &(sa_ptr->sin_addr), buf, MAX_HOSTNAME_LEN)); HYDU_ASSERT(lhost_ip, status); /* For each local IP address, see if it matches the "host" * IP address */ if (!strcmp(host_ip, lhost_ip)) { *is_local = 1; freeifaddrs(ifaddr); goto fn_exit; } MPL_free(lhost_ip); lhost_ip = NULL; } } freeifaddrs(ifaddr); fn_exit: if (host_ip) MPL_free(host_ip); if (lhost_ip) MPL_free(lhost_ip); return status; fn_fail: goto fn_exit; }
static int fPMI_Handle_spawn(PMIProcess * pentry) { char inbuf[PMIU_MAXLINE]; char *(args[PMI_MAX_ARGS]); char key[MAXKEYLEN]; char outbuf[PMIU_MAXLINE]; ProcessWorld *pWorld; ProcessApp *app = 0; int preputNum = 0, rc; int i; int totspawns = 0, spawnnum = 0; PMIKVSpace *kvs = 0; /* Variables for info */ char curInfoKey[PMI_MAX_INFO_KEY], curInfoVal[PMI_MAX_INFO_VAL]; int curInfoIdx = -1; DBG_PRINTFCOND(pmidebug, ("Entering fPMI_Handle_spawn\n")); if (!pentry->spawnWorld) { pWorld = (ProcessWorld *) MPL_malloc(sizeof(ProcessWorld), MPL_MEM_PM); if (!pWorld) return 1; pentry->spawnWorld = pWorld; pWorld->apps = 0; pWorld->nProcess = 0; pWorld->nextWorld = 0; pWorld->nApps = 0; pWorld->worldNum = pUniv.nWorlds++; /* FIXME: What should be the defaults for the spawned env? * Should the default be the env ov the spawner? */ pWorld->genv = 0; pentry->spawnKVS = fPMIKVSAllocate(); } else { pWorld = pentry->spawnWorld; } kvs = pentry->spawnKVS; /* Note that each mcmd=spawn creates an app. When all apps * are present, then then can be linked to a world. A * spawnmultiple command makes use of multiple mcmd=spawn PMI * commands */ /* Create a new app */ app = (ProcessApp *) MPL_malloc(sizeof(ProcessApp), MPL_MEM_PM); if (!app) return 1; app->myAppNum = 0; app->exename = 0; app->arch = 0; app->path = 0; app->wdir = 0; app->hostname = 0; app->args = 0; app->nArgs = 0; app->soft.nelm = 0; app->nProcess = 0; app->pState = 0; app->nextApp = 0; app->env = 0; app->pWorld = pWorld; /* Add to the pentry spawn structure */ if (pentry->spawnAppTail) { pentry->spawnAppTail->nextApp = app; } else { pentry->spawnApp = app; pWorld->apps = app; } pentry->spawnAppTail = app; for (i = 0; i < PMI_MAX_ARGS; i++) args[i] = 0; /* Get lines until we find either cmd or mcmd (an error) or endcmd * (expected end) */ while ((rc = PMIUBufferedReadLine(pentry, inbuf, sizeof(inbuf))) > 0) { char *cmdPtr, *valPtr, *p; /* Find the command = format */ p = inbuf; /* Find first nonblank */ while (*p && isascii(*p) && isspace(*p)) p++; if (!*p) { /* Empty string. Ignore */ continue; } cmdPtr = p++; /* Find '=' */ while (*p && *p != '=') p++; if (!*p) { /* No =. Check for endcmd */ p--; /* Trim spaces */ while (isascii(*p) && isspace(*p)) p--; /* Add null to end */ *++p = 0; if (strcmp("endcmd", cmdPtr) == 0) { break; } /* FIXME: Otherwise, we have a problem */ MPL_error_printf("Malformed PMI command (no endcmd seen\n"); return 1; } else { *p = 0; } /* Found an = . value is the rest of the line */ valPtr = ++p; while (*p && *p != '\n') p++; if (*p) *p = 0; /* Remove the newline */ /* Now, process the cmd and value */ if (strcmp("nprocs", cmdPtr) == 0) { app->nProcess = atoi(valPtr); pWorld->nProcess += app->nProcess; } else if (strcmp("execname", cmdPtr) == 0) { app->exename = MPL_strdup(valPtr); } else if (strcmp("totspawns", cmdPtr) == 0) { /* This tells us how many separate spawn commands * we expect to see (e.g., for spawn multiple). * Each spawn command is a separate "app" */ totspawns = atoi(valPtr); } else if (strcmp("spawnssofar", cmdPtr) == 0) { /* This tells us which app we are (starting from 1) */ spawnnum = atoi(valPtr); app->myAppNum = spawnnum - 1; } else if (strcmp("argcnt", cmdPtr) == 0) { /* argcnt may not be set before the args */ app->nArgs = atoi(valPtr); } else if (strncmp("arg", cmdPtr, 3) == 0) { int argnum; /* argcnt may not be set before the args */ /* Handle arg%d. Values are 1 - origin */ argnum = atoi(cmdPtr + 3) - 1; if (argnum < 0 || argnum >= PMI_MAX_ARGS) { MPL_error_printf ("Malformed PMI Spawn command; the index of an argument in the command is %d but must be between 0 and %d\n", argnum, PMI_MAX_ARGS - 1); return 1; } args[argnum] = MPL_strdup(valPtr); } else if (strcmp("preput_num", cmdPtr) == 0) { preputNum = atoi(valPtr); } else if (strncmp("preput_key_", cmdPtr, 11) == 0) { /* Save the key */ MPL_strncpy(key, valPtr, sizeof(key)); } else if (strncmp("preput_val_", cmdPtr, 11) == 0) { /* Place the key,val into the space associate with the current * PMI group */ fPMIKVSAddPair(kvs, key, valPtr); } /* Info is on a per-app basis (it is an array of info items in * spawn multiple). We can ignore most info values. * The ones that are handled are processed by a * separate routine (not yet implemented). * simple_pmi.c sends (key,value), so we can keep just the * last key and pass the key/value to the registered info * handler, along with tha app structure. Alternately, * we could save all info items and let the user's * spawner handle it */ else if (strcmp("info_num", cmdPtr) == 0) { /* Number of info values */ ; } else if (strncmp("info_key_", cmdPtr, 9) == 0) { /* The actual name has a digit, which indicates *which* info * key this is */ curInfoIdx = atoi(cmdPtr + 9); MPL_strncpy(curInfoKey, valPtr, sizeof(curInfoKey)); } else if (strncmp("info_val_", cmdPtr, 9) == 0) { /* The actual name has a digit, which indicates *which* info * value this is */ int idx = atoi(cmdPtr + 9); if (idx != curInfoIdx) { MPL_error_printf ("Malformed PMI command: info keys and values not ordered as expected (expected value %d but got %d)\n", curInfoIdx, idx); return 1; } else { MPL_strncpy(curInfoVal, valPtr, sizeof(curInfoVal)); /* Apply this info item */ fPMIInfoKey(app, curInfoKey, curInfoVal); /* printf("Got info %s+%s\n", curInfoKey, curInfoVal); */ } } else { MPL_error_printf("Unrecognized PMI subcommand on spawnmult: %s\n", cmdPtr); return 1; } } if (app->nArgs > 0) { app->args = (const char **) MPL_malloc(app->nArgs * sizeof(char *), MPL_MEM_PM); for (i = 0; i < app->nArgs; i++) { app->args[i] = args[i]; args[i] = 0; } } pWorld->nApps++; /* Now that we've read the commands, invoke the user's spawn command */ if (totspawns == spawnnum) { PMISetupNewGroup(pWorld->nProcess, kvs); if (userSpawner) { rc = (*userSpawner) (pWorld, userSpawnerData); } else { MPL_error_printf("Unable to spawn %s\n", app->exename); rc = 1; MPIE_PrintProcessWorld(stdout, pWorld); } MPL_snprintf(outbuf, PMIU_MAXLINE, "cmd=spawn_result rc=%d\n", rc); PMIWriteLine(pentry->fd, outbuf); DBG_PRINTFCOND(pmidebug, ("%s", outbuf)); /* Clear for the next spawn */ pentry->spawnApp = 0; pentry->spawnAppTail = 0; pentry->spawnKVS = 0; pentry->spawnWorld = 0; } /* If totspawnnum != spawnnum, then we are expecting a * spawnmult with additional items */ return 0; }
/* A low level, generic and internally used interface to register * a cvar to the MPIR_T. * * IN: dtype, MPI datatype for this cvar * IN: name, Name of the cvar * IN: addr, Pointer to the cvar if known at registeration, otherwise NULL. * IN: count, # of elements of this cvar if known at registeration, otherwise 0. * IN: etype, MPI_T_enum or MPI_T_ENUM_NULL * IN: verb, MPI_T_PVAR_VERBOSITY_* * IN: binding, MPI_T_BIND_* * IN: Scope, MPI_T_SCOPE_* * IN: get_addr, If not NULL, it is a callback to get address of the cvar. * IN: get_count, If not NULL, it is a callback to read count of the cvar. * IN: cat, Catogery name of the cvar * IN: desc, Description of the cvar */ void MPIR_T_CVAR_REGISTER_impl( MPI_Datatype dtype, const char* name, const void *addr, int count, MPIR_T_enum_t *etype, MPIR_T_verbosity_t verb, MPIR_T_bind_t binding, MPIR_T_scope_t scope, MPIR_T_cvar_get_addr_cb get_addr, MPIR_T_cvar_get_count_cb get_count, MPIR_T_cvar_value_t defaultval, const char *cat, const char * desc) { name2index_hash_t *hash_entry; cvar_table_entry_t *cvar; int cvar_idx; /* Check whether this is a replicated cvar, whose name is unique. */ HASH_FIND_STR(cvar_hash, name, hash_entry); if (hash_entry != NULL) { /* Found it, the cvar already exists */ cvar_idx = hash_entry->idx; cvar = (cvar_table_entry_t *)utarray_eltptr(cvar_table, cvar_idx); /* Should never override an existing & active var */ MPIU_Assert(cvar->active != TRUE); cvar->active = TRUE; /* FIXME: Do we need to check consistency between the old and new? */ } else { /* Not found, so push the cvar to back of cvar_table */ utarray_extend_back(cvar_table); cvar = (cvar_table_entry_t *)utarray_back(cvar_table); cvar->active = TRUE; cvar->datatype = dtype; cvar->name = MPL_strdup(name); MPIU_Assert(cvar->name); if (dtype != MPI_CHAR) { cvar->addr = (void *)addr; } else { cvar->addr = MPL_malloc(count); MPIU_Assert(cvar->addr); if (defaultval.str == NULL) { ((char *)(cvar->addr))[0] = '\0'; } else { /* Use greater (>), since count includes the terminating '\0', but strlen does not */ MPIU_Assert(count > strlen(defaultval.str)); strcpy(cvar->addr, defaultval.str); } } cvar->count = count; cvar->verbosity = verb; cvar->bind = binding; cvar->scope = scope; cvar->get_addr = get_addr; cvar->get_count = get_count; cvar->defaultval = defaultval; cvar->desc = MPL_strdup(desc); MPIU_Assert(cvar->desc); /* Record <name, index> in hash table */ cvar_idx = utarray_len(cvar_table) - 1; hash_entry = MPL_malloc(sizeof(name2index_hash_t)); MPIU_Assert(hash_entry); /* Need not to Strdup name, since cvar_table and cvar_hash co-exist */ hash_entry->name =name; hash_entry->idx = cvar_idx; HASH_ADD_KEYPTR(hh, cvar_hash, hash_entry->name, strlen(hash_entry->name), hash_entry); /* Add the cvar to a category */ MPIR_T_cat_add_cvar(cat, cvar_idx); } }
static HYD_status group_to_nodes(char *str) { char *nodes, *tnodes, *tmp, *start_str, *end_str, **set; int start, end, i, j, k = 0; HYD_status status = HYD_SUCCESS; for (tmp = str; *tmp != '[' && *tmp != 0; tmp++); if (*tmp == 0) { /* only one node in the group */ status = HYDU_add_to_node_list(str, tasks_per_node[k++], &global_node_list); HYDU_ERR_POP(status, "unable to add to node list\n"); goto fn_exit; } /* more than one node in the group */ *tmp = 0; nodes = tmp + 1; for (tmp = nodes; *tmp != ']' && *tmp != 0; tmp++); *tmp = 0; /* remove the closing ']' */ /* Find the number of sets */ tnodes = MPL_strdup(nodes); tmp = strtok(tnodes, ","); for (i = 1; tmp; i++) tmp = strtok(NULL, ","); HYDU_MALLOC_OR_JUMP(set, char **, i * sizeof(char *), status); /* Find the actual node sets */ set[0] = strtok(nodes, ","); for (i = 1; set[i - 1]; i++) set[i] = strtok(NULL, ","); for (i = 0; set[i]; i++) { start_str = strtok(set[i], "-"); if ((end_str = strtok(NULL, "-")) == NULL) end_str = start_str; start = atoi(start_str); end = atoi(end_str); for (j = start; j <= end; j++) { char *node_str[HYD_NUM_TMP_STRINGS]; node_str[0] = MPL_strdup(str); node_str[1] = HYDU_int_to_str_pad(j, strlen(start_str)); node_str[2] = NULL; status = HYDU_str_alloc_and_join(node_str, &tmp); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(node_str); status = HYDU_add_to_node_list(tmp, tasks_per_node[k++], &global_node_list); HYDU_ERR_POP(status, "unable to add to node list\n"); } } fn_exit: return status; fn_fail: goto fn_exit; }
/* Close one side of each pipe pair and replace stdout/err with the pipes */ int mypostfork( void *predata, void *data, ProcessState *pState ) { SetupInfo *s = (SetupInfo *)predata; int curarg=0; IOLabelSetupInClient( &s->labelinfo ); PMISetupInClient( 1, &s->pmiinfo ); /* Now, we *also* change the process state to insert the interposed remote shell routine. This is probably not where we want this in the final version (because MPIE_ExecProgram does a lot under the assumption that the started program will know what to do with new environment variables), but this will allow us to start. */ { ProcessApp *app = pState->app; const char **newargs = 0; char *pmiDebugStr = 0; int j; char rankStr[12]; /* Insert into app->args */ newargs = (const char **) MPL_malloc( (app->nArgs + 14 + 1) * sizeof(char *) ); if (!pState->hostname) { MPL_error_printf( "No hostname avaliable for %s\n", app->exename ); exit(1); } snprintf( rankStr, sizeof(rankStr)-1, "%d", pState->id ); rankStr[12-1] = 0; curarg = 0; newargs[curarg++] = MPL_strdup( "-Y" ); newargs[curarg++] = pState->hostname; curarg += AddEnvSetToCmdLine( "PMI_PORT", s->pmiinfo.portName, newargs + curarg ); curarg += AddEnvSetToCmdLine( "PMI_ID", rankStr, newargs + curarg ); pmiDebugStr = getenv( "PMI_DEBUG" ); if (pmiDebugStr) { /* Use this to help debug the connection process */ curarg += AddEnvSetToCmdLine( "PMI_DEBUG", pmiDebugStr, newargs + curarg ); } newargs[curarg++] = app->exename; for (j=0; j<app->nArgs; j++) { newargs[j+curarg] = app->args[j]; } newargs[j+curarg] = 0; app->exename = MPL_strdup( "/usr/bin/ssh" ); app->args = newargs; app->nArgs += curarg; if (MPIE_Debug) { printf( "cmd = %s\n", app->exename ); fflush(stdout); printf( "Number of args = %d\n", app->nArgs ); for (j=0; j<app->nArgs; j++) { printf( "argv[%d] = %s\n", j, app->args[j] ); fflush(stdout); } } } return 0; }
static HYD_status fn_info_getnodeattr(int fd, char *args[]) { int found; struct HYD_pmcd_pmi_kvs_pair *run; char *key, *waitval, *thrid; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); waitval = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "wait"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); /* if a predefined value is not found, we let the code fall back * to regular search and return an error to the client */ found = 0; for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { found = 1; break; } } if (found) { /* We found the attribute */ HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, MPL_strdup(run->val), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } else if (waitval && !strcmp(waitval, "TRUE")) { /* The client wants to wait for a response; queue up the request */ status = HYD_pmcd_pmi_v2_queue_req(fd, -1, -1, args, key, &pending_reqs); HYDU_ERR_POP(status, "unable to queue request\n"); goto fn_exit; } else { /* Tell the client that we can't find the attribute */ HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=FALSE;rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_fullinit(int fd, char *args[]) { int id, i; char *rank_str; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); rank_str = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pmirank"); HYDU_ERR_CHKANDJUMP(status, rank_str == NULL, HYD_INTERNAL_ERROR, "unable to find pmirank token\n"); id = atoi(rank_str); /* Store the PMI_RANK to fd mapping */ for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) { if (HYD_pmcd_pmip.downstream.pmi_rank[i] == id) { HYD_pmcd_pmip.downstream.pmi_fd[i] = fd; HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 1; break; } } HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(id), status); HYD_STRING_STASH(stash, MPL_strdup(";size="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_process_count), status); HYD_STRING_STASH(stash, MPL_strdup(";appnum=0"), status); if (HYD_pmcd_pmip.local.spawner_kvsname) { HYD_STRING_STASH(stash, MPL_strdup(";spawner-jobid="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.local.spawner_kvsname), status); } if (HYD_pmcd_pmip.user_global.debug) { HYD_STRING_STASH(stash, MPL_strdup(";debugged=TRUE;pmiverbose=TRUE"), status); } else { HYD_STRING_STASH(stash, MPL_strdup(";debugged=FALSE;pmiverbose=FALSE"), status); } HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_bscd_slurm_launch_procs(char **args, struct HYD_proxy *proxy_list, int use_rmk, int *control_fd) { int num_hosts, idx, i; int *pid, *fd_list; char *targs[HYD_NUM_TMP_STRINGS], *node_list_str = NULL; char *path = NULL, *extra_arg_list = NULL, *extra_arg; struct HYD_proxy *proxy; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* We use the following priority order for the executable path: * (1) user-specified; (2) search in path; (3) Hard-coded * location */ if (HYDT_bsci_info.launcher_exec) path = MPL_strdup(HYDT_bsci_info.launcher_exec); if (!path) path = HYDU_find_full_path("srun"); if (!path) path = MPL_strdup("/usr/bin/srun"); idx = 0; targs[idx++] = MPL_strdup(path); if (use_rmk == HYD_FALSE || strcmp(HYDT_bsci_info.rmk, "slurm")) { targs[idx++] = MPL_strdup("--nodelist"); status = proxy_list_to_node_str(proxy_list, &node_list_str); HYDU_ERR_POP(status, "unable to build a node list string\n"); targs[idx++] = MPL_strdup(node_list_str); } num_hosts = 0; for (proxy = proxy_list; proxy; proxy = proxy->next) num_hosts++; targs[idx++] = MPL_strdup("-N"); targs[idx++] = HYDU_int_to_str(num_hosts); targs[idx++] = MPL_strdup("-n"); targs[idx++] = HYDU_int_to_str(num_hosts); /* Force srun to ignore stdin to avoid issues with * unexpected files open on fd 0 */ targs[idx++] = MPL_strdup("--input"); targs[idx++] = MPL_strdup("none"); MPL_env2str("HYDRA_LAUNCHER_EXTRA_ARGS", (const char **) &extra_arg_list); if (extra_arg_list) { extra_arg = strtok(extra_arg_list, " "); while (extra_arg) { targs[idx++] = MPL_strdup(extra_arg); extra_arg = strtok(NULL, " "); } } /* Fill in the remaining arguments */ /* We do not need to create a quoted version of the string for * SLURM. It seems to be internally quoting it anyway. */ for (i = 0; args[i]; i++) targs[idx++] = MPL_strdup(args[i]); /* Increase pid list to accommodate the new pid */ HYDU_MALLOC_OR_JUMP(pid, int *, (HYD_bscu_pid_count + 1) * sizeof(int), status); for (i = 0; i < HYD_bscu_pid_count; i++) pid[i] = HYD_bscu_pid_list[i]; MPL_free(HYD_bscu_pid_list); HYD_bscu_pid_list = pid; /* Increase fd list to accommodate these new fds */ HYDU_MALLOC_OR_JUMP(fd_list, int *, (HYD_bscu_fd_count + 3) * sizeof(int), status); for (i = 0; i < HYD_bscu_fd_count; i++) fd_list[i] = HYD_bscu_fd_list[i]; MPL_free(HYD_bscu_fd_list); HYD_bscu_fd_list = fd_list; /* append proxy ID as -1 */ targs[idx++] = HYDU_int_to_str(-1); targs[idx++] = NULL; if (HYDT_bsci_info.debug) { HYDU_dump(stdout, "Launch arguments: "); HYDU_print_strlist(targs); } status = HYDU_create_process(targs, NULL, NULL, &fd_stdout, &fd_stderr, &HYD_bscu_pid_list[HYD_bscu_pid_count++], -1); HYDU_ERR_POP(status, "create process returned error\n"); HYD_bscu_fd_list[HYD_bscu_fd_count++] = fd_stdout; HYD_bscu_fd_list[HYD_bscu_fd_count++] = fd_stderr; status = HYDT_dmx_register_fd(1, &fd_stdout, HYD_POLLIN, (void *) (size_t) STDOUT_FILENO, HYDT_bscu_stdio_cb); HYDU_ERR_POP(status, "demux returned error registering fd\n"); status = HYDT_dmx_register_fd(1, &fd_stderr, HYD_POLLIN, (void *) (size_t) STDERR_FILENO, HYDT_bscu_stdio_cb); HYDU_ERR_POP(status, "demux returned error registering fd\n"); fn_exit: if (node_list_str) MPL_free(node_list_str); HYDU_free_strlist(targs); if (path) MPL_free(path); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
publish->next = NULL; if (HYD_pmcd_pmi_publish_list == NULL) HYD_pmcd_pmi_publish_list = publish; else { for (r = HYD_pmcd_pmi_publish_list; r->next; r = r->next); r->next = publish; } } else { int len, recvd, closed; char *resp; /* connect to the external nameserver and store the * information there */ ns = MPL_strdup(HYD_server_info.nameserver); ns_host = strtok(ns, ":"); HYDU_ASSERT(ns_host, status); ns_port_str = strtok(NULL, ":"); if (ns_port_str) ns_port = atoi(ns_port_str); else ns_port = HYDRA_NAMESERVER_DEFAULT_PORT; status = HYDU_sock_connect(ns_host, (uint16_t) ns_port, &ns_fd, 0, HYD_CONNECT_DELAY); HYDU_ERR_POP(status, "error connecting to the nameserver\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("PUBLISH"), status);