static HYD_status fn_name_publish(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *thrid, *val, *name = NULL, *port = NULL; int token_count, success; struct HYD_pmcd_token *tokens = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "name")) == NULL) HYDU_ERR_POP(status, "cannot find token: name\n"); name = HYDU_strdup(val); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "port")) == NULL) HYDU_ERR_POP(status, "cannot find token: port\n"); port = HYDU_strdup(val); status = HYD_pmcd_pmi_publish(name, port, &success); HYDU_ERR_POP(status, "error publishing service\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=name-publish-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } if (!success) { HYD_STRING_STASH(stash, HYDU_strdup("rc=1;errmsg=duplicate_service_"), status); HYD_STRING_STASH(stash, HYDU_strdup(name), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } else HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); if (name) HYDU_FREE(name); if (port) HYDU_FREE(port); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_put(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_token *tokens; int token_count, i, ret; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; for (i = 0; i < token_count; i++) { status = HYD_pmcd_pmi_add_kvs(tokens[i].key, tokens[i].val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_abort(int fd, int pid, int pgid, char *args[]) { int token_count; struct HYD_pmcd_token *tokens; /* set a default exit code of 1 */ int exitcode = 1; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); if (HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "exitcode") == NULL) HYDU_ERR_POP(status, "cannot find token: exitcode\n"); exitcode = atoi(HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "exitcode")); fn_exit: /* clean everything up and exit */ status = HYDT_bsci_wait_for_completion(0); exit(exitcode); /* never get here */ HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
char *HYDU_find_full_path(const char *execname) { char *tmp[HYD_NUM_TMP_STRINGS] = { NULL }, *path = NULL, *test_path = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYDU_find_in_path(execname, &test_path); HYDU_ERR_POP(status, "error while searching for executable in user path\n"); if (test_path) { tmp[0] = MPL_strdup(test_path); tmp[1] = MPL_strdup(execname); tmp[2] = NULL; status = HYDU_str_alloc_and_join(tmp, &path); HYDU_ERR_POP(status, "error joining strings\n"); } fn_exit: HYDU_free_strlist(tmp); if (test_path) MPL_free(test_path); HYDU_FUNC_EXIT(); return path; fn_fail: goto fn_exit; }
static HYD_status cmd_response(int fd, int pid, const char *cmd) { struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_RESPONSE; hdr.pid = pid; hdr.pmi_version = 1; hdr.buflen = strlen(cmd); status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed); HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n"); HYDU_ASSERT(!closed, status); if (HYD_server_info.user_global.debug) { HYDU_dump(stdout, "PMI response to fd %d pid %d: %s", fd, pid, cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed); HYDU_ERR_POP(status, "unable to send response to command\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_send_strlist(int fd, char **strlist) { int i, list_len, len; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* Check how many arguments we have */ list_len = HYDU_strlist_lastidx(strlist); status = HYDU_sock_write(fd, &list_len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); /* Convert the string list to parseable data and send */ for (i = 0; strlist[i]; i++) { len = strlen(strlist[i]) + 1; status = HYDU_sock_write(fd, &len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); status = HYDU_sock_write(fd, strlist[i], len, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status get_abs_wd(const char *wd, char **abs_wd) { int ret; char *cwd; HYD_status status = HYD_SUCCESS; if (wd == NULL) { *abs_wd = NULL; goto fn_exit; } if (wd[0] != '.') { *abs_wd = (char *) wd; goto fn_exit; } cwd = HYDU_getcwd(); ret = chdir(wd); if (ret < 0) HYDU_ERR_POP(status, "error calling chdir\n"); *abs_wd = HYDU_getcwd(); ret = chdir(cwd); if (ret < 0) HYDU_ERR_POP(status, "error calling chdir\n"); fn_exit: return status; fn_fail: goto fn_exit; }
static HYD_status send_cmd_downstream(int fd, const char *cmd) { char cmdlen[7]; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); MPL_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd)); status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); /* FIXME: We cannot abort when we are not able to send data * downstream. The upper layer needs to handle this based on * whether we want to abort or not.*/ HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "PMI response: %s\n", cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status alloc_fwd_hash(struct fwd_hash **fwd_hash, int in, int out) { HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_MALLOC_OR_JUMP(*fwd_hash, struct fwd_hash *, sizeof(struct fwd_hash), status); (*fwd_hash)->in = in; (*fwd_hash)->out = out; (*fwd_hash)->buf_offset = 0; (*fwd_hash)->buf_count = 0; (*fwd_hash)->next = NULL; status = HYDU_sock_set_nonblock(in); HYDU_ERR_POP(status, "unable to set out-socket to non-blocking\n"); status = HYDU_sock_set_nonblock(out); HYDU_ERR_POP(status, "unable to set out-socket to non-blocking\n"); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_putnodeattr(int fd, char *args[]) { struct HYD_string_stash stash; char *key, *val, *thrid, *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count, ret; struct HYD_pmcd_pmi_v2_reqs *req; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find value token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret); HYDU_ERR_POP(status, "unable to put data into kvs\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-putnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("rc="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); for (req = pending_reqs; req; req = req->next) { if (!strcmp(req->key, key)) { /* Poke the progress engine before exiting */ status = poke_progress(key); HYDU_ERR_POP(status, "poke progress error\n"); break; } } fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_find_in_path(const char *execname, char **path) { char *tmp[HYD_NUM_TMP_STRINGS], *path_loc = NULL, *test_loc, *user_path; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* The executable is somewhere in the user's path. Find it. */ if (MPL_env2str("PATH", (const char **) &user_path)) user_path = MPL_strdup(user_path); if (user_path) { /* If the PATH environment exists */ status = get_abs_wd(strtok(user_path, ";:"), &test_loc); HYDU_ERR_POP(status, "error getting absolute working dir\n"); do { tmp[0] = MPL_strdup(test_loc); tmp[1] = MPL_strdup("/"); tmp[2] = MPL_strdup(execname); tmp[3] = NULL; status = HYDU_str_alloc_and_join(tmp, &path_loc); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); if (exists(path_loc)) { tmp[0] = MPL_strdup(test_loc); tmp[1] = MPL_strdup("/"); tmp[2] = NULL; status = HYDU_str_alloc_and_join(tmp, path); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); goto fn_exit; /* We are done */ } MPL_free(path_loc); path_loc = NULL; status = get_abs_wd(strtok(NULL, ";:"), &test_loc); HYDU_ERR_POP(status, "error getting absolute working dir\n"); } while (test_loc); } /* There is either no PATH environment or we could not find the * file in the PATH. Just return an empty path */ *path = MPL_strdup(""); fn_exit: if (user_path) MPL_free(user_path); if (path_loc) MPL_free(path_loc); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const char *membind) { HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_ASSERT(binding, status); hwloc_topology_init(&topology); hwloc_topology_load(topology); HYDT_topo_hwloc_info.total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); hwloc_initialized = 1; /* bindings that don't require mapping */ if (!strncmp(binding, "user:"******"user:"******"user:"******"error binding to %s\n", binding); goto fn_exit; } else if (!strcmp(binding, "rr")) { status = handle_rr_binding(); HYDU_ERR_POP(status, "error binding to %s\n", binding); goto fn_exit; } status = handle_bitmap_binding(binding, mapping ? mapping : binding); HYDU_ERR_POP(status, "error binding with bind \"%s\" and map \"%s\"\n", binding, mapping); /* Memory binding options */ if (membind == NULL) HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_DEFAULT; else if (!strcmp(membind, "firsttouch")) HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strcmp(membind, "nexttouch")) HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_NEXTTOUCH; else if (!strncmp(membind, "bind:", strlen("bind:"))) { HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_BIND; } else if (!strncmp(membind, "interleave:", strlen("interleave:"))) { HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_INTERLEAVE; } else if (!strncmp(membind, "replicate:", strlen("replicate:"))) { HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_REPLICATE; } else { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized membind policy \"%s\"\n", membind); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_ckpoint_blcr_restart(const char *prefix, int pgid, int id, int ckpt_num, struct HYD_env *envlist, int num_ranks, int ranks[], int *in, int *out, int *err, int *pid) { HYD_status status = HYD_SUCCESS; int ret; int context_fd; cr_restart_handle_t cr_handle; cr_restart_args_t args; char filename[256]; char port_str[64]; int port; HYDU_FUNC_ENTER(); /* create listener socket for stdin/out/err */ status = create_stdinouterr_sock(&port); HYDU_ERR_POP(status, "failed to create stdin/out/err socket\n"); MPL_snprintf(port_str, sizeof(port_str), "%d", port); status = HYDU_append_env_to_list(STDINOUTERR_PORT_NAME, port_str, &envlist); HYDU_ERR_POP(status, "failed to add to env list\n"); status = create_env_file(envlist, num_ranks, ranks); if (status) HYDU_ERR_POP(status, "blcr restart\n"); /* open the checkpoint file */ MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid, id); context_fd = open(filename, O_RDONLY /* | O_LARGEFILE */); HYDU_ERR_CHKANDJUMP(status, context_fd < 0, HYD_INTERNAL_ERROR, "open failed, %s\n", strerror(errno)); /* ... initialize the request structure */ cr_initialize_restart_args_t(&args); args.cr_fd = context_fd; args.cr_flags = CR_RSTRT_RESTORE_PID; /* ... issue the request */ ret = cr_request_restart(&args, &cr_handle); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "cr_request_restart failed, %s\n", strerror(errno)); ret = close(context_fd); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n", strerror(errno)); /* get fds for stdin/out/err sockets, and get pids of restarted processes */ status = wait_for_stdinouterr_sockets(num_ranks, ranks, in, out, err, pid); if (status) HYDU_ERR_POP(status, "blcr restart\n"); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status handle_pmi_cmd(int fd, int pgid, int pid, char *buf, int pmi_version) { char **args = NULL, *cmd = NULL; struct HYD_pmcd_pmi_handle *h; int i; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); if (pmi_version == 1) HYD_pmcd_pmi_handle = HYD_pmcd_pmi_v1; else HYD_pmcd_pmi_handle = HYD_pmcd_pmi_v2; if (HYD_server_info.user_global.debug) HYDU_dump(stdout, "[pgid: %d] got PMI command: %s\n", pgid, buf); HYDU_MALLOC(args, char **, MAX_PMI_ARGS * sizeof(char *), status); for (i = 0; i < MAX_PMI_ARGS; i++) args[i] = NULL; status = HYD_pmcd_pmi_parse_pmi_cmd(buf, pmi_version, &cmd, args); HYDU_ERR_POP(status, "unable to parse PMI command\n"); #if defined ENABLE_PROFILING if (HYD_server_info.enable_profiling) HYD_server_info.num_pmi_calls++; #endif /* ENABLE_PROFILING */ h = HYD_pmcd_pmi_handle; while (h->handler) { if (!strcmp(cmd, h->cmd)) { status = h->handler(fd, pid, pgid, args); HYDU_ERR_POP(status, "PMI handler returned error\n"); break; } h++; } if (!h->handler) { /* We don't understand the command */ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "Unrecognized PMI command: %s | cleaning up processes\n", cmd); } fn_exit: if (cmd) HYDU_FREE(cmd); if (args) { HYDU_free_strlist(args); HYDU_free(args); } HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status add_exec_to_proxy(struct HYD_exec *exec, struct HYD_proxy *proxy, int num_procs) #endif { int i; struct HYD_exec *texec; HYD_status status = HYD_SUCCESS; if (proxy->exec_list == NULL) { status = HYDU_alloc_exec(&proxy->exec_list); HYDU_ERR_POP(status, "unable to allocate proxy exec\n"); for (i = 0; exec->exec[i]; i++) proxy->exec_list->exec[i] = HYDU_strdup(exec->exec[i]); proxy->exec_list->exec[i] = NULL; proxy->exec_list->wdir = HYDU_strdup(exec->wdir); proxy->exec_list->proc_count = num_procs; #if defined(FINEGRAIN_MPI) proxy->exec_list->nfg = exec->nfg; proxy->exec_list->start_rank = current_exec_start_rank; #endif proxy->exec_list->env_prop = exec->env_prop ? HYDU_strdup(exec->env_prop) : NULL; proxy->exec_list->user_env = HYDU_env_list_dup(exec->user_env); proxy->exec_list->appnum = exec->appnum; } else { for (texec = proxy->exec_list; texec->next; texec = texec->next); status = HYDU_alloc_exec(&texec->next); HYDU_ERR_POP(status, "unable to allocate proxy exec\n"); texec = texec->next; for (i = 0; exec->exec[i]; i++) texec->exec[i] = HYDU_strdup(exec->exec[i]); texec->exec[i] = NULL; texec->wdir = HYDU_strdup(exec->wdir); texec->proc_count = num_procs; #if defined(FINEGRAIN_MPI) texec->nfg = exec->nfg; texec->start_rank = current_exec_start_rank; #endif texec->env_prop = exec->env_prop ? HYDU_strdup(exec->env_prop) : NULL; texec->user_env = HYDU_env_list_dup(exec->user_env); texec->appnum = exec->appnum; } proxy->proxy_process_count += num_procs; proxy->node->active_processes += num_procs; fn_exit: return status; fn_fail: goto fn_exit; }
static HYD_status fn_publish_name(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *val; int token_count; struct HYD_pmcd_token *tokens = NULL; char *name = NULL, *port = NULL; int success = 0; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "service")) == NULL) HYDU_ERR_POP(status, "cannot find token: service\n"); name = MPL_strdup(val); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "port")) == NULL) HYDU_ERR_POP(status, "cannot find token: port\n"); port = MPL_strdup(val); status = HYD_pmcd_pmi_publish(name, port, &success); HYDU_ERR_POP(status, "error publishing service\n"); HYD_STRING_STASH_INIT(stash); if (success) HYD_STRING_STASH(stash, MPL_strdup("cmd=publish_result info=ok rc=0 msg=success\n"), status); else HYD_STRING_STASH(stash, MPL_strdup("cmd=publish_result info=ok rc=1 msg=key_already_present\n"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); MPL_free(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); if (name) MPL_free(name); if (port) MPL_free(port); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_name_lookup(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *thrid, *name, *value; int token_count; struct HYD_pmcd_token *tokens = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if ((name = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "name")) == NULL) HYDU_ERR_POP(status, "cannot find token: name\n"); status = HYD_pmcd_pmi_lookup(name, &value); HYDU_ERR_POP(status, "error while looking up service\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=name-lookup-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } if (value) { HYD_STRING_STASH(stash, HYDU_strdup("port="), status); HYD_STRING_STASH(stash, HYDU_strdup(value), status); HYD_STRING_STASH(stash, HYDU_strdup(";found=TRUE;rc=0;"), status); } else { HYD_STRING_STASH(stash, HYDU_strdup("found=FALSE;rc=1;"), status); } HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_sock_create_and_listen_portstr(char *iface, char *hostname, char *port_range, char **port_str, HYD_status(*callback) (int fd, HYD_event_t events, void *userp), void *userp) { int listenfd; char *sport, *real_port_range, *ip = NULL; uint16_t port; HYD_status status = HYD_SUCCESS; /* Listen on a port in the port range */ port = 0; real_port_range = port_range ? MPL_strdup(port_range) : NULL; status = HYDU_sock_listen(&listenfd, real_port_range, &port); HYDU_ERR_POP(status, "unable to listen on port\n"); /* Register the listening socket with the demux engine */ status = HYDT_dmx_register_fd(1, &listenfd, HYD_POLLIN, userp, callback); HYDU_ERR_POP(status, "unable to register fd\n"); /* Create a port string for MPI processes to use to connect to */ if (iface) { status = HYDU_sock_get_iface_ip(iface, &ip); HYDU_ERR_POP(status, "unable to get network interface IP\n"); } else if (hostname) { ip = MPL_strdup(hostname); } else { char localhost[MAX_HOSTNAME_LEN] = { 0 }; if (gethostname(localhost, MAX_HOSTNAME_LEN) < 0) HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "unable to get local hostname\n"); ip = MPL_strdup(localhost); } sport = HYDU_int_to_str(port); HYDU_MALLOC_OR_JUMP(*port_str, char *, strlen(ip) + 1 + strlen(sport) + 1, status); MPL_snprintf(*port_str, strlen(ip) + 1 + strlen(sport) + 1, "%s:%s", ip, sport); MPL_free(sport); fn_exit: if (ip) MPL_free(ip); return status; fn_fail: goto fn_exit; }
static HYD_status send_cmd_upstream(const char *start, int fd, char *args[]) { int i, sent, closed; struct HYD_string_stash stash; char *buf = NULL; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup(start), status); for (i = 0; args[i]; i++) { HYD_STRING_STASH(stash, MPL_strdup(args[i]), status); if (args[i + 1]) HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_SPIT(stash, buf, status); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_CMD; hdr.pid = fd; hdr.buflen = strlen(buf); hdr.pmi_version = 2; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI header upstream\n"); HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "forwarding command (%s) upstream\n", buf); } status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI command upstream\n"); HYDU_ASSERT(!closed, status); fn_exit: if (buf) MPL_free(buf); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getjobattr(int fd, char *args[]) { struct HYD_string_stash stash; char *cmd, *key, *thrid; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if (!strcmp(key, "PMI_process_mapping")) { HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getjobattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.system_global.pmi_process_mapping), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } else { status = send_cmd_upstream("cmd=info-getjobattr;", fd, args); HYDU_ERR_POP(status, "error sending command upstream\n"); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_correct_wdir(char **wdir) { char *tmp[HYD_NUM_TMP_STRINGS]; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); if (*wdir == NULL) { *wdir = HYDU_getcwd(); } else if (*wdir[0] != '/') { tmp[0] = HYDU_getcwd(); tmp[1] = HYDU_strdup("/"); tmp[2] = HYDU_strdup(*wdir); tmp[3] = NULL; HYDU_FREE(*wdir); status = HYDU_str_alloc_and_join(tmp, wdir); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_append_env_str_to_list(const char *str, struct HYD_env **env_list) { char *my_str = NULL; char *env_name, *env_value; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); my_str = env_value = MPL_strdup(str); /* don't use strtok, it will mangle env values that contain '=' */ env_name = MPL_strsep(&env_value, "="); HYDU_ASSERT(env_name != NULL, status); status = HYDU_append_env_to_list(env_name, env_value, env_list); HYDU_ERR_POP(status, "unable to append env to list\n"); fn_exit: if (my_str) MPL_free(my_str); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy, *tproxy; const char *cmd; int proxy_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); proxy_count = 0; for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) proxy_count++; proxy->pg->barrier_count++; if (proxy->pg->barrier_count == proxy_count) { proxy->pg->barrier_count = 0; cmd = "cmd=barrier_out\n"; for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) { status = cmd_response(tproxy->control_fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); } } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_bscd_sge_query_node_list(struct HYD_node **node_list) { char *hostfile; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); if (MPL_env2str("PE_HOSTFILE", (const char **) &hostfile) == 0) hostfile = NULL; if (hostfile == NULL) { *node_list = NULL; HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "No SGE nodefile found\n"); } else { status = HYDU_parse_hostfile(hostfile, node_list, process_mfile_token); HYDU_ERR_POP(status, "error parsing hostfile\n"); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_putenv(struct HYD_env *env, HYD_env_overwrite_t overwrite) { char *tmp[HYD_NUM_TMP_STRINGS], *str; int i; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* If the overwrite flag is false, just exit */ if (MPL_env2str(env->env_name, (const char **) &str) && overwrite == HYD_ENV_OVERWRITE_FALSE) goto fn_exit; i = 0; tmp[i++] = MPL_strdup(env->env_name); tmp[i++] = MPL_strdup("="); tmp[i++] = env->env_value ? MPL_strdup(env->env_value) : MPL_strdup(""); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &str); HYDU_ERR_POP(status, "unable to join strings\n"); MPL_putenv(str); for (i = 0; tmp[i]; i++) MPL_free(tmp[i]); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status process_mfile_token(char *token, int newline, struct HYD_node **node_list) { int num_procs; static int entry_count = 0; static char *hostname; HYD_status status = HYD_SUCCESS; entry_count++; if (newline) { /* The first entry gives the hostname */ entry_count = 1; if (hostname) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unexpected token %s\n", token); hostname = HYDU_strdup(token); } else { /* Not a new line */ if (entry_count != 2) goto fn_exit; num_procs = atoi(token); status = HYDU_add_to_node_list(hostname, num_procs, node_list); HYDU_ERR_POP(status, "unable to initialize proxy\n"); hostname = NULL; } fn_exit: return status; fn_fail: goto fn_exit; }
HYD_status HYDU_env_to_str(struct HYD_env *env, char **str) { int i; char *tmp[HYD_NUM_TMP_STRINGS]; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); i = 0; tmp[i++] = MPL_strdup("'"); tmp[i++] = MPL_strdup(env->env_name); tmp[i++] = MPL_strdup("="); tmp[i++] = env->env_value ? MPL_strdup(env->env_value) : MPL_strdup(""); tmp[i++] = MPL_strdup("'"); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, str); HYDU_ERR_POP(status, "unable to join strings\n"); for (i = 0; tmp[i]; i++) MPL_free(tmp[i]); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
char *HYDU_size_t_to_str(size_t x) { int len = 1, i; size_t max = 10; char *str = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); while (x >= max) { len++; max *= 10; } len++; HYDU_MALLOC(str, char *, len, status); HYDU_ERR_POP(status, "unable to allocate memory\n"); for (i = 0; i < len; i++) str[i] = '0'; HYDU_snprintf(str, len, "%llu", (unsigned long long) x); fn_exit: HYDU_FUNC_EXIT(); return str; fn_fail: goto fn_exit; }
HYD_status HYDT_topo_bind(int idx) { HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); if (idx < 0 || ignore_binding) goto fn_exit; #if defined HAVE_HWLOC if (!strcmp(HYDT_topo_info.topolib, "hwloc")) { status = HYDT_topo_hwloc_bind(idx); HYDU_ERR_POP(status, "HWLOC failure binding process to core\n"); goto fn_exit; } #endif /* HAVE_HWLOC */ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "no topology library available\n"); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status list_to_nodes(char *str) { hostlist_t hostlist; char *host; int k = 0; HYD_status status = HYD_SUCCESS; if ((hostlist = slurm_hostlist_create(str)) == NULL) { status = HYD_FAILURE; goto fn_fail; } for (host = slurm_hostlist_shift(hostlist); host; host = slurm_hostlist_shift(hostlist)) { status = HYDU_add_to_node_list(host, tasks_per_node[k++], &global_node_list); HYDU_ERR_POP(status, "unable to add to node list\n"); } slurm_hostlist_destroy(hostlist); fn_exit: return status; fn_fail: goto fn_exit; }