static HYD_status pmi_id_map_fn(char *arg, char ***argv) { char *map, *tmp; HYD_status status = HYD_SUCCESS; /* Split the core map into three different segments */ map = MPL_strdup(**argv); HYDU_ASSERT(map, status); tmp = strtok(map, ","); HYDU_ASSERT(tmp, status); HYD_pmcd_pmip.system_global.pmi_id_map.filler_start = atoi(tmp); tmp = strtok(NULL, ","); HYDU_ASSERT(tmp, status); HYD_pmcd_pmip.system_global.pmi_id_map.non_filler_start = atoi(tmp); MPL_free(map); (*argv)++; fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_send_strlist(int fd, char **strlist) { int i, list_len, len; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* Check how many arguments we have */ list_len = HYDU_strlist_lastidx(strlist); status = HYDU_sock_write(fd, &list_len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); /* Convert the string list to parseable data and send */ for (i = 0; strlist[i]; i++) { len = strlen(strlist[i]) + 1; status = HYDU_sock_write(fd, &len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); status = HYDU_sock_write(fd, strlist[i], len, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status global_core_map_fn(char *arg, char ***argv) { char *map, *tmp; HYD_status status = HYD_SUCCESS; /* Split the core map into three different segments */ map = HYDU_strdup(**argv); HYDU_ASSERT(map, status); tmp = strtok(map, ","); HYDU_ASSERT(tmp, status); HYD_pmcd_pmip.system_global.global_core_map.local_filler = atoi(tmp); tmp = strtok(NULL, ","); HYDU_ASSERT(tmp, status); HYD_pmcd_pmip.system_global.global_core_map.local_count = atoi(tmp); tmp = strtok(NULL, ","); HYDU_ASSERT(tmp, status); HYD_pmcd_pmip.system_global.global_core_map.global_count = atoi(tmp); HYDU_FREE(map); (*argv)++; fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status send_cmd_downstream(int fd, const char *cmd) { char cmdlen[7]; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); MPL_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd)); status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); /* FIXME: We cannot abort when we are not able to send data * downstream. The upper layer needs to handle this based on * whether we want to abort or not.*/ HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "PMI response: %s\n", cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status cmd_response(int fd, int pid, const char *cmd) { struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_RESPONSE; hdr.pid = pid; hdr.pmi_version = 1; hdr.buflen = strlen(cmd); status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed); HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n"); HYDU_ASSERT(!closed, status); if (HYD_server_info.user_global.debug) { HYDU_dump(stdout, "PMI response to fd %d pid %d: %s", fd, pid, cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed); HYDU_ERR_POP(status, "unable to send response to command\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status send_cmd_upstream(const char *start, int fd, char *args[]) { int i, sent, closed; struct HYD_string_stash stash; char *buf = NULL; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup(start), status); for (i = 0; args[i]; i++) { HYD_STRING_STASH(stash, MPL_strdup(args[i]), status); if (args[i + 1]) HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_SPIT(stash, buf, status); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_CMD; hdr.pid = fd; hdr.buflen = strlen(buf); hdr.pmi_version = 2; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI header upstream\n"); HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "forwarding command (%s) upstream\n", buf); } status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI command upstream\n"); HYDU_ASSERT(!closed, status); fn_exit: if (buf) MPL_free(buf); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_append_env_str_to_list(const char *str, struct HYD_env **env_list) { char *my_str = NULL; char *env_name, *env_value; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); my_str = env_value = MPL_strdup(str); /* don't use strtok, it will mangle env values that contain '=' */ env_name = MPL_strsep(&env_value, "="); HYDU_ASSERT(env_name != NULL, status); status = HYDU_append_env_to_list(env_name, env_value, env_list); HYDU_ERR_POP(status, "unable to append env to list\n"); fn_exit: if (my_str) MPL_free(my_str); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status handle_rr_binding(void) { int i; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_ASSERT(hwloc_initialized, status); /* initialize bitmaps */ HYDT_topo_hwloc_info.num_bitmaps = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *, HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status); for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) { HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc(); hwloc_bitmap_only(HYDT_topo_hwloc_info.bitmap[i], i); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_put(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_token *tokens; int token_count, i, ret; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; for (i = 0; i < token_count; i++) { status = HYD_pmcd_pmi_add_kvs(tokens[i].key, tokens[i].val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy, *tproxy; const char *cmd; int proxy_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); proxy_count = 0; for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) proxy_count++; proxy->pg->barrier_count++; if (proxy->pg->barrier_count == proxy_count) { proxy->pg->barrier_count = 0; cmd = "cmd=barrier_out\n"; for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) { status = cmd_response(tproxy->control_fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); } } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status control_cb(int fd, HYD_event_t events, void *userp) { int count, closed, i; struct HYD_pg *pg; struct HYD_pmcd_hdr hdr; struct HYD_proxy *proxy, *tproxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; char *buf; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); proxy = (struct HYD_proxy *) userp; if (fd == STDIN_FILENO) { HYD_pmcd_init_header(&hdr); hdr.cmd = STDIN; } else { status = HYDU_sock_read(fd, &hdr, sizeof(hdr), &count, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to read command from proxy\n"); HYDU_ASSERT(!closed, status); } if (hdr.cmd == PID_LIST) { /* Got PIDs */ HYDU_MALLOC(proxy->pid, int *, proxy->proxy_process_count * sizeof(int), status); status = HYDU_sock_read(fd, (void *) proxy->pid, proxy->proxy_process_count * sizeof(int), &count, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to read status from proxy\n"); HYDU_ASSERT(!closed, status); if (proxy->pg->pgid) { /* We initialize the debugger code only for non-dynamically * spawned processes */ goto fn_exit; } /* Check if all the PIDs have been received */ for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) if (tproxy->pid == NULL) goto fn_exit; /* Call the debugger initialization */ status = HYDT_dbg_setup_procdesc(proxy->pg); HYDU_ERR_POP(status, "debugger setup failed\n"); }
HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const char *membind) { HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_ASSERT(binding, status); hwloc_topology_init(&topology); hwloc_topology_load(topology); HYDT_topo_hwloc_info.total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); hwloc_initialized = 1; /* bindings that don't require mapping */ if (!strncmp(binding, "user:"******"user:"******"user:"******"error binding to %s\n", binding); goto fn_exit; } else if (!strcmp(binding, "rr")) { status = handle_rr_binding(); HYDU_ERR_POP(status, "error binding to %s\n", binding); goto fn_exit; } status = handle_bitmap_binding(binding, mapping ? mapping : binding); HYDU_ERR_POP(status, "error binding with bind \"%s\" and map \"%s\"\n", binding, mapping); /* Memory binding options */ if (membind == NULL) HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_DEFAULT; else if (!strcmp(membind, "firsttouch")) HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strcmp(membind, "nexttouch")) HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_NEXTTOUCH; else if (!strncmp(membind, "bind:", strlen("bind:"))) { HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_BIND; } else if (!strncmp(membind, "interleave:", strlen("interleave:"))) { HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_INTERLEAVE; } else if (!strncmp(membind, "replicate:", strlen("replicate:"))) { HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_REPLICATE; } else { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized membind policy \"%s\"\n", membind); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status cmd_response(int fd, int pid, char *cmd) { char cmdlen[7]; struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_RESPONSE; hdr.pid = pid; hdr.pmi_version = 2; hdr.buflen = 6 + strlen(cmd); status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n"); HYDU_ASSERT(!closed, status); HYDU_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd)); status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); if (HYD_server_info.user_global.debug) { HYDU_dump(stdout, "PMI response to fd %d pid %d: %s\n", fd, pid, cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_sock_read(int fd, void *buf, int maxlen, int *recvd, int *closed, enum HYDU_sock_comm_flag flag) { int tmp; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_ASSERT(maxlen, status); *recvd = 0; *closed = 0; while (1) { do { tmp = read(fd, (char *) buf + *recvd, maxlen - *recvd); if (tmp < 0) { if (errno == ECONNRESET || fd == STDIN_FILENO) { /* If the remote end closed the socket or if we * get an EINTR on stdin, set the socket to be * closed and jump out */ *closed = 1; status = HYD_SUCCESS; goto fn_exit; } } } while (tmp < 0 && errno == EINTR); if (tmp < 0) { HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "read error (%s)\n", MPL_strerror(errno)); } else if (tmp == 0) { *closed = 1; goto fn_exit; } else { *recvd += tmp; } if (flag == HYDU_SOCK_COMM_NONE || *recvd == maxlen) break; } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_bscd_lsf_query_node_list(struct HYD_node **node_list) { char *hosts, *hostname, *num_procs_str, *thosts = NULL; int num_procs; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); if (MPL_env2str("LSB_MCPU_HOSTS", (const char **) &hosts) == 0) hosts = NULL; if (hosts == NULL) { *node_list = NULL; HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "No LSF node list found\n"); } else { hosts = HYDU_strdup(hosts); thosts = hosts; hostname = strtok(hosts, " "); while (1) { if (hostname == NULL) break; /* the even fields in the list should be the number of * cores */ num_procs_str = strtok(NULL, " "); HYDU_ASSERT(num_procs_str, status); num_procs = atoi(num_procs_str); status = HYDU_add_to_node_list(hostname, num_procs, node_list); HYDU_ERR_POP(status, "unable to add to node list\n"); hostname = strtok(NULL, " "); } if (thosts) HYDU_FREE(thosts); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status extract_tasks_per_node(int nnodes, char *task_list) { char *task_set, **tmp_core_list = NULL; char *nodes, *cores; int i, j, k, p, count = 0; HYD_status status = HYD_SUCCESS; HYDU_MALLOC_OR_JUMP(tasks_per_node, int *, nnodes * sizeof(int), status); HYDU_MALLOC_OR_JUMP(tmp_core_list, char **, nnodes * sizeof(char *), status); task_set = strtok(task_list, ","); HYDU_ASSERT(task_set, status); i = 0; do { HYDU_MALLOC_OR_JUMP(tmp_core_list[i], char *, strlen(task_set) + 1, status); MPL_snprintf(tmp_core_list[i], strlen(task_set) + 1, "%s", task_set); i++; task_set = strtok(NULL, ","); } while (task_set); count = i; p = 0; for (i = 0; i < count; i++) { cores = strtok(tmp_core_list[i], "("); nodes = strtok(NULL, "("); if (nodes) { nodes[strlen(nodes) - 1] = 0; nodes++; j = atoi(nodes); } else j = 1; for (k = 0; k < j; k++) tasks_per_node[p++] = atoi(cores); } fn_exit: for (i = 0; i < count; i++) MPL_free(tmp_core_list[i]); MPL_free(tmp_core_list); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_sock_write(int fd, const void *buf, int maxlen, int *sent, int *closed, enum HYDU_sock_comm_flag flag) { int tmp; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_ASSERT(maxlen, status); *sent = 0; *closed = 0; while (1) { tmp = write(fd, (char *) buf + *sent, maxlen - *sent); if (tmp <= 0) { if (errno == EAGAIN) { if (flag == HYDU_SOCK_COMM_NONE) goto fn_exit; else continue; } else if (errno == ECONNRESET) { *closed = 1; goto fn_exit; } HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "write error (%s)\n", MPL_strerror(errno)); } else { *sent += tmp; } if (flag == HYDU_SOCK_COMM_NONE || *sent == maxlen) break; } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYD_pmcd_pmiserv_send_signal(struct HYD_proxy *proxy, int signum) { struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYD_pmcd_init_header(&hdr); hdr.cmd = SIGNAL; hdr.signum = signum; status = HYDU_sock_write(proxy->control_fd, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); fn_exit: return status; fn_fail: goto fn_exit; }
HYD_status HYDT_dmxu_poll_stdin_valid(int *out) { struct pollfd fd[1]; int ret; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYDT_dmxi_stdin_valid(out); HYDU_ERR_POP(status, "unable to check if stdin is valid\n"); if (*out) { /* The generic test thinks that STDIN is valid. Try poll * specific tests. */ fd[0].fd = STDIN_FILENO; fd[0].events = POLLIN; /* Check if poll on stdin returns any errors; on Darwin this * is broken */ ret = poll(fd, 1, 0); HYDU_ASSERT((ret >= 0), status); if (fd[0].revents & ~(POLLIN | POLLHUP)) *out = 0; else *out = 1; } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_put(int fd, int pid, int pgid, char *args[]) { int i, ret; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; char *kvsname, *key, *val; char *tmp[HYD_NUM_TMP_STRINGS], *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname"); HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR, "unable to find token: kvsname\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); if (val == NULL) { /* the user sent an empty string */ val = HYDU_strdup(""); } proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; if (strcmp(pg_scratch->kvs->kvs_name, kvsname)) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "kvsname (%s) does not match this group's kvs space (%s)\n", kvsname, pg_scratch->kvs->kvs_name); status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); i = 0; tmp[i++] = HYDU_strdup("cmd=put_result rc="); tmp[i++] = HYDU_int_to_str(ret); if (ret == 0) { tmp[i++] = HYDU_strdup(" msg=success"); } else { tmp[i++] = HYDU_strdup(" msg=duplicate_key"); tmp[i++] = HYDU_strdup(key); } tmp[i++] = HYDU_strdup("\n"); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list, struct HYD_pg *pg) { struct HYD_proxy *proxy = NULL, *last_proxy = NULL, *tmp; struct HYD_exec *exec; struct HYD_node *node; int max_oversubscribe, c, num_procs, proxy_rem_cores, exec_rem_procs, allocated_procs; int filler_round, num_nodes, i, dummy_fillers; #if defined(FINEGRAIN_MPI) int current_exec_start_rank = 0; #endif HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* Find the current maximum oversubscription on the nodes */ max_oversubscribe = 1; num_nodes = 0; for (node = node_list; node; node = node->next) { c = HYDU_dceil(node->active_processes, node->core_count); if (c > max_oversubscribe) max_oversubscribe = c; num_nodes++; } /* make sure there are non-zero cores available */ c = 0; for (node = node_list; node; node = node->next) c += (node->core_count * max_oversubscribe) - node->active_processes; if (c == 0) max_oversubscribe++; allocated_procs = 0; dummy_fillers = 1; for (node = node_list; node; node = node->next) { /* check how many cores are available */ c = (node->core_count * max_oversubscribe) - node->active_processes; /* create a proxy associated with this node */ status = alloc_proxy(&proxy, pg, node); HYDU_ERR_POP(status, "error allocating proxy\n"); proxy->filler_processes = c; allocated_procs += c; if (proxy->filler_processes < node->core_count) dummy_fillers = 0; if (pg->proxy_list == NULL) pg->proxy_list = proxy; else last_proxy->next = proxy; last_proxy = proxy; if (allocated_procs >= pg->pg_process_count) break; } /* If all proxies have as many filler processes as the number of * cores, we can reduce those filler processes */ if (dummy_fillers) for (proxy = pg->proxy_list; proxy; proxy = proxy->next) proxy->filler_processes -= proxy->node->core_count; /* Proxy list is created; add the executables to the proxy list */ if (pg->proxy_list->next == NULL) { /* Special case: there is only one proxy, so all executables * directly get appended to this proxy */ for (exec = exec_list; exec; exec = exec->next) { #if defined(FINEGRAIN_MPI) status = add_exec_to_proxy(exec, pg->proxy_list, exec->proc_count, current_exec_start_rank); HYDU_ERR_POP(status, "unable to add executable to proxy\n"); current_exec_start_rank += exec->proc_count * exec->nfg; #else status = add_exec_to_proxy(exec, pg->proxy_list, exec->proc_count); HYDU_ERR_POP(status, "unable to add executable to proxy\n"); #endif } } else { exec = exec_list; filler_round = 1; for (proxy = pg->proxy_list; proxy && proxy->filler_processes == 0; proxy = proxy->next); if (proxy == NULL) { filler_round = 0; proxy = pg->proxy_list; } exec_rem_procs = exec->proc_count; proxy_rem_cores = filler_round ? proxy->filler_processes : proxy->node->core_count; while (exec) { if (exec_rem_procs == 0) { exec = exec->next; if (exec) exec_rem_procs = exec->proc_count; else break; } HYDU_ASSERT(exec_rem_procs, status); while (proxy_rem_cores == 0) { proxy = proxy->next; if (proxy == NULL) { filler_round = 0; proxy = pg->proxy_list; } proxy_rem_cores = filler_round ? proxy->filler_processes : proxy->node->core_count; } num_procs = (exec_rem_procs > proxy_rem_cores) ? proxy_rem_cores : exec_rem_procs; HYDU_ASSERT(num_procs, status); exec_rem_procs -= num_procs; proxy_rem_cores -= num_procs; #if defined(FINEGRAIN_MPI) status = add_exec_to_proxy(exec, proxy, num_procs, current_exec_start_rank); HYDU_ERR_POP(status, "unable to add executable to proxy\n"); current_exec_start_rank += num_procs * exec->nfg; #else status = add_exec_to_proxy(exec, proxy, num_procs); HYDU_ERR_POP(status, "unable to add executable to proxy\n"); #endif } } /* find dummy proxies and remove them */ while (pg->proxy_list->exec_list == NULL) { tmp = pg->proxy_list->next; pg->proxy_list->next = NULL; HYDU_free_proxy_list(pg->proxy_list); pg->proxy_list = tmp; } for (proxy = pg->proxy_list; proxy->next;) { if (proxy->next->exec_list == NULL) { tmp = proxy->next; proxy->next = proxy->next->next; tmp->next = NULL; HYDU_free_proxy_list(tmp); } else { proxy = proxy->next; } } for (proxy = pg->proxy_list, i = 0; proxy; proxy = proxy->next, i++) proxy->proxy_id = i; fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[]) { struct HYD_pg *pg; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_proxy *proxy; struct HYD_pmcd_token *tokens; struct HYD_exec *exec_list = NULL, *exec; struct HYD_env *env; struct HYD_node *node; char key[PMI_MAXKEYLEN], *val; int nprocs, preput_num, info_num, ret; char *execname, *path = NULL; struct HYD_pmcd_token_segment *segment_list = NULL; int token_count, i, j, k, new_pgid, total_spawns; int argcnt, num_segments; char *control_port, *proxy_args[HYD_NUM_TMP_STRINGS] = { NULL }; char *tmp[HYD_NUM_TMP_STRINGS]; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); for (i = 0; args[i]; i++) mcmd_args[mcmd_num_args++] = HYDU_strdup(args[i]); mcmd_args[mcmd_num_args] = NULL; status = HYD_pmcd_pmi_args_to_tokens(mcmd_args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); /* Here's the order of things we do: * * 1. Break the token list into multiple segments, each segment * corresponding to a command. Each command represents * information for one executable. * * 2. Allocate a process group for the new set of spawned * processes * * 3. Get all the common keys and deal with them * * 4. Create an executable list based on the segments. * * 5. Create a proxy list using the created executable list and * spawn it. */ /* Break the token list into multiple segments and create an * executable list based on the segments. */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "totspawns"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: totspawns\n"); total_spawns = atoi(val); HYDU_MALLOC(segment_list, struct HYD_pmcd_token_segment *, total_spawns * sizeof(struct HYD_pmcd_token_segment), status); segment_tokens(tokens, token_count, segment_list, &num_segments); if (num_segments != total_spawns) { /* We didn't read the entire PMI string; wait for the rest to * arrive */ goto fn_exit; } else { /* Got the entire PMI string; free the arguments and reset */ HYDU_free_strlist(mcmd_args); mcmd_num_args = 0; } /* Allocate a new process group */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); new_pgid = pg->pgid + 1; status = HYDU_alloc_pg(&pg->next, new_pgid); HYDU_ERR_POP(status, "unable to allocate process group\n"); pg = pg->next; proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg->spawner_pg = proxy->pg; for (j = 0; j < total_spawns; j++) { /* For each segment, we create an exec structure */ val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "nprocs"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: nprocs\n"); nprocs = atoi(val); pg->pg_process_count += nprocs; val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "argcnt"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: argcnt\n"); argcnt = atoi(val); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "info_num"); if (val) info_num = atoi(val); else info_num = 0; if (exec_list == NULL) { status = HYDU_alloc_exec(&exec_list); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec_list->appnum = 0; exec = exec_list; } else { for (exec = exec_list; exec->next; exec = exec->next); status = HYDU_alloc_exec(&exec->next); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec->next->appnum = exec->appnum + 1; exec = exec->next; } /* Info keys */ for (i = 0; i < info_num; i++) { char *info_key, *info_val; HYDU_snprintf(key, PMI_MAXKEYLEN, "info_key_%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_key = val; HYDU_snprintf(key, PMI_MAXKEYLEN, "info_val_%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_val = val; if (!strcmp(info_key, "path")) { path = HYDU_strdup(info_val); } else if (!strcmp(info_key, "wdir")) { exec->wdir = HYDU_strdup(info_val); } else if (!strcmp(info_key, "host")) { status = HYDU_process_mfile_token(info_val, 1, &pg->user_node_list); HYDU_ERR_POP(status, "error create node list\n"); } else if (!strcmp(info_key, "hostfile")) { status = HYDU_parse_hostfile(info_val, &pg->user_node_list, HYDU_process_mfile_token); HYDU_ERR_POP(status, "error parsing hostfile\n"); } else { /* Unrecognized info key; ignore */ } } status = HYDU_correct_wdir(&exec->wdir); HYDU_ERR_POP(status, "unable to correct wdir\n"); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "execname"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: execname\n"); if (path == NULL) execname = HYDU_strdup(val); else { i = 0; tmp[i++] = HYDU_strdup(path); tmp[i++] = HYDU_strdup("/"); tmp[i++] = HYDU_strdup(val); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &execname); HYDU_ERR_POP(status, "error while joining strings\n"); HYDU_free_strlist(tmp); } i = 0; exec->exec[i++] = execname; for (k = 0; k < argcnt; k++) { HYDU_snprintf(key, PMI_MAXKEYLEN, "arg%d", k + 1); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); exec->exec[i++] = HYDU_strdup(val); } exec->exec[i++] = NULL; exec->proc_count = nprocs; /* It is not clear what kind of environment needs to get * passed to the spawned process. Don't set anything here, and * let the proxy do whatever it does by default. */ exec->env_prop = NULL; status = HYDU_env_create(&env, "PMI_SPAWNED", "1"); HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n"); exec->user_env = env; } status = HYD_pmcd_pmi_alloc_pg_scratch(pg); HYDU_ERR_POP(status, "unable to allocate pg scratch space\n"); if (pg->user_node_list) { pg->pg_core_count = 0; for (i = 0, node = pg->user_node_list; node; node = node->next, i++) { pg->pg_core_count += node->core_count; node->node_id = i; } } else { pg->pg_core_count = HYD_server_info.pg_list.pg_core_count; } pg->pg_process_count = 0; for (exec = exec_list; exec; exec = exec->next) pg->pg_process_count += exec->proc_count; pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch; /* Get the common keys and deal with them */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preput_num"); if (val) preput_num = atoi(val); else preput_num = 0; for (i = 0; i < preput_num; i++) { char *preput_key, *preput_val; HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_key_%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_key = val; HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_val_%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_val = val; status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); } /* Create the proxy list */ if (pg->user_node_list) { status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } else { status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } HYDU_free_exec_list(exec_list); status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface, HYD_server_info.local_hostname, HYD_server_info.port_range, &control_port, HYD_pmcd_pmiserv_control_listen_cb, (void *) (size_t) new_pgid); HYDU_ERR_POP(status, "unable to create PMI port\n"); if (HYD_server_info.user_global.debug) HYDU_dump(stdout, "Got a control port string of %s\n", control_port); /* Go to the last PG */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid); HYDU_ERR_POP(status, "unable to fill in proxy arguments\n"); HYDU_FREE(control_port); status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg); HYDU_ERR_POP(status, "unable to fill in executable arguments\n"); status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL); HYDU_ERR_POP(status, "launcher cannot launch processes\n"); { char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd; i = 0; cmd_str[i++] = HYDU_strdup("cmd=spawn_result rc=0"); cmd_str[i++] = HYDU_strdup("\n"); cmd_str[i++] = NULL; status = HYDU_str_alloc_and_join(cmd_str, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(cmd_str); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_FREE(cmd); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_free_strlist(proxy_args); if (segment_list) HYDU_FREE(segment_list); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status resolve_pattern_string(const char *pattern, char **str, int pgid, int proxy_id, int rank) { HYD_status status = HYD_SUCCESS; int i, pos, tpos; char *tmp[HYD_NUM_TMP_STRINGS] = { NULL }; struct HYD_pg *pg; struct HYD_proxy *proxy; HYDU_FUNC_ENTER(); *str = NULL; tpos = 0; pos = 0; i = 0; HYDU_MALLOC_OR_JUMP(tmp[i], char *, HYD_TMP_STRLEN, status); tmp[i][0] = '\0'; while (1) { HYDU_ASSERT(tpos < HYD_TMP_STRLEN, status); if (pattern[pos] != '%') { tmp[i][tpos++] = pattern[pos++]; if (pattern[pos - 1] == '\0') break; } else { ++pos; /* consume '%' */ if (pattern[pos] == '%') { tmp[i][tpos++] = pattern[pos++]; continue; } /* all remaining valid specifiers need a new temp string */ tmp[i][tpos] = '\0'; ++i; tpos = 0; HYDU_MALLOC_OR_JUMP(tmp[i], char *, HYD_TMP_STRLEN, status); tmp[i][0] = '\0'; switch (pattern[pos]) { case 'r': MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%d", rank); break; case 'g': MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%d", pgid); break; case 'p': MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%d", proxy_id); break; case 'h': for (pg = &HYD_server_info.pg_list; pg; pg = pg->next) if (pg->pgid == pgid) break; HYDU_ASSERT(pg, status); for (proxy = pg->proxy_list; proxy; proxy = proxy->next) if (proxy->proxy_id == proxy_id) break; HYDU_ASSERT(proxy, status); MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%s", proxy->node->hostname); break; case '\0': HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "dangling '%%' at end of pattern\n"); break; default: HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized pattern specifier ('%c')\n", pattern[pos]); break; } ++pos; /* skip past fmt specifier */ ++i; tpos = 0; HYDU_MALLOC_OR_JUMP(tmp[i], char *, HYD_TMP_STRLEN, status); tmp[i][0] = '\0'; } } tmp[++i] = NULL; status = HYDU_str_alloc_and_join(tmp, str); HYDU_ERR_POP(status, "unable to join strings\n"); fn_exit: HYDU_free_strlist(tmp); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status stdoe_cb(int _fd, int pgid, int proxy_id, int rank, void *_buf, int buflen) { int fd = _fd; char *pattern_resolve, *pattern = NULL; struct stdoe_fd *tmp, *run; int sent, closed, mark, i; char *buf = (char *) _buf, *prepend; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); pattern = (_fd == STDOUT_FILENO) ? HYD_ui_info.outfile_pattern : (_fd == STDERR_FILENO) ? HYD_ui_info.errfile_pattern : NULL; if (pattern) { /* See if the pattern already exists */ status = resolve_pattern_string(pattern, &pattern_resolve, pgid, proxy_id, rank); HYDU_ERR_POP(status, "error resolving pattern\n"); for (run = stdoe_fd_list; run; run = run->next) if (!strcmp(run->pattern, pattern_resolve)) break; if (run) { fd = run->fd; MPL_free(pattern_resolve); } else { HYDU_MALLOC_OR_JUMP(tmp, struct stdoe_fd *, sizeof(struct stdoe_fd), status); tmp->pattern = pattern_resolve; tmp->fd = open(tmp->pattern, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); HYDU_ASSERT(tmp->fd >= 0, status); tmp->next = NULL; if (stdoe_fd_list == NULL) stdoe_fd_list = tmp; else { for (run = stdoe_fd_list; run->next; run = run->next); run->next = tmp; } fd = tmp->fd; } } if (HYD_ui_info.prepend_pattern == NULL) { status = HYDU_sock_write(fd, buf, buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n"); HYDU_ASSERT(!closed, status); } else { status = resolve_pattern_string(HYD_ui_info.prepend_pattern, &prepend, pgid, proxy_id, rank); HYDU_ERR_POP(status, "error resolving pattern\n"); mark = 0; for (i = 0; i < buflen; i++) { if (buf[i] == '\n' || i == buflen - 1) { if (prepend[0] != '\0') { /* sock_write barfs on maxlen==0 */ status = HYDU_sock_write(fd, (const void *) prepend, strlen(prepend), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n"); } status = HYDU_sock_write(fd, (const void *) &buf[mark], i - mark + 1, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n"); HYDU_ASSERT(!closed, status); mark = i + 1; } } MPL_free(prepend); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_get(int fd, int pid, int pgid, char *args[]) { int i; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_pmi_kvs_pair *run; char *kvsname, *key, *val; char *tmp[HYD_NUM_TMP_STRINGS], *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname"); HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR, "unable to find token: kvsname\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; val = NULL; if (!strcmp(key, "PMI_dead_processes")) { val = pg_scratch->dead_processes; goto found_val; } if (strcmp(pg_scratch->kvs->kvs_name, kvsname)) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "kvsname (%s) does not match this group's kvs space (%s)\n", kvsname, pg_scratch->kvs->kvs_name); /* Try to find the key */ for (run = pg_scratch->kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { val = run->val; break; } } found_val: i = 0; tmp[i++] = HYDU_strdup("cmd=get_result rc="); if (val) { tmp[i++] = HYDU_strdup("0 msg=success value="); tmp[i++] = HYDU_strdup(val); } else { tmp[i++] = HYDU_strdup("-1 msg=key_"); tmp[i++] = HYDU_strdup(key); tmp[i++] = HYDU_strdup("_not_found value=unknown"); } tmp[i++] = HYDU_strdup("\n"); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
/* This waits for the restarted processes to reconnect their stdin/out/err sockets, then sets the appropriate entries in the in out and err arrays. This also gets the pids of the restarted processes. */ static HYD_status wait_for_stdinouterr_sockets(int num_ranks, int *ranks, int *in, int *out, int *err, int *pid) { HYD_status status = HYD_SUCCESS; int ret; int fd; int i, c; sock_ident_t id; int num_expected_connections = num_ranks * 2; /* wait for connections for stdout and err */ HYDU_FUNC_ENTER(); /* if one of the processes is rank 0, we should wait for an * additional connection for stdin */ for (i = 0; i < num_ranks; ++i) if (ranks[i] == 0) { ++num_expected_connections; break; } for (c = 0; c < num_expected_connections; ++c) { size_t len; char *id_p; /* wait for a connection */ do { struct sockaddr_in rmt_addr; socklen_t sa_len = sizeof(rmt_addr);; fd = accept(listen_fd, (struct sockaddr *) &rmt_addr, &sa_len); } while (fd && errno == EINTR); HYDU_ERR_CHKANDJUMP(status, fd == -1, HYD_INTERNAL_ERROR, "accept failed, %s\n", strerror(errno)); /* read the socket identifier */ len = sizeof(id); id_p = (char *) &id; do { do { ret = read(fd, id_p, len); } while (ret == 0 || (ret == -1 && errno == EINTR)); HYDU_ERR_CHKANDJUMP(status, ret == -1, HYD_INTERNAL_ERROR, "read failed, %s\n", strerror(errno)); len -= ret; id_p += ret; } while (len); /* determine the index for this process in the stdout/err * arrays */ for (i = 0; i < num_ranks; ++i) if (ranks[i] == id.rank) break; HYDU_ASSERT(i < num_ranks, status); /* assign the fd */ switch (id.socktype) { case IN_SOCK: HYDU_ASSERT(id.rank == 0, status); *in = fd; break; case OUT_SOCK: out[i] = fd; break; case ERR_SOCK: err[i] = fd; break; default: HYDU_ASSERT(0, status); break; } /* assign the pid */ pid[i] = id.pid; } ret = close(listen_fd); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close of listener port failed, %s\n", strerror(errno)); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status stdoe_cb(int fd, HYD_event_t events, void *userp) { int closed, i, sent, recvd, stdfd; char buf[HYD_TMPBUF_SIZE]; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); stdfd = (int) (size_t) userp; status = HYDU_sock_read(fd, buf, HYD_TMPBUF_SIZE, &recvd, &closed, HYDU_SOCK_COMM_NONE); HYDU_ERR_POP(status, "sock read error\n"); if (recvd) { if (stdfd == STDOUT_FILENO) { HYD_pmcd_init_header(&hdr); hdr.cmd = STDOUT; for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.out[i] == fd) break; } else { HYD_pmcd_init_header(&hdr); hdr.cmd = STDERR; for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.err[i] == fd) break; } HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status); hdr.pgid = HYD_pmcd_pmip.local.pgid; hdr.proxy_id = HYD_pmcd_pmip.local.id; hdr.rank = HYD_pmcd_pmip.downstream.pmi_rank[i]; hdr.buflen = recvd; { int upstream_sock_closed; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &upstream_sock_closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "sock write error\n"); HYDU_ASSERT(!upstream_sock_closed, status); status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, recvd, &sent, &upstream_sock_closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "sock write error\n"); HYDU_ASSERT(!upstream_sock_closed, status); } } if (closed) { /* The connection has closed */ status = HYDT_dmx_deregister_fd(fd); HYDU_ERR_POP(status, "unable to deregister fd\n"); if (stdfd == STDOUT_FILENO) { for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.out[i] == fd) HYD_pmcd_pmip.downstream.out[i] = HYD_FD_CLOSED; } else { for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.err[i] == fd) HYD_pmcd_pmip.downstream.err[i] = HYD_FD_CLOSED; } close(fd); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status bcast_keyvals(int fd, int pid) { int keyval_count, arg_count, i, j; char **tmp = NULL, *cmd; struct HYD_pmcd_pmi_kvs_pair *run; struct HYD_proxy *proxy, *tproxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; /* find the number of keyvals */ keyval_count = 0; for (run = pg_scratch->kvs->key_pair; run; run = run->next) keyval_count++; keyval_count -= pg_scratch->keyval_dist_count; /* Each keyval has the following four items: 'key' '=' 'val' * '<space>'. Two additional items for the command at the start * and the NULL at the end. */ HYDU_MALLOC_OR_JUMP(tmp, char **, (4 * keyval_count + 3) * sizeof(char *), status); /* send all available keyvals downstream */ if (keyval_count) { arg_count = 1; i = 0; tmp[i++] = MPL_strdup("cmd=keyval_cache "); for (run = pg_scratch->kvs->key_pair, j = 0; run; run = run->next, j++) { if (j < pg_scratch->keyval_dist_count) continue; tmp[i++] = MPL_strdup(run->key); tmp[i++] = MPL_strdup("="); tmp[i++] = MPL_strdup(run->val); tmp[i++] = MPL_strdup(" "); arg_count++; if (arg_count >= MAX_PMI_INTERNAL_ARGS) { tmp[i++] = MPL_strdup("\n"); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); pg_scratch->keyval_dist_count += (arg_count - 1); for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) { status = cmd_response(tproxy->control_fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); } MPL_free(cmd); i = 0; tmp[i++] = MPL_strdup("cmd=keyval_cache "); arg_count = 1; } } tmp[i++] = MPL_strdup("\n"); tmp[i++] = NULL; if (arg_count > 1) { status = HYDU_str_alloc_and_join(tmp, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); pg_scratch->keyval_dist_count += (arg_count - 1); for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) { status = cmd_response(tproxy->control_fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); } MPL_free(cmd); } HYDU_free_strlist(tmp); } fn_exit: if (tmp) MPL_free(tmp); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
/* This function does not provide any flow control. We just read from * the incoming socket as much as we can and push out to the outgoing * socket as much as we can. This can result in the process calling it * polling continuously waiting for events, but that's a rare case for * stdio (which is what this function is meant to provide * functionality for). */ HYD_status HYDU_sock_forward_stdio(int in, int out, int *closed) { struct fwd_hash *fwd_hash, *tmp; int count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* find the fwd hash */ for (tmp = fwd_hash_list; tmp; tmp = tmp->next) if (out == tmp->out) break; if (tmp == NULL) { /* No hash found; create one */ status = alloc_fwd_hash(&fwd_hash, in, out); HYDU_ERR_POP(status, "unable to allocate forward hash\n"); if (fwd_hash_list == NULL) fwd_hash_list = fwd_hash; else { for (tmp = fwd_hash_list; tmp->next; tmp = tmp->next); tmp->next = fwd_hash; } } else { fwd_hash = tmp; } *closed = 0; if (fwd_hash->buf_count == 0) { /* there is no data in the buffer, read something into it */ status = HYDU_sock_read(in, fwd_hash->buf, HYD_TMPBUF_SIZE, &count, closed, HYDU_SOCK_COMM_NONE); HYDU_ERR_POP(status, "read error\n"); if (!*closed) { fwd_hash->buf_offset = 0; fwd_hash->buf_count += count; /* We should never get a zero count, as the upper-layer * should have waited for an event from the demux engine * before calling us. */ HYDU_ASSERT(count, status); } } if (fwd_hash->buf_count) { /* there is data in the buffer, send it out first */ status = HYDU_sock_write(out, fwd_hash->buf + fwd_hash->buf_offset, fwd_hash->buf_count, &count, closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "write error\n"); if (!*closed) { fwd_hash->buf_offset += count; fwd_hash->buf_count -= count; } } /* If the incoming socket is closed, make sure we forward out all * of the buffered data */ while (*closed && fwd_hash->buf_count) { status = HYDU_sock_write(out, fwd_hash->buf + fwd_hash->buf_offset, fwd_hash->buf_count, &count, closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "write error\n"); if (!*closed) { fwd_hash->buf_offset += count; fwd_hash->buf_count -= count; } } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_sock_is_local(char *host, int *is_local) { struct hostent *ht; char *host_ip = NULL, *lhost_ip = NULL; char lhost[MAX_HOSTNAME_LEN]; struct sockaddr_in sa; struct ifaddrs *ifaddr, *ifa; char buf[MAX_HOSTNAME_LEN]; HYD_status status = HYD_SUCCESS; *is_local = 0; /* Algorithm used: * * 1. Find the local host name * - If "host" matches the local host name, return. * 2. Find the IP address associated with "host" and the IP the local host * resolves to. * - If these IPs match, return. * 3. Find all local network IP addresses * - If the "host" IP address matches any of the local network IP * addresses, return. */ /* STEP 1: If "host" matches the local host name, return */ if (gethostname(lhost, MAX_HOSTNAME_LEN) < 0) { /* We can't figure out what my localhost name is. *sigh*. We * could return an error here, but we will just punt it to the * upper layer saying that we don't know if it is local. We * cannot try steps 2 and 3 either, since we don't have our * local hostname. */ goto fn_exit; } else if (!strcmp(lhost, host)) { *is_local = 1; goto fn_exit; } else { /* we have our local hostname, but that does not match the * provided hostname. Let's try to get our remote IP address * first. If we can't get that, we can give up. */ /* If we are unable to resolve the remote host name, it need * not be an error. It could mean that the user is using an * alias for the hostname (e.g., an ssh config alias) */ if ((ht = gethostbyname(host)) == NULL) goto fn_exit; memset((char *) &sa, 0, sizeof(struct sockaddr_in)); memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length); /* Find the IP address of the host */ host_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf, MAX_HOSTNAME_LEN)); HYDU_ASSERT(host_ip, status); } /* OK, if we are here, we got the remote IP. We have two ways of * getting the local IP: gethostbyname or getifaddrs. We'll try * both. */ /* STEP 2: Let's try the gethostbyname model */ if ((ht = gethostbyname(lhost))) { memset((char *) &sa, 0, sizeof(struct sockaddr_in)); memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length); /* Find the IP address of the host */ lhost_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf, MAX_HOSTNAME_LEN)); HYDU_ASSERT(lhost_ip, status); /* See if the IP address of the hostname we got matches the IP * address to which the local host resolves */ if (!strcmp(lhost_ip, host_ip)) { *is_local = 1; goto fn_exit; } } /* Either gethostbyname didn't resolve or we didn't find a match. * Either way, let's try the getifaddr model. */ /* STEP 3: Let's try the getifaddr model */ if (getifaddrs(&ifaddr) == -1) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "getifaddrs failed\n"); /* Find the IP addresses of all local interfaces */ for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { struct sockaddr_in *sa_ptr = (struct sockaddr_in *) ifa->ifa_addr; lhost_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &(sa_ptr->sin_addr), buf, MAX_HOSTNAME_LEN)); HYDU_ASSERT(lhost_ip, status); /* For each local IP address, see if it matches the "host" * IP address */ if (!strcmp(host_ip, lhost_ip)) { *is_local = 1; freeifaddrs(ifaddr); goto fn_exit; } MPL_free(lhost_ip); lhost_ip = NULL; } } freeifaddrs(ifaddr); fn_exit: if (host_ip) MPL_free(host_ip); if (lhost_ip) MPL_free(lhost_ip); return status; fn_fail: goto fn_exit; }