static HYD_status fn_name_publish(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *thrid, *val, *name = NULL, *port = NULL; int token_count, success; struct HYD_pmcd_token *tokens = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "name")) == NULL) HYDU_ERR_POP(status, "cannot find token: name\n"); name = HYDU_strdup(val); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "port")) == NULL) HYDU_ERR_POP(status, "cannot find token: port\n"); port = HYDU_strdup(val); status = HYD_pmcd_pmi_publish(name, port, &success); HYDU_ERR_POP(status, "error publishing service\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=name-publish-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } if (!success) { HYD_STRING_STASH(stash, HYDU_strdup("rc=1;errmsg=duplicate_service_"), status); HYD_STRING_STASH(stash, HYDU_strdup(name), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } else HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); if (name) HYDU_FREE(name); if (port) HYDU_FREE(port); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_putnodeattr(int fd, char *args[]) { struct HYD_string_stash stash; char *key, *val, *thrid, *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count, ret; struct HYD_pmcd_pmi_v2_reqs *req; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find value token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret); HYDU_ERR_POP(status, "unable to put data into kvs\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-putnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("rc="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); for (req = pending_reqs; req; req = req->next) { if (!strcmp(req->key, key)) { /* Poke the progress engine before exiting */ status = poke_progress(key); HYDU_ERR_POP(status, "poke progress error\n"); break; } } fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_publish_name(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *val; int token_count; struct HYD_pmcd_token *tokens = NULL; char *name = NULL, *port = NULL; int success = 0; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "service")) == NULL) HYDU_ERR_POP(status, "cannot find token: service\n"); name = MPL_strdup(val); if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "port")) == NULL) HYDU_ERR_POP(status, "cannot find token: port\n"); port = MPL_strdup(val); status = HYD_pmcd_pmi_publish(name, port, &success); HYDU_ERR_POP(status, "error publishing service\n"); HYD_STRING_STASH_INIT(stash); if (success) HYD_STRING_STASH(stash, MPL_strdup("cmd=publish_result info=ok rc=0 msg=success\n"), status); else HYD_STRING_STASH(stash, MPL_strdup("cmd=publish_result info=ok rc=1 msg=key_already_present\n"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); MPL_free(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); if (name) MPL_free(name); if (port) MPL_free(port); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_name_lookup(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *thrid, *name, *value; int token_count; struct HYD_pmcd_token *tokens = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if ((name = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "name")) == NULL) HYDU_ERR_POP(status, "cannot find token: name\n"); status = HYD_pmcd_pmi_lookup(name, &value); HYDU_ERR_POP(status, "error while looking up service\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=name-lookup-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } if (value) { HYD_STRING_STASH(stash, HYDU_strdup("port="), status); HYD_STRING_STASH(stash, HYDU_strdup(value), status); HYD_STRING_STASH(stash, HYDU_strdup(";found=TRUE;rc=0;"), status); } else { HYD_STRING_STASH(stash, HYDU_strdup("found=FALSE;rc=1;"), status); } HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status send_cmd_upstream(const char *start, int fd, char *args[]) { int i, sent, closed; struct HYD_string_stash stash; char *buf = NULL; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup(start), status); for (i = 0; args[i]; i++) { HYD_STRING_STASH(stash, MPL_strdup(args[i]), status); if (args[i + 1]) HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_SPIT(stash, buf, status); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_CMD; hdr.pid = fd; hdr.buflen = strlen(buf); hdr.pmi_version = 2; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI header upstream\n"); HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "forwarding command (%s) upstream\n", buf); } status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI command upstream\n"); HYDU_ASSERT(!closed, status); fn_exit: if (buf) MPL_free(buf); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getjobattr(int fd, char *args[]) { struct HYD_string_stash stash; char *cmd, *key, *thrid; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if (!strcmp(key, "PMI_process_mapping")) { HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getjobattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.system_global.pmi_process_mapping), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } else { status = send_cmd_upstream("cmd=info-getjobattr;", fd, args); HYDU_ERR_POP(status, "error sending command upstream\n"); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_lookup_name(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *cmd, *name, *value = NULL; int token_count; struct HYD_pmcd_token *tokens = NULL; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); if ((name = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "service")) == NULL) HYDU_ERR_POP(status, "cannot find token: service\n"); status = HYD_pmcd_pmi_lookup(name, &value); HYDU_ERR_POP(status, "error while looking up service\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=lookup_result"), status); if (value) { HYD_STRING_STASH(stash, MPL_strdup(" port="), status); HYD_STRING_STASH(stash, MPL_strdup(value), status); HYD_STRING_STASH(stash, MPL_strdup(" info=ok rc=0 msg=success\n"), status); } else { HYD_STRING_STASH(stash, MPL_strdup(" rc=1 msg=service_not_found\n"), status); } HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); MPL_free(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); if (value) MPL_free(value); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_finalize(int fd, char *args[]) { char *thrid; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=finalize-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); status = HYDT_dmx_deregister_fd(fd); HYDU_ERR_POP(status, "unable to deregister fd\n"); close(fd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_job_getid(int fd, char *args[]) { struct HYD_string_stash stash; char *cmd, *thrid; struct HYD_pmcd_token *tokens = NULL; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=job-getid-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("jobid="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.local.kvs->kvsname), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[]) { struct HYD_pg *pg; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_proxy *proxy; struct HYD_pmcd_token *tokens; struct HYD_exec *exec_list = NULL, *exec; struct HYD_env *env; struct HYD_node *node; char key[PMI_MAXKEYLEN], *val; int nprocs, preput_num, info_num, ret; char *execname, *path = NULL; struct HYD_pmcd_token_segment *segment_list = NULL; int token_count, i, j, k, new_pgid, total_spawns; int argcnt, num_segments; struct HYD_string_stash proxy_stash; char *control_port; struct HYD_string_stash stash; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); for (i = 0; args[i]; i++) mcmd_args[mcmd_num_args++] = MPL_strdup(args[i]); mcmd_args[mcmd_num_args] = NULL; /* Initialize the proxy stash, so it can be freed if we jump to * exit */ HYD_STRING_STASH_INIT(proxy_stash); status = HYD_pmcd_pmi_args_to_tokens(mcmd_args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); /* Here's the order of things we do: * * 1. Break the token list into multiple segments, each segment * corresponding to a command. Each command represents * information for one executable. * * 2. Allocate a process group for the new set of spawned * processes * * 3. Get all the common keys and deal with them * * 4. Create an executable list based on the segments. * * 5. Create a proxy list using the created executable list and * spawn it. */ /* Break the token list into multiple segments and create an * executable list based on the segments. */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "totspawns"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: totspawns\n"); total_spawns = atoi(val); HYDU_MALLOC_OR_JUMP(segment_list, struct HYD_pmcd_token_segment *, total_spawns * sizeof(struct HYD_pmcd_token_segment), status); segment_tokens(tokens, token_count, segment_list, &num_segments); if (num_segments != total_spawns) { /* We didn't read the entire PMI string; wait for the rest to * arrive */ goto fn_exit; } else { /* Got the entire PMI string; free the arguments and reset */ HYDU_free_strlist(mcmd_args); mcmd_num_args = 0; } /* Allocate a new process group */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); new_pgid = pg->pgid + 1; status = HYDU_alloc_pg(&pg->next, new_pgid); HYDU_ERR_POP(status, "unable to allocate process group\n"); pg = pg->next; proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg->spawner_pg = proxy->pg; for (j = 0; j < total_spawns; j++) { /* For each segment, we create an exec structure */ val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "nprocs"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: nprocs\n"); nprocs = atoi(val); pg->pg_process_count += nprocs; val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "argcnt"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: argcnt\n"); argcnt = atoi(val); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "info_num"); if (val) info_num = atoi(val); else info_num = 0; if (exec_list == NULL) { status = HYDU_alloc_exec(&exec_list); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec_list->appnum = 0; exec = exec_list; } else { for (exec = exec_list; exec->next; exec = exec->next); status = HYDU_alloc_exec(&exec->next); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec->next->appnum = exec->appnum + 1; exec = exec->next; } /* Info keys */ for (i = 0; i < info_num; i++) { char *info_key, *info_val; MPL_snprintf(key, PMI_MAXKEYLEN, "info_key_%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_key = val; MPL_snprintf(key, PMI_MAXKEYLEN, "info_val_%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_val = val; if (!strcmp(info_key, "path")) { path = MPL_strdup(info_val); } else if (!strcmp(info_key, "wdir")) { exec->wdir = MPL_strdup(info_val); } else if (!strcmp(info_key, "host") || !strcmp(info_key, "hosts")) { char *saveptr; char *host = strtok_r(info_val, ",", &saveptr); while (host) { status = HYDU_process_mfile_token(host, 1, &pg->user_node_list); HYDU_ERR_POP(status, "error creating node list\n"); host = strtok_r(NULL, ",", &saveptr); } } else if (!strcmp(info_key, "hostfile")) { status = HYDU_parse_hostfile(info_val, &pg->user_node_list, HYDU_process_mfile_token); HYDU_ERR_POP(status, "error parsing hostfile\n"); } else { /* Unrecognized info key; ignore */ } } status = HYDU_correct_wdir(&exec->wdir); HYDU_ERR_POP(status, "unable to correct wdir\n"); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "execname"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: execname\n"); if (path == NULL) execname = MPL_strdup(val); else { HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup(path), status); HYD_STRING_STASH(stash, MPL_strdup("/"), status); HYD_STRING_STASH(stash, MPL_strdup(val), status); HYD_STRING_SPIT(stash, execname, status); } i = 0; exec->exec[i++] = execname; for (k = 0; k < argcnt; k++) { MPL_snprintf(key, PMI_MAXKEYLEN, "arg%d", k + 1); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); exec->exec[i++] = MPL_strdup(val); } exec->exec[i++] = NULL; exec->proc_count = nprocs; /* It is not clear what kind of environment needs to get * passed to the spawned process. Don't set anything here, and * let the proxy do whatever it does by default. */ exec->env_prop = NULL; status = HYDU_env_create(&env, "PMI_SPAWNED", "1"); HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n"); exec->user_env = env; } status = HYD_pmcd_pmi_alloc_pg_scratch(pg); HYDU_ERR_POP(status, "unable to allocate pg scratch space\n"); pg->pg_process_count = 0; for (exec = exec_list; exec; exec = exec->next) pg->pg_process_count += exec->proc_count; pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch; /* Get the common keys and deal with them */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preput_num"); if (val) preput_num = atoi(val); else preput_num = 0; for (i = 0; i < preput_num; i++) { char *preput_key, *preput_val; MPL_snprintf(key, PMI_MAXKEYLEN, "preput_key_%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_key = val; MPL_snprintf(key, PMI_MAXKEYLEN, "preput_val_%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_val = val; status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); } /* Create the proxy list */ if (pg->user_node_list) { status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } else { status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } HYDU_free_exec_list(exec_list); if (pg->user_node_list) { pg->pg_core_count = 0; for (i = 0, node = pg->user_node_list; node; node = node->next, i++) pg->pg_core_count += node->core_count; } else { pg->pg_core_count = 0; for (proxy = pg->proxy_list; proxy; proxy = proxy->next) pg->pg_core_count += proxy->node->core_count; } status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface, HYD_server_info.localhost, HYD_server_info.port_range, &control_port, HYD_pmcd_pmiserv_control_listen_cb, (void *) (size_t) new_pgid); HYDU_ERR_POP(status, "unable to create PMI port\n"); if (HYD_server_info.user_global.debug) HYDU_dump(stdout, "Got a control port string of %s\n", control_port); /* Go to the last PG */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); status = HYD_pmcd_pmi_fill_in_proxy_args(&proxy_stash, control_port, new_pgid); HYDU_ERR_POP(status, "unable to fill in proxy arguments\n"); MPL_free(control_port); status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg); HYDU_ERR_POP(status, "unable to fill in executable arguments\n"); status = HYDT_bsci_launch_procs(proxy_stash.strlist, pg->proxy_list, HYD_FALSE, NULL); HYDU_ERR_POP(status, "launcher cannot launch processes\n"); { char *cmd; HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=spawn_result rc=0"), status); HYD_STRING_STASH(stash, MPL_strdup("\n"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); MPL_free(cmd); } /* Cache the pre-initialized keyvals on the new proxies */ if (preput_num) bcast_keyvals(fd, pid); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYD_STRING_STASH_FREE(proxy_stash); if (segment_list) MPL_free(segment_list); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_kvs_fence(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_string_stash stash; char *cmd, *thrid; struct HYD_pmcd_token *tokens; int token_count, i; static int fence_count = 0; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; /* Try to find the epoch point of this process */ for (i = 0; i < proxy->pg->pg_process_count; i++) if (pg_scratch->ecount[i].fd == fd && pg_scratch->ecount[i].pid == pid) pg_scratch->ecount[i].epoch++; if (i == proxy->pg->pg_process_count) { /* couldn't find the current process; find a NULL entry */ for (i = 0; i < proxy->pg->pg_process_count; i++) if (pg_scratch->ecount[i].fd == HYD_FD_UNSET) break; pg_scratch->ecount[i].fd = fd; pg_scratch->ecount[i].pid = pid; pg_scratch->ecount[i].epoch = 1; } HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-fence-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fence_count++; if (fence_count % proxy->pg->pg_process_count == 0) { /* Poke the progress engine before exiting */ status = poke_progress(NULL); HYDU_ERR_POP(status, "poke progress error\n"); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_kvs_get(int fd, int pid, int pgid, char *args[]) { int i, idx, found; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pg *pg; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_kvs_pair *run; char *key, *thrid, *cmd; struct HYD_string_stash stash; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; found = 0; for (run = pg_scratch->kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { found = 1; break; } } if (!found) { pg = proxy->pg; pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch; idx = -1; for (i = 0; i < pg->pg_process_count; i++) if (pg_scratch->ecount[i].fd == fd && pg_scratch->ecount[i].pid == pid) { idx = i; break; } HYDU_ASSERT(idx != -1, status); for (i = 0; i < pg->pg_process_count; i++) { if (pg_scratch->ecount[i].epoch < pg_scratch->ecount[idx].epoch) { /* We haven't reached a barrier yet; queue up request */ status = HYD_pmcd_pmi_v2_queue_req(fd, pid, pgid, args, key, &pending_reqs); HYDU_ERR_POP(status, "unable to queue request\n"); /* We are done */ goto fn_exit; } } } HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-get-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } if (found) { HYD_STRING_STASH(stash, HYDU_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, HYDU_strdup(run->val), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } else { HYD_STRING_STASH(stash, HYDU_strdup("found=FALSE;"), status); } HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_kvs_put(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *key, *val, *thrid, *cmd; int ret; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_token *tokens; int token_count; struct HYD_pmcd_pmi_v2_reqs *req; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); if (val == NULL) { /* the user sent an empty string */ val = HYDU_strdup(""); } thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to put data into kvs\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-put-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } HYD_STRING_STASH(stash, HYDU_strdup("rc="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); for (req = pending_reqs; req; req = req->next) { if (!strcmp(req->key, key)) { /* Poke the progress engine before exiting */ status = poke_progress(key); HYDU_ERR_POP(status, "poke progress error\n"); break; } } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getjobattr(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_pmi_kvs_pair *run; const char *key; char *thrid, *val, *cmd; struct HYD_string_stash stash; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; val = NULL; if (!strcmp(key, "PMI_dead_processes")) val = pg_scratch->dead_processes; /* Try to find the key */ for (run = pg_scratch->kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { val = run->val; break; } } HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=info-getjobattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } HYD_STRING_STASH(stash, HYDU_strdup("found="), status); if (val) { HYD_STRING_STASH(stash, HYDU_strdup("TRUE;value="), status); HYD_STRING_STASH(stash, HYDU_strdup(val), status); HYD_STRING_STASH(stash, HYDU_strdup(";rc=0;"), status); } else { HYD_STRING_STASH(stash, HYDU_strdup("FALSE;rc=0;"), status); } HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getnodeattr(int fd, char *args[]) { int found; struct HYD_pmcd_pmi_kvs_pair *run; char *key, *waitval, *thrid; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); waitval = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "wait"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); /* if a predefined value is not found, we let the code fall back * to regular search and return an error to the client */ found = 0; for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { found = 1; break; } } if (found) { /* We found the attribute */ HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, MPL_strdup(run->val), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } else if (waitval && !strcmp(waitval, "TRUE")) { /* The client wants to wait for a response; queue up the request */ status = HYD_pmcd_pmi_v2_queue_req(fd, -1, -1, args, key, &pending_reqs); HYDU_ERR_POP(status, "unable to queue request\n"); goto fn_exit; } else { /* Tell the client that we can't find the attribute */ HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=FALSE;rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_fullinit(int fd, char *args[]) { int id, i; char *rank_str; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); rank_str = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pmirank"); HYDU_ERR_CHKANDJUMP(status, rank_str == NULL, HYD_INTERNAL_ERROR, "unable to find pmirank token\n"); id = atoi(rank_str); /* Store the PMI_RANK to fd mapping */ for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) { if (HYD_pmcd_pmip.downstream.pmi_rank[i] == id) { HYD_pmcd_pmip.downstream.pmi_fd[i] = fd; HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 1; break; } } HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(id), status); HYD_STRING_STASH(stash, MPL_strdup(";size="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_process_count), status); HYD_STRING_STASH(stash, MPL_strdup(";appnum=0"), status); if (HYD_pmcd_pmip.local.spawner_kvsname) { HYD_STRING_STASH(stash, MPL_strdup(";spawner-jobid="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.local.spawner_kvsname), status); } if (HYD_pmcd_pmip.user_global.debug) { HYD_STRING_STASH(stash, MPL_strdup(";debugged=TRUE;pmiverbose=TRUE"), status); } else { HYD_STRING_STASH(stash, MPL_strdup(";debugged=FALSE;pmiverbose=FALSE"), status); } HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }