static HYD_status create_env_file(const struct HYD_env *envlist, int num_ranks, int *ranks) { HYD_status status = HYD_SUCCESS; char filename[256]; FILE *f; const struct HYD_env *e; int ret; int r; HYDU_FUNC_ENTER(); for (r = 0; r < num_ranks; ++r) { MPL_snprintf(filename, sizeof(filename), "/tmp/hydra-env-file-%d:%d", (int) getpid(), ranks[r]); f = fopen(filename, "w"); HYDU_ERR_CHKANDJUMP(status, f == NULL, HYD_INTERNAL_ERROR, "fopen failed: %s\n", strerror(errno)); for (e = envlist; e; e = e->next) { fprintf(f, "%s=%s\n", e->env_name, e->env_value); } ret = fclose(f); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "fclose failed: %s\n", strerror(errno)); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_ckpoint_blcr_restart(const char *prefix, int pgid, int id, int ckpt_num, struct HYD_env *envlist, int num_ranks, int ranks[], int *in, int *out, int *err, int *pid) { HYD_status status = HYD_SUCCESS; int ret; int context_fd; cr_restart_handle_t cr_handle; cr_restart_args_t args; char filename[256]; char port_str[64]; int port; HYDU_FUNC_ENTER(); /* create listener socket for stdin/out/err */ status = create_stdinouterr_sock(&port); HYDU_ERR_POP(status, "failed to create stdin/out/err socket\n"); MPL_snprintf(port_str, sizeof(port_str), "%d", port); status = HYDU_append_env_to_list(STDINOUTERR_PORT_NAME, port_str, &envlist); HYDU_ERR_POP(status, "failed to add to env list\n"); status = create_env_file(envlist, num_ranks, ranks); if (status) HYDU_ERR_POP(status, "blcr restart\n"); /* open the checkpoint file */ MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid, id); context_fd = open(filename, O_RDONLY /* | O_LARGEFILE */); HYDU_ERR_CHKANDJUMP(status, context_fd < 0, HYD_INTERNAL_ERROR, "open failed, %s\n", strerror(errno)); /* ... initialize the request structure */ cr_initialize_restart_args_t(&args); args.cr_fd = context_fd; args.cr_flags = CR_RSTRT_RESTORE_PID; /* ... issue the request */ ret = cr_request_restart(&args, &cr_handle); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "cr_request_restart failed, %s\n", strerror(errno)); ret = close(context_fd); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n", strerror(errno)); /* get fds for stdin/out/err sockets, and get pids of restarted processes */ status = wait_for_stdinouterr_sockets(num_ranks, ranks, in, out, err, pid); if (status) HYDU_ERR_POP(status, "blcr restart\n"); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_putnodeattr(int fd, char *args[]) { struct HYD_string_stash stash; char *key, *val, *thrid, *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count, ret; struct HYD_pmcd_pmi_v2_reqs *req; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find value token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret); HYDU_ERR_POP(status, "unable to put data into kvs\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-putnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("rc="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); for (req = pending_reqs; req; req = req->next) { if (!strcmp(req->key, key)) { /* Poke the progress engine before exiting */ status = poke_progress(key); HYDU_ERR_POP(status, "poke progress error\n"); break; } } fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_bscd_pbs_launcher_finalize(void) { int err; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); #if defined(HAVE_TM_H) err = tm_finalize(); HYDU_ERR_CHKANDJUMP(status, err != TM_SUCCESS, HYD_INTERNAL_ERROR, "error calling tm_finalize\n"); #endif /* HAVE_TM_H */ if (HYDT_bscd_pbs_sys) { if (HYDT_bscd_pbs_sys->task_id) HYDU_FREE(HYDT_bscd_pbs_sys->task_id); if (HYDT_bscd_pbs_sys->spawn_events) HYDU_FREE(HYDT_bscd_pbs_sys->spawn_events); HYDU_FREE(HYDT_bscd_pbs_sys); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status create_stdinouterr_sock(int *port) { HYD_status status = HYD_SUCCESS; int ret; struct sockaddr_in sin; socklen_t len; HYDU_FUNC_ENTER(); listen_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); HYDU_ERR_CHKANDJUMP(status, listen_fd < 0, HYD_INTERNAL_ERROR, "socket() failed, %s\n", strerror(errno)); memset((void *) &sin, 0, sizeof(sin)); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); sin.sin_port = htons(0); ret = bind(listen_fd, (struct sockaddr *) &sin, sizeof(sin)); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "bind() failed, %s\n", strerror(errno)); ret = listen(listen_fd, SOMAXCONN); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "listen() failed, %s\n", strerror(errno)); len = sizeof(sin); ret = getsockname(listen_fd, (struct sockaddr *) &sin, &len); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "getsockname() failed, %s\n", strerror(errno)); *port = ntohs(sin.sin_port); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_set_int(char *arg, int *var, int val) { HYD_status status = HYD_SUCCESS; HYDU_ERR_CHKANDJUMP(status, *var != -1, HYD_INTERNAL_ERROR, "duplicate setting: %s\n", arg); *var = val; fn_exit: return status; fn_fail: goto fn_exit; }
static HYD_status control_port_fn(char *arg, char ***argv) { char *port = NULL, *name; HYD_status status = HYD_SUCCESS; HYDU_ERR_CHKANDJUMP(status, HYD_pmcd_pmip.upstream.server_name, HYD_INTERNAL_ERROR, "duplicate control port setting\n"); port = MPL_strdup(**argv); HYDU_ERR_CHKANDJUMP(status, NULL == port, HYD_INTERNAL_ERROR, "port not provided\n"); name = strtok(port, ":"); HYD_pmcd_pmip.upstream.server_name = name ? MPL_strdup(name) : NULL; HYD_pmcd_pmip.upstream.server_port = strtol(strtok(NULL, ":"), NULL, 10); (*argv)++; fn_exit: MPL_free(port); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getjobattr(int fd, char *args[]) { struct HYD_string_stash stash; char *cmd, *key, *thrid; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); if (!strcmp(key, "PMI_process_mapping")) { HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getjobattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.system_global.pmi_process_mapping), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } else { status = send_cmd_upstream("cmd=info-getjobattr;", fd, args); HYDU_ERR_POP(status, "error sending command upstream\n"); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_set_str(char *arg, char **var, const char *val) { HYD_status status = HYD_SUCCESS; HYDU_ERR_CHKANDJUMP(status, *var, HYD_INTERNAL_ERROR, "duplicate setting: %s\n", arg); if (val == NULL) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cannot assign NULL object\n"); *var = MPL_strdup(val); fn_exit: return status; fn_fail: goto fn_exit; }
static HYD_status control_port_fn(char *arg, char ***argv) { char *port = NULL; HYD_status status = HYD_SUCCESS; HYDU_ERR_CHKANDJUMP(status, HYD_pmcd_pmip.upstream.server_name, HYD_INTERNAL_ERROR, "duplicate control port setting\n"); port = MPL_strdup(**argv); HYD_pmcd_pmip.upstream.server_name = MPL_strdup(strtok(port, ":")); HYD_pmcd_pmip.upstream.server_port = atoi(strtok(NULL, ":")); (*argv)++; fn_exit: if (port) MPL_free(port); return status; fn_fail: goto fn_exit; }
static HYD_status fn_kvs_get(int fd, int pid, int pgid, char *args[]) { int i, idx, found; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pg *pg; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_kvs_pair *run; char *key, *thrid, *cmd; struct HYD_string_stash stash; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; found = 0; for (run = pg_scratch->kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { found = 1; break; } } if (!found) { pg = proxy->pg; pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch; idx = -1; for (i = 0; i < pg->pg_process_count; i++) if (pg_scratch->ecount[i].fd == fd && pg_scratch->ecount[i].pid == pid) { idx = i; break; } HYDU_ASSERT(idx != -1, status); for (i = 0; i < pg->pg_process_count; i++) { if (pg_scratch->ecount[i].epoch < pg_scratch->ecount[idx].epoch) { /* We haven't reached a barrier yet; queue up request */ status = HYD_pmcd_pmi_v2_queue_req(fd, pid, pgid, args, key, &pending_reqs); HYDU_ERR_POP(status, "unable to queue request\n"); /* We are done */ goto fn_exit; } } } HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-get-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } if (found) { HYD_STRING_STASH(stash, HYDU_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, HYDU_strdup(run->val), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } else { HYD_STRING_STASH(stash, HYDU_strdup("found=FALSE;"), status); } HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getjobattr(int fd, int pid, int pgid, char *args[]) { struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_pmi_kvs_pair *run; const char *key; char *thrid, *val, *cmd; struct HYD_string_stash stash; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; val = NULL; if (!strcmp(key, "PMI_dead_processes")) val = pg_scratch->dead_processes; /* Try to find the key */ for (run = pg_scratch->kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { val = run->val; break; } } HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=info-getjobattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } HYD_STRING_STASH(stash, HYDU_strdup("found="), status); if (val) { HYD_STRING_STASH(stash, HYDU_strdup("TRUE;value="), status); HYD_STRING_STASH(stash, HYDU_strdup(val), status); HYD_STRING_STASH(stash, HYDU_strdup(";rc=0;"), status); } else { HYD_STRING_STASH(stash, HYDU_strdup("FALSE;rc=0;"), status); } HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_info_getnodeattr(int fd, char *args[]) { int found; struct HYD_pmcd_pmi_kvs_pair *run; char *key, *waitval, *thrid; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens = NULL; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); waitval = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "wait"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); /* if a predefined value is not found, we let the code fall back * to regular search and return an error to the client */ found = 0; for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { found = 1; break; } } if (found) { /* We found the attribute */ HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status); HYD_STRING_STASH(stash, MPL_strdup(run->val), status); HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } else if (waitval && !strcmp(waitval, "TRUE")) { /* The client wants to wait for a response; queue up the request */ status = HYD_pmcd_pmi_v2_queue_req(fd, -1, -1, args, key, &pending_reqs); HYDU_ERR_POP(status, "unable to queue request\n"); goto fn_exit; } else { /* Tell the client that we can't find the attribute */ HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status); if (thrid) { HYD_STRING_STASH(stash, MPL_strdup("thrid="), status); HYD_STRING_STASH(stash, MPL_strdup(thrid), status); HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_STASH(stash, MPL_strdup("found=FALSE;rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); } fn_exit: if (tokens) HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_kvs_put(int fd, int pid, int pgid, char *args[]) { struct HYD_string_stash stash; char *key, *val, *thrid, *cmd; int ret; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_token *tokens; int token_count; struct HYD_pmcd_pmi_v2_reqs *req; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); if (val == NULL) { /* the user sent an empty string */ val = HYDU_strdup(""); } thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to put data into kvs\n"); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-put-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } HYD_STRING_STASH(stash, HYDU_strdup("rc="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); for (req = pending_reqs; req; req = req->next) { if (!strcmp(req->key, key)) { /* Poke the progress engine before exiting */ status = poke_progress(key); HYDU_ERR_POP(status, "poke progress error\n"); break; } } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_ckpoint_blcr_checkpoint(const char *prefix, int pgid, int id, int ckpt_num) { HYD_status status = HYD_SUCCESS; int ret; int fd; cr_checkpoint_args_t my_args; cr_checkpoint_handle_t my_handle; char filename[256]; HYDU_FUNC_ENTER(); /* build the checkpoint filename */ MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid, id); /* remove existing checkpoint file, if any */ (void) unlink(filename); /* open the checkpoint file */ fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC /* | O_LARGEFILE */ , 0600); HYDU_ERR_CHKANDJUMP(status, fd < 0, HYD_INTERNAL_ERROR, "open failed: %s\n", strerror(errno)); cr_initialize_checkpoint_args_t(&my_args); my_args.cr_fd = fd; my_args.cr_scope = CR_SCOPE_TREE; /* issue the request */ ret = cr_request_checkpoint(&my_args, &my_handle); if (ret < 0) { HYDU_ERR_CHKANDJUMP(status, errno == CR_ENOSUPPORT, HYD_INTERNAL_ERROR, "Checkpointing failed. Make sure BLCR kernel module is loaded. %s\n", strerror(errno)); HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cr_request_checkpoint failed, %s\n", strerror(errno)); } /* wait for the request to complete */ while (1) { ret = cr_poll_checkpoint(&my_handle, NULL); if (ret < 0) { if ((ret == CR_POLL_CHKPT_ERR_POST) && (errno == CR_ERESTARTED)) { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "trying to restart in a checkpoint\n"); } else if (errno == EINTR) { /* poll was interrupted by a signal -- retry */ } else { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cr_poll_checkpoint failed: %s\n", strerror(errno)); } } else if (ret == 0) { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cr_poll_checkpoint returned 0 unexpectedly\n"); } else { break; } } ret = close(my_args.cr_fd); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n", strerror(errno)); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
/* This waits for the restarted processes to reconnect their stdin/out/err sockets, then sets the appropriate entries in the in out and err arrays. This also gets the pids of the restarted processes. */ static HYD_status wait_for_stdinouterr_sockets(int num_ranks, int *ranks, int *in, int *out, int *err, int *pid) { HYD_status status = HYD_SUCCESS; int ret; int fd; int i, c; sock_ident_t id; int num_expected_connections = num_ranks * 2; /* wait for connections for stdout and err */ HYDU_FUNC_ENTER(); /* if one of the processes is rank 0, we should wait for an * additional connection for stdin */ for (i = 0; i < num_ranks; ++i) if (ranks[i] == 0) { ++num_expected_connections; break; } for (c = 0; c < num_expected_connections; ++c) { size_t len; char *id_p; /* wait for a connection */ do { struct sockaddr_in rmt_addr; socklen_t sa_len = sizeof(rmt_addr);; fd = accept(listen_fd, (struct sockaddr *) &rmt_addr, &sa_len); } while (fd && errno == EINTR); HYDU_ERR_CHKANDJUMP(status, fd == -1, HYD_INTERNAL_ERROR, "accept failed, %s\n", strerror(errno)); /* read the socket identifier */ len = sizeof(id); id_p = (char *) &id; do { do { ret = read(fd, id_p, len); } while (ret == 0 || (ret == -1 && errno == EINTR)); HYDU_ERR_CHKANDJUMP(status, ret == -1, HYD_INTERNAL_ERROR, "read failed, %s\n", strerror(errno)); len -= ret; id_p += ret; } while (len); /* determine the index for this process in the stdout/err * arrays */ for (i = 0; i < num_ranks; ++i) if (ranks[i] == id.rank) break; HYDU_ASSERT(i < num_ranks, status); /* assign the fd */ switch (id.socktype) { case IN_SOCK: HYDU_ASSERT(id.rank == 0, status); *in = fd; break; case OUT_SOCK: out[i] = fd; break; case ERR_SOCK: err[i] = fd; break; default: HYDU_ASSERT(0, status); break; } /* assign the pid */ pid[i] = id.pid; } ret = close(listen_fd); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close of listener port failed, %s\n", strerror(errno)); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_get(int fd, int pid, int pgid, char *args[]) { int i; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_pmcd_pmi_kvs_pair *run; char *kvsname, *key, *val; char *tmp[HYD_NUM_TMP_STRINGS], *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname"); HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR, "unable to find token: kvsname\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; val = NULL; if (!strcmp(key, "PMI_dead_processes")) { val = pg_scratch->dead_processes; goto found_val; } if (strcmp(pg_scratch->kvs->kvs_name, kvsname)) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "kvsname (%s) does not match this group's kvs space (%s)\n", kvsname, pg_scratch->kvs->kvs_name); /* Try to find the key */ for (run = pg_scratch->kvs->key_pair; run; run = run->next) { if (!strcmp(run->key, key)) { val = run->val; break; } } found_val: i = 0; tmp[i++] = HYDU_strdup("cmd=get_result rc="); if (val) { tmp[i++] = HYDU_strdup("0 msg=success value="); tmp[i++] = HYDU_strdup(val); } else { tmp[i++] = HYDU_strdup("-1 msg=key_"); tmp[i++] = HYDU_strdup(key); tmp[i++] = HYDU_strdup("_not_found value=unknown"); } tmp[i++] = HYDU_strdup("\n"); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_put(int fd, int pid, int pgid, char *args[]) { int i, ret; struct HYD_proxy *proxy; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; char *kvsname, *key, *val; char *tmp[HYD_NUM_TMP_STRINGS], *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname"); HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR, "unable to find token: kvsname\n"); key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key"); HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n"); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value"); if (val == NULL) { /* the user sent an empty string */ val = HYDU_strdup(""); } proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch; if (strcmp(pg_scratch->kvs->kvs_name, kvsname)) HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "kvsname (%s) does not match this group's kvs space (%s)\n", kvsname, pg_scratch->kvs->kvs_name); status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); i = 0; tmp[i++] = HYDU_strdup("cmd=put_result rc="); tmp[i++] = HYDU_int_to_str(ret); if (ret == 0) { tmp[i++] = HYDU_strdup(" msg=success"); } else { tmp[i++] = HYDU_strdup(" msg=duplicate_key"); tmp[i++] = HYDU_strdup(key); } tmp[i++] = HYDU_strdup("\n"); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(tmp); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_FREE(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[]) { struct HYD_pg *pg; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_proxy *proxy; struct HYD_pmcd_token *tokens; struct HYD_exec *exec_list = NULL, *exec; struct HYD_env *env; struct HYD_node *node; char key[PMI_MAXKEYLEN], *val; int nprocs, preput_num, info_num, ret; char *execname, *path = NULL; struct HYD_pmcd_token_segment *segment_list = NULL; int token_count, i, j, k, new_pgid, total_spawns; int argcnt, num_segments; char *control_port, *proxy_args[HYD_NUM_TMP_STRINGS] = { NULL }; char *tmp[HYD_NUM_TMP_STRINGS]; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); for (i = 0; args[i]; i++) mcmd_args[mcmd_num_args++] = HYDU_strdup(args[i]); mcmd_args[mcmd_num_args] = NULL; status = HYD_pmcd_pmi_args_to_tokens(mcmd_args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); /* Here's the order of things we do: * * 1. Break the token list into multiple segments, each segment * corresponding to a command. Each command represents * information for one executable. * * 2. Allocate a process group for the new set of spawned * processes * * 3. Get all the common keys and deal with them * * 4. Create an executable list based on the segments. * * 5. Create a proxy list using the created executable list and * spawn it. */ /* Break the token list into multiple segments and create an * executable list based on the segments. */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "totspawns"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: totspawns\n"); total_spawns = atoi(val); HYDU_MALLOC(segment_list, struct HYD_pmcd_token_segment *, total_spawns * sizeof(struct HYD_pmcd_token_segment), status); segment_tokens(tokens, token_count, segment_list, &num_segments); if (num_segments != total_spawns) { /* We didn't read the entire PMI string; wait for the rest to * arrive */ goto fn_exit; } else { /* Got the entire PMI string; free the arguments and reset */ HYDU_free_strlist(mcmd_args); mcmd_num_args = 0; } /* Allocate a new process group */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); new_pgid = pg->pgid + 1; status = HYDU_alloc_pg(&pg->next, new_pgid); HYDU_ERR_POP(status, "unable to allocate process group\n"); pg = pg->next; proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg->spawner_pg = proxy->pg; for (j = 0; j < total_spawns; j++) { /* For each segment, we create an exec structure */ val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "nprocs"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: nprocs\n"); nprocs = atoi(val); pg->pg_process_count += nprocs; val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "argcnt"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: argcnt\n"); argcnt = atoi(val); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "info_num"); if (val) info_num = atoi(val); else info_num = 0; if (exec_list == NULL) { status = HYDU_alloc_exec(&exec_list); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec_list->appnum = 0; exec = exec_list; } else { for (exec = exec_list; exec->next; exec = exec->next); status = HYDU_alloc_exec(&exec->next); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec->next->appnum = exec->appnum + 1; exec = exec->next; } /* Info keys */ for (i = 0; i < info_num; i++) { char *info_key, *info_val; HYDU_snprintf(key, PMI_MAXKEYLEN, "info_key_%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_key = val; HYDU_snprintf(key, PMI_MAXKEYLEN, "info_val_%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_val = val; if (!strcmp(info_key, "path")) { path = HYDU_strdup(info_val); } else if (!strcmp(info_key, "wdir")) { exec->wdir = HYDU_strdup(info_val); } else if (!strcmp(info_key, "host")) { status = HYDU_process_mfile_token(info_val, 1, &pg->user_node_list); HYDU_ERR_POP(status, "error create node list\n"); } else if (!strcmp(info_key, "hostfile")) { status = HYDU_parse_hostfile(info_val, &pg->user_node_list, HYDU_process_mfile_token); HYDU_ERR_POP(status, "error parsing hostfile\n"); } else { /* Unrecognized info key; ignore */ } } status = HYDU_correct_wdir(&exec->wdir); HYDU_ERR_POP(status, "unable to correct wdir\n"); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "execname"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: execname\n"); if (path == NULL) execname = HYDU_strdup(val); else { i = 0; tmp[i++] = HYDU_strdup(path); tmp[i++] = HYDU_strdup("/"); tmp[i++] = HYDU_strdup(val); tmp[i++] = NULL; status = HYDU_str_alloc_and_join(tmp, &execname); HYDU_ERR_POP(status, "error while joining strings\n"); HYDU_free_strlist(tmp); } i = 0; exec->exec[i++] = execname; for (k = 0; k < argcnt; k++) { HYDU_snprintf(key, PMI_MAXKEYLEN, "arg%d", k + 1); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); exec->exec[i++] = HYDU_strdup(val); } exec->exec[i++] = NULL; exec->proc_count = nprocs; /* It is not clear what kind of environment needs to get * passed to the spawned process. Don't set anything here, and * let the proxy do whatever it does by default. */ exec->env_prop = NULL; status = HYDU_env_create(&env, "PMI_SPAWNED", "1"); HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n"); exec->user_env = env; } status = HYD_pmcd_pmi_alloc_pg_scratch(pg); HYDU_ERR_POP(status, "unable to allocate pg scratch space\n"); if (pg->user_node_list) { pg->pg_core_count = 0; for (i = 0, node = pg->user_node_list; node; node = node->next, i++) { pg->pg_core_count += node->core_count; node->node_id = i; } } else { pg->pg_core_count = HYD_server_info.pg_list.pg_core_count; } pg->pg_process_count = 0; for (exec = exec_list; exec; exec = exec->next) pg->pg_process_count += exec->proc_count; pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch; /* Get the common keys and deal with them */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preput_num"); if (val) preput_num = atoi(val); else preput_num = 0; for (i = 0; i < preput_num; i++) { char *preput_key, *preput_val; HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_key_%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_key = val; HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_val_%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_val = val; status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); } /* Create the proxy list */ if (pg->user_node_list) { status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } else { status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } HYDU_free_exec_list(exec_list); status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface, HYD_server_info.local_hostname, HYD_server_info.port_range, &control_port, HYD_pmcd_pmiserv_control_listen_cb, (void *) (size_t) new_pgid); HYDU_ERR_POP(status, "unable to create PMI port\n"); if (HYD_server_info.user_global.debug) HYDU_dump(stdout, "Got a control port string of %s\n", control_port); /* Go to the last PG */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid); HYDU_ERR_POP(status, "unable to fill in proxy arguments\n"); HYDU_FREE(control_port); status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg); HYDU_ERR_POP(status, "unable to fill in executable arguments\n"); status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL); HYDU_ERR_POP(status, "launcher cannot launch processes\n"); { char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd; i = 0; cmd_str[i++] = HYDU_strdup("cmd=spawn_result rc=0"); cmd_str[i++] = HYDU_strdup("\n"); cmd_str[i++] = NULL; status = HYDU_str_alloc_and_join(cmd_str, &cmd); HYDU_ERR_POP(status, "unable to join strings\n"); HYDU_free_strlist(cmd_str); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_FREE(cmd); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_free_strlist(proxy_args); if (segment_list) HYDU_FREE(segment_list); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[]) { struct HYD_pg *pg; struct HYD_pmcd_pmi_pg_scratch *pg_scratch; struct HYD_proxy *proxy; struct HYD_pmcd_token *tokens; struct HYD_exec *exec_list = NULL, *exec; struct HYD_env *env; struct HYD_node *node; char *thrid; char key[PMI_MAXKEYLEN], *val; int maxprocs, preputcount, infokeycount, ret; int ncmds; char *execname, *path = NULL; struct HYD_pmcd_token_segment *segment_list = NULL; int token_count, i, j, k, new_pgid; int argcnt, num_segments; struct HYD_string_stash proxy_stash; char *control_port; struct HYD_string_stash stash; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid"); /* Here's the order of things we do: * * 1. Break the token list into multiple segments, each segment * corresponding to a command. Each command represents * information for one executable. * * 2. Allocate a process group for the new set of spawned * processes * * 3. Get all the common keys and deal with them * * 4. Create an executable list based on the segments. * * 5. Create a proxy list using the created executable list and * spawn it. */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "ncmds"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: ncmds\n"); ncmds = atoi(val); HYDU_MALLOC(segment_list, struct HYD_pmcd_token_segment *, (ncmds + 1) * sizeof(struct HYD_pmcd_token_segment), status); segment_tokens(tokens, token_count, segment_list, &num_segments); HYDU_ASSERT((ncmds + 1) == num_segments, status); /* Allocate a new process group */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); new_pgid = pg->pgid + 1; status = HYDU_alloc_pg(&pg->next, new_pgid); HYDU_ERR_POP(status, "unable to allocate process group\n"); pg = pg->next; proxy = HYD_pmcd_pmi_find_proxy(fd); HYDU_ASSERT(proxy, status); pg->spawner_pg = proxy->pg; for (j = 1; j <= ncmds; j++) { /* For each segment, we create an exec structure */ val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "maxprocs"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: maxprocs\n"); maxprocs = atoi(val); pg->pg_process_count += maxprocs; val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "argc"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: argc\n"); argcnt = atoi(val); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "infokeycount"); if (val) infokeycount = atoi(val); else infokeycount = 0; if (exec_list == NULL) { status = HYDU_alloc_exec(&exec_list); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec_list->appnum = 0; exec = exec_list; } else { for (exec = exec_list; exec->next; exec = exec->next); status = HYDU_alloc_exec(&exec->next); HYDU_ERR_POP(status, "unable to allocate exec\n"); exec->next->appnum = exec->appnum + 1; exec = exec->next; } /* Info keys */ for (i = 0; i < infokeycount; i++) { char *info_key, *info_val; HYDU_snprintf(key, PMI_MAXKEYLEN, "infokey%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_key = val; HYDU_snprintf(key, PMI_MAXKEYLEN, "infoval%d", i); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); info_val = val; if (!strcmp(info_key, "path")) { path = HYDU_strdup(info_val); } else if (!strcmp(info_key, "wdir")) { exec->wdir = HYDU_strdup(info_val); } else if (!strcmp(info_key, "host") || !strcmp(info_key, "hosts")) { char *host = strtok(info_val, ","); while (host) { status = HYDU_process_mfile_token(host, 1, &pg->user_node_list); HYDU_ERR_POP(status, "error creating node list\n"); host = strtok(NULL, ","); } } else if (!strcmp(info_key, "hostfile")) { status = HYDU_parse_hostfile(info_val, &pg->user_node_list, HYDU_process_mfile_token); HYDU_ERR_POP(status, "error parsing hostfile\n"); } else { /* Unrecognized info key; ignore */ } } status = HYDU_correct_wdir(&exec->wdir); HYDU_ERR_POP(status, "unable to correct wdir\n"); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, "subcmd"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: subcmd\n"); if (path == NULL) execname = HYDU_strdup(val); else { HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup(path), status); HYD_STRING_STASH(stash, HYDU_strdup("/"), status); HYD_STRING_STASH(stash, HYDU_strdup(val), status); HYD_STRING_SPIT(stash, execname, status); } i = 0; exec->exec[i++] = execname; for (k = 0; k < argcnt; k++) { HYDU_snprintf(key, PMI_MAXKEYLEN, "argv%d", k); val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx], segment_list[j].token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); exec->exec[i++] = HYDU_strdup(val); } exec->exec[i++] = NULL; exec->proc_count = maxprocs; /* It is not clear what kind of environment needs to get * passed to the spawned process. Don't set anything here, and * let the proxy do whatever it does by default. */ exec->env_prop = NULL; status = HYDU_env_create(&env, "PMI_SPAWNED", "1"); HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n"); exec->user_env = env; } status = HYD_pmcd_pmi_alloc_pg_scratch(pg); HYDU_ERR_POP(status, "unable to allocate pg scratch space\n"); pg->pg_process_count = 0; for (exec = exec_list; exec; exec = exec->next) pg->pg_process_count += exec->proc_count; pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch; /* Get the common keys and deal with them */ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preputcount"); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: preputcount\n"); preputcount = atoi(val); for (i = 0; i < preputcount; i++) { char *preput_key, *preput_val; HYDU_snprintf(key, PMI_MAXKEYLEN, "ppkey%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_key = val; HYDU_snprintf(key, PMI_MAXKEYLEN, "ppval%d", i); val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key); HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: %s\n", key); preput_val = val; status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret); HYDU_ERR_POP(status, "unable to add keypair to kvs\n"); } /* Create the proxy list */ if (pg->user_node_list) { status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } else { status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg); HYDU_ERR_POP(status, "error creating proxy list\n"); } HYDU_free_exec_list(exec_list); if (pg->user_node_list) { pg->pg_core_count = 0; for (i = 0, node = pg->user_node_list; node; node = node->next, i++) pg->pg_core_count += node->core_count; } else { pg->pg_core_count = 0; for (proxy = pg->proxy_list; proxy; proxy = proxy->next) pg->pg_core_count += proxy->node->core_count; } status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface, HYD_server_info.localhost, HYD_server_info.port_range, &control_port, HYD_pmcd_pmiserv_control_listen_cb, (void *) (size_t) new_pgid); HYDU_ERR_POP(status, "unable to create PMI port\n"); if (HYD_server_info.user_global.debug) HYDU_dump(stdout, "Got a control port string of %s\n", control_port); /* Go to the last PG */ for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next); status = HYD_pmcd_pmi_fill_in_proxy_args(&proxy_stash, control_port, new_pgid); HYDU_ERR_POP(status, "unable to fill in proxy arguments\n"); HYDU_FREE(control_port); status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg); HYDU_ERR_POP(status, "unable to fill in executable arguments\n"); status = HYDT_bsci_launch_procs(proxy_stash.strlist, pg->proxy_list, NULL); HYDU_ERR_POP(status, "launcher cannot launch processes\n"); { char *cmd; HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, HYDU_strdup("cmd=spawn-response;"), status); if (thrid) { HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status); HYD_STRING_STASH(stash, HYDU_strdup(thrid), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); } HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status); HYD_STRING_STASH(stash, HYDU_strdup("jobid="), status); HYD_STRING_STASH(stash, HYDU_strdup(pg_scratch->kvs->kvsname), status); HYD_STRING_STASH(stash, HYDU_strdup(";"), status); HYD_STRING_STASH(stash, HYDU_strdup("nerrs=0;"), status); HYD_STRING_SPIT(stash, cmd, status); status = cmd_response(fd, pid, cmd); HYDU_ERR_POP(status, "send command failed\n"); HYDU_FREE(cmd); } fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYD_STRING_STASH_FREE(proxy_stash); if (segment_list) HYDU_FREE(segment_list); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status handle_bitmap_binding(const char *binding, const char *mapping) { int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0; int total_map_objs, total_bind_objs, num_pus_in_map_domain, num_pus_in_bind_domain, total_map_domains; hwloc_obj_t map_obj, bind_obj, *start_pu; hwloc_cpuset_t *map_domains; char *bind_str, *map_str; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* split out the count fields */ status = split_count_field(binding, &bind_str, &bind_count); HYDU_ERR_POP(status, "error splitting count field\n"); status = split_count_field(mapping, &map_str, &map_count); HYDU_ERR_POP(status, "error splitting count field\n"); /* get the binding object */ if (!strcmp(bind_str, "board")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE); else if (!strcmp(bind_str, "numa")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE); else if (!strcmp(bind_str, "socket")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET); else if (!strcmp(bind_str, "core")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE); else if (!strcmp(bind_str, "hwthread")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU); else { /* check if it's in the l*cache format */ cache_depth = parse_cache_string(bind_str); if (!cache_depth) { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized binding string \"%s\"\n", binding); } bind_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1); } /* get the mapping */ if (!strcmp(map_str, "board")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE); else if (!strcmp(map_str, "numa")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE); else if (!strcmp(map_str, "socket")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET); else if (!strcmp(map_str, "core")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE); else if (!strcmp(map_str, "hwthread")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU); else { cache_depth = parse_cache_string(map_str); if (!cache_depth) { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized mapping string \"%s\"\n", mapping); } map_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1); } /* * Process Affinity Algorithm: * * The code below works in 3 stages. The end result is an array of all the possible * binding bitmaps for a system, based on the options specified. * * 1. Define all possible mapping "domains" in a system. A mapping domain is a group * of hardware elements found by traversing the topology. Each traversal skips the * number of elements the user specified in the mapping string. The traversal ends * when the next mapping domain == the first mapping domain. Note that if the * mapping string defines a domain that is larger than the system size, we exit * with an error. * * 2. Define the number of possible binding domains within a mapping domain. This * process is similar to step 1, in that we traverse the mapping domain finding * all possible bind combinations, stopping when a duplicate of the first binding * is reached. If a binding is larger (in # of PUs) than the mapping domain, * the number of possible bindings for that domain is 1. In this stage, we also * locate the first PU in each mapping domain for use later during binding. * * 3. Create the binding bitmaps. We allocate an array of bitmaps and fill them in * with all possible bindings. The starting PU in each mapping domain is advanced * if and when we wrap around to the beginning of the mapping domains. This ensures * that we do not repeat. * */ /* calculate the number of map domains */ total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth); num_pus_in_map_domain = (HYDT_topo_hwloc_info.total_num_pus / total_map_objs) * map_count; HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > HYDT_topo_hwloc_info.total_num_pus, HYD_INTERNAL_ERROR, "mapping option \"%s\" larger than total system size\n", mapping); /* The number of total_map_domains should be large enough to * contain all contiguous map object collections of length * map_count. For example, if the map object is "socket" and the * map_count is 3, on a system with 4 sockets, the following map * domains should be included: (0,1,2), (3,0,1), (2,3,0), (1,2,3). * We do this by finding how many times we need to replicate the * list of the map objects so that an integral number of map * domains can map to them. In the above case, the list of map * objects is replicated 3 times. */ for (i = 1; (i * total_map_objs) % map_count; i++); total_map_domains = (i * total_map_objs) / map_count; /* initialize the map domains */ HYDU_MALLOC_OR_JUMP(map_domains, hwloc_bitmap_t *, total_map_domains * sizeof(hwloc_bitmap_t), status); HYDU_MALLOC_OR_JUMP(start_pu, hwloc_obj_t *, total_map_domains * sizeof(hwloc_obj_t), status); /* For each map domain, find the next map object (first map object * for the first map domain) and add the following "map_count" * number of contiguous map objects, wrapping to the first one if * needed, to the map domain. Store the first PU in the first map * object of the map domain as "start_pu". This is needed later * for the actual binding. */ map_obj = NULL; for (i = 0; i < total_map_domains; i++) { map_domains[i] = hwloc_bitmap_alloc(); hwloc_bitmap_zero(map_domains[i]); for (j = 0; j < map_count; j++) { map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj); /* map_obj will be NULL if it reaches the end. call again to wrap around */ if (!map_obj) map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj); if (j == 0) start_pu[i] = hwloc_get_obj_inside_cpuset_by_type(topology, map_obj->cpuset, HWLOC_OBJ_PU, 0); hwloc_bitmap_or(map_domains[i], map_domains[i], map_obj->cpuset); } } /* Find the possible binding domains is similar to that of map * domains. But if a binding domain is larger (in # of PUs) than * the mapping domain, the number of possible bindings for that * domain is 1. */ /* calculate the number of possible bindings and allocate bitmaps for them */ total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth); num_pus_in_bind_domain = (HYDT_topo_hwloc_info.total_num_pus / total_bind_objs) * bind_count; if (num_pus_in_bind_domain < num_pus_in_map_domain) { for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++); HYDT_topo_hwloc_info.num_bitmaps = (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain; } else { HYDT_topo_hwloc_info.num_bitmaps = total_map_domains; } /* initialize bitmaps */ HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *, HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status); for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) { HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc(); hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]); } /* do bindings */ i = 0; while (i < HYDT_topo_hwloc_info.num_bitmaps) { for (j = 0; j < total_map_domains; j++) { bind_obj = hwloc_get_ancestor_obj_by_depth(topology, bind_depth, start_pu[j]); for (k = 0; k < bind_count; k++) { hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i], bind_obj->cpuset); /* if the binding is smaller than the mapping domain, wrap around inside that domain */ if (num_pus_in_bind_domain < num_pus_in_map_domain) { bind_obj = hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j], bind_depth, bind_obj); if (!bind_obj) bind_obj = hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j], bind_depth, bind_obj); } else { bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj); if (!bind_obj) bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj); } } i++; /* advance the starting position for this map domain, if needed */ if (num_pus_in_bind_domain < num_pus_in_map_domain) { for (k = 0; k < num_pus_in_bind_domain; k++) { start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j], HWLOC_OBJ_PU, start_pu[j]); if (!start_pu[j]) start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j], HWLOC_OBJ_PU, start_pu[j]); } } } } /* free temporary memory */ MPL_free(map_domains); MPL_free(start_pu); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status fn_fullinit(int fd, char *args[]) { int id, i; char *rank_str; struct HYD_string_stash stash; char *cmd; struct HYD_pmcd_token *tokens; int token_count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count); HYDU_ERR_POP(status, "unable to convert args to tokens\n"); rank_str = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pmirank"); HYDU_ERR_CHKANDJUMP(status, rank_str == NULL, HYD_INTERNAL_ERROR, "unable to find pmirank token\n"); id = atoi(rank_str); /* Store the PMI_RANK to fd mapping */ for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) { if (HYD_pmcd_pmip.downstream.pmi_rank[i] == id) { HYD_pmcd_pmip.downstream.pmi_fd[i] = fd; HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 1; break; } } HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup("cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(id), status); HYD_STRING_STASH(stash, MPL_strdup(";size="), status); HYD_STRING_STASH(stash, HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_process_count), status); HYD_STRING_STASH(stash, MPL_strdup(";appnum=0"), status); if (HYD_pmcd_pmip.local.spawner_kvsname) { HYD_STRING_STASH(stash, MPL_strdup(";spawner-jobid="), status); HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.local.spawner_kvsname), status); } if (HYD_pmcd_pmip.user_global.debug) { HYD_STRING_STASH(stash, MPL_strdup(";debugged=TRUE;pmiverbose=TRUE"), status); } else { HYD_STRING_STASH(stash, MPL_strdup(";debugged=FALSE;pmiverbose=FALSE"), status); } HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status); HYD_STRING_SPIT(stash, cmd, status); send_cmd_downstream(fd, cmd); MPL_free(cmd); fn_exit: HYD_pmcd_pmi_free_tokens(tokens, token_count); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }