HYD_status HYDU_append_env_str_to_list(const char *str, struct HYD_env **env_list) { char *my_str = NULL; char *env_name, *env_value; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); my_str = env_value = MPL_strdup(str); /* don't use strtok, it will mangle env values that contain '=' */ env_name = MPL_strsep(&env_value, "="); HYDU_ASSERT(env_name != NULL, status); status = HYDU_append_env_to_list(env_name, env_value, env_list); HYDU_ERR_POP(status, "unable to append env to list\n"); fn_exit: if (my_str) MPL_free(my_str); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDT_ckpoint_blcr_restart(const char *prefix, int pgid, int id, int ckpt_num, struct HYD_env *envlist, int num_ranks, int ranks[], int *in, int *out, int *err, int *pid) { HYD_status status = HYD_SUCCESS; int ret; int context_fd; cr_restart_handle_t cr_handle; cr_restart_args_t args; char filename[256]; char port_str[64]; int port; HYDU_FUNC_ENTER(); /* create listener socket for stdin/out/err */ status = create_stdinouterr_sock(&port); HYDU_ERR_POP(status, "failed to create stdin/out/err socket\n"); MPL_snprintf(port_str, sizeof(port_str), "%d", port); status = HYDU_append_env_to_list(STDINOUTERR_PORT_NAME, port_str, &envlist); HYDU_ERR_POP(status, "failed to add to env list\n"); status = create_env_file(envlist, num_ranks, ranks); if (status) HYDU_ERR_POP(status, "blcr restart\n"); /* open the checkpoint file */ MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid, id); context_fd = open(filename, O_RDONLY /* | O_LARGEFILE */); HYDU_ERR_CHKANDJUMP(status, context_fd < 0, HYD_INTERNAL_ERROR, "open failed, %s\n", strerror(errno)); /* ... initialize the request structure */ cr_initialize_restart_args_t(&args); args.cr_fd = context_fd; args.cr_flags = CR_RSTRT_RESTORE_PID; /* ... issue the request */ ret = cr_request_restart(&args, &cr_handle); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "cr_request_restart failed, %s\n", strerror(errno)); ret = close(context_fd); HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n", strerror(errno)); /* get fds for stdin/out/err sockets, and get pids of restarted processes */ status = wait_for_stdinouterr_sockets(num_ranks, ranks, in, out, err, pid); if (status) HYDU_ERR_POP(status, "blcr restart\n"); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYDU_comma_list_to_env_list(char *str, struct HYD_env **env_list) { char *env_name; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); env_name = strtok(str, ","); do { status = HYDU_append_env_to_list(env_name, NULL, env_list); HYDU_ERR_POP(status, "unable to add env to list\n"); } while ((env_name = strtok(NULL, ","))); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
struct HYD_env *HYDU_env_list_dup(struct HYD_env *env) { struct HYD_env *tenv, *run; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); run = env; tenv = NULL; while (run) { status = HYDU_append_env_to_list(run->env_name, run->env_value, &tenv); HYDU_ERR_POP(status, "unable to add env to list\n"); run = run->next; } fn_exit: HYDU_FUNC_EXIT(); return tenv; fn_fail: tenv = NULL; goto fn_exit; }