Пример #1
0
static HYD_status create_env_file(const struct HYD_env *envlist, int num_ranks, int *ranks)
{
    HYD_status status = HYD_SUCCESS;
    char filename[256];
    FILE *f;
    const struct HYD_env *e;
    int ret;
    int r;

    HYDU_FUNC_ENTER();

    for (r = 0; r < num_ranks; ++r) {
        MPL_snprintf(filename, sizeof(filename), "/tmp/hydra-env-file-%d:%d", (int) getpid(),
                     ranks[r]);

        f = fopen(filename, "w");
        HYDU_ERR_CHKANDJUMP(status, f == NULL, HYD_INTERNAL_ERROR, "fopen failed: %s\n",
                            strerror(errno));

        for (e = envlist; e; e = e->next) {
            fprintf(f, "%s=%s\n", e->env_name, e->env_value);
        }

        ret = fclose(f);
        HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "fclose failed: %s\n",
                            strerror(errno));
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #2
0
HYD_status HYDT_ckpoint_blcr_restart(const char *prefix, int pgid, int id, int ckpt_num,
                                     struct HYD_env *envlist, int num_ranks, int ranks[],
                                     int *in, int *out, int *err, int *pid)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    int context_fd;
    cr_restart_handle_t cr_handle;
    cr_restart_args_t args;
    char filename[256];
    char port_str[64];
    int port;

    HYDU_FUNC_ENTER();

    /* create listener socket for stdin/out/err */
    status = create_stdinouterr_sock(&port);
    HYDU_ERR_POP(status, "failed to create stdin/out/err socket\n");
    MPL_snprintf(port_str, sizeof(port_str), "%d", port);
    status = HYDU_append_env_to_list(STDINOUTERR_PORT_NAME, port_str, &envlist);
    HYDU_ERR_POP(status, "failed to add to env list\n");

    status = create_env_file(envlist, num_ranks, ranks);
    if (status)
        HYDU_ERR_POP(status, "blcr restart\n");

    /* open the checkpoint file */
    MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid,
                 id);
    context_fd = open(filename, O_RDONLY /* | O_LARGEFILE */);
    HYDU_ERR_CHKANDJUMP(status, context_fd < 0, HYD_INTERNAL_ERROR, "open failed, %s\n",
                        strerror(errno));

    /* ... initialize the request structure */
    cr_initialize_restart_args_t(&args);
    args.cr_fd = context_fd;
    args.cr_flags = CR_RSTRT_RESTORE_PID;

    /* ... issue the request */
    ret = cr_request_restart(&args, &cr_handle);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "cr_request_restart failed, %s\n",
                        strerror(errno));

    ret = close(context_fd);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n",
                        strerror(errno));

    /* get fds for stdin/out/err sockets, and get pids of restarted processes */
    status = wait_for_stdinouterr_sockets(num_ranks, ranks, in, out, err, pid);
    if (status)
        HYDU_ERR_POP(status, "blcr restart\n");

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #3
0
static HYD_status fn_info_putnodeattr(int fd, char *args[])
{
    struct HYD_string_stash stash;
    char *key, *val, *thrid, *cmd;
    struct HYD_pmcd_token *tokens = NULL;
    int token_count, ret;
    struct HYD_pmcd_pmi_v2_reqs *req;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
    HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find value token\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret);
    HYDU_ERR_POP(status, "unable to put data into kvs\n");

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, MPL_strdup("cmd=info-putnodeattr-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
        HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
        HYD_STRING_STASH(stash, MPL_strdup(";"), status);
    }
    HYD_STRING_STASH(stash, MPL_strdup("rc="), status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status);
    HYD_STRING_STASH(stash, MPL_strdup(";"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    send_cmd_downstream(fd, cmd);
    MPL_free(cmd);

    for (req = pending_reqs; req; req = req->next) {
        if (!strcmp(req->key, key)) {
            /* Poke the progress engine before exiting */
            status = poke_progress(key);
            HYDU_ERR_POP(status, "poke progress error\n");
            break;
        }
    }

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #4
0
HYD_status HYDT_bscd_pbs_launcher_finalize(void)
{
    int err;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

#if defined(HAVE_TM_H)
    err = tm_finalize();
    HYDU_ERR_CHKANDJUMP(status, err != TM_SUCCESS, HYD_INTERNAL_ERROR,
                        "error calling tm_finalize\n");
#endif /* HAVE_TM_H */

    if (HYDT_bscd_pbs_sys) {
        if (HYDT_bscd_pbs_sys->task_id)
            HYDU_FREE(HYDT_bscd_pbs_sys->task_id);
        if (HYDT_bscd_pbs_sys->spawn_events)
            HYDU_FREE(HYDT_bscd_pbs_sys->spawn_events);
        HYDU_FREE(HYDT_bscd_pbs_sys);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #5
0
static HYD_status create_stdinouterr_sock(int *port)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    struct sockaddr_in sin;
    socklen_t len;
    HYDU_FUNC_ENTER();

    listen_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
    HYDU_ERR_CHKANDJUMP(status, listen_fd < 0, HYD_INTERNAL_ERROR, "socket() failed, %s\n",
                        strerror(errno));

    memset((void *) &sin, 0, sizeof(sin));
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    sin.sin_port = htons(0);

    ret = bind(listen_fd, (struct sockaddr *) &sin, sizeof(sin));
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "bind() failed, %s\n",
                        strerror(errno));

    ret = listen(listen_fd, SOMAXCONN);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "listen() failed, %s\n",
                        strerror(errno));

    len = sizeof(sin);
    ret = getsockname(listen_fd, (struct sockaddr *) &sin, &len);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "getsockname() failed, %s\n",
                        strerror(errno));

    *port = ntohs(sin.sin_port);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;
  fn_fail:
    goto fn_exit;
}
Пример #6
0
HYD_status HYDU_set_int(char *arg, int *var, int val)
{
    HYD_status status = HYD_SUCCESS;

    HYDU_ERR_CHKANDJUMP(status, *var != -1, HYD_INTERNAL_ERROR, "duplicate setting: %s\n", arg);

    *var = val;

fn_exit:
    return status;

fn_fail:
    goto fn_exit;
}
Пример #7
0
static HYD_status control_port_fn(char *arg, char ***argv)
{
    char *port = NULL, *name;
    HYD_status status = HYD_SUCCESS;

    HYDU_ERR_CHKANDJUMP(status, HYD_pmcd_pmip.upstream.server_name, HYD_INTERNAL_ERROR,
                        "duplicate control port setting\n");

    port = MPL_strdup(**argv);
    HYDU_ERR_CHKANDJUMP(status, NULL == port, HYD_INTERNAL_ERROR, "port not provided\n");
    name = strtok(port, ":");
    HYD_pmcd_pmip.upstream.server_name = name ? MPL_strdup(name) : NULL;
    HYD_pmcd_pmip.upstream.server_port = strtol(strtok(NULL, ":"), NULL, 10);

    (*argv)++;

  fn_exit:
    MPL_free(port);
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #8
0
static HYD_status fn_info_getjobattr(int fd, char *args[])
{
    struct HYD_string_stash stash;
    char *cmd, *key, *thrid;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    if (!strcmp(key, "PMI_process_mapping")) {
        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getjobattr-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
            HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status);
        HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.system_global.pmi_process_mapping),
                         status);
        HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        send_cmd_downstream(fd, cmd);
        MPL_free(cmd);
    } else {
        status = send_cmd_upstream("cmd=info-getjobattr;", fd, args);
        HYDU_ERR_POP(status, "error sending command upstream\n");
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #9
0
HYD_status HYDU_set_str(char *arg, char **var, const char *val)
{
    HYD_status status = HYD_SUCCESS;

    HYDU_ERR_CHKANDJUMP(status, *var, HYD_INTERNAL_ERROR, "duplicate setting: %s\n", arg);

    if (val == NULL)
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cannot assign NULL object\n");

    *var = MPL_strdup(val);

fn_exit:
    return status;

fn_fail:
    goto fn_exit;
}
Пример #10
0
static HYD_status control_port_fn(char *arg, char ***argv)
{
    char *port = NULL;
    HYD_status status = HYD_SUCCESS;

    HYDU_ERR_CHKANDJUMP(status, HYD_pmcd_pmip.upstream.server_name, HYD_INTERNAL_ERROR,
                        "duplicate control port setting\n");

    port = MPL_strdup(**argv);
    HYD_pmcd_pmip.upstream.server_name = MPL_strdup(strtok(port, ":"));
    HYD_pmcd_pmip.upstream.server_port = atoi(strtok(NULL, ":"));

    (*argv)++;

  fn_exit:
    if (port)
        MPL_free(port);
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #11
0
static HYD_status fn_kvs_get(int fd, int pid, int pgid, char *args[])
{
    int i, idx, found;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pg *pg;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_kvs_pair *run;
    char *key, *thrid, *cmd;
    struct HYD_string_stash stash;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    found = 0;
    for (run = pg_scratch->kvs->key_pair; run; run = run->next) {
        if (!strcmp(run->key, key)) {
            found = 1;
            break;
        }
    }

    if (!found) {
        pg = proxy->pg;
        pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch;

        idx = -1;
        for (i = 0; i < pg->pg_process_count; i++)
            if (pg_scratch->ecount[i].fd == fd && pg_scratch->ecount[i].pid == pid) {
                idx = i;
                break;
            }

        HYDU_ASSERT(idx != -1, status);

        for (i = 0; i < pg->pg_process_count; i++) {
            if (pg_scratch->ecount[i].epoch < pg_scratch->ecount[idx].epoch) {
                /* We haven't reached a barrier yet; queue up request */
                status = HYD_pmcd_pmi_v2_queue_req(fd, pid, pgid, args, key, &pending_reqs);
                HYDU_ERR_POP(status, "unable to queue request\n");

                /* We are done */
                goto fn_exit;
            }
        }
    }

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-get-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(thrid), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    if (found) {
        HYD_STRING_STASH(stash, HYDU_strdup("found=TRUE;value="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(run->val), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    else {
        HYD_STRING_STASH(stash, HYDU_strdup("found=FALSE;"), status);
    }
    HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "send command failed\n");
    HYDU_FREE(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #12
0
static HYD_status fn_info_getjobattr(int fd, int pid, int pgid, char *args[])
{
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pmcd_pmi_kvs_pair *run;
    const char *key;
    char *thrid, *val, *cmd;
    struct HYD_string_stash stash;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    val = NULL;
    if (!strcmp(key, "PMI_dead_processes"))
        val = pg_scratch->dead_processes;

    /* Try to find the key */
    for (run = pg_scratch->kvs->key_pair; run; run = run->next) {
        if (!strcmp(run->key, key)) {
            val = run->val;
            break;
        }
    }

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, HYDU_strdup("cmd=info-getjobattr-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(thrid), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    HYD_STRING_STASH(stash, HYDU_strdup("found="), status);
    if (val) {
        HYD_STRING_STASH(stash, HYDU_strdup("TRUE;value="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(val), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";rc=0;"), status);
    }
    else {
        HYD_STRING_STASH(stash, HYDU_strdup("FALSE;rc=0;"), status);
    }

    HYD_STRING_SPIT(stash, cmd, status);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "send command failed\n");

    HYDU_FREE(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #13
0
static HYD_status fn_info_getnodeattr(int fd, char *args[])
{
    int found;
    struct HYD_pmcd_pmi_kvs_pair *run;
    char *key, *waitval, *thrid;
    struct HYD_string_stash stash;
    char *cmd;
    struct HYD_pmcd_token *tokens = NULL;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    waitval = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "wait");
    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    /* if a predefined value is not found, we let the code fall back
     * to regular search and return an error to the client */

    found = 0;
    for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) {
        if (!strcmp(run->key, key)) {
            found = 1;
            break;
        }
    }

    if (found) {        /* We found the attribute */
        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
            HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status);
        HYD_STRING_STASH(stash, MPL_strdup(run->val), status);
        HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        send_cmd_downstream(fd, cmd);
        MPL_free(cmd);
    } else if (waitval && !strcmp(waitval, "TRUE")) {
        /* The client wants to wait for a response; queue up the request */
        status = HYD_pmcd_pmi_v2_queue_req(fd, -1, -1, args, key, &pending_reqs);
        HYDU_ERR_POP(status, "unable to queue request\n");

        goto fn_exit;
    } else {
        /* Tell the client that we can't find the attribute */
        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getnodeattr-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
            HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, MPL_strdup("found=FALSE;rc=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        send_cmd_downstream(fd, cmd);
        MPL_free(cmd);
    }

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #14
0
static HYD_status fn_kvs_put(int fd, int pid, int pgid, char *args[])
{
    struct HYD_string_stash stash;
    char *key, *val, *thrid, *cmd;
    int ret;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pmcd_token *tokens;
    int token_count;
    struct HYD_pmcd_pmi_v2_reqs *req;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
    if (val == NULL) {
        /* the user sent an empty string */
        val = HYDU_strdup("");
    }

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret);
    HYDU_ERR_POP(status, "unable to put data into kvs\n");

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, HYDU_strdup("cmd=kvs-put-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(thrid), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    HYD_STRING_STASH(stash, HYDU_strdup("rc="), status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status);
    HYD_STRING_STASH(stash, HYDU_strdup(";"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "send command failed\n");
    HYDU_FREE(cmd);

    for (req = pending_reqs; req; req = req->next) {
        if (!strcmp(req->key, key)) {
            /* Poke the progress engine before exiting */
            status = poke_progress(key);
            HYDU_ERR_POP(status, "poke progress error\n");
            break;
        }
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #15
0
HYD_status HYDT_ckpoint_blcr_checkpoint(const char *prefix, int pgid, int id, int ckpt_num)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    int fd;
    cr_checkpoint_args_t my_args;
    cr_checkpoint_handle_t my_handle;
    char filename[256];

    HYDU_FUNC_ENTER();

    /* build the checkpoint filename */
    MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid,
                 id);

    /* remove existing checkpoint file, if any */
    (void) unlink(filename);

    /* open the checkpoint file */
    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC /* | O_LARGEFILE */ , 0600);
    HYDU_ERR_CHKANDJUMP(status, fd < 0, HYD_INTERNAL_ERROR, "open failed: %s\n",
                        strerror(errno));

    cr_initialize_checkpoint_args_t(&my_args);
    my_args.cr_fd = fd;
    my_args.cr_scope = CR_SCOPE_TREE;

    /* issue the request */
    ret = cr_request_checkpoint(&my_args, &my_handle);
    if (ret < 0) {
        HYDU_ERR_CHKANDJUMP(status, errno == CR_ENOSUPPORT, HYD_INTERNAL_ERROR,
                            "Checkpointing failed.  Make sure BLCR kernel module is loaded. %s\n",
                            strerror(errno));
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "cr_request_checkpoint failed, %s\n",
                            strerror(errno));
    }
    /* wait for the request to complete */
    while (1) {
        ret = cr_poll_checkpoint(&my_handle, NULL);
        if (ret < 0) {
            if ((ret == CR_POLL_CHKPT_ERR_POST) && (errno == CR_ERESTARTED)) {
                HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                    "trying to restart in a checkpoint\n");
            }
            else if (errno == EINTR) {
                /* poll was interrupted by a signal -- retry */
            }
            else {
                HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                    "cr_poll_checkpoint failed: %s\n", strerror(errno));
            }
        }
        else if (ret == 0) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "cr_poll_checkpoint returned 0 unexpectedly\n");
        }
        else {
            break;
        }
    }

    ret = close(my_args.cr_fd);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n",
                        strerror(errno));

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #16
0
/* This waits for the restarted processes to reconnect their
   stdin/out/err sockets, then sets the appropriate entries in the in
   out and err arrays.  This also gets the pids of the restarted
   processes. */
static HYD_status wait_for_stdinouterr_sockets(int num_ranks, int *ranks, int *in, int *out,
                                               int *err, int *pid)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    int fd;
    int i, c;
    sock_ident_t id;
    int num_expected_connections = num_ranks * 2;       /* wait for connections for stdout and err */
    HYDU_FUNC_ENTER();

    /* if one of the processes is rank 0, we should wait for an
     * additional connection for stdin */
    for (i = 0; i < num_ranks; ++i)
        if (ranks[i] == 0) {
            ++num_expected_connections;
            break;
        }

    for (c = 0; c < num_expected_connections; ++c) {
        size_t len;
        char *id_p;
        /* wait for a connection */
        do {
            struct sockaddr_in rmt_addr;
            socklen_t sa_len = sizeof(rmt_addr);;
            fd = accept(listen_fd, (struct sockaddr *) &rmt_addr, &sa_len);
        } while (fd && errno == EINTR);
        HYDU_ERR_CHKANDJUMP(status, fd == -1, HYD_INTERNAL_ERROR, "accept failed, %s\n",
                            strerror(errno));

        /* read the socket identifier */
        len = sizeof(id);
        id_p = (char *) &id;
        do {
            do {
                ret = read(fd, id_p, len);
            } while (ret == 0 || (ret == -1 && errno == EINTR));
            HYDU_ERR_CHKANDJUMP(status, ret == -1, HYD_INTERNAL_ERROR, "read failed, %s\n",
                                strerror(errno));
            len -= ret;
            id_p += ret;
        } while (len);

        /* determine the index for this process in the stdout/err
         * arrays */
        for (i = 0; i < num_ranks; ++i)
            if (ranks[i] == id.rank)
                break;
        HYDU_ASSERT(i < num_ranks, status);

        /* assign the fd */
        switch (id.socktype) {
        case IN_SOCK:
            HYDU_ASSERT(id.rank == 0, status);
            *in = fd;
            break;
        case OUT_SOCK:
            out[i] = fd;
            break;
        case ERR_SOCK:
            err[i] = fd;
            break;
        default:
            HYDU_ASSERT(0, status);
            break;
        }

        /* assign the pid */
        pid[i] = id.pid;
    }

    ret = close(listen_fd);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close of listener port failed, %s\n",
                        strerror(errno));


  fn_exit:
    HYDU_FUNC_EXIT();
    return status;
  fn_fail:
    goto fn_exit;
}
Пример #17
0
static HYD_status fn_get(int fd, int pid, int pgid, char *args[])
{
    int i;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pmcd_pmi_kvs_pair *run;
    char *kvsname, *key, *val;
    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
    HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: kvsname\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: key\n");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    val = NULL;
    if (!strcmp(key, "PMI_dead_processes")) {
        val = pg_scratch->dead_processes;
        goto found_val;
    }

    if (strcmp(pg_scratch->kvs->kvs_name, kvsname))
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "kvsname (%s) does not match this group's kvs space (%s)\n",
                            kvsname, pg_scratch->kvs->kvs_name);

    /* Try to find the key */
    for (run = pg_scratch->kvs->key_pair; run; run = run->next) {
        if (!strcmp(run->key, key)) {
            val = run->val;
            break;
        }
    }

  found_val:
    i = 0;
    tmp[i++] = HYDU_strdup("cmd=get_result rc=");
    if (val) {
        tmp[i++] = HYDU_strdup("0 msg=success value=");
        tmp[i++] = HYDU_strdup(val);
    }
    else {
        tmp[i++] = HYDU_strdup("-1 msg=key_");
        tmp[i++] = HYDU_strdup(key);
        tmp[i++] = HYDU_strdup("_not_found value=unknown");
    }
    tmp[i++] = HYDU_strdup("\n");
    tmp[i++] = NULL;

    status = HYDU_str_alloc_and_join(tmp, &cmd);
    HYDU_ERR_POP(status, "unable to join strings\n");
    HYDU_free_strlist(tmp);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_FREE(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #18
0
static HYD_status fn_put(int fd, int pid, int pgid, char *args[])
{
    int i, ret;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    char *kvsname, *key, *val;
    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
    HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: kvsname\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: key\n");

    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
    if (val == NULL) {
        /* the user sent an empty string */
        val = HYDU_strdup("");
    }

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    if (strcmp(pg_scratch->kvs->kvs_name, kvsname))
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "kvsname (%s) does not match this group's kvs space (%s)\n",
                            kvsname, pg_scratch->kvs->kvs_name);

    status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret);
    HYDU_ERR_POP(status, "unable to add keypair to kvs\n");

    i = 0;
    tmp[i++] = HYDU_strdup("cmd=put_result rc=");
    tmp[i++] = HYDU_int_to_str(ret);
    if (ret == 0) {
        tmp[i++] = HYDU_strdup(" msg=success");
    }
    else {
        tmp[i++] = HYDU_strdup(" msg=duplicate_key");
        tmp[i++] = HYDU_strdup(key);
    }
    tmp[i++] = HYDU_strdup("\n");
    tmp[i++] = NULL;

    status = HYDU_str_alloc_and_join(tmp, &cmd);
    HYDU_ERR_POP(status, "unable to join strings\n");
    HYDU_free_strlist(tmp);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_FREE(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #19
0
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[])
{
    struct HYD_pg *pg;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_token *tokens;
    struct HYD_exec *exec_list = NULL, *exec;
    struct HYD_env *env;
    struct HYD_node *node;

    char key[PMI_MAXKEYLEN], *val;
    int nprocs, preput_num, info_num, ret;
    char *execname, *path = NULL;

    struct HYD_pmcd_token_segment *segment_list = NULL;

    int token_count, i, j, k, new_pgid, total_spawns;
    int argcnt, num_segments;
    char *control_port, *proxy_args[HYD_NUM_TMP_STRINGS] = { NULL };
    char *tmp[HYD_NUM_TMP_STRINGS];
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    for (i = 0; args[i]; i++)
        mcmd_args[mcmd_num_args++] = HYDU_strdup(args[i]);
    mcmd_args[mcmd_num_args] = NULL;

    status = HYD_pmcd_pmi_args_to_tokens(mcmd_args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");


    /* Here's the order of things we do:
     *
     *   1. Break the token list into multiple segments, each segment
     *      corresponding to a command. Each command represents
     *      information for one executable.
     *
     *   2. Allocate a process group for the new set of spawned
     *      processes
     *
     *   3. Get all the common keys and deal with them
     *
     *   4. Create an executable list based on the segments.
     *
     *   5. Create a proxy list using the created executable list and
     *      spawn it.
     */

    /* Break the token list into multiple segments and create an
     * executable list based on the segments. */
    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "totspawns");
    HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: totspawns\n");
    total_spawns = atoi(val);

    HYDU_MALLOC(segment_list, struct HYD_pmcd_token_segment *,
                total_spawns * sizeof(struct HYD_pmcd_token_segment), status);

    segment_tokens(tokens, token_count, segment_list, &num_segments);

    if (num_segments != total_spawns) {
        /* We didn't read the entire PMI string; wait for the rest to
         * arrive */
        goto fn_exit;
    }
    else {
        /* Got the entire PMI string; free the arguments and reset */
        HYDU_free_strlist(mcmd_args);
        mcmd_num_args = 0;
    }

    /* Allocate a new process group */
    for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
    new_pgid = pg->pgid + 1;

    status = HYDU_alloc_pg(&pg->next, new_pgid);
    HYDU_ERR_POP(status, "unable to allocate process group\n");

    pg = pg->next;

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg->spawner_pg = proxy->pg;

    for (j = 0; j < total_spawns; j++) {
        /* For each segment, we create an exec structure */
        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "nprocs");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: nprocs\n");
        nprocs = atoi(val);
        pg->pg_process_count += nprocs;

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "argcnt");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: argcnt\n");
        argcnt = atoi(val);

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "info_num");
        if (val)
            info_num = atoi(val);
        else
            info_num = 0;

        if (exec_list == NULL) {
            status = HYDU_alloc_exec(&exec_list);
            HYDU_ERR_POP(status, "unable to allocate exec\n");
            exec_list->appnum = 0;
            exec = exec_list;
        }
        else {
            for (exec = exec_list; exec->next; exec = exec->next);
            status = HYDU_alloc_exec(&exec->next);
            HYDU_ERR_POP(status, "unable to allocate exec\n");
            exec->next->appnum = exec->appnum + 1;
            exec = exec->next;
        }

        /* Info keys */
        for (i = 0; i < info_num; i++) {
            char *info_key, *info_val;

            HYDU_snprintf(key, PMI_MAXKEYLEN, "info_key_%d", i);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            info_key = val;

            HYDU_snprintf(key, PMI_MAXKEYLEN, "info_val_%d", i);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            info_val = val;

            if (!strcmp(info_key, "path")) {
                path = HYDU_strdup(info_val);
            }
            else if (!strcmp(info_key, "wdir")) {
                exec->wdir = HYDU_strdup(info_val);
            }
            else if (!strcmp(info_key, "host")) {
                status = HYDU_process_mfile_token(info_val, 1, &pg->user_node_list);
                HYDU_ERR_POP(status, "error create node list\n");
            }
            else if (!strcmp(info_key, "hostfile")) {
                status =
                    HYDU_parse_hostfile(info_val, &pg->user_node_list, HYDU_process_mfile_token);
                HYDU_ERR_POP(status, "error parsing hostfile\n");
            }
            else {
                /* Unrecognized info key; ignore */
            }
        }

        status = HYDU_correct_wdir(&exec->wdir);
        HYDU_ERR_POP(status, "unable to correct wdir\n");

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "execname");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: execname\n");
        if (path == NULL)
            execname = HYDU_strdup(val);
        else {
            i = 0;
            tmp[i++] = HYDU_strdup(path);
            tmp[i++] = HYDU_strdup("/");
            tmp[i++] = HYDU_strdup(val);
            tmp[i++] = NULL;

            status = HYDU_str_alloc_and_join(tmp, &execname);
            HYDU_ERR_POP(status, "error while joining strings\n");
            HYDU_free_strlist(tmp);
        }

        i = 0;
        exec->exec[i++] = execname;
        for (k = 0; k < argcnt; k++) {
            HYDU_snprintf(key, PMI_MAXKEYLEN, "arg%d", k + 1);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            exec->exec[i++] = HYDU_strdup(val);
        }
        exec->exec[i++] = NULL;

        exec->proc_count = nprocs;

        /* It is not clear what kind of environment needs to get
         * passed to the spawned process. Don't set anything here, and
         * let the proxy do whatever it does by default. */
        exec->env_prop = NULL;

        status = HYDU_env_create(&env, "PMI_SPAWNED", "1");
        HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n");
        exec->user_env = env;
    }

    status = HYD_pmcd_pmi_alloc_pg_scratch(pg);
    HYDU_ERR_POP(status, "unable to allocate pg scratch space\n");

    if (pg->user_node_list) {
        pg->pg_core_count = 0;
        for (i = 0, node = pg->user_node_list; node; node = node->next, i++) {
            pg->pg_core_count += node->core_count;
            node->node_id = i;
        }
    }
    else {
        pg->pg_core_count = HYD_server_info.pg_list.pg_core_count;
    }

    pg->pg_process_count = 0;
    for (exec = exec_list; exec; exec = exec->next)
        pg->pg_process_count += exec->proc_count;

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch;

    /* Get the common keys and deal with them */
    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preput_num");
    if (val)
        preput_num = atoi(val);
    else
        preput_num = 0;

    for (i = 0; i < preput_num; i++) {
        char *preput_key, *preput_val;

        HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_key_%d", i);
        val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key);
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: %s\n", key);
        preput_key = val;

        HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_val_%d", i);
        val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key);
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: %s\n", key);
        preput_val = val;

        status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret);
        HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
    }

    /* Create the proxy list */
    if (pg->user_node_list) {
        status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg);
        HYDU_ERR_POP(status, "error creating proxy list\n");
    }
    else {
        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
        HYDU_ERR_POP(status, "error creating proxy list\n");
    }
    HYDU_free_exec_list(exec_list);

    status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface,
                                                 HYD_server_info.local_hostname,
                                                 HYD_server_info.port_range, &control_port,
                                                 HYD_pmcd_pmiserv_control_listen_cb,
                                                 (void *) (size_t) new_pgid);
    HYDU_ERR_POP(status, "unable to create PMI port\n");
    if (HYD_server_info.user_global.debug)
        HYDU_dump(stdout, "Got a control port string of %s\n", control_port);

    /* Go to the last PG */
    for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);

    status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid);
    HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
    HYDU_FREE(control_port);

    status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
    HYDU_ERR_POP(status, "unable to fill in executable arguments\n");

    status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL);
    HYDU_ERR_POP(status, "launcher cannot launch processes\n");

    {
        char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd;

        i = 0;
        cmd_str[i++] = HYDU_strdup("cmd=spawn_result rc=0");
        cmd_str[i++] = HYDU_strdup("\n");
        cmd_str[i++] = NULL;

        status = HYDU_str_alloc_and_join(cmd_str, &cmd);
        HYDU_ERR_POP(status, "unable to join strings\n");
        HYDU_free_strlist(cmd_str);

        status = cmd_response(fd, pid, cmd);
        HYDU_ERR_POP(status, "error writing PMI line\n");
        HYDU_FREE(cmd);
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_free_strlist(proxy_args);
    if (segment_list)
        HYDU_FREE(segment_list);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #20
0
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[])
{
    struct HYD_pg *pg;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_token *tokens;
    struct HYD_exec *exec_list = NULL, *exec;
    struct HYD_env *env;
    struct HYD_node *node;

    char *thrid;
    char key[PMI_MAXKEYLEN], *val;
    int maxprocs, preputcount, infokeycount, ret;
    int ncmds;
    char *execname, *path = NULL;

    struct HYD_pmcd_token_segment *segment_list = NULL;

    int token_count, i, j, k, new_pgid;
    int argcnt, num_segments;
    struct HYD_string_stash proxy_stash;
    char *control_port;
    struct HYD_string_stash stash;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");


    /* Here's the order of things we do:
     *
     *   1. Break the token list into multiple segments, each segment
     *      corresponding to a command. Each command represents
     *      information for one executable.
     *
     *   2. Allocate a process group for the new set of spawned
     *      processes
     *
     *   3. Get all the common keys and deal with them
     *
     *   4. Create an executable list based on the segments.
     *
     *   5. Create a proxy list using the created executable list and
     *      spawn it.
     */

    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "ncmds");
    HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find token: ncmds\n");
    ncmds = atoi(val);

    HYDU_MALLOC(segment_list, struct HYD_pmcd_token_segment *,
                (ncmds + 1) * sizeof(struct HYD_pmcd_token_segment), status);
    segment_tokens(tokens, token_count, segment_list, &num_segments);
    HYDU_ASSERT((ncmds + 1) == num_segments, status);


    /* Allocate a new process group */
    for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
    new_pgid = pg->pgid + 1;

    status = HYDU_alloc_pg(&pg->next, new_pgid);
    HYDU_ERR_POP(status, "unable to allocate process group\n");

    pg = pg->next;

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg->spawner_pg = proxy->pg;

    for (j = 1; j <= ncmds; j++) {
        /* For each segment, we create an exec structure */
        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "maxprocs");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: maxprocs\n");
        maxprocs = atoi(val);
        pg->pg_process_count += maxprocs;

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "argc");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: argc\n");
        argcnt = atoi(val);

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "infokeycount");
        if (val)
            infokeycount = atoi(val);
        else
            infokeycount = 0;

        if (exec_list == NULL) {
            status = HYDU_alloc_exec(&exec_list);
            HYDU_ERR_POP(status, "unable to allocate exec\n");
            exec_list->appnum = 0;
            exec = exec_list;
        }
        else {
            for (exec = exec_list; exec->next; exec = exec->next);
            status = HYDU_alloc_exec(&exec->next);
            HYDU_ERR_POP(status, "unable to allocate exec\n");
            exec->next->appnum = exec->appnum + 1;
            exec = exec->next;
        }

        /* Info keys */
        for (i = 0; i < infokeycount; i++) {
            char *info_key, *info_val;

            HYDU_snprintf(key, PMI_MAXKEYLEN, "infokey%d", i);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            info_key = val;

            HYDU_snprintf(key, PMI_MAXKEYLEN, "infoval%d", i);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            info_val = val;

            if (!strcmp(info_key, "path")) {
                path = HYDU_strdup(info_val);
            }
            else if (!strcmp(info_key, "wdir")) {
                exec->wdir = HYDU_strdup(info_val);
            }
            else if (!strcmp(info_key, "host") || !strcmp(info_key, "hosts")) {
                char *host = strtok(info_val, ",");
                while (host) {
                    status = HYDU_process_mfile_token(host, 1, &pg->user_node_list);
                    HYDU_ERR_POP(status, "error creating node list\n");
                    host = strtok(NULL, ",");
                }
            }
            else if (!strcmp(info_key, "hostfile")) {
                status = HYDU_parse_hostfile(info_val, &pg->user_node_list,
                                             HYDU_process_mfile_token);
                HYDU_ERR_POP(status, "error parsing hostfile\n");
            }
            else {
                /* Unrecognized info key; ignore */
            }
        }

        status = HYDU_correct_wdir(&exec->wdir);
        HYDU_ERR_POP(status, "unable to correct wdir\n");

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "subcmd");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: subcmd\n");
        if (path == NULL)
            execname = HYDU_strdup(val);
        else {
            HYD_STRING_STASH_INIT(stash);
            HYD_STRING_STASH(stash, HYDU_strdup(path), status);
            HYD_STRING_STASH(stash, HYDU_strdup("/"), status);
            HYD_STRING_STASH(stash, HYDU_strdup(val), status);

            HYD_STRING_SPIT(stash, execname, status);
        }

        i = 0;
        exec->exec[i++] = execname;
        for (k = 0; k < argcnt; k++) {
            HYDU_snprintf(key, PMI_MAXKEYLEN, "argv%d", k);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            exec->exec[i++] = HYDU_strdup(val);
        }
        exec->exec[i++] = NULL;

        exec->proc_count = maxprocs;

        /* It is not clear what kind of environment needs to get
         * passed to the spawned process. Don't set anything here, and
         * let the proxy do whatever it does by default. */
        exec->env_prop = NULL;

        status = HYDU_env_create(&env, "PMI_SPAWNED", "1");
        HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n");
        exec->user_env = env;
    }

    status = HYD_pmcd_pmi_alloc_pg_scratch(pg);
    HYDU_ERR_POP(status, "unable to allocate pg scratch space\n");

    pg->pg_process_count = 0;
    for (exec = exec_list; exec; exec = exec->next)
        pg->pg_process_count += exec->proc_count;

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch;

    /* Get the common keys and deal with them */
    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preputcount");
    HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: preputcount\n");
    preputcount = atoi(val);

    for (i = 0; i < preputcount; i++) {
        char *preput_key, *preput_val;

        HYDU_snprintf(key, PMI_MAXKEYLEN, "ppkey%d", i);
        val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key);
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: %s\n", key);
        preput_key = val;

        HYDU_snprintf(key, PMI_MAXKEYLEN, "ppval%d", i);
        val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key);
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: %s\n", key);
        preput_val = val;

        status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret);
        HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
    }

    /* Create the proxy list */
    if (pg->user_node_list) {
        status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg);
        HYDU_ERR_POP(status, "error creating proxy list\n");
    }
    else {
        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
        HYDU_ERR_POP(status, "error creating proxy list\n");
    }
    HYDU_free_exec_list(exec_list);

    if (pg->user_node_list) {
        pg->pg_core_count = 0;
        for (i = 0, node = pg->user_node_list; node; node = node->next, i++)
            pg->pg_core_count += node->core_count;
    }
    else {
        pg->pg_core_count = 0;
        for (proxy = pg->proxy_list; proxy; proxy = proxy->next)
            pg->pg_core_count += proxy->node->core_count;
    }

    status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface,
                                                 HYD_server_info.localhost,
                                                 HYD_server_info.port_range, &control_port,
                                                 HYD_pmcd_pmiserv_control_listen_cb,
                                                 (void *) (size_t) new_pgid);
    HYDU_ERR_POP(status, "unable to create PMI port\n");
    if (HYD_server_info.user_global.debug)
        HYDU_dump(stdout, "Got a control port string of %s\n", control_port);

    /* Go to the last PG */
    for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);

    status = HYD_pmcd_pmi_fill_in_proxy_args(&proxy_stash, control_port, new_pgid);
    HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
    HYDU_FREE(control_port);

    status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
    HYDU_ERR_POP(status, "unable to fill in executable arguments\n");

    status = HYDT_bsci_launch_procs(proxy_stash.strlist, pg->proxy_list, NULL);
    HYDU_ERR_POP(status, "launcher cannot launch processes\n");

    {
        char *cmd;

        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, HYDU_strdup("cmd=spawn-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status);
            HYD_STRING_STASH(stash, HYDU_strdup(thrid), status);
            HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status);
        HYD_STRING_STASH(stash, HYDU_strdup("jobid="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(pg_scratch->kvs->kvsname), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
        HYD_STRING_STASH(stash, HYDU_strdup("nerrs=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        status = cmd_response(fd, pid, cmd);
        HYDU_ERR_POP(status, "send command failed\n");
        HYDU_FREE(cmd);
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYD_STRING_STASH_FREE(proxy_stash);
    if (segment_list)
        HYDU_FREE(segment_list);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #21
0
static HYD_status handle_bitmap_binding(const char *binding, const char *mapping)
{
    int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0;
    int total_map_objs, total_bind_objs, num_pus_in_map_domain, num_pus_in_bind_domain,
        total_map_domains;
    hwloc_obj_t map_obj, bind_obj, *start_pu;
    hwloc_cpuset_t *map_domains;
    char *bind_str, *map_str;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* split out the count fields */
    status = split_count_field(binding, &bind_str, &bind_count);
    HYDU_ERR_POP(status, "error splitting count field\n");

    status = split_count_field(mapping, &map_str, &map_count);
    HYDU_ERR_POP(status, "error splitting count field\n");


    /* get the binding object */
    if (!strcmp(bind_str, "board"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
    else if (!strcmp(bind_str, "numa"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
    else if (!strcmp(bind_str, "socket"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
    else if (!strcmp(bind_str, "core"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
    else if (!strcmp(bind_str, "hwthread"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
    else {
        /* check if it's in the l*cache format */
        cache_depth = parse_cache_string(bind_str);
        if (!cache_depth) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "unrecognized binding string \"%s\"\n", binding);
        }
        bind_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
    }

    /* get the mapping */
    if (!strcmp(map_str, "board"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
    else if (!strcmp(map_str, "numa"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
    else if (!strcmp(map_str, "socket"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
    else if (!strcmp(map_str, "core"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
    else if (!strcmp(map_str, "hwthread"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
    else {
        cache_depth = parse_cache_string(map_str);
        if (!cache_depth) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "unrecognized mapping string \"%s\"\n", mapping);
        }
        map_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
    }

    /*
     * Process Affinity Algorithm:
     *
     * The code below works in 3 stages. The end result is an array of all the possible
     * binding bitmaps for a system, based on the options specified.
     *
     * 1. Define all possible mapping "domains" in a system. A mapping domain is a group
     *    of hardware elements found by traversing the topology. Each traversal skips the
     *    number of elements the user specified in the mapping string. The traversal ends
     *    when the next mapping domain == the first mapping domain. Note that if the
     *    mapping string defines a domain that is larger than the system size, we exit
     *    with an error.
     *
     * 2. Define the number of possible binding domains within a mapping domain. This
     *    process is similar to step 1, in that we traverse the mapping domain finding
     *    all possible bind combinations, stopping when a duplicate of the first binding
     *    is reached. If a binding is larger (in # of PUs) than the mapping domain,
     *    the number of possible bindings for that domain is 1. In this stage, we also
     *    locate the first PU in each mapping domain for use later during binding.
     *
     * 3. Create the binding bitmaps. We allocate an array of bitmaps and fill them in
     *    with all possible bindings. The starting PU in each mapping domain is advanced
     *    if and when we wrap around to the beginning of the mapping domains. This ensures
     *    that we do not repeat.
     *
     */

    /* calculate the number of map domains */
    total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth);
    num_pus_in_map_domain = (HYDT_topo_hwloc_info.total_num_pus / total_map_objs) * map_count;
    HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > HYDT_topo_hwloc_info.total_num_pus,
                        HYD_INTERNAL_ERROR, "mapping option \"%s\" larger than total system size\n",
                        mapping);

    /* The number of total_map_domains should be large enough to
     * contain all contiguous map object collections of length
     * map_count.  For example, if the map object is "socket" and the
     * map_count is 3, on a system with 4 sockets, the following map
     * domains should be included: (0,1,2), (3,0,1), (2,3,0), (1,2,3).
     * We do this by finding how many times we need to replicate the
     * list of the map objects so that an integral number of map
     * domains can map to them.  In the above case, the list of map
     * objects is replicated 3 times. */
    for (i = 1; (i * total_map_objs) % map_count; i++);
    total_map_domains = (i * total_map_objs) / map_count;

    /* initialize the map domains */
    HYDU_MALLOC_OR_JUMP(map_domains, hwloc_bitmap_t *, total_map_domains * sizeof(hwloc_bitmap_t),
                        status);
    HYDU_MALLOC_OR_JUMP(start_pu, hwloc_obj_t *, total_map_domains * sizeof(hwloc_obj_t), status);

    /* For each map domain, find the next map object (first map object
     * for the first map domain) and add the following "map_count"
     * number of contiguous map objects, wrapping to the first one if
     * needed, to the map domain.  Store the first PU in the first map
     * object of the map domain as "start_pu".  This is needed later
     * for the actual binding. */
    map_obj = NULL;
    for (i = 0; i < total_map_domains; i++) {
        map_domains[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_zero(map_domains[i]);

        for (j = 0; j < map_count; j++) {
            map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);
            /* map_obj will be NULL if it reaches the end. call again to wrap around */
            if (!map_obj)
                map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);

            if (j == 0)
                start_pu[i] =
                    hwloc_get_obj_inside_cpuset_by_type(topology, map_obj->cpuset, HWLOC_OBJ_PU, 0);

            hwloc_bitmap_or(map_domains[i], map_domains[i], map_obj->cpuset);
        }
    }


    /* Find the possible binding domains is similar to that of map
     * domains.  But if a binding domain is larger (in # of PUs) than
     * the mapping domain, the number of possible bindings for that
     * domain is 1. */

    /* calculate the number of possible bindings and allocate bitmaps for them */
    total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth);
    num_pus_in_bind_domain = (HYDT_topo_hwloc_info.total_num_pus / total_bind_objs) * bind_count;

    if (num_pus_in_bind_domain < num_pus_in_map_domain) {
        for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++);
        HYDT_topo_hwloc_info.num_bitmaps =
            (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain;
    }
    else {
        HYDT_topo_hwloc_info.num_bitmaps = total_map_domains;
    }

    /* initialize bitmaps */
    HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
                        HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);

    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
    }

    /* do bindings */
    i = 0;
    while (i < HYDT_topo_hwloc_info.num_bitmaps) {
        for (j = 0; j < total_map_domains; j++) {
            bind_obj = hwloc_get_ancestor_obj_by_depth(topology, bind_depth, start_pu[j]);

            for (k = 0; k < bind_count; k++) {
                hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i],
                                bind_obj->cpuset);

                /* if the binding is smaller than the mapping domain, wrap around inside that domain */
                if (num_pus_in_bind_domain < num_pus_in_map_domain) {
                    bind_obj =
                        hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
                                                                  bind_depth, bind_obj);
                    if (!bind_obj)
                        bind_obj =
                            hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
                                                                      bind_depth, bind_obj);
                }
                else {
                    bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
                    if (!bind_obj)
                        bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
                }

            }
            i++;

            /* advance the starting position for this map domain, if needed */
            if (num_pus_in_bind_domain < num_pus_in_map_domain) {
                for (k = 0; k < num_pus_in_bind_domain; k++) {
                    start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
                                                                           HWLOC_OBJ_PU,
                                                                           start_pu[j]);
                    if (!start_pu[j])
                        start_pu[j] =
                            hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
                                                                     HWLOC_OBJ_PU, start_pu[j]);
                }
            }
        }
    }

    /* free temporary memory */
    MPL_free(map_domains);
    MPL_free(start_pu);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #22
0
static HYD_status fn_fullinit(int fd, char *args[])
{
    int id, i;
    char *rank_str;
    struct HYD_string_stash stash;
    char *cmd;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    rank_str = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pmirank");
    HYDU_ERR_CHKANDJUMP(status, rank_str == NULL, HYD_INTERNAL_ERROR,
                        "unable to find pmirank token\n");
    id = atoi(rank_str);

    /* Store the PMI_RANK to fd mapping */
    for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) {
        if (HYD_pmcd_pmip.downstream.pmi_rank[i] == id) {
            HYD_pmcd_pmip.downstream.pmi_fd[i] = fd;
            HYD_pmcd_pmip.downstream.pmi_fd_active[i] = 1;
            break;
        }
    }
    HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status);

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash,
                     MPL_strdup("cmd=fullinit-response;pmi-version=2;pmi-subversion=0;rank="),
                     status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(id), status);

    HYD_STRING_STASH(stash, MPL_strdup(";size="), status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(HYD_pmcd_pmip.system_global.global_process_count),
                     status);
    HYD_STRING_STASH(stash, MPL_strdup(";appnum=0"), status);
    if (HYD_pmcd_pmip.local.spawner_kvsname) {
        HYD_STRING_STASH(stash, MPL_strdup(";spawner-jobid="), status);
        HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.local.spawner_kvsname), status);
    }
    if (HYD_pmcd_pmip.user_global.debug) {
        HYD_STRING_STASH(stash, MPL_strdup(";debugged=TRUE;pmiverbose=TRUE"), status);
    } else {
        HYD_STRING_STASH(stash, MPL_strdup(";debugged=FALSE;pmiverbose=FALSE"), status);
    }
    HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    send_cmd_downstream(fd, cmd);
    MPL_free(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}