Пример #1
0
static HYD_status fn_name_publish(int fd, int pid, int pgid, char *args[])
{
    struct HYD_string_stash stash;
    char *cmd, *thrid, *val, *name = NULL, *port = NULL;
    int token_count, success;
    struct HYD_pmcd_token *tokens = NULL;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "name")) == NULL)
        HYDU_ERR_POP(status, "cannot find token: name\n");
    name = HYDU_strdup(val);

    if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "port")) == NULL)
        HYDU_ERR_POP(status, "cannot find token: port\n");
    port = HYDU_strdup(val);

    status = HYD_pmcd_pmi_publish(name, port, &success);
    HYDU_ERR_POP(status, "error publishing service\n");

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, HYDU_strdup("cmd=name-publish-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(thrid), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    if (!success) {
        HYD_STRING_STASH(stash, HYDU_strdup("rc=1;errmsg=duplicate_service_"), status);
        HYD_STRING_STASH(stash, HYDU_strdup(name), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    else
        HYD_STRING_STASH(stash, HYDU_strdup("rc=0;"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "send command failed\n");
    HYDU_FREE(cmd);

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    if (name)
        HYDU_FREE(name);
    if (port)
        HYDU_FREE(port);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #2
0
static HYD_status fn_put(int fd, int pid, int pgid, char *args[])
{
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pmcd_token *tokens;
    int token_count, i, ret;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);
    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    for (i = 0; i < token_count; i++) {
        status = HYD_pmcd_pmi_add_kvs(tokens[i].key, tokens[i].val, pg_scratch->kvs, &ret);
        HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #3
0
static HYD_status fn_abort(int fd, int pid, int pgid, char *args[])
{
    int token_count;
    struct HYD_pmcd_token *tokens;
    /* set a default exit code of 1 */
    int exitcode = 1;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    if (HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "exitcode") == NULL)
        HYDU_ERR_POP(status, "cannot find token: exitcode\n");

    exitcode = atoi(HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "exitcode"));

  fn_exit:
    /* clean everything up and exit */
    status = HYDT_bsci_wait_for_completion(0);
    exit(exitcode);

    /* never get here */
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #4
0
char *HYDU_find_full_path(const char *execname)
{
    char *tmp[HYD_NUM_TMP_STRINGS] = { NULL }, *path = NULL, *test_path = NULL;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYDU_find_in_path(execname, &test_path);
    HYDU_ERR_POP(status, "error while searching for executable in user path\n");

    if (test_path) {
        tmp[0] = MPL_strdup(test_path);
        tmp[1] = MPL_strdup(execname);
        tmp[2] = NULL;

        status = HYDU_str_alloc_and_join(tmp, &path);
        HYDU_ERR_POP(status, "error joining strings\n");
    }

fn_exit:
    HYDU_free_strlist(tmp);
    if (test_path)
        MPL_free(test_path);
    HYDU_FUNC_EXIT();
    return path;

fn_fail:
    goto fn_exit;
}
Пример #5
0
static HYD_status cmd_response(int fd, int pid, const char *cmd)
{
    struct HYD_pmcd_hdr hdr;
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYD_pmcd_init_header(&hdr);
    hdr.cmd = PMI_RESPONSE;
    hdr.pid = pid;
    hdr.pmi_version = 1;
    hdr.buflen = strlen(cmd);
    status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed);
    HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n");
    HYDU_ASSERT(!closed, status);

    if (HYD_server_info.user_global.debug) {
        HYDU_dump(stdout, "PMI response to fd %d pid %d: %s", fd, pid, cmd);
    }

    status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed);
    HYDU_ERR_POP(status, "unable to send response to command\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #6
0
HYD_status HYDU_send_strlist(int fd, char **strlist)
{
    int i, list_len, len;
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* Check how many arguments we have */
    list_len = HYDU_strlist_lastidx(strlist);
    status = HYDU_sock_write(fd, &list_len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to write data to proxy\n");
    HYDU_ASSERT(!closed, status);

    /* Convert the string list to parseable data and send */
    for (i = 0; strlist[i]; i++) {
        len = strlen(strlist[i]) + 1;

        status = HYDU_sock_write(fd, &len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to write data to proxy\n");
        HYDU_ASSERT(!closed, status);

        status = HYDU_sock_write(fd, strlist[i], len, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to write data to proxy\n");
        HYDU_ASSERT(!closed, status);
    }

fn_exit:
    HYDU_FUNC_EXIT();
    return status;

fn_fail:
    goto fn_exit;
}
Пример #7
0
static HYD_status get_abs_wd(const char *wd, char **abs_wd)
{
    int ret;
    char *cwd;
    HYD_status status = HYD_SUCCESS;

    if (wd == NULL) {
        *abs_wd = NULL;
        goto fn_exit;
    }

    if (wd[0] != '.') {
        *abs_wd = (char *) wd;
        goto fn_exit;
    }

    cwd = HYDU_getcwd();
    ret = chdir(wd);
    if (ret < 0)
        HYDU_ERR_POP(status, "error calling chdir\n");

    *abs_wd = HYDU_getcwd();
    ret = chdir(cwd);
    if (ret < 0)
        HYDU_ERR_POP(status, "error calling chdir\n");

fn_exit:
    return status;

fn_fail:
    goto fn_exit;
}
Пример #8
0
static HYD_status send_cmd_downstream(int fd, const char *cmd)
{
    char cmdlen[7];
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    MPL_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd));
    status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    /* FIXME: We cannot abort when we are not able to send data
     * downstream. The upper layer needs to handle this based on
     * whether we want to abort or not.*/
    HYDU_ASSERT(!closed, status);

    if (HYD_pmcd_pmip.user_global.debug) {
        HYDU_dump(stdout, "PMI response: %s\n", cmd);
    }

    status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #9
0
static HYD_status alloc_fwd_hash(struct fwd_hash **fwd_hash, int in, int out)
{
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_MALLOC_OR_JUMP(*fwd_hash, struct fwd_hash *, sizeof(struct fwd_hash), status);
    (*fwd_hash)->in = in;
    (*fwd_hash)->out = out;

    (*fwd_hash)->buf_offset = 0;
    (*fwd_hash)->buf_count = 0;

    (*fwd_hash)->next = NULL;

    status = HYDU_sock_set_nonblock(in);
    HYDU_ERR_POP(status, "unable to set out-socket to non-blocking\n");

    status = HYDU_sock_set_nonblock(out);
    HYDU_ERR_POP(status, "unable to set out-socket to non-blocking\n");

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #10
0
static HYD_status fn_info_putnodeattr(int fd, char *args[])
{
    struct HYD_string_stash stash;
    char *key, *val, *thrid, *cmd;
    struct HYD_pmcd_token *tokens = NULL;
    int token_count, ret;
    struct HYD_pmcd_pmi_v2_reqs *req;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find key token\n");

    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
    HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR, "unable to find value token\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret);
    HYDU_ERR_POP(status, "unable to put data into kvs\n");

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, MPL_strdup("cmd=info-putnodeattr-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
        HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
        HYD_STRING_STASH(stash, MPL_strdup(";"), status);
    }
    HYD_STRING_STASH(stash, MPL_strdup("rc="), status);
    HYD_STRING_STASH(stash, HYDU_int_to_str(ret), status);
    HYD_STRING_STASH(stash, MPL_strdup(";"), status);

    HYD_STRING_SPIT(stash, cmd, status);

    send_cmd_downstream(fd, cmd);
    MPL_free(cmd);

    for (req = pending_reqs; req; req = req->next) {
        if (!strcmp(req->key, key)) {
            /* Poke the progress engine before exiting */
            status = poke_progress(key);
            HYDU_ERR_POP(status, "poke progress error\n");
            break;
        }
    }

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #11
0
HYD_status HYDU_find_in_path(const char *execname, char **path)
{
    char *tmp[HYD_NUM_TMP_STRINGS], *path_loc = NULL, *test_loc, *user_path;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* The executable is somewhere in the user's path. Find it. */
    if (MPL_env2str("PATH", (const char **) &user_path))
        user_path = MPL_strdup(user_path);

    if (user_path) {    /* If the PATH environment exists */
        status = get_abs_wd(strtok(user_path, ";:"), &test_loc);
        HYDU_ERR_POP(status, "error getting absolute working dir\n");
        do {
            tmp[0] = MPL_strdup(test_loc);
            tmp[1] = MPL_strdup("/");
            tmp[2] = MPL_strdup(execname);
            tmp[3] = NULL;

            status = HYDU_str_alloc_and_join(tmp, &path_loc);
            HYDU_ERR_POP(status, "unable to join strings\n");
            HYDU_free_strlist(tmp);

            if (exists(path_loc)) {
                tmp[0] = MPL_strdup(test_loc);
                tmp[1] = MPL_strdup("/");
                tmp[2] = NULL;

                status = HYDU_str_alloc_and_join(tmp, path);
                HYDU_ERR_POP(status, "unable to join strings\n");
                HYDU_free_strlist(tmp);

                goto fn_exit;   /* We are done */
            }

            MPL_free(path_loc);
            path_loc = NULL;

            status = get_abs_wd(strtok(NULL, ";:"), &test_loc);
            HYDU_ERR_POP(status, "error getting absolute working dir\n");
        } while (test_loc);
    }

    /* There is either no PATH environment or we could not find the
     * file in the PATH. Just return an empty path */
    *path = MPL_strdup("");

fn_exit:
    if (user_path)
        MPL_free(user_path);
    if (path_loc)
        MPL_free(path_loc);
    HYDU_FUNC_EXIT();
    return status;

fn_fail:
    goto fn_exit;
}
Пример #12
0
HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const char *membind)
{
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_ASSERT(binding, status);

    hwloc_topology_init(&topology);
    hwloc_topology_load(topology);

    HYDT_topo_hwloc_info.total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);

    hwloc_initialized = 1;

    /* bindings that don't require mapping */
    if (!strncmp(binding, "user:"******"user:"******"user:"******"error binding to %s\n", binding);
        goto fn_exit;
    }
    else if (!strcmp(binding, "rr")) {
        status = handle_rr_binding();
        HYDU_ERR_POP(status, "error binding to %s\n", binding);
        goto fn_exit;
    }

    status = handle_bitmap_binding(binding, mapping ? mapping : binding);
    HYDU_ERR_POP(status, "error binding with bind \"%s\" and map \"%s\"\n", binding, mapping);


    /* Memory binding options */
    if (membind == NULL)
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_DEFAULT;
    else if (!strcmp(membind, "firsttouch"))
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_FIRSTTOUCH;
    else if (!strcmp(membind, "nexttouch"))
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_NEXTTOUCH;
    else if (!strncmp(membind, "bind:", strlen("bind:"))) {
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_BIND;
    }
    else if (!strncmp(membind, "interleave:", strlen("interleave:"))) {
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_INTERLEAVE;
    }
    else if (!strncmp(membind, "replicate:", strlen("replicate:"))) {
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_REPLICATE;
    }
    else {
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "unrecognized membind policy \"%s\"\n", membind);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #13
0
HYD_status HYDT_ckpoint_blcr_restart(const char *prefix, int pgid, int id, int ckpt_num,
                                     struct HYD_env *envlist, int num_ranks, int ranks[],
                                     int *in, int *out, int *err, int *pid)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    int context_fd;
    cr_restart_handle_t cr_handle;
    cr_restart_args_t args;
    char filename[256];
    char port_str[64];
    int port;

    HYDU_FUNC_ENTER();

    /* create listener socket for stdin/out/err */
    status = create_stdinouterr_sock(&port);
    HYDU_ERR_POP(status, "failed to create stdin/out/err socket\n");
    MPL_snprintf(port_str, sizeof(port_str), "%d", port);
    status = HYDU_append_env_to_list(STDINOUTERR_PORT_NAME, port_str, &envlist);
    HYDU_ERR_POP(status, "failed to add to env list\n");

    status = create_env_file(envlist, num_ranks, ranks);
    if (status)
        HYDU_ERR_POP(status, "blcr restart\n");

    /* open the checkpoint file */
    MPL_snprintf(filename, sizeof(filename), "%s/context-num%d-%d-%d", prefix, ckpt_num, pgid,
                 id);
    context_fd = open(filename, O_RDONLY /* | O_LARGEFILE */);
    HYDU_ERR_CHKANDJUMP(status, context_fd < 0, HYD_INTERNAL_ERROR, "open failed, %s\n",
                        strerror(errno));

    /* ... initialize the request structure */
    cr_initialize_restart_args_t(&args);
    args.cr_fd = context_fd;
    args.cr_flags = CR_RSTRT_RESTORE_PID;

    /* ... issue the request */
    ret = cr_request_restart(&args, &cr_handle);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "cr_request_restart failed, %s\n",
                        strerror(errno));

    ret = close(context_fd);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close failed, %s\n",
                        strerror(errno));

    /* get fds for stdin/out/err sockets, and get pids of restarted processes */
    status = wait_for_stdinouterr_sockets(num_ranks, ranks, in, out, err, pid);
    if (status)
        HYDU_ERR_POP(status, "blcr restart\n");

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #14
0
static HYD_status handle_pmi_cmd(int fd, int pgid, int pid, char *buf, int pmi_version)
{
    char **args = NULL, *cmd = NULL;
    struct HYD_pmcd_pmi_handle *h;
    int i;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    if (pmi_version == 1)
        HYD_pmcd_pmi_handle = HYD_pmcd_pmi_v1;
    else
        HYD_pmcd_pmi_handle = HYD_pmcd_pmi_v2;

    if (HYD_server_info.user_global.debug)
        HYDU_dump(stdout, "[pgid: %d] got PMI command: %s\n", pgid, buf);

    HYDU_MALLOC(args, char **, MAX_PMI_ARGS * sizeof(char *), status);
    for (i = 0; i < MAX_PMI_ARGS; i++)
        args[i] = NULL;

    status = HYD_pmcd_pmi_parse_pmi_cmd(buf, pmi_version, &cmd, args);
    HYDU_ERR_POP(status, "unable to parse PMI command\n");

#if defined ENABLE_PROFILING
    if (HYD_server_info.enable_profiling)
        HYD_server_info.num_pmi_calls++;
#endif /* ENABLE_PROFILING */

    h = HYD_pmcd_pmi_handle;
    while (h->handler) {
        if (!strcmp(cmd, h->cmd)) {
            status = h->handler(fd, pid, pgid, args);
            HYDU_ERR_POP(status, "PMI handler returned error\n");
            break;
        }
        h++;
    }
    if (!h->handler) {
        /* We don't understand the command */
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "Unrecognized PMI command: %s | cleaning up processes\n", cmd);
    }

  fn_exit:
    if (cmd)
        HYDU_FREE(cmd);
    if (args) {
        HYDU_free_strlist(args);
        HYDU_free(args);
    }
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #15
0
static HYD_status add_exec_to_proxy(struct HYD_exec *exec, struct HYD_proxy *proxy, int num_procs)
#endif
{
    int i;
    struct HYD_exec *texec;
    HYD_status status = HYD_SUCCESS;

    if (proxy->exec_list == NULL) {
        status = HYDU_alloc_exec(&proxy->exec_list);
        HYDU_ERR_POP(status, "unable to allocate proxy exec\n");

        for (i = 0; exec->exec[i]; i++)
            proxy->exec_list->exec[i] = HYDU_strdup(exec->exec[i]);
        proxy->exec_list->exec[i] = NULL;

        proxy->exec_list->wdir = HYDU_strdup(exec->wdir);
        proxy->exec_list->proc_count = num_procs;
#if defined(FINEGRAIN_MPI)
        proxy->exec_list->nfg = exec->nfg;
        proxy->exec_list->start_rank = current_exec_start_rank;
#endif
        proxy->exec_list->env_prop = exec->env_prop ? HYDU_strdup(exec->env_prop) : NULL;
        proxy->exec_list->user_env = HYDU_env_list_dup(exec->user_env);
        proxy->exec_list->appnum = exec->appnum;
    }
    else {
        for (texec = proxy->exec_list; texec->next; texec = texec->next);
        status = HYDU_alloc_exec(&texec->next);
        HYDU_ERR_POP(status, "unable to allocate proxy exec\n");

        texec = texec->next;

        for (i = 0; exec->exec[i]; i++)
            texec->exec[i] = HYDU_strdup(exec->exec[i]);
        texec->exec[i] = NULL;

        texec->wdir = HYDU_strdup(exec->wdir);
        texec->proc_count = num_procs;
#if defined(FINEGRAIN_MPI)
        texec->nfg = exec->nfg;
        texec->start_rank = current_exec_start_rank;
#endif
        texec->env_prop = exec->env_prop ? HYDU_strdup(exec->env_prop) : NULL;
        texec->user_env = HYDU_env_list_dup(exec->user_env);
        texec->appnum = exec->appnum;
    }
    proxy->proxy_process_count += num_procs;
    proxy->node->active_processes += num_procs;

  fn_exit:
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #16
0
static HYD_status fn_publish_name(int fd, int pid, int pgid, char *args[])
{
    struct HYD_string_stash stash;
    char *cmd, *val;
    int token_count;
    struct HYD_pmcd_token *tokens = NULL;
    char *name = NULL, *port = NULL;
    int success = 0;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "service")) == NULL)
        HYDU_ERR_POP(status, "cannot find token: service\n");
    name = MPL_strdup(val);

    if ((val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "port")) == NULL)
        HYDU_ERR_POP(status, "cannot find token: port\n");
    port = MPL_strdup(val);

    status = HYD_pmcd_pmi_publish(name, port, &success);
    HYDU_ERR_POP(status, "error publishing service\n");

    HYD_STRING_STASH_INIT(stash);
    if (success)
        HYD_STRING_STASH(stash, MPL_strdup("cmd=publish_result info=ok rc=0 msg=success\n"),
                         status);
    else
        HYD_STRING_STASH(stash,
                         MPL_strdup("cmd=publish_result info=ok rc=1 msg=key_already_present\n"),
                         status);

    HYD_STRING_SPIT(stash, cmd, status);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "send command failed\n");
    MPL_free(cmd);

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    if (name)
        MPL_free(name);
    if (port)
        MPL_free(port);

    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #17
0
static HYD_status fn_name_lookup(int fd, int pid, int pgid, char *args[])
{
    struct HYD_string_stash stash;
    char *cmd, *thrid, *name, *value;
    int token_count;
    struct HYD_pmcd_token *tokens = NULL;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    if ((name = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "name")) == NULL)
        HYDU_ERR_POP(status, "cannot find token: name\n");

    status = HYD_pmcd_pmi_lookup(name, &value);
    HYDU_ERR_POP(status, "error while looking up service\n");

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, HYDU_strdup("cmd=name-lookup-response;"), status);
    if (thrid) {
        HYD_STRING_STASH(stash, HYDU_strdup("thrid="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(thrid), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";"), status);
    }
    if (value) {
        HYD_STRING_STASH(stash, HYDU_strdup("port="), status);
        HYD_STRING_STASH(stash, HYDU_strdup(value), status);
        HYD_STRING_STASH(stash, HYDU_strdup(";found=TRUE;rc=0;"), status);
    }
    else {
        HYD_STRING_STASH(stash, HYDU_strdup("found=FALSE;rc=1;"), status);
    }

    HYD_STRING_SPIT(stash, cmd, status);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "send command failed\n");
    HYDU_FREE(cmd);

  fn_exit:
    if (tokens)
        HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #18
0
HYD_status
HYDU_sock_create_and_listen_portstr(char *iface, char *hostname, char *port_range,
                                    char **port_str,
                                    HYD_status(*callback) (int fd, HYD_event_t events,
                                                           void *userp), void *userp)
{
    int listenfd;
    char *sport, *real_port_range, *ip = NULL;
    uint16_t port;
    HYD_status status = HYD_SUCCESS;

    /* Listen on a port in the port range */
    port = 0;
    real_port_range = port_range ? MPL_strdup(port_range) : NULL;
    status = HYDU_sock_listen(&listenfd, real_port_range, &port);
    HYDU_ERR_POP(status, "unable to listen on port\n");

    /* Register the listening socket with the demux engine */
    status = HYDT_dmx_register_fd(1, &listenfd, HYD_POLLIN, userp, callback);
    HYDU_ERR_POP(status, "unable to register fd\n");

    /* Create a port string for MPI processes to use to connect to */
    if (iface) {
        status = HYDU_sock_get_iface_ip(iface, &ip);
        HYDU_ERR_POP(status, "unable to get network interface IP\n");
    }
    else if (hostname) {
        ip = MPL_strdup(hostname);
    }
    else {
        char localhost[MAX_HOSTNAME_LEN] = { 0 };

        if (gethostname(localhost, MAX_HOSTNAME_LEN) < 0)
            HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "unable to get local hostname\n");

        ip = MPL_strdup(localhost);
    }

    sport = HYDU_int_to_str(port);
    HYDU_MALLOC_OR_JUMP(*port_str, char *, strlen(ip) + 1 + strlen(sport) + 1, status);
    MPL_snprintf(*port_str, strlen(ip) + 1 + strlen(sport) + 1, "%s:%s", ip, sport);
    MPL_free(sport);

  fn_exit:
    if (ip)
        MPL_free(ip);
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #19
0
static HYD_status send_cmd_upstream(const char *start, int fd, char *args[])
{
    int i, sent, closed;
    struct HYD_string_stash stash;
    char *buf = NULL;
    struct HYD_pmcd_hdr hdr;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, MPL_strdup(start), status);
    for (i = 0; args[i]; i++) {
        HYD_STRING_STASH(stash, MPL_strdup(args[i]), status);
        if (args[i + 1])
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
    }

    HYD_STRING_SPIT(stash, buf, status);

    HYD_pmcd_init_header(&hdr);
    hdr.cmd = PMI_CMD;
    hdr.pid = fd;
    hdr.buflen = strlen(buf);
    hdr.pmi_version = 2;
    status =
        HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed,
                        HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to send PMI header upstream\n");
    HYDU_ASSERT(!closed, status);

    if (HYD_pmcd_pmip.user_global.debug) {
        HYDU_dump(stdout, "forwarding command (%s) upstream\n", buf);
    }

    status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed,
                             HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to send PMI command upstream\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    if (buf)
        MPL_free(buf);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #20
0
static HYD_status fn_info_getjobattr(int fd, char *args[])
{
    struct HYD_string_stash stash;
    char *cmd, *key, *thrid;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR, "unable to find token: key\n");

    thrid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "thrid");

    if (!strcmp(key, "PMI_process_mapping")) {
        HYD_STRING_STASH_INIT(stash);
        HYD_STRING_STASH(stash, MPL_strdup("cmd=info-getjobattr-response;"), status);
        if (thrid) {
            HYD_STRING_STASH(stash, MPL_strdup("thrid="), status);
            HYD_STRING_STASH(stash, MPL_strdup(thrid), status);
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
        }
        HYD_STRING_STASH(stash, MPL_strdup("found=TRUE;value="), status);
        HYD_STRING_STASH(stash, MPL_strdup(HYD_pmcd_pmip.system_global.pmi_process_mapping),
                         status);
        HYD_STRING_STASH(stash, MPL_strdup(";rc=0;"), status);

        HYD_STRING_SPIT(stash, cmd, status);

        send_cmd_downstream(fd, cmd);
        MPL_free(cmd);
    } else {
        status = send_cmd_upstream("cmd=info-getjobattr;", fd, args);
        HYDU_ERR_POP(status, "error sending command upstream\n");
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #21
0
HYD_status HYDU_correct_wdir(char **wdir)
{
    char *tmp[HYD_NUM_TMP_STRINGS];
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    if (*wdir == NULL) {
        *wdir = HYDU_getcwd();
    }
    else if (*wdir[0] != '/') {
        tmp[0] = HYDU_getcwd();
        tmp[1] = HYDU_strdup("/");
        tmp[2] = HYDU_strdup(*wdir);
        tmp[3] = NULL;

        HYDU_FREE(*wdir);
        status = HYDU_str_alloc_and_join(tmp, wdir);
        HYDU_ERR_POP(status, "unable to join strings\n");
        HYDU_free_strlist(tmp);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #22
0
HYD_status HYDU_append_env_str_to_list(const char *str, struct HYD_env **env_list)
{
    char *my_str = NULL;
    char *env_name, *env_value;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    my_str = env_value = MPL_strdup(str);
    /* don't use strtok, it will mangle env values that contain '=' */
    env_name = MPL_strsep(&env_value, "=");
    HYDU_ASSERT(env_name != NULL, status);

    status = HYDU_append_env_to_list(env_name, env_value, env_list);
    HYDU_ERR_POP(status, "unable to append env to list\n");

  fn_exit:
    if (my_str)
        MPL_free(my_str);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #23
0
static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
{
    struct HYD_proxy *proxy, *tproxy;
    const char *cmd;
    int proxy_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    proxy_count = 0;
    for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next)
        proxy_count++;

    proxy->pg->barrier_count++;
    if (proxy->pg->barrier_count == proxy_count) {
        proxy->pg->barrier_count = 0;
        cmd = "cmd=barrier_out\n";

        for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
            status = cmd_response(tproxy->control_fd, pid, cmd);
            HYDU_ERR_POP(status, "error writing PMI line\n");
        }
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #24
0
HYD_status HYDT_bscd_sge_query_node_list(struct HYD_node **node_list)
{
    char *hostfile;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    if (MPL_env2str("PE_HOSTFILE", (const char **) &hostfile) == 0)
        hostfile = NULL;

    if (hostfile == NULL) {
        *node_list = NULL;
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "No SGE nodefile found\n");
    }
    else {
        status = HYDU_parse_hostfile(hostfile, node_list, process_mfile_token);
        HYDU_ERR_POP(status, "error parsing hostfile\n");
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #25
0
HYD_status HYDU_putenv(struct HYD_env *env, HYD_env_overwrite_t overwrite)
{
    char *tmp[HYD_NUM_TMP_STRINGS], *str;
    int i;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* If the overwrite flag is false, just exit */
    if (MPL_env2str(env->env_name, (const char **) &str) && overwrite == HYD_ENV_OVERWRITE_FALSE)
        goto fn_exit;

    i = 0;
    tmp[i++] = MPL_strdup(env->env_name);
    tmp[i++] = MPL_strdup("=");
    tmp[i++] = env->env_value ? MPL_strdup(env->env_value) : MPL_strdup("");
    tmp[i++] = NULL;
    status = HYDU_str_alloc_and_join(tmp, &str);
    HYDU_ERR_POP(status, "unable to join strings\n");

    MPL_putenv(str);

    for (i = 0; tmp[i]; i++)
        MPL_free(tmp[i]);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #26
0
static HYD_status process_mfile_token(char *token, int newline, struct HYD_node **node_list)
{
    int num_procs;
    static int entry_count = 0;
    static char *hostname;
    HYD_status status = HYD_SUCCESS;

    entry_count++;

    if (newline) {      /* The first entry gives the hostname */
        entry_count = 1;
        if (hostname)
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unexpected token %s\n", token);
        hostname = HYDU_strdup(token);
    }
    else {      /* Not a new line */
        if (entry_count != 2)
            goto fn_exit;

        num_procs = atoi(token);

        status = HYDU_add_to_node_list(hostname, num_procs, node_list);
        HYDU_ERR_POP(status, "unable to initialize proxy\n");

        hostname = NULL;
    }

  fn_exit:
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #27
0
HYD_status HYDU_env_to_str(struct HYD_env *env, char **str)
{
    int i;
    char *tmp[HYD_NUM_TMP_STRINGS];
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    i = 0;
    tmp[i++] = MPL_strdup("'");
    tmp[i++] = MPL_strdup(env->env_name);
    tmp[i++] = MPL_strdup("=");
    tmp[i++] = env->env_value ? MPL_strdup(env->env_value) : MPL_strdup("");
    tmp[i++] = MPL_strdup("'");
    tmp[i++] = NULL;

    status = HYDU_str_alloc_and_join(tmp, str);
    HYDU_ERR_POP(status, "unable to join strings\n");

    for (i = 0; tmp[i]; i++)
        MPL_free(tmp[i]);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #28
0
char *HYDU_size_t_to_str(size_t x)
{
    int len = 1, i;
    size_t max = 10;
    char *str = NULL;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    while (x >= max) {
        len++;
        max *= 10;
    }
    len++;

    HYDU_MALLOC(str, char *, len, status);
    HYDU_ERR_POP(status, "unable to allocate memory\n");

    for (i = 0; i < len; i++)
        str[i] = '0';

    HYDU_snprintf(str, len, "%llu", (unsigned long long) x);

  fn_exit:
    HYDU_FUNC_EXIT();
    return str;

  fn_fail:
    goto fn_exit;
}
Пример #29
0
HYD_status HYDT_topo_bind(int idx)
{
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    if (idx < 0 || ignore_binding)
        goto fn_exit;

#if defined HAVE_HWLOC
    if (!strcmp(HYDT_topo_info.topolib, "hwloc")) {
        status = HYDT_topo_hwloc_bind(idx);
        HYDU_ERR_POP(status, "HWLOC failure binding process to core\n");
        goto fn_exit;
    }
#endif /* HAVE_HWLOC */

    HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "no topology library available\n");

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Пример #30
0
static HYD_status list_to_nodes(char *str)
{
    hostlist_t hostlist;
    char *host;
    int k = 0;
    HYD_status status = HYD_SUCCESS;

    if ((hostlist = slurm_hostlist_create(str)) == NULL) {
        status = HYD_FAILURE;
        goto fn_fail;
    }

    for (host = slurm_hostlist_shift(hostlist); host; host = slurm_hostlist_shift(hostlist)) {
        status = HYDU_add_to_node_list(host, tasks_per_node[k++], &global_node_list);
        HYDU_ERR_POP(status, "unable to add to node list\n");
    }

    slurm_hostlist_destroy(hostlist);

  fn_exit:
    return status;

  fn_fail:
    goto fn_exit;
}