Example #1
0
static HYD_status pmi_id_map_fn(char *arg, char ***argv)
{
    char *map, *tmp;
    HYD_status status = HYD_SUCCESS;

    /* Split the core map into three different segments */
    map = MPL_strdup(**argv);
    HYDU_ASSERT(map, status);

    tmp = strtok(map, ",");
    HYDU_ASSERT(tmp, status);
    HYD_pmcd_pmip.system_global.pmi_id_map.filler_start = atoi(tmp);

    tmp = strtok(NULL, ",");
    HYDU_ASSERT(tmp, status);
    HYD_pmcd_pmip.system_global.pmi_id_map.non_filler_start = atoi(tmp);

    MPL_free(map);

    (*argv)++;

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #2
0
HYD_status HYDU_send_strlist(int fd, char **strlist)
{
    int i, list_len, len;
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* Check how many arguments we have */
    list_len = HYDU_strlist_lastidx(strlist);
    status = HYDU_sock_write(fd, &list_len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to write data to proxy\n");
    HYDU_ASSERT(!closed, status);

    /* Convert the string list to parseable data and send */
    for (i = 0; strlist[i]; i++) {
        len = strlen(strlist[i]) + 1;

        status = HYDU_sock_write(fd, &len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to write data to proxy\n");
        HYDU_ASSERT(!closed, status);

        status = HYDU_sock_write(fd, strlist[i], len, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to write data to proxy\n");
        HYDU_ASSERT(!closed, status);
    }

fn_exit:
    HYDU_FUNC_EXIT();
    return status;

fn_fail:
    goto fn_exit;
}
Example #3
0
static HYD_status global_core_map_fn(char *arg, char ***argv)
{
    char *map, *tmp;
    HYD_status status = HYD_SUCCESS;

    /* Split the core map into three different segments */
    map = HYDU_strdup(**argv);
    HYDU_ASSERT(map, status);

    tmp = strtok(map, ",");
    HYDU_ASSERT(tmp, status);
    HYD_pmcd_pmip.system_global.global_core_map.local_filler = atoi(tmp);

    tmp = strtok(NULL, ",");
    HYDU_ASSERT(tmp, status);
    HYD_pmcd_pmip.system_global.global_core_map.local_count = atoi(tmp);

    tmp = strtok(NULL, ",");
    HYDU_ASSERT(tmp, status);
    HYD_pmcd_pmip.system_global.global_core_map.global_count = atoi(tmp);

    HYDU_FREE(map);

    (*argv)++;

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #4
0
static HYD_status send_cmd_downstream(int fd, const char *cmd)
{
    char cmdlen[7];
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    MPL_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd));
    status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    /* FIXME: We cannot abort when we are not able to send data
     * downstream. The upper layer needs to handle this based on
     * whether we want to abort or not.*/
    HYDU_ASSERT(!closed, status);

    if (HYD_pmcd_pmip.user_global.debug) {
        HYDU_dump(stdout, "PMI response: %s\n", cmd);
    }

    status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
static HYD_status cmd_response(int fd, int pid, const char *cmd)
{
    struct HYD_pmcd_hdr hdr;
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYD_pmcd_init_header(&hdr);
    hdr.cmd = PMI_RESPONSE;
    hdr.pid = pid;
    hdr.pmi_version = 1;
    hdr.buflen = strlen(cmd);
    status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed);
    HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n");
    HYDU_ASSERT(!closed, status);

    if (HYD_server_info.user_global.debug) {
        HYDU_dump(stdout, "PMI response to fd %d pid %d: %s", fd, pid, cmd);
    }

    status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed);
    HYDU_ERR_POP(status, "unable to send response to command\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #6
0
static HYD_status send_cmd_upstream(const char *start, int fd, char *args[])
{
    int i, sent, closed;
    struct HYD_string_stash stash;
    char *buf = NULL;
    struct HYD_pmcd_hdr hdr;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYD_STRING_STASH_INIT(stash);
    HYD_STRING_STASH(stash, MPL_strdup(start), status);
    for (i = 0; args[i]; i++) {
        HYD_STRING_STASH(stash, MPL_strdup(args[i]), status);
        if (args[i + 1])
            HYD_STRING_STASH(stash, MPL_strdup(";"), status);
    }

    HYD_STRING_SPIT(stash, buf, status);

    HYD_pmcd_init_header(&hdr);
    hdr.cmd = PMI_CMD;
    hdr.pid = fd;
    hdr.buflen = strlen(buf);
    hdr.pmi_version = 2;
    status =
        HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed,
                        HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to send PMI header upstream\n");
    HYDU_ASSERT(!closed, status);

    if (HYD_pmcd_pmip.user_global.debug) {
        HYDU_dump(stdout, "forwarding command (%s) upstream\n", buf);
    }

    status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed,
                             HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to send PMI command upstream\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    if (buf)
        MPL_free(buf);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #7
0
HYD_status HYDU_append_env_str_to_list(const char *str, struct HYD_env **env_list)
{
    char *my_str = NULL;
    char *env_name, *env_value;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    my_str = env_value = MPL_strdup(str);
    /* don't use strtok, it will mangle env values that contain '=' */
    env_name = MPL_strsep(&env_value, "=");
    HYDU_ASSERT(env_name != NULL, status);

    status = HYDU_append_env_to_list(env_name, env_value, env_list);
    HYDU_ERR_POP(status, "unable to append env to list\n");

  fn_exit:
    if (my_str)
        MPL_free(my_str);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #8
0
static HYD_status handle_rr_binding(void)
{
    int i;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_ASSERT(hwloc_initialized, status);

    /* initialize bitmaps */
    HYDT_topo_hwloc_info.num_bitmaps = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);

    HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
                        HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);

    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_only(HYDT_topo_hwloc_info.bitmap[i], i);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #9
0
static HYD_status fn_put(int fd, int pid, int pgid, char *args[])
{
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pmcd_token *tokens;
    int token_count, i, ret;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);
    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    for (i = 0; i < token_count; i++) {
        status = HYD_pmcd_pmi_add_kvs(tokens[i].key, tokens[i].val, pg_scratch->kvs, &ret);
        HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
static HYD_status fn_barrier_in(int fd, int pid, int pgid, char *args[])
{
    struct HYD_proxy *proxy, *tproxy;
    const char *cmd;
    int proxy_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    proxy_count = 0;
    for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next)
        proxy_count++;

    proxy->pg->barrier_count++;
    if (proxy->pg->barrier_count == proxy_count) {
        proxy->pg->barrier_count = 0;
        cmd = "cmd=barrier_out\n";

        for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
            status = cmd_response(tproxy->control_fd, pid, cmd);
            HYDU_ERR_POP(status, "error writing PMI line\n");
        }
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #11
0
static HYD_status control_cb(int fd, HYD_event_t events, void *userp)
{
    int count, closed, i;
    struct HYD_pg *pg;
    struct HYD_pmcd_hdr hdr;
    struct HYD_proxy *proxy, *tproxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    char *buf;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    proxy = (struct HYD_proxy *) userp;

    if (fd == STDIN_FILENO) {
        HYD_pmcd_init_header(&hdr);
        hdr.cmd = STDIN;
    }
    else {
        status = HYDU_sock_read(fd, &hdr, sizeof(hdr), &count, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to read command from proxy\n");
        HYDU_ASSERT(!closed, status);
    }

    if (hdr.cmd == PID_LIST) {  /* Got PIDs */
        HYDU_MALLOC(proxy->pid, int *, proxy->proxy_process_count * sizeof(int), status);
        status = HYDU_sock_read(fd, (void *) proxy->pid,
                                proxy->proxy_process_count * sizeof(int),
                                &count, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to read status from proxy\n");
        HYDU_ASSERT(!closed, status);

        if (proxy->pg->pgid) {
            /* We initialize the debugger code only for non-dynamically
             * spawned processes */
            goto fn_exit;
        }

        /* Check if all the PIDs have been received */
        for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next)
            if (tproxy->pid == NULL)
                goto fn_exit;

        /* Call the debugger initialization */
        status = HYDT_dbg_setup_procdesc(proxy->pg);
        HYDU_ERR_POP(status, "debugger setup failed\n");
    }
Example #12
0
HYD_status HYDT_topo_hwloc_init(const char *binding, const char *mapping, const char *membind)
{
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_ASSERT(binding, status);

    hwloc_topology_init(&topology);
    hwloc_topology_load(topology);

    HYDT_topo_hwloc_info.total_num_pus = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);

    hwloc_initialized = 1;

    /* bindings that don't require mapping */
    if (!strncmp(binding, "user:"******"user:"******"user:"******"error binding to %s\n", binding);
        goto fn_exit;
    }
    else if (!strcmp(binding, "rr")) {
        status = handle_rr_binding();
        HYDU_ERR_POP(status, "error binding to %s\n", binding);
        goto fn_exit;
    }

    status = handle_bitmap_binding(binding, mapping ? mapping : binding);
    HYDU_ERR_POP(status, "error binding with bind \"%s\" and map \"%s\"\n", binding, mapping);


    /* Memory binding options */
    if (membind == NULL)
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_DEFAULT;
    else if (!strcmp(membind, "firsttouch"))
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_FIRSTTOUCH;
    else if (!strcmp(membind, "nexttouch"))
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_NEXTTOUCH;
    else if (!strncmp(membind, "bind:", strlen("bind:"))) {
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_BIND;
    }
    else if (!strncmp(membind, "interleave:", strlen("interleave:"))) {
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_INTERLEAVE;
    }
    else if (!strncmp(membind, "replicate:", strlen("replicate:"))) {
        HYDT_topo_hwloc_info.membind = HWLOC_MEMBIND_REPLICATE;
    }
    else {
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "unrecognized membind policy \"%s\"\n", membind);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #13
0
static HYD_status cmd_response(int fd, int pid, char *cmd)
{
    char cmdlen[7];
    struct HYD_pmcd_hdr hdr;
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYD_pmcd_init_header(&hdr);
    hdr.cmd = PMI_RESPONSE;
    hdr.pid = pid;
    hdr.pmi_version = 2;
    hdr.buflen = 6 + strlen(cmd);
    status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n");
    HYDU_ASSERT(!closed, status);

    HYDU_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd));
    status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_ASSERT(!closed, status);

    if (HYD_server_info.user_global.debug) {
        HYDU_dump(stdout, "PMI response to fd %d pid %d: %s\n", fd, pid, cmd);
    }

    status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #14
0
HYD_status HYDU_sock_read(int fd, void *buf, int maxlen, int *recvd, int *closed,
                          enum HYDU_sock_comm_flag flag)
{
    int tmp;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_ASSERT(maxlen, status);

    *recvd = 0;
    *closed = 0;
    while (1) {
        do {
            tmp = read(fd, (char *) buf + *recvd, maxlen - *recvd);
            if (tmp < 0) {
                if (errno == ECONNRESET || fd == STDIN_FILENO) {
                    /* If the remote end closed the socket or if we
                     * get an EINTR on stdin, set the socket to be
                     * closed and jump out */
                    *closed = 1;
                    status = HYD_SUCCESS;
                    goto fn_exit;
                }
            }
        } while (tmp < 0 && errno == EINTR);

        if (tmp < 0) {
            HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "read error (%s)\n", MPL_strerror(errno));
        }
        else if (tmp == 0) {
            *closed = 1;
            goto fn_exit;
        }
        else {
            *recvd += tmp;
        }

        if (flag == HYDU_SOCK_COMM_NONE || *recvd == maxlen)
            break;
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #15
0
HYD_status HYDT_bscd_lsf_query_node_list(struct HYD_node **node_list)
{
    char *hosts, *hostname, *num_procs_str, *thosts = NULL;
    int num_procs;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    if (MPL_env2str("LSB_MCPU_HOSTS", (const char **) &hosts) == 0)
        hosts = NULL;

    if (hosts == NULL) {
        *node_list = NULL;
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "No LSF node list found\n");
    }
    else {
        hosts = HYDU_strdup(hosts);
        thosts = hosts;

        hostname = strtok(hosts, " ");
        while (1) {
            if (hostname == NULL)
                break;

            /* the even fields in the list should be the number of
             * cores */
            num_procs_str = strtok(NULL, " ");
            HYDU_ASSERT(num_procs_str, status);

            num_procs = atoi(num_procs_str);

            status = HYDU_add_to_node_list(hostname, num_procs, node_list);
            HYDU_ERR_POP(status, "unable to add to node list\n");

            hostname = strtok(NULL, " ");
        }

        if (thosts)
            HYDU_FREE(thosts);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
static HYD_status extract_tasks_per_node(int nnodes, char *task_list)
{
    char *task_set, **tmp_core_list = NULL;
    char *nodes, *cores;
    int i, j, k, p, count = 0;
    HYD_status status = HYD_SUCCESS;

    HYDU_MALLOC_OR_JUMP(tasks_per_node, int *, nnodes * sizeof(int), status);
    HYDU_MALLOC_OR_JUMP(tmp_core_list, char **, nnodes * sizeof(char *), status);

    task_set = strtok(task_list, ",");
    HYDU_ASSERT(task_set, status);
    i = 0;
    do {
        HYDU_MALLOC_OR_JUMP(tmp_core_list[i], char *, strlen(task_set) + 1, status);
        MPL_snprintf(tmp_core_list[i], strlen(task_set) + 1, "%s", task_set);
        i++;
        task_set = strtok(NULL, ",");
    } while (task_set);
    count = i;

    p = 0;
    for (i = 0; i < count; i++) {
        cores = strtok(tmp_core_list[i], "(");
        nodes = strtok(NULL, "(");
        if (nodes) {
            nodes[strlen(nodes) - 1] = 0;
            nodes++;
            j = atoi(nodes);
        } else
            j = 1;

        for (k = 0; k < j; k++)
            tasks_per_node[p++] = atoi(cores);
    }

  fn_exit:
    for (i = 0; i < count; i++)
        MPL_free(tmp_core_list[i]);
    MPL_free(tmp_core_list);
    return status;

  fn_fail:
    goto fn_exit;
}
Example #17
0
HYD_status HYDU_sock_write(int fd, const void *buf, int maxlen, int *sent, int *closed,
                           enum HYDU_sock_comm_flag flag)
{
    int tmp;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    HYDU_ASSERT(maxlen, status);

    *sent = 0;
    *closed = 0;
    while (1) {
        tmp = write(fd, (char *) buf + *sent, maxlen - *sent);
        if (tmp <= 0) {
            if (errno == EAGAIN) {
                if (flag == HYDU_SOCK_COMM_NONE)
                    goto fn_exit;
                else
                    continue;
            }
            else if (errno == ECONNRESET) {
                *closed = 1;
                goto fn_exit;
            }
            HYDU_ERR_SETANDJUMP(status, HYD_SOCK_ERROR, "write error (%s)\n", MPL_strerror(errno));
        }
        else {
            *sent += tmp;
        }

        if (flag == HYDU_SOCK_COMM_NONE || *sent == maxlen)
            break;
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #18
0
HYD_status HYD_pmcd_pmiserv_send_signal(struct HYD_proxy *proxy, int signum)
{
    struct HYD_pmcd_hdr hdr;
    int sent, closed;
    HYD_status status = HYD_SUCCESS;

    HYD_pmcd_init_header(&hdr);
    hdr.cmd = SIGNAL;
    hdr.signum = signum;

    status = HYDU_sock_write(proxy->control_fd, &hdr, sizeof(hdr), &sent, &closed,
                             HYDU_SOCK_COMM_MSGWAIT);
    HYDU_ERR_POP(status, "unable to write data to proxy\n");
    HYDU_ASSERT(!closed, status);

  fn_exit:
    return status;

  fn_fail:
    goto fn_exit;
}
Example #19
0
HYD_status HYDT_dmxu_poll_stdin_valid(int *out)
{
    struct pollfd fd[1];
    int ret;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYDT_dmxi_stdin_valid(out);
    HYDU_ERR_POP(status, "unable to check if stdin is valid\n");

    if (*out) {
        /* The generic test thinks that STDIN is valid. Try poll
         * specific tests. */

        fd[0].fd = STDIN_FILENO;
        fd[0].events = POLLIN;

        /* Check if poll on stdin returns any errors; on Darwin this
         * is broken */
        ret = poll(fd, 1, 0);
        HYDU_ASSERT((ret >= 0), status);

        if (fd[0].revents & ~(POLLIN | POLLHUP))
            *out = 0;
        else
            *out = 1;
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
static HYD_status fn_put(int fd, int pid, int pgid, char *args[])
{
    int i, ret;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    char *kvsname, *key, *val;
    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
    HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: kvsname\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: key\n");

    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
    if (val == NULL) {
        /* the user sent an empty string */
        val = HYDU_strdup("");
    }

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    if (strcmp(pg_scratch->kvs->kvs_name, kvsname))
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "kvsname (%s) does not match this group's kvs space (%s)\n",
                            kvsname, pg_scratch->kvs->kvs_name);

    status = HYD_pmcd_pmi_add_kvs(key, val, pg_scratch->kvs, &ret);
    HYDU_ERR_POP(status, "unable to add keypair to kvs\n");

    i = 0;
    tmp[i++] = HYDU_strdup("cmd=put_result rc=");
    tmp[i++] = HYDU_int_to_str(ret);
    if (ret == 0) {
        tmp[i++] = HYDU_strdup(" msg=success");
    }
    else {
        tmp[i++] = HYDU_strdup(" msg=duplicate_key");
        tmp[i++] = HYDU_strdup(key);
    }
    tmp[i++] = HYDU_strdup("\n");
    tmp[i++] = NULL;

    status = HYDU_str_alloc_and_join(tmp, &cmd);
    HYDU_ERR_POP(status, "unable to join strings\n");
    HYDU_free_strlist(tmp);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_FREE(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #21
0
File: alloc.c Project: tjhei/fgmpi
HYD_status HYDU_create_proxy_list(struct HYD_exec *exec_list, struct HYD_node *node_list,
                                  struct HYD_pg *pg)
{
    struct HYD_proxy *proxy = NULL, *last_proxy = NULL, *tmp;
    struct HYD_exec *exec;
    struct HYD_node *node;
    int max_oversubscribe, c, num_procs, proxy_rem_cores, exec_rem_procs, allocated_procs;
    int filler_round, num_nodes, i, dummy_fillers;
#if defined(FINEGRAIN_MPI)
    int current_exec_start_rank = 0;
#endif
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* Find the current maximum oversubscription on the nodes */
    max_oversubscribe = 1;
    num_nodes = 0;
    for (node = node_list; node; node = node->next) {
        c = HYDU_dceil(node->active_processes, node->core_count);
        if (c > max_oversubscribe)
            max_oversubscribe = c;
        num_nodes++;
    }

    /* make sure there are non-zero cores available */
    c = 0;
    for (node = node_list; node; node = node->next)
        c += (node->core_count * max_oversubscribe) - node->active_processes;
    if (c == 0)
        max_oversubscribe++;

    allocated_procs = 0;
    dummy_fillers = 1;
    for (node = node_list; node; node = node->next) {
        /* check how many cores are available */
        c = (node->core_count * max_oversubscribe) - node->active_processes;

        /* create a proxy associated with this node */
        status = alloc_proxy(&proxy, pg, node);
        HYDU_ERR_POP(status, "error allocating proxy\n");

        proxy->filler_processes = c;
        allocated_procs += c;

        if (proxy->filler_processes < node->core_count)
            dummy_fillers = 0;

        if (pg->proxy_list == NULL)
            pg->proxy_list = proxy;
        else
            last_proxy->next = proxy;
        last_proxy = proxy;

        if (allocated_procs >= pg->pg_process_count)
            break;
    }

    /* If all proxies have as many filler processes as the number of
     * cores, we can reduce those filler processes */
    if (dummy_fillers)
        for (proxy = pg->proxy_list; proxy; proxy = proxy->next)
            proxy->filler_processes -= proxy->node->core_count;

    /* Proxy list is created; add the executables to the proxy list */
    if (pg->proxy_list->next == NULL) {
        /* Special case: there is only one proxy, so all executables
         * directly get appended to this proxy */
        for (exec = exec_list; exec; exec = exec->next) {
#if defined(FINEGRAIN_MPI)
            status = add_exec_to_proxy(exec, pg->proxy_list, exec->proc_count, current_exec_start_rank);
            HYDU_ERR_POP(status, "unable to add executable to proxy\n");
            current_exec_start_rank += exec->proc_count * exec->nfg;
#else
            status = add_exec_to_proxy(exec, pg->proxy_list, exec->proc_count);
            HYDU_ERR_POP(status, "unable to add executable to proxy\n");
#endif
        }
    }
    else {
        exec = exec_list;

        filler_round = 1;
        for (proxy = pg->proxy_list; proxy && proxy->filler_processes == 0; proxy = proxy->next);
        if (proxy == NULL) {
            filler_round = 0;
            proxy = pg->proxy_list;
        }

        exec_rem_procs = exec->proc_count;
        proxy_rem_cores = filler_round ? proxy->filler_processes : proxy->node->core_count;

        while (exec) {
            if (exec_rem_procs == 0) {
                exec = exec->next;
                if (exec)
                    exec_rem_procs = exec->proc_count;
                else
                    break;
            }
            HYDU_ASSERT(exec_rem_procs, status);

            while (proxy_rem_cores == 0) {
                proxy = proxy->next;

                if (proxy == NULL) {
                    filler_round = 0;
                    proxy = pg->proxy_list;
                }

                proxy_rem_cores = filler_round ? proxy->filler_processes : proxy->node->core_count;
            }

            num_procs = (exec_rem_procs > proxy_rem_cores) ? proxy_rem_cores : exec_rem_procs;
            HYDU_ASSERT(num_procs, status);

            exec_rem_procs -= num_procs;
            proxy_rem_cores -= num_procs;

#if defined(FINEGRAIN_MPI)
            status = add_exec_to_proxy(exec, proxy, num_procs, current_exec_start_rank);
            HYDU_ERR_POP(status, "unable to add executable to proxy\n");
            current_exec_start_rank += num_procs * exec->nfg;
#else
            status = add_exec_to_proxy(exec, proxy, num_procs);
            HYDU_ERR_POP(status, "unable to add executable to proxy\n");
#endif
        }
    }

    /* find dummy proxies and remove them */
    while (pg->proxy_list->exec_list == NULL) {
        tmp = pg->proxy_list->next;
        pg->proxy_list->next = NULL;
        HYDU_free_proxy_list(pg->proxy_list);
        pg->proxy_list = tmp;
    }
    for (proxy = pg->proxy_list; proxy->next;) {
        if (proxy->next->exec_list == NULL) {
            tmp = proxy->next;
            proxy->next = proxy->next->next;
            tmp->next = NULL;
            HYDU_free_proxy_list(tmp);
        }
        else {
            proxy = proxy->next;
        }
    }

    for (proxy = pg->proxy_list, i = 0; proxy; proxy = proxy->next, i++)
        proxy->proxy_id = i;

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
static HYD_status fn_spawn(int fd, int pid, int pgid, char *args[])
{
    struct HYD_pg *pg;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_token *tokens;
    struct HYD_exec *exec_list = NULL, *exec;
    struct HYD_env *env;
    struct HYD_node *node;

    char key[PMI_MAXKEYLEN], *val;
    int nprocs, preput_num, info_num, ret;
    char *execname, *path = NULL;

    struct HYD_pmcd_token_segment *segment_list = NULL;

    int token_count, i, j, k, new_pgid, total_spawns;
    int argcnt, num_segments;
    char *control_port, *proxy_args[HYD_NUM_TMP_STRINGS] = { NULL };
    char *tmp[HYD_NUM_TMP_STRINGS];
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    for (i = 0; args[i]; i++)
        mcmd_args[mcmd_num_args++] = HYDU_strdup(args[i]);
    mcmd_args[mcmd_num_args] = NULL;

    status = HYD_pmcd_pmi_args_to_tokens(mcmd_args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");


    /* Here's the order of things we do:
     *
     *   1. Break the token list into multiple segments, each segment
     *      corresponding to a command. Each command represents
     *      information for one executable.
     *
     *   2. Allocate a process group for the new set of spawned
     *      processes
     *
     *   3. Get all the common keys and deal with them
     *
     *   4. Create an executable list based on the segments.
     *
     *   5. Create a proxy list using the created executable list and
     *      spawn it.
     */

    /* Break the token list into multiple segments and create an
     * executable list based on the segments. */
    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "totspawns");
    HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: totspawns\n");
    total_spawns = atoi(val);

    HYDU_MALLOC(segment_list, struct HYD_pmcd_token_segment *,
                total_spawns * sizeof(struct HYD_pmcd_token_segment), status);

    segment_tokens(tokens, token_count, segment_list, &num_segments);

    if (num_segments != total_spawns) {
        /* We didn't read the entire PMI string; wait for the rest to
         * arrive */
        goto fn_exit;
    }
    else {
        /* Got the entire PMI string; free the arguments and reset */
        HYDU_free_strlist(mcmd_args);
        mcmd_num_args = 0;
    }

    /* Allocate a new process group */
    for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);
    new_pgid = pg->pgid + 1;

    status = HYDU_alloc_pg(&pg->next, new_pgid);
    HYDU_ERR_POP(status, "unable to allocate process group\n");

    pg = pg->next;

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg->spawner_pg = proxy->pg;

    for (j = 0; j < total_spawns; j++) {
        /* For each segment, we create an exec structure */
        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "nprocs");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: nprocs\n");
        nprocs = atoi(val);
        pg->pg_process_count += nprocs;

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "argcnt");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: argcnt\n");
        argcnt = atoi(val);

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "info_num");
        if (val)
            info_num = atoi(val);
        else
            info_num = 0;

        if (exec_list == NULL) {
            status = HYDU_alloc_exec(&exec_list);
            HYDU_ERR_POP(status, "unable to allocate exec\n");
            exec_list->appnum = 0;
            exec = exec_list;
        }
        else {
            for (exec = exec_list; exec->next; exec = exec->next);
            status = HYDU_alloc_exec(&exec->next);
            HYDU_ERR_POP(status, "unable to allocate exec\n");
            exec->next->appnum = exec->appnum + 1;
            exec = exec->next;
        }

        /* Info keys */
        for (i = 0; i < info_num; i++) {
            char *info_key, *info_val;

            HYDU_snprintf(key, PMI_MAXKEYLEN, "info_key_%d", i);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            info_key = val;

            HYDU_snprintf(key, PMI_MAXKEYLEN, "info_val_%d", i);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            info_val = val;

            if (!strcmp(info_key, "path")) {
                path = HYDU_strdup(info_val);
            }
            else if (!strcmp(info_key, "wdir")) {
                exec->wdir = HYDU_strdup(info_val);
            }
            else if (!strcmp(info_key, "host")) {
                status = HYDU_process_mfile_token(info_val, 1, &pg->user_node_list);
                HYDU_ERR_POP(status, "error create node list\n");
            }
            else if (!strcmp(info_key, "hostfile")) {
                status =
                    HYDU_parse_hostfile(info_val, &pg->user_node_list, HYDU_process_mfile_token);
                HYDU_ERR_POP(status, "error parsing hostfile\n");
            }
            else {
                /* Unrecognized info key; ignore */
            }
        }

        status = HYDU_correct_wdir(&exec->wdir);
        HYDU_ERR_POP(status, "unable to correct wdir\n");

        val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                             segment_list[j].token_count, "execname");
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: execname\n");
        if (path == NULL)
            execname = HYDU_strdup(val);
        else {
            i = 0;
            tmp[i++] = HYDU_strdup(path);
            tmp[i++] = HYDU_strdup("/");
            tmp[i++] = HYDU_strdup(val);
            tmp[i++] = NULL;

            status = HYDU_str_alloc_and_join(tmp, &execname);
            HYDU_ERR_POP(status, "error while joining strings\n");
            HYDU_free_strlist(tmp);
        }

        i = 0;
        exec->exec[i++] = execname;
        for (k = 0; k < argcnt; k++) {
            HYDU_snprintf(key, PMI_MAXKEYLEN, "arg%d", k + 1);
            val = HYD_pmcd_pmi_find_token_keyval(&tokens[segment_list[j].start_idx],
                                                 segment_list[j].token_count, key);
            HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                                "unable to find token: %s\n", key);
            exec->exec[i++] = HYDU_strdup(val);
        }
        exec->exec[i++] = NULL;

        exec->proc_count = nprocs;

        /* It is not clear what kind of environment needs to get
         * passed to the spawned process. Don't set anything here, and
         * let the proxy do whatever it does by default. */
        exec->env_prop = NULL;

        status = HYDU_env_create(&env, "PMI_SPAWNED", "1");
        HYDU_ERR_POP(status, "unable to create PMI_SPAWNED environment\n");
        exec->user_env = env;
    }

    status = HYD_pmcd_pmi_alloc_pg_scratch(pg);
    HYDU_ERR_POP(status, "unable to allocate pg scratch space\n");

    if (pg->user_node_list) {
        pg->pg_core_count = 0;
        for (i = 0, node = pg->user_node_list; node; node = node->next, i++) {
            pg->pg_core_count += node->core_count;
            node->node_id = i;
        }
    }
    else {
        pg->pg_core_count = HYD_server_info.pg_list.pg_core_count;
    }

    pg->pg_process_count = 0;
    for (exec = exec_list; exec; exec = exec->next)
        pg->pg_process_count += exec->proc_count;

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) pg->pg_scratch;

    /* Get the common keys and deal with them */
    val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "preput_num");
    if (val)
        preput_num = atoi(val);
    else
        preput_num = 0;

    for (i = 0; i < preput_num; i++) {
        char *preput_key, *preput_val;

        HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_key_%d", i);
        val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key);
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: %s\n", key);
        preput_key = val;

        HYDU_snprintf(key, PMI_MAXKEYLEN, "preput_val_%d", i);
        val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, key);
        HYDU_ERR_CHKANDJUMP(status, val == NULL, HYD_INTERNAL_ERROR,
                            "unable to find token: %s\n", key);
        preput_val = val;

        status = HYD_pmcd_pmi_add_kvs(preput_key, preput_val, pg_scratch->kvs, &ret);
        HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
    }

    /* Create the proxy list */
    if (pg->user_node_list) {
        status = HYDU_create_proxy_list(exec_list, pg->user_node_list, pg);
        HYDU_ERR_POP(status, "error creating proxy list\n");
    }
    else {
        status = HYDU_create_proxy_list(exec_list, HYD_server_info.node_list, pg);
        HYDU_ERR_POP(status, "error creating proxy list\n");
    }
    HYDU_free_exec_list(exec_list);

    status = HYDU_sock_create_and_listen_portstr(HYD_server_info.user_global.iface,
                                                 HYD_server_info.local_hostname,
                                                 HYD_server_info.port_range, &control_port,
                                                 HYD_pmcd_pmiserv_control_listen_cb,
                                                 (void *) (size_t) new_pgid);
    HYDU_ERR_POP(status, "unable to create PMI port\n");
    if (HYD_server_info.user_global.debug)
        HYDU_dump(stdout, "Got a control port string of %s\n", control_port);

    /* Go to the last PG */
    for (pg = &HYD_server_info.pg_list; pg->next; pg = pg->next);

    status = HYD_pmcd_pmi_fill_in_proxy_args(proxy_args, control_port, new_pgid);
    HYDU_ERR_POP(status, "unable to fill in proxy arguments\n");
    HYDU_FREE(control_port);

    status = HYD_pmcd_pmi_fill_in_exec_launch_info(pg);
    HYDU_ERR_POP(status, "unable to fill in executable arguments\n");

    status = HYDT_bsci_launch_procs(proxy_args, pg->proxy_list, NULL);
    HYDU_ERR_POP(status, "launcher cannot launch processes\n");

    {
        char *cmd_str[HYD_NUM_TMP_STRINGS], *cmd;

        i = 0;
        cmd_str[i++] = HYDU_strdup("cmd=spawn_result rc=0");
        cmd_str[i++] = HYDU_strdup("\n");
        cmd_str[i++] = NULL;

        status = HYDU_str_alloc_and_join(cmd_str, &cmd);
        HYDU_ERR_POP(status, "unable to join strings\n");
        HYDU_free_strlist(cmd_str);

        status = cmd_response(fd, pid, cmd);
        HYDU_ERR_POP(status, "error writing PMI line\n");
        HYDU_FREE(cmd);
    }

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_free_strlist(proxy_args);
    if (segment_list)
        HYDU_FREE(segment_list);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #23
0
static HYD_status resolve_pattern_string(const char *pattern, char **str, int pgid,
                                         int proxy_id, int rank)
{
    HYD_status status = HYD_SUCCESS;
    int i, pos, tpos;
    char *tmp[HYD_NUM_TMP_STRINGS] = { NULL };
    struct HYD_pg *pg;
    struct HYD_proxy *proxy;

    HYDU_FUNC_ENTER();

    *str = NULL;
    tpos = 0;
    pos = 0;
    i = 0;
    HYDU_MALLOC_OR_JUMP(tmp[i], char *, HYD_TMP_STRLEN, status);
    tmp[i][0] = '\0';

    while (1) {
        HYDU_ASSERT(tpos < HYD_TMP_STRLEN, status);
        if (pattern[pos] != '%') {
            tmp[i][tpos++] = pattern[pos++];
            if (pattern[pos - 1] == '\0')
                break;
        }
        else {
            ++pos;      /* consume '%' */

            if (pattern[pos] == '%') {
                tmp[i][tpos++] = pattern[pos++];
                continue;
            }

            /* all remaining valid specifiers need a new temp string */
            tmp[i][tpos] = '\0';
            ++i;
            tpos = 0;
            HYDU_MALLOC_OR_JUMP(tmp[i], char *, HYD_TMP_STRLEN, status);
            tmp[i][0] = '\0';

            switch (pattern[pos]) {
            case 'r':
                MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%d", rank);
                break;
            case 'g':
                MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%d", pgid);
                break;
            case 'p':
                MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%d", proxy_id);
                break;
            case 'h':
                for (pg = &HYD_server_info.pg_list; pg; pg = pg->next)
                    if (pg->pgid == pgid)
                        break;
                HYDU_ASSERT(pg, status);

                for (proxy = pg->proxy_list; proxy; proxy = proxy->next)
                    if (proxy->proxy_id == proxy_id)
                        break;
                HYDU_ASSERT(proxy, status);
                MPL_snprintf(tmp[i], HYD_TMP_STRLEN, "%s", proxy->node->hostname);
                break;
            case '\0':
                HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                    "dangling '%%' at end of pattern\n");
                break;
            default:
                HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                    "unrecognized pattern specifier ('%c')\n", pattern[pos]);
                break;
            }

            ++pos;      /* skip past fmt specifier */

            ++i;
            tpos = 0;
            HYDU_MALLOC_OR_JUMP(tmp[i], char *, HYD_TMP_STRLEN, status);
            tmp[i][0] = '\0';
        }
    }

    tmp[++i] = NULL;
    status = HYDU_str_alloc_and_join(tmp, str);
    HYDU_ERR_POP(status, "unable to join strings\n");

  fn_exit:
    HYDU_free_strlist(tmp);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #24
0
static HYD_status stdoe_cb(int _fd, int pgid, int proxy_id, int rank, void *_buf, int buflen)
{
    int fd = _fd;
    char *pattern_resolve, *pattern = NULL;
    struct stdoe_fd *tmp, *run;
    int sent, closed, mark, i;
    char *buf = (char *) _buf, *prepend;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    pattern = (_fd == STDOUT_FILENO) ? HYD_ui_info.outfile_pattern :
        (_fd == STDERR_FILENO) ? HYD_ui_info.errfile_pattern : NULL;

    if (pattern) {
        /* See if the pattern already exists */
        status = resolve_pattern_string(pattern, &pattern_resolve, pgid, proxy_id, rank);
        HYDU_ERR_POP(status, "error resolving pattern\n");

        for (run = stdoe_fd_list; run; run = run->next)
            if (!strcmp(run->pattern, pattern_resolve))
                break;

        if (run) {
            fd = run->fd;
            MPL_free(pattern_resolve);
        }
        else {
            HYDU_MALLOC_OR_JUMP(tmp, struct stdoe_fd *, sizeof(struct stdoe_fd), status);
            tmp->pattern = pattern_resolve;
            tmp->fd = open(tmp->pattern, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
            HYDU_ASSERT(tmp->fd >= 0, status);
            tmp->next = NULL;

            if (stdoe_fd_list == NULL)
                stdoe_fd_list = tmp;
            else {
                for (run = stdoe_fd_list; run->next; run = run->next);
                run->next = tmp;
            }

            fd = tmp->fd;
        }
    }

    if (HYD_ui_info.prepend_pattern == NULL) {
        status = HYDU_sock_write(fd, buf, buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n");
        HYDU_ASSERT(!closed, status);
    }
    else {
        status = resolve_pattern_string(HYD_ui_info.prepend_pattern, &prepend, pgid, proxy_id,
                                        rank);
        HYDU_ERR_POP(status, "error resolving pattern\n");

        mark = 0;
        for (i = 0; i < buflen; i++) {
            if (buf[i] == '\n' || i == buflen - 1) {
                if (prepend[0] != '\0') {       /* sock_write barfs on maxlen==0 */
                    status = HYDU_sock_write(fd, (const void *) prepend,
                                             strlen(prepend), &sent, &closed,
                                             HYDU_SOCK_COMM_MSGWAIT);
                    HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n");
                }
                status = HYDU_sock_write(fd, (const void *) &buf[mark], i - mark + 1,
                                         &sent, &closed, HYDU_SOCK_COMM_MSGWAIT);
                HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n");
                HYDU_ASSERT(!closed, status);
                mark = i + 1;
            }
        }

        MPL_free(prepend);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
static HYD_status fn_get(int fd, int pid, int pgid, char *args[])
{
    int i;
    struct HYD_proxy *proxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    struct HYD_pmcd_pmi_kvs_pair *run;
    char *kvsname, *key, *val;
    char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
    struct HYD_pmcd_token *tokens;
    int token_count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
    HYDU_ERR_POP(status, "unable to convert args to tokens\n");

    kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
    HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: kvsname\n");

    key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
    HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
                        "unable to find token: key\n");

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);

    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    val = NULL;
    if (!strcmp(key, "PMI_dead_processes")) {
        val = pg_scratch->dead_processes;
        goto found_val;
    }

    if (strcmp(pg_scratch->kvs->kvs_name, kvsname))
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                            "kvsname (%s) does not match this group's kvs space (%s)\n",
                            kvsname, pg_scratch->kvs->kvs_name);

    /* Try to find the key */
    for (run = pg_scratch->kvs->key_pair; run; run = run->next) {
        if (!strcmp(run->key, key)) {
            val = run->val;
            break;
        }
    }

  found_val:
    i = 0;
    tmp[i++] = HYDU_strdup("cmd=get_result rc=");
    if (val) {
        tmp[i++] = HYDU_strdup("0 msg=success value=");
        tmp[i++] = HYDU_strdup(val);
    }
    else {
        tmp[i++] = HYDU_strdup("-1 msg=key_");
        tmp[i++] = HYDU_strdup(key);
        tmp[i++] = HYDU_strdup("_not_found value=unknown");
    }
    tmp[i++] = HYDU_strdup("\n");
    tmp[i++] = NULL;

    status = HYDU_str_alloc_and_join(tmp, &cmd);
    HYDU_ERR_POP(status, "unable to join strings\n");
    HYDU_free_strlist(tmp);

    status = cmd_response(fd, pid, cmd);
    HYDU_ERR_POP(status, "error writing PMI line\n");
    HYDU_FREE(cmd);

  fn_exit:
    HYD_pmcd_pmi_free_tokens(tokens, token_count);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #26
0
/* This waits for the restarted processes to reconnect their
   stdin/out/err sockets, then sets the appropriate entries in the in
   out and err arrays.  This also gets the pids of the restarted
   processes. */
static HYD_status wait_for_stdinouterr_sockets(int num_ranks, int *ranks, int *in, int *out,
                                               int *err, int *pid)
{
    HYD_status status = HYD_SUCCESS;
    int ret;
    int fd;
    int i, c;
    sock_ident_t id;
    int num_expected_connections = num_ranks * 2;       /* wait for connections for stdout and err */
    HYDU_FUNC_ENTER();

    /* if one of the processes is rank 0, we should wait for an
     * additional connection for stdin */
    for (i = 0; i < num_ranks; ++i)
        if (ranks[i] == 0) {
            ++num_expected_connections;
            break;
        }

    for (c = 0; c < num_expected_connections; ++c) {
        size_t len;
        char *id_p;
        /* wait for a connection */
        do {
            struct sockaddr_in rmt_addr;
            socklen_t sa_len = sizeof(rmt_addr);;
            fd = accept(listen_fd, (struct sockaddr *) &rmt_addr, &sa_len);
        } while (fd && errno == EINTR);
        HYDU_ERR_CHKANDJUMP(status, fd == -1, HYD_INTERNAL_ERROR, "accept failed, %s\n",
                            strerror(errno));

        /* read the socket identifier */
        len = sizeof(id);
        id_p = (char *) &id;
        do {
            do {
                ret = read(fd, id_p, len);
            } while (ret == 0 || (ret == -1 && errno == EINTR));
            HYDU_ERR_CHKANDJUMP(status, ret == -1, HYD_INTERNAL_ERROR, "read failed, %s\n",
                                strerror(errno));
            len -= ret;
            id_p += ret;
        } while (len);

        /* determine the index for this process in the stdout/err
         * arrays */
        for (i = 0; i < num_ranks; ++i)
            if (ranks[i] == id.rank)
                break;
        HYDU_ASSERT(i < num_ranks, status);

        /* assign the fd */
        switch (id.socktype) {
        case IN_SOCK:
            HYDU_ASSERT(id.rank == 0, status);
            *in = fd;
            break;
        case OUT_SOCK:
            out[i] = fd;
            break;
        case ERR_SOCK:
            err[i] = fd;
            break;
        default:
            HYDU_ASSERT(0, status);
            break;
        }

        /* assign the pid */
        pid[i] = id.pid;
    }

    ret = close(listen_fd);
    HYDU_ERR_CHKANDJUMP(status, ret, HYD_INTERNAL_ERROR, "close of listener port failed, %s\n",
                        strerror(errno));


  fn_exit:
    HYDU_FUNC_EXIT();
    return status;
  fn_fail:
    goto fn_exit;
}
Example #27
0
static HYD_status stdoe_cb(int fd, HYD_event_t events, void *userp)
{
    int closed, i, sent, recvd, stdfd;
    char buf[HYD_TMPBUF_SIZE];
    struct HYD_pmcd_hdr hdr;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    stdfd = (int) (size_t) userp;

    status = HYDU_sock_read(fd, buf, HYD_TMPBUF_SIZE, &recvd, &closed, HYDU_SOCK_COMM_NONE);
    HYDU_ERR_POP(status, "sock read error\n");

    if (recvd) {
        if (stdfd == STDOUT_FILENO) {
            HYD_pmcd_init_header(&hdr);
            hdr.cmd = STDOUT;
            for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++)
                if (HYD_pmcd_pmip.downstream.out[i] == fd)
                    break;
        }
        else {
            HYD_pmcd_init_header(&hdr);
            hdr.cmd = STDERR;
            for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++)
                if (HYD_pmcd_pmip.downstream.err[i] == fd)
                    break;
        }

        HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status);

        hdr.pgid = HYD_pmcd_pmip.local.pgid;
        hdr.proxy_id = HYD_pmcd_pmip.local.id;
        hdr.rank = HYD_pmcd_pmip.downstream.pmi_rank[i];
        hdr.buflen = recvd;

        {
            int upstream_sock_closed;

            status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent,
                                     &upstream_sock_closed, HYDU_SOCK_COMM_MSGWAIT);
            HYDU_ERR_POP(status, "sock write error\n");
            HYDU_ASSERT(!upstream_sock_closed, status);

            status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, recvd, &sent,
                                     &upstream_sock_closed, HYDU_SOCK_COMM_MSGWAIT);
            HYDU_ERR_POP(status, "sock write error\n");
            HYDU_ASSERT(!upstream_sock_closed, status);
        }
    }

    if (closed) {
        /* The connection has closed */
        status = HYDT_dmx_deregister_fd(fd);
        HYDU_ERR_POP(status, "unable to deregister fd\n");

        if (stdfd == STDOUT_FILENO) {
            for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++)
                if (HYD_pmcd_pmip.downstream.out[i] == fd)
                    HYD_pmcd_pmip.downstream.out[i] = HYD_FD_CLOSED;
        }
        else {
            for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++)
                if (HYD_pmcd_pmip.downstream.err[i] == fd)
                    HYD_pmcd_pmip.downstream.err[i] = HYD_FD_CLOSED;
        }

        close(fd);
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #28
0
static HYD_status bcast_keyvals(int fd, int pid)
{
    int keyval_count, arg_count, i, j;
    char **tmp = NULL, *cmd;
    struct HYD_pmcd_pmi_kvs_pair *run;
    struct HYD_proxy *proxy, *tproxy;
    struct HYD_pmcd_pmi_pg_scratch *pg_scratch;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    proxy = HYD_pmcd_pmi_find_proxy(fd);
    HYDU_ASSERT(proxy, status);
    pg_scratch = (struct HYD_pmcd_pmi_pg_scratch *) proxy->pg->pg_scratch;

    /* find the number of keyvals */
    keyval_count = 0;
    for (run = pg_scratch->kvs->key_pair; run; run = run->next)
        keyval_count++;

    keyval_count -= pg_scratch->keyval_dist_count;

    /* Each keyval has the following four items: 'key' '=' 'val'
     * '<space>'.  Two additional items for the command at the start
     * and the NULL at the end. */
    HYDU_MALLOC_OR_JUMP(tmp, char **, (4 * keyval_count + 3) * sizeof(char *), status);

    /* send all available keyvals downstream */
    if (keyval_count) {
        arg_count = 1;
        i = 0;
        tmp[i++] = MPL_strdup("cmd=keyval_cache ");
        for (run = pg_scratch->kvs->key_pair, j = 0; run; run = run->next, j++) {
            if (j < pg_scratch->keyval_dist_count)
                continue;

            tmp[i++] = MPL_strdup(run->key);
            tmp[i++] = MPL_strdup("=");
            tmp[i++] = MPL_strdup(run->val);
            tmp[i++] = MPL_strdup(" ");

            arg_count++;
            if (arg_count >= MAX_PMI_INTERNAL_ARGS) {
                tmp[i++] = MPL_strdup("\n");
                tmp[i++] = NULL;

                status = HYDU_str_alloc_and_join(tmp, &cmd);
                HYDU_ERR_POP(status, "unable to join strings\n");
                HYDU_free_strlist(tmp);

                pg_scratch->keyval_dist_count += (arg_count - 1);
                for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
                    status = cmd_response(tproxy->control_fd, pid, cmd);
                    HYDU_ERR_POP(status, "error writing PMI line\n");
                }
                MPL_free(cmd);

                i = 0;
                tmp[i++] = MPL_strdup("cmd=keyval_cache ");
                arg_count = 1;
            }
        }
        tmp[i++] = MPL_strdup("\n");
        tmp[i++] = NULL;

        if (arg_count > 1) {
            status = HYDU_str_alloc_and_join(tmp, &cmd);
            HYDU_ERR_POP(status, "unable to join strings\n");

            pg_scratch->keyval_dist_count += (arg_count - 1);
            for (tproxy = proxy->pg->proxy_list; tproxy; tproxy = tproxy->next) {
                status = cmd_response(tproxy->control_fd, pid, cmd);
                HYDU_ERR_POP(status, "error writing PMI line\n");
            }
            MPL_free(cmd);
        }
        HYDU_free_strlist(tmp);
    }

  fn_exit:
    if (tmp)
        MPL_free(tmp);
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #29
0
/* This function does not provide any flow control. We just read from
 * the incoming socket as much as we can and push out to the outgoing
 * socket as much as we can. This can result in the process calling it
 * polling continuously waiting for events, but that's a rare case for
 * stdio (which is what this function is meant to provide
 * functionality for). */
HYD_status HYDU_sock_forward_stdio(int in, int out, int *closed)
{
    struct fwd_hash *fwd_hash, *tmp;
    int count;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* find the fwd hash */
    for (tmp = fwd_hash_list; tmp; tmp = tmp->next)
        if (out == tmp->out)
            break;

    if (tmp == NULL) {  /* No hash found; create one */
        status = alloc_fwd_hash(&fwd_hash, in, out);
        HYDU_ERR_POP(status, "unable to allocate forward hash\n");
        if (fwd_hash_list == NULL)
            fwd_hash_list = fwd_hash;
        else {
            for (tmp = fwd_hash_list; tmp->next; tmp = tmp->next);
            tmp->next = fwd_hash;
        }
    }
    else {
        fwd_hash = tmp;
    }

    *closed = 0;
    if (fwd_hash->buf_count == 0) {
        /* there is no data in the buffer, read something into it */
        status = HYDU_sock_read(in, fwd_hash->buf, HYD_TMPBUF_SIZE, &count, closed,
                                HYDU_SOCK_COMM_NONE);
        HYDU_ERR_POP(status, "read error\n");

        if (!*closed) {
            fwd_hash->buf_offset = 0;
            fwd_hash->buf_count += count;

            /* We should never get a zero count, as the upper-layer
             * should have waited for an event from the demux engine
             * before calling us. */
            HYDU_ASSERT(count, status);
        }
    }

    if (fwd_hash->buf_count) {
        /* there is data in the buffer, send it out first */
        status =
            HYDU_sock_write(out, fwd_hash->buf + fwd_hash->buf_offset, fwd_hash->buf_count,
                            &count, closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "write error\n");

        if (!*closed) {
            fwd_hash->buf_offset += count;
            fwd_hash->buf_count -= count;
        }
    }

    /* If the incoming socket is closed, make sure we forward out all
     * of the buffered data */
    while (*closed && fwd_hash->buf_count) {
        status =
            HYDU_sock_write(out, fwd_hash->buf + fwd_hash->buf_offset, fwd_hash->buf_count,
                            &count, closed, HYDU_SOCK_COMM_MSGWAIT);
        HYDU_ERR_POP(status, "write error\n");

        if (!*closed) {
            fwd_hash->buf_offset += count;
            fwd_hash->buf_count -= count;
        }
    }

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #30
0
HYD_status HYDU_sock_is_local(char *host, int *is_local)
{
    struct hostent *ht;
    char *host_ip = NULL, *lhost_ip = NULL;
    char lhost[MAX_HOSTNAME_LEN];
    struct sockaddr_in sa;
    struct ifaddrs *ifaddr, *ifa;
    char buf[MAX_HOSTNAME_LEN];
    HYD_status status = HYD_SUCCESS;

    *is_local = 0;

    /* Algorithm used:
     *
     * 1. Find the local host name
     *    - If "host" matches the local host name, return.
     * 2. Find the IP address associated with "host" and the IP the local host
     *    resolves to.
     *    - If these IPs match, return.
     * 3. Find all local network IP addresses
     *    - If the "host" IP address matches any of the local network IP
     *      addresses, return.
     */


    /* STEP 1: If "host" matches the local host name, return */
    if (gethostname(lhost, MAX_HOSTNAME_LEN) < 0) {
        /* We can't figure out what my localhost name is.  *sigh*.  We
         * could return an error here, but we will just punt it to the
         * upper layer saying that we don't know if it is local.  We
         * cannot try steps 2 and 3 either, since we don't have our
         * local hostname. */
        goto fn_exit;
    }
    else if (!strcmp(lhost, host)) {
        *is_local = 1;
        goto fn_exit;
    }
    else {
        /* we have our local hostname, but that does not match the
         * provided hostname.  Let's try to get our remote IP address
         * first.  If we can't get that, we can give up. */
        /* If we are unable to resolve the remote host name, it need
         * not be an error. It could mean that the user is using an
         * alias for the hostname (e.g., an ssh config alias) */
        if ((ht = gethostbyname(host)) == NULL)
            goto fn_exit;

        memset((char *) &sa, 0, sizeof(struct sockaddr_in));
        memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length);

        /* Find the IP address of the host */
        host_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf,
                                                MAX_HOSTNAME_LEN));
        HYDU_ASSERT(host_ip, status);
    }

    /* OK, if we are here, we got the remote IP.  We have two ways of
     * getting the local IP: gethostbyname or getifaddrs.  We'll try
     * both.  */

    /* STEP 2: Let's try the gethostbyname model */

    if ((ht = gethostbyname(lhost))) {
        memset((char *) &sa, 0, sizeof(struct sockaddr_in));
        memcpy(&sa.sin_addr, ht->h_addr_list[0], ht->h_length);

        /* Find the IP address of the host */
        lhost_ip = MPL_strdup((char *) inet_ntop(AF_INET, (const void *) &sa.sin_addr, buf,
                                                 MAX_HOSTNAME_LEN));
        HYDU_ASSERT(lhost_ip, status);

        /* See if the IP address of the hostname we got matches the IP
         * address to which the local host resolves */
        if (!strcmp(lhost_ip, host_ip)) {
            *is_local = 1;
            goto fn_exit;
        }
    }

    /* Either gethostbyname didn't resolve or we didn't find a match.
     * Either way, let's try the getifaddr model. */

    /* STEP 3: Let's try the getifaddr model */

    if (getifaddrs(&ifaddr) == -1)
        HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "getifaddrs failed\n");

    /* Find the IP addresses of all local interfaces */
    for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
        if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
            struct sockaddr_in *sa_ptr = (struct sockaddr_in *) ifa->ifa_addr;

            lhost_ip = MPL_strdup((char *)
                                  inet_ntop(AF_INET, (const void *) &(sa_ptr->sin_addr), buf,
                                            MAX_HOSTNAME_LEN));
            HYDU_ASSERT(lhost_ip, status);

            /* For each local IP address, see if it matches the "host"
             * IP address */
            if (!strcmp(host_ip, lhost_ip)) {
                *is_local = 1;
                freeifaddrs(ifaddr);
                goto fn_exit;
            }

            MPL_free(lhost_ip);
            lhost_ip = NULL;
        }
    }

    freeifaddrs(ifaddr);

  fn_exit:
    if (host_ip)
        MPL_free(host_ip);
    if (lhost_ip)
        MPL_free(lhost_ip);
    return status;

  fn_fail:
    goto fn_exit;
}