Esempio n. 1
0
static void config_hwloc_paths (flux_t h, const char *dirpath)
{
    uint32_t size, rank;
    const char *key_prefix = "config.resource.hwloc.xml";
    char key[64];
    char path[PATH_MAX];
    int n;

    if (flux_get_size (h, &size) < 0)
        log_err_exit ("flux_get_size");
    for (rank = 0; rank < size; rank++) {
        n = snprintf (key, sizeof (key), "%s.%"PRIu32, key_prefix, rank);
        assert (n < sizeof (key));
        if (dirpath == NULL) {
            /* Remove any per rank xml and reload default xml */
            if (kvs_unlink (h, key) < 0)
                log_err_exit ("kvs_unlink");
            continue;
        }
        n = snprintf (path, sizeof (path), "%s/%"PRIu32".xml", dirpath, rank);
        assert (n < sizeof (path));
        if (access (path, R_OK) < 0)
            log_err_exit ("%s", path);
        if (kvs_put_string (h, key, path) < 0)
            log_err_exit ("kvs_put_string");
    }
    if (kvs_commit (h) < 0)
        log_err_exit ("kvs_commit");
}
Esempio n. 2
0
bool hello_complete (hello_t *hello)
{
    uint32_t size;
    if (flux_get_size (hello->h, &size) < 0)
        return false;
    return (size == hello->count);
}
Esempio n. 3
0
int flux_info (flux_t h, uint32_t *rank, uint32_t *size, int *arity)
{
    if (rank && flux_get_rank (h, rank) < 0)
        return -1;
    if (size && flux_get_size (h, size) < 0)
        return -1;
    if (arity && flux_get_arity (h, arity) < 0)
        return -1;
    return 0;
}
Esempio n. 4
0
int flux_size (flux_t h)
{
    uint32_t size;
    if (flux_get_size (h, &size) < 0)
        return -1;
    if (size >= INT_MAX) {
        errno = ERANGE;
        return -1;
    }
    return size;
}
Esempio n. 5
0
flux_rpc_t *flux_rpc_multi (flux_t h, const char *topic, const char *json_str,
                            const char *nodeset, int flags)
{
    nodeset_t *ns = NULL;
    nodeset_iterator_t *itr = NULL;
    flux_rpc_t *rpc = NULL;
    int i;
    uint32_t count;

    if (!topic || !nodeset) {
        errno = EINVAL;
        goto error;
    }
    if (!strcmp (nodeset, "all")) {
        if (flux_get_size (h, &count) < 0)
            goto error;
        ns = nodeset_create_range (0, count - 1);
    } else {
        if ((ns = nodeset_create_string (nodeset)))
            count = nodeset_count (ns);
    }
    if (!ns) {
        errno = EINVAL;
        goto error;
    }
    if (!(rpc = rpc_create (h, flags, count)))
        goto error;
    if (!(itr = nodeset_iterator_create (ns)))
        goto error;
    for (i = 0; i < count; i++) {
        uint32_t nodeid = nodeset_next (itr);
        assert (nodeid != NODESET_EOF);
        if (rpc_request_send (rpc, i, topic, json_str, nodeid) < 0)
            goto error;
        if (!rpc->oneway)
            rpc->nodemap[i] = nodeid;
    }
    nodeset_iterator_destroy (itr);
    return rpc;
error:
    if (rpc)
        flux_rpc_destroy (rpc);
    if (itr)
        nodeset_iterator_destroy (itr);
    if (ns)
        nodeset_destroy (ns);
    return NULL;
}
Esempio n. 6
0
static ns_t *ns_guess (flux_t h)
{
    ns_t *ns = xzmalloc (sizeof (*ns));
    uint32_t size, rank;

    if (flux_get_rank (h, &rank) < 0)
        log_err_exit ("flux_get_rank");
    if (flux_get_size (h, &size) < 0)
        log_err_exit ("flux_get_size");
    ns->ok = nodeset_create ();
    ns->slow = nodeset_create ();
    ns->fail = nodeset_create ();
    ns->unknown = nodeset_create ();
    if (!ns->ok || !ns->slow || !ns->fail || !ns->unknown)
        oom ();
    nodeset_add_range (ns->ok, rank, size - 1);
    return ns;
}
Esempio n. 7
0
int main (int argc, char **argv)
{
    flux_t h;
    uint32_t rank, n;
    int k;

    if (!(h = flux_open (NULL, 0)))
        err_exit ("flux_open");
    if (flux_get_rank (h, &rank) < 0)
        err_exit ("flux_get_rank");
    if (flux_get_size (h, &n) < 0)
        err_exit ("flux_get_size");
    if (flux_get_arity (h, &k) < 0)
        err_exit ("flux_get_arity");
    printf ("height of %d-ary tree of size %" PRIu32 ": %d\n",
            k, n, tree_height (n, k));
    printf ("height of %d-ary at rank %" PRIu32 ": %d\n",
            k, rank, tree_height (rank + 1, k));
    flux_close (h);
    return (0);
}
Esempio n. 8
0
static int hello_add_rank (hello_t *hello, uint32_t rank)
{
    uint32_t size;

    if (flux_get_size (hello->h, &size) < 0)
        return -1;
    if (!hello->nodeset)
        hello->nodeset = nodeset_create_size (size);
    if (!nodeset_add_rank (hello->nodeset, rank)) {
        errno = EPROTO;
        return -1;
    }
    hello->count++;
    if (hello->count == size) {
        if (hello->cb)
            hello->cb (hello, hello->cb_arg);
        if (hello->timer)
            flux_watcher_stop (hello->timer);
    }
    return 0;
}
Esempio n. 9
0
int main (int argc, char *argv[])
{
    flux_t h;
    int ch;
    uint32_t rank = FLUX_NODEID_ANY; /* local */
    char *cmd;
    int e;

    log_init ("flux-comms");

    while ((ch = getopt_long (argc, argv, OPTIONS, longopts, NULL)) != -1) {
        switch (ch) {
            case 'h': /* --help */
                usage ();
                break;
            case 'r': /* --rank N */
                rank = strtoul (optarg, NULL, 10);
                break;
            default:
                usage ();
                break;
        }
    }
    if (optind == argc)
        usage ();
    cmd = argv[optind++];
    if (rank != FLUX_NODEID_ANY
            && (!strcmp (cmd, "recover-all") || !strcmp (cmd, "info")))
        usage ();

    if (!(h = flux_open (NULL, 0)))
        log_err_exit ("flux_open");

    if (!strcmp (cmd, "reparent")) {
        if (optind != argc - 1)
            usage ();
        if (flux_reparent (h, rank, argv[optind]) < 0)
            log_err_exit ("flux_reparent");
    } else if (!strcmp (cmd, "idle")) {
        if (optind != argc)
            usage ();
        char *peers;
        if (!(peers = flux_lspeer (h, rank)))
            log_err_exit ("flux_peer");
        printf ("%s\n", peers);
        free (peers);
    } else if (!strcmp (cmd, "panic")) {
        char *msg = NULL;
        size_t len = 0;
        if (optind < argc) {
            if ((e = argz_create (argv + optind, &msg, &len)) != 0)
                log_errn_exit (e, "argz_create");
            argz_stringify (msg, len, ' ');
        }
        flux_panic (h, rank, msg);
        if (msg)
            free (msg);
    } else if (!strcmp (cmd, "failover")) {
        if (optind != argc)
            usage ();
        if (flux_failover (h, rank) < 0)
            log_err_exit ("flux_failover");
    } else if (!strcmp (cmd, "recover")) {
        if (optind != argc)
            usage ();
        if (flux_recover (h, rank) < 0)
            log_err_exit ("flux_recover");
    } else if (!strcmp (cmd, "recover-all")) {
        if (optind != argc)
            usage ();
        if (flux_recover_all (h) < 0)
            log_err_exit ("flux_recover_all");
    } else if (!strcmp (cmd, "info")) {
        int arity;
        uint32_t rank, size;
        const char *s;
        if (flux_get_rank (h, &rank) < 0 || flux_get_size (h, &size) < 0)
            log_err_exit ("flux_get_rank/size");
        if (!(s = flux_attr_get (h, "tbon.arity", NULL)))
            log_err_exit ("flux_attr_get tbon.arity");
        arity = strtoul (s, NULL, 10);
        printf ("rank=%d\n", rank);
        printf ("size=%d\n", size);
        printf ("arity=%d\n", arity);
    } else
        usage ();

    flux_close (h);
    log_fini ();
    return 0;
}
Esempio n. 10
0
flux_rpc_t *flux_rpc_multi (flux_t *h,
                            const char *topic,
                            const char *json_str,
                            const char *nodeset,
                            int flags)
{
    nodeset_t *ns = NULL;
    nodeset_iterator_t *itr = NULL;
    flux_rpc_t *rpc = NULL;
    int i;
    uint32_t count;
    int rx_expected;

    if (!topic || !nodeset) {
        errno = EINVAL;
        goto error;
    }
    if (!strcmp (nodeset, "all")) {
        if (flux_get_size (h, &count) < 0)
            goto error;
        ns = nodeset_create_range (0, count - 1);
    } else {
        if ((ns = nodeset_create_string (nodeset)))
            count = nodeset_count (ns);
    }
    if (!ns) {
        errno = EINVAL;
        goto error;
    }
    rx_expected = count;
    if ((flags & FLUX_RPC_NORESPONSE))
        rx_expected = 0;
    if (!(rpc = rpc_create (h, rx_expected)))
        goto error;
    if (!(itr = nodeset_iterator_create (ns)))
        goto error;
#if HAVE_CALIPER
    cali_begin_string_byname ("flux.message.rpc", "multi");
    cali_begin_int_byname ("flux.message.response_expected",
                           !(flags & FLUX_RPC_NORESPONSE));
#endif
    for (i = 0; i < count; i++) {
        uint32_t nodeid = nodeset_next (itr);
        assert (nodeid != NODESET_EOF);
#if HAVE_CALIPER
        cali_begin_int_byname ("flux.message.rpc.nodeid", nodeid);
#endif
        if (rpc_request_send (rpc, topic, nodeid, json_str) < 0)
            goto error;
#if HAVE_CALIPER
        cali_end_byname ("flux.message.rpc.nodeid");
#endif
    }
#if HAVE_CALIPER
    cali_end_byname ("flux.message.response_expected");
    cali_end_byname ("flux.message.rpc");
#endif
    nodeset_iterator_destroy (itr);
    return rpc;
error:
    if (rpc)
        flux_rpc_destroy (rpc);
    if (itr)
        nodeset_iterator_destroy (itr);
    if (ns)
        nodeset_destroy (ns);
    return NULL;
}
Esempio n. 11
0
// Recevied a request to join the simulation ("sim.join")
static void join_cb (flux_t *h,
                     flux_msg_handler_t *w,
                     const flux_msg_t *msg,
                     void *arg)
{
    int mod_rank;
    json_t *request = NULL;
    const char *mod_name = NULL, *json_str = NULL;
    double *next_event = (double *)malloc (sizeof (double));
    ctx_t *ctx = arg;
    sim_state_t *sim_state = ctx->sim_state;
    uint32_t size;

    if (flux_msg_get_json (msg, &json_str) < 0 || json_str == NULL
        || !(request = Jfromstr (json_str))
        || !Jget_str (request, "mod_name", &mod_name)
        || !Jget_int (request, "rank", &mod_rank)
        || !Jget_double (request, "next_event", next_event)) {
        flux_log (h, LOG_ERR, "%s: bad join message", __FUNCTION__);
        goto out;
    }
    if (flux_get_size (h, &size) < 0)
        goto out;
    if (mod_rank < 0 || mod_rank >= size) {
        flux_log (h, LOG_ERR, "%s: bad rank in join message", __FUNCTION__);
        goto out;
    }

    flux_log (h,
              LOG_DEBUG,
              "join rcvd from module %s on rank %d, next event at %f",
              mod_name,
              mod_rank,
              *next_event);

    zhash_t *timers = sim_state->timers;
    if (zhash_insert (timers, mod_name, next_event) < 0) {  // key already
                                                            // exists
        flux_log (h,
                  LOG_ERR,
                  "duplicate join request from %s, module already exists in "
                  "sim_state",
                  mod_name);
        goto out;
    }
    // clear next event so it is not freed below
    next_event = NULL;

    // TODO: this is horribly hackish, improve the handshake to avoid
    // this hardcoded # of modules. maybe use a timeout?  ZMQ provides
    // support for polling etc with timeouts, should try that
    static int num_modules = 3;
    num_modules--;
    if (num_modules <= 0) {
        if (handle_next_event (ctx) < 0) {
            flux_log (h, LOG_ERR, "failure while handling next event");
            return;
        }
    }

out:
    Jput (request);
    free (next_event);
}
Esempio n. 12
0
int rpctest_begin_cb (flux_t h, int type, zmsg_t **zmsg, void *arg)
{
    uint32_t nodeid;
    int i, errors;
    int old_count;
    flux_rpc_t *r;
    const char *json_str;

    errno = 0;
    ok (!(r = flux_rpc_multi (h, NULL, "foo", "all", 0)) && errno == EINVAL,
        "flux_rpc_multi [0] with NULL topic fails with EINVAL");
    errno = 0;
    ok (!(r = flux_rpc_multi (h, "bar", "foo", NULL, 0)) && errno == EINVAL,
        "flux_rpc_multi [0] with NULL nodeset fails with EINVAL");
    errno = 0;
    ok (!(r = flux_rpc_multi (h, "bar", "foo", "xyz", 0)) && errno == EINVAL,
        "flux_rpc_multi [0] with bad nodeset fails with EINVAL");

    /* working no-payload RPC */
    old_count = hello_count;
    ok ((r = flux_rpc_multi (h, "rpctest.hello", NULL, "all", 0)) != NULL,
        "flux_rpc_multi [0] with no payload when none is expected works");
    if (!r)
        BAIL_OUT ("can't continue without successful rpc call");
    ok (flux_rpc_check (r) == false,
        "flux_rpc_check says get would block");
    ok (flux_rpc_get (r, NULL, NULL) == 0,
        "flux_rpc_get works");
    ok (hello_count == old_count + 1,
        "rpc was called once");
    flux_rpc_destroy (r);

    /* cause remote EPROTO (unexpected payload) - picked up in _get() */
    ok ((r = flux_rpc_multi (h, "rpctest.hello", "foo", "all", 0)) != NULL,
        "flux_rpc_multi [0] with unexpected payload works, at first");
    ok (flux_rpc_check (r) == false,
        "flux_rpc_check says get would block");
    errno = 0;
    ok (flux_rpc_get (r, NULL, NULL) < 0
        && errno == EPROTO,
        "flux_rpc_get fails with EPROTO");
    flux_rpc_destroy (r);

    /* fake that we have a larger session */
    fake_size = 128;
    char s[16];
    uint32_t size = 0;
    snprintf (s, sizeof (s), "%u", fake_size);
    flux_attr_fake (h, "size", s, FLUX_ATTRFLAG_IMMUTABLE);
    flux_get_size (h, &size);
    cmp_ok (size, "==", fake_size,
        "successfully faked flux_get_size() of %d", fake_size);

    /* repeat working no-payload RPC test (now with 128 nodes) */
    old_count = hello_count;
    ok ((r = flux_rpc_multi (h, "rpctest.hello", NULL, "all", 0)) != NULL,
        "flux_rpc_multi [0-%d] with no payload when none is expected works",
        fake_size - 1);
    ok (flux_rpc_check (r) == false,
        "flux_rpc_check says get would block");
    errors = 0;
    for (i = 0; i < fake_size; i++)
        if (flux_rpc_get (r, NULL, NULL) < 0)
            errors++;
    ok (errors == 0,
        "flux_rpc_get succeded %d times", fake_size);

    cmp_ok (hello_count - old_count, "==", fake_size,
        "rpc was called %d times", fake_size);
    flux_rpc_destroy (r);

    /* same with a subset */
    old_count = hello_count;
    ok ((r = flux_rpc_multi (h, "rpctest.hello", NULL, "[0-63]", 0)) != NULL,
        "flux_rpc_multi [0-%d] with no payload when none is expected works",
        64 - 1);
    ok (flux_rpc_check (r) == false,
        "flux_rpc_check says get would block");
    errors = 0;
    for (i = 0; i < 64; i++)
        if (flux_rpc_get (r, &nodeid, NULL) < 0 || nodeid != i)
            errors++;
    ok (errors == 0,
        "flux_rpc_get succeded %d times, with correct nodeid map", 64);

    cmp_ok (hello_count - old_count, "==", 64,
        "rpc was called %d times", 64);
    flux_rpc_destroy (r);

    /* same with echo payload */
    ok ((r = flux_rpc_multi (h, "rpctest.echo", "foo", "[0-63]", 0)) != NULL,
        "flux_rpc_multi [0-%d] ok",
        64 - 1);
    ok (flux_rpc_check (r) == false,
        "flux_rpc_check says get would block");
    errors = 0;
    for (i = 0; i < 64; i++) {
        if (flux_rpc_get (r, NULL, &json_str) < 0
                || !json_str || strcmp (json_str, "foo") != 0)
            errors++;
    }
    ok (errors == 0,
        "flux_rpc_get succeded %d times, with correct return payload", 64);
    flux_rpc_destroy (r);

    /* detect partial failure without mresponse */
    nodeid_fake_error = 20;
    ok ((r = flux_rpc_multi (h, "rpctest.nodeid", NULL, "[0-63]", 0)) != NULL,
        "flux_rpc_multi [0-%d] ok",
        64 - 1);
    ok (flux_rpc_check (r) == false,
        "flux_rpc_check says get would block");
    for (i = 0; i < 64; i++) {
        if (flux_rpc_get (r, &nodeid, &json_str) < 0)
            break;
    }
    ok (i == 20 && errno == EPERM,
        "flux_rpc_get correctly reports single error");
    flux_rpc_destroy (r);

    /* test _then (still at fake session size of 128) */
    ok ((then_r = flux_rpc_multi (h, "rpctest.hello", NULL, "[0-127]", 0)) != NULL,
        "flux_rpc_multi [0-127] ok");
    ok (flux_rpc_then (then_r, then_cb, h) == 0,
        "flux_rpc_then works");
    /* then_cb stops reactor; results reported, then_r destroyed in main() */

    return 0;
}