static void config_hwloc_paths (flux_t h, const char *dirpath) { uint32_t size, rank; const char *key_prefix = "config.resource.hwloc.xml"; char key[64]; char path[PATH_MAX]; int n; if (flux_get_size (h, &size) < 0) log_err_exit ("flux_get_size"); for (rank = 0; rank < size; rank++) { n = snprintf (key, sizeof (key), "%s.%"PRIu32, key_prefix, rank); assert (n < sizeof (key)); if (dirpath == NULL) { /* Remove any per rank xml and reload default xml */ if (kvs_unlink (h, key) < 0) log_err_exit ("kvs_unlink"); continue; } n = snprintf (path, sizeof (path), "%s/%"PRIu32".xml", dirpath, rank); assert (n < sizeof (path)); if (access (path, R_OK) < 0) log_err_exit ("%s", path); if (kvs_put_string (h, key, path) < 0) log_err_exit ("kvs_put_string"); } if (kvs_commit (h) < 0) log_err_exit ("kvs_commit"); }
bool hello_complete (hello_t *hello) { uint32_t size; if (flux_get_size (hello->h, &size) < 0) return false; return (size == hello->count); }
int flux_info (flux_t h, uint32_t *rank, uint32_t *size, int *arity) { if (rank && flux_get_rank (h, rank) < 0) return -1; if (size && flux_get_size (h, size) < 0) return -1; if (arity && flux_get_arity (h, arity) < 0) return -1; return 0; }
int flux_size (flux_t h) { uint32_t size; if (flux_get_size (h, &size) < 0) return -1; if (size >= INT_MAX) { errno = ERANGE; return -1; } return size; }
flux_rpc_t *flux_rpc_multi (flux_t h, const char *topic, const char *json_str, const char *nodeset, int flags) { nodeset_t *ns = NULL; nodeset_iterator_t *itr = NULL; flux_rpc_t *rpc = NULL; int i; uint32_t count; if (!topic || !nodeset) { errno = EINVAL; goto error; } if (!strcmp (nodeset, "all")) { if (flux_get_size (h, &count) < 0) goto error; ns = nodeset_create_range (0, count - 1); } else { if ((ns = nodeset_create_string (nodeset))) count = nodeset_count (ns); } if (!ns) { errno = EINVAL; goto error; } if (!(rpc = rpc_create (h, flags, count))) goto error; if (!(itr = nodeset_iterator_create (ns))) goto error; for (i = 0; i < count; i++) { uint32_t nodeid = nodeset_next (itr); assert (nodeid != NODESET_EOF); if (rpc_request_send (rpc, i, topic, json_str, nodeid) < 0) goto error; if (!rpc->oneway) rpc->nodemap[i] = nodeid; } nodeset_iterator_destroy (itr); return rpc; error: if (rpc) flux_rpc_destroy (rpc); if (itr) nodeset_iterator_destroy (itr); if (ns) nodeset_destroy (ns); return NULL; }
static ns_t *ns_guess (flux_t h) { ns_t *ns = xzmalloc (sizeof (*ns)); uint32_t size, rank; if (flux_get_rank (h, &rank) < 0) log_err_exit ("flux_get_rank"); if (flux_get_size (h, &size) < 0) log_err_exit ("flux_get_size"); ns->ok = nodeset_create (); ns->slow = nodeset_create (); ns->fail = nodeset_create (); ns->unknown = nodeset_create (); if (!ns->ok || !ns->slow || !ns->fail || !ns->unknown) oom (); nodeset_add_range (ns->ok, rank, size - 1); return ns; }
int main (int argc, char **argv) { flux_t h; uint32_t rank, n; int k; if (!(h = flux_open (NULL, 0))) err_exit ("flux_open"); if (flux_get_rank (h, &rank) < 0) err_exit ("flux_get_rank"); if (flux_get_size (h, &n) < 0) err_exit ("flux_get_size"); if (flux_get_arity (h, &k) < 0) err_exit ("flux_get_arity"); printf ("height of %d-ary tree of size %" PRIu32 ": %d\n", k, n, tree_height (n, k)); printf ("height of %d-ary at rank %" PRIu32 ": %d\n", k, rank, tree_height (rank + 1, k)); flux_close (h); return (0); }
static int hello_add_rank (hello_t *hello, uint32_t rank) { uint32_t size; if (flux_get_size (hello->h, &size) < 0) return -1; if (!hello->nodeset) hello->nodeset = nodeset_create_size (size); if (!nodeset_add_rank (hello->nodeset, rank)) { errno = EPROTO; return -1; } hello->count++; if (hello->count == size) { if (hello->cb) hello->cb (hello, hello->cb_arg); if (hello->timer) flux_watcher_stop (hello->timer); } return 0; }
int main (int argc, char *argv[]) { flux_t h; int ch; uint32_t rank = FLUX_NODEID_ANY; /* local */ char *cmd; int e; log_init ("flux-comms"); while ((ch = getopt_long (argc, argv, OPTIONS, longopts, NULL)) != -1) { switch (ch) { case 'h': /* --help */ usage (); break; case 'r': /* --rank N */ rank = strtoul (optarg, NULL, 10); break; default: usage (); break; } } if (optind == argc) usage (); cmd = argv[optind++]; if (rank != FLUX_NODEID_ANY && (!strcmp (cmd, "recover-all") || !strcmp (cmd, "info"))) usage (); if (!(h = flux_open (NULL, 0))) log_err_exit ("flux_open"); if (!strcmp (cmd, "reparent")) { if (optind != argc - 1) usage (); if (flux_reparent (h, rank, argv[optind]) < 0) log_err_exit ("flux_reparent"); } else if (!strcmp (cmd, "idle")) { if (optind != argc) usage (); char *peers; if (!(peers = flux_lspeer (h, rank))) log_err_exit ("flux_peer"); printf ("%s\n", peers); free (peers); } else if (!strcmp (cmd, "panic")) { char *msg = NULL; size_t len = 0; if (optind < argc) { if ((e = argz_create (argv + optind, &msg, &len)) != 0) log_errn_exit (e, "argz_create"); argz_stringify (msg, len, ' '); } flux_panic (h, rank, msg); if (msg) free (msg); } else if (!strcmp (cmd, "failover")) { if (optind != argc) usage (); if (flux_failover (h, rank) < 0) log_err_exit ("flux_failover"); } else if (!strcmp (cmd, "recover")) { if (optind != argc) usage (); if (flux_recover (h, rank) < 0) log_err_exit ("flux_recover"); } else if (!strcmp (cmd, "recover-all")) { if (optind != argc) usage (); if (flux_recover_all (h) < 0) log_err_exit ("flux_recover_all"); } else if (!strcmp (cmd, "info")) { int arity; uint32_t rank, size; const char *s; if (flux_get_rank (h, &rank) < 0 || flux_get_size (h, &size) < 0) log_err_exit ("flux_get_rank/size"); if (!(s = flux_attr_get (h, "tbon.arity", NULL))) log_err_exit ("flux_attr_get tbon.arity"); arity = strtoul (s, NULL, 10); printf ("rank=%d\n", rank); printf ("size=%d\n", size); printf ("arity=%d\n", arity); } else usage (); flux_close (h); log_fini (); return 0; }
flux_rpc_t *flux_rpc_multi (flux_t *h, const char *topic, const char *json_str, const char *nodeset, int flags) { nodeset_t *ns = NULL; nodeset_iterator_t *itr = NULL; flux_rpc_t *rpc = NULL; int i; uint32_t count; int rx_expected; if (!topic || !nodeset) { errno = EINVAL; goto error; } if (!strcmp (nodeset, "all")) { if (flux_get_size (h, &count) < 0) goto error; ns = nodeset_create_range (0, count - 1); } else { if ((ns = nodeset_create_string (nodeset))) count = nodeset_count (ns); } if (!ns) { errno = EINVAL; goto error; } rx_expected = count; if ((flags & FLUX_RPC_NORESPONSE)) rx_expected = 0; if (!(rpc = rpc_create (h, rx_expected))) goto error; if (!(itr = nodeset_iterator_create (ns))) goto error; #if HAVE_CALIPER cali_begin_string_byname ("flux.message.rpc", "multi"); cali_begin_int_byname ("flux.message.response_expected", !(flags & FLUX_RPC_NORESPONSE)); #endif for (i = 0; i < count; i++) { uint32_t nodeid = nodeset_next (itr); assert (nodeid != NODESET_EOF); #if HAVE_CALIPER cali_begin_int_byname ("flux.message.rpc.nodeid", nodeid); #endif if (rpc_request_send (rpc, topic, nodeid, json_str) < 0) goto error; #if HAVE_CALIPER cali_end_byname ("flux.message.rpc.nodeid"); #endif } #if HAVE_CALIPER cali_end_byname ("flux.message.response_expected"); cali_end_byname ("flux.message.rpc"); #endif nodeset_iterator_destroy (itr); return rpc; error: if (rpc) flux_rpc_destroy (rpc); if (itr) nodeset_iterator_destroy (itr); if (ns) nodeset_destroy (ns); return NULL; }
// Recevied a request to join the simulation ("sim.join") static void join_cb (flux_t *h, flux_msg_handler_t *w, const flux_msg_t *msg, void *arg) { int mod_rank; json_t *request = NULL; const char *mod_name = NULL, *json_str = NULL; double *next_event = (double *)malloc (sizeof (double)); ctx_t *ctx = arg; sim_state_t *sim_state = ctx->sim_state; uint32_t size; if (flux_msg_get_json (msg, &json_str) < 0 || json_str == NULL || !(request = Jfromstr (json_str)) || !Jget_str (request, "mod_name", &mod_name) || !Jget_int (request, "rank", &mod_rank) || !Jget_double (request, "next_event", next_event)) { flux_log (h, LOG_ERR, "%s: bad join message", __FUNCTION__); goto out; } if (flux_get_size (h, &size) < 0) goto out; if (mod_rank < 0 || mod_rank >= size) { flux_log (h, LOG_ERR, "%s: bad rank in join message", __FUNCTION__); goto out; } flux_log (h, LOG_DEBUG, "join rcvd from module %s on rank %d, next event at %f", mod_name, mod_rank, *next_event); zhash_t *timers = sim_state->timers; if (zhash_insert (timers, mod_name, next_event) < 0) { // key already // exists flux_log (h, LOG_ERR, "duplicate join request from %s, module already exists in " "sim_state", mod_name); goto out; } // clear next event so it is not freed below next_event = NULL; // TODO: this is horribly hackish, improve the handshake to avoid // this hardcoded # of modules. maybe use a timeout? ZMQ provides // support for polling etc with timeouts, should try that static int num_modules = 3; num_modules--; if (num_modules <= 0) { if (handle_next_event (ctx) < 0) { flux_log (h, LOG_ERR, "failure while handling next event"); return; } } out: Jput (request); free (next_event); }
int rpctest_begin_cb (flux_t h, int type, zmsg_t **zmsg, void *arg) { uint32_t nodeid; int i, errors; int old_count; flux_rpc_t *r; const char *json_str; errno = 0; ok (!(r = flux_rpc_multi (h, NULL, "foo", "all", 0)) && errno == EINVAL, "flux_rpc_multi [0] with NULL topic fails with EINVAL"); errno = 0; ok (!(r = flux_rpc_multi (h, "bar", "foo", NULL, 0)) && errno == EINVAL, "flux_rpc_multi [0] with NULL nodeset fails with EINVAL"); errno = 0; ok (!(r = flux_rpc_multi (h, "bar", "foo", "xyz", 0)) && errno == EINVAL, "flux_rpc_multi [0] with bad nodeset fails with EINVAL"); /* working no-payload RPC */ old_count = hello_count; ok ((r = flux_rpc_multi (h, "rpctest.hello", NULL, "all", 0)) != NULL, "flux_rpc_multi [0] with no payload when none is expected works"); if (!r) BAIL_OUT ("can't continue without successful rpc call"); ok (flux_rpc_check (r) == false, "flux_rpc_check says get would block"); ok (flux_rpc_get (r, NULL, NULL) == 0, "flux_rpc_get works"); ok (hello_count == old_count + 1, "rpc was called once"); flux_rpc_destroy (r); /* cause remote EPROTO (unexpected payload) - picked up in _get() */ ok ((r = flux_rpc_multi (h, "rpctest.hello", "foo", "all", 0)) != NULL, "flux_rpc_multi [0] with unexpected payload works, at first"); ok (flux_rpc_check (r) == false, "flux_rpc_check says get would block"); errno = 0; ok (flux_rpc_get (r, NULL, NULL) < 0 && errno == EPROTO, "flux_rpc_get fails with EPROTO"); flux_rpc_destroy (r); /* fake that we have a larger session */ fake_size = 128; char s[16]; uint32_t size = 0; snprintf (s, sizeof (s), "%u", fake_size); flux_attr_fake (h, "size", s, FLUX_ATTRFLAG_IMMUTABLE); flux_get_size (h, &size); cmp_ok (size, "==", fake_size, "successfully faked flux_get_size() of %d", fake_size); /* repeat working no-payload RPC test (now with 128 nodes) */ old_count = hello_count; ok ((r = flux_rpc_multi (h, "rpctest.hello", NULL, "all", 0)) != NULL, "flux_rpc_multi [0-%d] with no payload when none is expected works", fake_size - 1); ok (flux_rpc_check (r) == false, "flux_rpc_check says get would block"); errors = 0; for (i = 0; i < fake_size; i++) if (flux_rpc_get (r, NULL, NULL) < 0) errors++; ok (errors == 0, "flux_rpc_get succeded %d times", fake_size); cmp_ok (hello_count - old_count, "==", fake_size, "rpc was called %d times", fake_size); flux_rpc_destroy (r); /* same with a subset */ old_count = hello_count; ok ((r = flux_rpc_multi (h, "rpctest.hello", NULL, "[0-63]", 0)) != NULL, "flux_rpc_multi [0-%d] with no payload when none is expected works", 64 - 1); ok (flux_rpc_check (r) == false, "flux_rpc_check says get would block"); errors = 0; for (i = 0; i < 64; i++) if (flux_rpc_get (r, &nodeid, NULL) < 0 || nodeid != i) errors++; ok (errors == 0, "flux_rpc_get succeded %d times, with correct nodeid map", 64); cmp_ok (hello_count - old_count, "==", 64, "rpc was called %d times", 64); flux_rpc_destroy (r); /* same with echo payload */ ok ((r = flux_rpc_multi (h, "rpctest.echo", "foo", "[0-63]", 0)) != NULL, "flux_rpc_multi [0-%d] ok", 64 - 1); ok (flux_rpc_check (r) == false, "flux_rpc_check says get would block"); errors = 0; for (i = 0; i < 64; i++) { if (flux_rpc_get (r, NULL, &json_str) < 0 || !json_str || strcmp (json_str, "foo") != 0) errors++; } ok (errors == 0, "flux_rpc_get succeded %d times, with correct return payload", 64); flux_rpc_destroy (r); /* detect partial failure without mresponse */ nodeid_fake_error = 20; ok ((r = flux_rpc_multi (h, "rpctest.nodeid", NULL, "[0-63]", 0)) != NULL, "flux_rpc_multi [0-%d] ok", 64 - 1); ok (flux_rpc_check (r) == false, "flux_rpc_check says get would block"); for (i = 0; i < 64; i++) { if (flux_rpc_get (r, &nodeid, &json_str) < 0) break; } ok (i == 20 && errno == EPERM, "flux_rpc_get correctly reports single error"); flux_rpc_destroy (r); /* test _then (still at fake session size of 128) */ ok ((then_r = flux_rpc_multi (h, "rpctest.hello", NULL, "[0-127]", 0)) != NULL, "flux_rpc_multi [0-127] ok"); ok (flux_rpc_then (then_r, then_cb, h) == 0, "flux_rpc_then works"); /* then_cb stops reactor; results reported, then_r destroyed in main() */ return 0; }