static int enter_request_cb (flux_t h, int typemask, zmsg_t **zmsg, void *arg) { ctx_t *ctx = arg; barrier_t *b; json_object *o = NULL; char *sender = NULL; const char *name; int count, nprocs, hopcount; if (flux_json_request_decode (*zmsg, &o) < 0 || flux_msg_get_route_first (*zmsg, &sender) < 0 || util_json_object_get_string (o, "name", &name) < 0 || util_json_object_get_int (o, "count", &count) < 0 || util_json_object_get_int (o, "nprocs", &nprocs) < 0) { flux_log (ctx->h, LOG_ERR, "%s: ignoring bad message", __FUNCTION__); goto done; } if (!(b = zhash_lookup (ctx->barriers, name))) b = barrier_create (ctx, name, nprocs); /* Distinguish client (tracked) vs downstream barrier plugin (untracked). * A client, distinguished by hopcount > 0, can only enter barrier once. */ if (util_json_object_get_int (o, "hopcount", &hopcount) < 0) { if (barrier_add_client (b, sender, zmsg) < 0) { flux_err_respond (ctx->h, EEXIST, zmsg); flux_log (ctx->h, LOG_ERR, "abort %s due to double entry by client %s", name, sender); if (exit_event_send (ctx->h, b->name, ECONNABORTED) < 0) flux_log (ctx->h, LOG_ERR, "exit_event_send: %s", strerror (errno)); goto done; } } /* If the count has been reached, terminate the barrier; * o/w set timer to pass count upstream and zero it here. */ b->count += count; if (b->count == b->nprocs) { if (exit_event_send (ctx->h, b->name, 0) < 0) flux_log (ctx->h, LOG_ERR, "exit_event_send: %s", strerror (errno)); } else if (ctx->rank > 0 && !ctx->timer_armed) { if (flux_tmouthandler_add (h, barrier_reduction_timeout_msec, true, timeout_cb, ctx) < 0) { flux_log (h, LOG_ERR, "flux_tmouthandler_add: %s",strerror (errno)); goto done; } ctx->timer_armed = true; } done: if (o) json_object_put (o); if (*zmsg) zmsg_destroy (zmsg); if (sender) free (sender); return 0; }
static void disconnect_request_cb (flux_t *h, flux_msg_handler_t *w, const flux_msg_t *msg, void *arg) { ctx_t *ctx = arg; char *sender; if (flux_msg_get_route_first (msg, &sender) < 0) return; zhash_foreach (ctx->barriers, disconnect, sender); free (sender); }
bool msgcmp (const flux_msg_t *msg, void *arg) { char *id = NULL; bool match = false; if (flux_msg_get_route_first (msg, &id) == 0 && (!strcmp (id, "19") || !strcmp (id, "18") || !strcmp (id, "17"))) match = true; if (id) free (id); return match; }
static int disconnect_request_cb (flux_t h, int typemask, zmsg_t **zmsg, void *arg) { ctx_t *ctx = arg; char *sender; if (flux_msg_get_route_first (*zmsg, &sender) < 0) goto done; zhash_foreach (ctx->barriers, disconnect, sender); free (sender); done: zmsg_destroy (zmsg); return 0; }
/* If this function is called without the NONBLOCK flag from a reactor * handler running in coprocess context, the call to flux_sleep_on() * will allow the reactor to run until a message matching 'match' arrives. * The flux_sleep_on() call will then resume, and the next call to recv() * will return the matching message. If not running in coprocess context, * flux_sleep_on() will fail with EINVAL. In that case, the do loop * reading messages and comparing them to match criteria may have to read * a few non-matching messages before finding a match. On return, those * non-matching messages have to be requeued in the handle, hence the * defer_*() helper calls. */ flux_msg_t *flux_recv (flux_t h, struct flux_match match, int flags) { zlist_t *l = NULL; flux_msg_t *msg = NULL; int saved_errno; flags |= h->flags; if (!(flags & FLUX_O_NONBLOCK) && (flags & FLUX_O_COPROC) && flux_sleep_on (h, match) < 0) { if (errno != EINVAL) goto fatal; errno = 0; } do { if (!(msg = flux_recv_any (h, flags))) { if (errno != EAGAIN && errno != EWOULDBLOCK) goto fatal; if (defer_requeue (&l, h) < 0) goto fatal; defer_destroy (&l); errno = EWOULDBLOCK; return NULL; } if (!flux_msg_cmp (msg, match)) { if (defer_enqueue (&l, msg) < 0) goto fatal; msg = NULL; } } while (!msg); update_rx_stats (h, msg); if ((flags & FLUX_O_TRACE)) flux_msg_fprint (stderr, msg); if (defer_requeue (&l, h) < 0) goto fatal; defer_destroy (&l); #if HAVE_CALIPER cali_begin_int (h->prof.msg_match_type, match.typemask); cali_begin_int (h->prof.msg_match_tag, match.matchtag); cali_begin_string (h->prof.msg_match_glob, match.topic_glob ? match.topic_glob : "NONE"); char *sender = NULL; flux_msg_get_route_first (msg, &sender); if (sender) cali_begin_string (h->prof.msg_sender, sender); profiling_msg_snapshot (h, msg, flags, "recv"); if (sender) cali_end (h->prof.msg_sender); cali_end (h->prof.msg_match_type); cali_end (h->prof.msg_match_tag); cali_end (h->prof.msg_match_glob); free (sender); #endif return msg; fatal: saved_errno = errno; FLUX_FATAL (h); if (msg) flux_msg_destroy (msg); defer_destroy (&l); errno = saved_errno; return NULL; }
static void enter_request_cb (flux_t *h, flux_msg_handler_t *w, const flux_msg_t *msg, void *arg) { ctx_t *ctx = arg; barrier_t *b; json_object *o = NULL; char *sender = NULL; const char *name; int count, nprocs, hopcount; const char *json_str; if (flux_request_decode (msg, NULL, &json_str) < 0 || flux_msg_get_route_first (msg, &sender) < 0) { flux_log_error (ctx->h, "%s: decoding request", __FUNCTION__); goto done; } if (!(o = Jfromstr (json_str)) || !Jget_str (o, "name", &name) || !Jget_int (o, "count", &count) || !Jget_int (o, "nprocs", &nprocs)) { errno = EPROTO; flux_log_error (ctx->h, "%s: decoding request", __FUNCTION__); goto done; } if (!(b = zhash_lookup (ctx->barriers, name))) b = barrier_create (ctx, name, nprocs); /* Distinguish client (tracked) vs downstream barrier plugin (untracked). * A client, distinguished by hopcount > 0, can only enter barrier once. */ if (!Jget_int (o, "hopcount", &hopcount)) { if (barrier_add_client (b, sender, msg) < 0) { flux_respond (ctx->h, msg, EEXIST, NULL); flux_log (ctx->h, LOG_ERR, "abort %s due to double entry by client %s", name, sender); if (exit_event_send (ctx->h, b->name, ECONNABORTED) < 0) flux_log_error (ctx->h, "exit_event_send"); goto done; } } /* If the count has been reached, terminate the barrier; * o/w set timer to pass count upstream and zero it here. */ b->count += count; if (b->count == b->nprocs) { if (exit_event_send (ctx->h, b->name, 0) < 0) flux_log_error (ctx->h, "exit_event_send"); } else if (ctx->rank > 0 && !ctx->timer_armed) { flux_timer_watcher_reset (ctx->timer, barrier_reduction_timeout_sec, 0.); flux_watcher_start (ctx->timer); ctx->timer_armed = true; } done: if (o) json_object_put (o); if (sender) free (sender); }