Пример #1
0
static int update_jcb_obj (flux_t *h, int64_t jobid, const char *key,
			json_object *jcb)
{
    int rc = -1;
    json_object *o = NULL;

    if (!jcb) return -1;
    if (jobid_exist (h, jobid) != 0) return -1;

    if (is_jobid (key)) {
        flux_log (h, LOG_ERR, "jobid attr cannot be updated");
    } else if (is_state_pair (key)) {
        if (Jget_obj (jcb, JSC_STATE_PAIR, &o))
            rc = update_state (h, jobid, o);
    } else if (is_rdesc (key)) {
        if (Jget_obj (jcb, JSC_RDESC, &o))
            rc = update_rdesc (h, jobid, o);
    } else if (is_rdl (key)) {
        const char *s = NULL;
        if (Jget_str (jcb, JSC_RDL, &s))
            rc = update_rdl (h, jobid, s);
    } else if (is_rdl_alloc (key)) {
        if (Jget_obj (jcb, JSC_RDL_ALLOC, &o))
            rc = update_rdl_alloc (h, jobid, o);
    } else if (is_pdesc (key)) {
        if (Jget_obj (jcb, JSC_PDESC, &o))
            rc = update_pdesc (h, jobid, o);
    }
    else
        flux_log (h, LOG_ERR, "key (%s) not understood", key);

    return rc;
}
Пример #2
0
static int waitjob_cb (const char *jcbstr, void *arg, int errnum)
{
    JSON jcb = NULL;
    int64_t os = 0, ns = 0, j = 0;
    flux_t h = (flux_t)arg;
    wjctx_t *ctx = getctx (h);

    if (errnum > 0) {
        flux_log (ctx->h, LOG_ERR, "waitjob_cb: errnum passed in");
        return -1;
    }

    if (!(jcb = Jfromstr (jcbstr))) {
        flux_log (ctx->h, LOG_ERR, "waitjob_cb: error parsing JSON string");
        return -1;
    }
    get_jobid (jcb, &j);
    get_states (jcb, &os, &ns);
    Jput (jcb);
    if ((j == ctx->jobid) && (ns == J_COMPLETE)) {
        if (ctx->complete)
            touch_outfile (ctx->complete);
        flux_log (ctx->h, LOG_INFO, "waitjob_cb: completion notified");
        raise (SIGINT);
    }

    return 0;
}
Пример #3
0
static void fixup_newjob_event (flux_t *h, int64_t nj)
{
    json_object *ss = NULL;
    json_object *jcb = NULL;
    int64_t js = J_NULL;
    char *key = xasprintf ("%"PRId64, nj);
    jscctx_t *ctx = getctx (h);

    /* We fix up ordering problem only when new job
       event hasn't been reported through a kvs watch
     */
    jcb = Jnew ();
    ss = Jnew ();
    Jadd_int64 (jcb, JSC_JOBID, nj);
    Jadd_int64 (ss, JSC_STATE_PAIR_OSTATE , (int64_t) js);
    Jadd_int64 (ss, JSC_STATE_PAIR_NSTATE, (int64_t) js);
    json_object_object_add (jcb, JSC_STATE_PAIR, ss);
    if (zhash_insert (ctx->active_jobs, key, (void *)(intptr_t)js) < 0) {
        flux_log (h, LOG_ERR, "new_job_cb: inserting a job to hash failed");
        goto done;
    }
    if (invoke_cbs (h, nj, jcb, 0) < 0) {
        flux_log (h, LOG_ERR,
                     "makeup_newjob_event: failed to invoke callbacks");
        goto done;
    }
done:
    Jput (jcb);
    free (key);
    return;
}
Пример #4
0
// Recevied a reply to a trigger ("sim.reply")
static void reply_cb (flux_t *h,
                      flux_msg_handler_t *w,
                      const flux_msg_t *msg,
                      void *arg)
{
    const char *json_str = NULL;
    json_t *request = NULL;
    ctx_t *ctx = arg;
    sim_state_t *curr_sim_state = ctx->sim_state;
    sim_state_t *reply_sim_state;

    if (flux_msg_get_json (msg, &json_str) < 0 || json_str == NULL
        || !(request = Jfromstr (json_str))) {
        flux_log (h, LOG_ERR, "%s: bad reply message", __FUNCTION__);
        Jput (request);
        return;
    }

    // De-serialize and get new info
    reply_sim_state = json_to_sim_state (request);
    copy_new_state_data (ctx, curr_sim_state, reply_sim_state);

    if (handle_next_event (ctx) < 0) {
        flux_log (h, LOG_DEBUG, "No events remaining");
        if (ctx->exit_on_complete) {
            log_msg_exit ("exit_on_complete is set. Exiting now.");
        } else {
            send_complete_event (h);
        }
    }

    free_simstate (reply_sim_state);
    Jput (request);
}
Пример #5
0
static int update_rdesc (flux_t h, int64_t j, JSON o)
{
    int rc = -1;
    int64_t nnodes = 0;
    int64_t ntasks = 0;
    char key1[20] = {'\0'}; 
    char key2[20] = {'\0'}; 

    if (!Jget_int64 (o, JSC_RDESC_NNODES, &nnodes)) return -1;
    if (!Jget_int64 (o, JSC_RDESC_NTASKS, &ntasks)) return -1;
    if ((nnodes < 0) || (ntasks < 0)) return -1;

    snprintf (key1, 20, "lwj.%ld.nnodes", j);
    snprintf (key2, 20, "lwj.%ld.ntasks", j);
    if (kvs_put_int64 (h, key1, nnodes) < 0) 
        flux_log (h, LOG_ERR, "update %s: %s", key1, strerror (errno));
    else if (kvs_put_int64 (h, key2, ntasks) < 0) 
        flux_log (h, LOG_ERR, "update %s: %s", key2, strerror (errno));
    else if (kvs_commit (h) < 0) 
        flux_log (h, LOG_ERR, "commit failed");
    else {
        flux_log (h, LOG_DEBUG, "job (%ld) assigned new resources.", j);
        rc = 0;
    }

    return rc;
}
Пример #6
0
static int wait_job_complete (flux_t h)
{
    int rc = -1;
    sig_flux_h = h;
    wjctx_t *ctx = getctx (h);

    if (signal (SIGINT, sig_handler) == SIG_ERR)
        goto done;

    if (jsc_notify_status (h, waitjob_cb, (void *)h) != 0) {
        flux_log (h, LOG_ERR, "failed to register a waitjob CB");
    }
    /* once jsc_notify_status is returned, all of JSC events
     * will be queued and delivered. It is safe to signal
     * readiness.
     */
    if (ctx->start)
        touch_outfile (ctx->start);

    if (complete_job (ctx)) {
        if (ctx->complete)
            touch_outfile (ctx->complete);
        flux_log (ctx->h, LOG_INFO, "wait_job_complete: completion detected");
    }
    if (flux_reactor_run (flux_get_reactor (h), 0) < 0) {
        flux_log (h, LOG_ERR, "error in flux_reactor_run");
        goto done;
    }
    rc = 0;
done:
    return rc;
}
Пример #7
0
/* Accept a connection from new client.
 */
static void listener_cb (flux_t h, flux_fd_watcher_t *w,
                         int fd, int revents, void *arg)
{
    ctx_t *ctx = arg;

    if (revents & FLUX_POLLIN) {
        client_t *c;
        int cfd;

        if ((cfd = accept4 (fd, NULL, NULL, SOCK_CLOEXEC)) < 0) {
            flux_log (h, LOG_ERR, "accept: %s", strerror (errno));
            goto done;
        }
        if (!(c = client_create (ctx, cfd))) {
            close (cfd);
            goto done;
        }
        if (zlist_append (ctx->clients, c) < 0)
            oom ();
    }
    if (revents & ZMQ_POLLERR) {
        flux_log (h, LOG_ERR, "poll listen fd: %s", strerror (errno));
    }
done:
    return;
}
Пример #8
0
/* Return 'n' sequenced responses.
 */
static int nsrc_request_cb (flux_t h, int typemask, zmsg_t **zmsg, void *arg)
{
    JSON o = Jnew ();
    int i, count;

    if (flux_json_request_decode (*zmsg, &o) < 0) {
        if (flux_err_respond (h, errno, zmsg) < 0)
            flux_log (h, LOG_ERR, "%s: flux_err_respond: %s", __FUNCTION__,
                      strerror (errno));
        goto done;
    }
    if (!Jget_int (o, "count", &count)) {
        if (flux_err_respond (h, EPROTO, zmsg) < 0)
            flux_log (h, LOG_ERR, "%s: flux_err_respond: %s", __FUNCTION__,
                      strerror (errno));
        goto done;
    }
    for (i = 0; i < count; i++) {
        zmsg_t *cpy = zmsg_dup (*zmsg);
        if (!cpy)
            oom ();
        Jadd_int (o, "seq", i);
        if (flux_json_respond (h, o, &cpy) < 0)
            flux_log (h, LOG_ERR, "%s: flux_json_respond: %s", __FUNCTION__,
                      strerror (errno));
        zmsg_destroy (&cpy);
    }
    zmsg_destroy (zmsg);
done:
    Jput (o);
    return 0;
}
Пример #9
0
static int listener_init (ctx_t *ctx, char *sockpath)
{
    struct sockaddr_un addr;
    int fd;

    fd = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
    if (fd < 0) {
        flux_log (ctx->h, LOG_ERR, "socket: %s", strerror (errno));
        goto done;
    }
    if (remove (sockpath) < 0 && errno != ENOENT) {
        flux_log (ctx->h, LOG_ERR, "remove %s: %s", sockpath, strerror (errno));
        goto error_close;
    }
    memset (&addr, 0, sizeof (struct sockaddr_un));
    addr.sun_family = AF_UNIX;
    strncpy (addr.sun_path, sockpath, sizeof (addr.sun_path) - 1);

    if (bind (fd, (struct sockaddr *)&addr, sizeof (struct sockaddr_un)) < 0) {
        flux_log (ctx->h, LOG_ERR, "bind: %s", strerror (errno));
        goto error_close;
    }
    if (listen (fd, LISTEN_BACKLOG) < 0) {
        flux_log (ctx->h, LOG_ERR, "listen: %s", strerror (errno));
        goto error_close;
    }
done:
    cleanup_push_string(cleanup_file, sockpath);
    return fd;
error_close:
    close (fd);
    return -1;
}
Пример #10
0
static int enter_request_cb (flux_t h, int typemask, zmsg_t **zmsg, void *arg)
{
    ctx_t *ctx = arg;
    barrier_t *b;
    json_object *o = NULL;
    char *sender = NULL;
    const char *name;
    int count, nprocs, hopcount;

    if (flux_msg_decode (*zmsg, NULL, &o) < 0 || o == NULL
     || !(sender = flux_msg_sender (*zmsg))
     || util_json_object_get_string (o, "name", &name) < 0
     || util_json_object_get_int (o, "count", &count) < 0
     || util_json_object_get_int (o, "nprocs", &nprocs) < 0) {
        flux_log (ctx->h, LOG_ERR, "%s: ignoring bad message", __FUNCTION__);
        goto done;
    }

    if (!(b = zhash_lookup (ctx->barriers, name)))
        b = barrier_create (ctx, name, nprocs);

    /* Distinguish client (tracked) vs downstream barrier plugin (untracked).
     * A client, distinguished by hopcount > 0, can only enter barrier once.
     */
    if (util_json_object_get_int (o, "hopcount", &hopcount) < 0) {
        if (barrier_add_client (b, sender, zmsg) < 0) {
            flux_respond_errnum (ctx->h, zmsg, EEXIST);
            flux_log (ctx->h, LOG_ERR,
                        "abort %s due to double entry by client %s",
                        name, sender);
            if (exit_event_send (ctx->h, b->name, ECONNABORTED) < 0)
                flux_log (ctx->h, LOG_ERR, "exit_event_send: %s", strerror (errno));
            goto done;
        }
    }

    /* If the count has been reached, terminate the barrier;
     * o/w set timer to pass count upstream and zero it here.
     */
    b->count += count;
    if (b->count == b->nprocs) {
        if (exit_event_send (ctx->h, b->name, 0) < 0)
            flux_log (ctx->h, LOG_ERR, "exit_event_send: %s", strerror (errno));
    } else if (!flux_treeroot (ctx->h) && !ctx->timer_armed) {
        if (flux_tmouthandler_add (h, barrier_reduction_timeout_msec,
                                   true, timeout_cb, ctx) < 0) {
            flux_log (h, LOG_ERR, "flux_tmouthandler_add: %s",strerror (errno));
            goto done;
        }
        ctx->timer_armed = true;
    }
done:
    if (o)
        json_object_put (o);
    if (*zmsg)
        zmsg_destroy (zmsg);
    if (sender)
        free (sender);
    return 0;
}
Пример #11
0
static void job_state_cb (flux_t h, flux_msg_handler_t *w,
                          const flux_msg_t *msg, void *arg)
{
    int64_t jobid = -1;
    json_object *o = NULL;
    const char *topic = NULL;
    const char *json_str = NULL;
    const char *state = NULL;
    int len = 12;

    if (flux_msg_get_topic (msg, &topic) < 0)
        goto done;

    if (flux_event_decode (msg, NULL, &json_str) < 0
            || !(o = Jfromstr (json_str))
            || !Jget_int64 (o, "lwj", &jobid)) {
        flux_log (h, LOG_ERR, "%s: bad message", __FUNCTION__);
        goto done;
    }

    if (strncmp (topic, "jsc", 3) == 0)
       len = 10;

    state = topic + len;
    if (strcmp (state, jsc_job_num2state (J_RESERVED)) == 0)
        fixup_newjob_event (h, jobid);

    if (invoke_cbs (h, jobid, get_update_jcb (h, jobid, state), 0) < 0)
        flux_log (h, LOG_ERR, "job_state_cb: failed to invoke callbacks");

    if (job_is_finished (state))
        delete_jobinfo (h, jobid);
done:
    return;
}
Пример #12
0
int mod_main (flux_t *h, int argc, char **argv)
{
    ctx_t *ctx = getctx (h);
    uint32_t rank;
    flux_msg_handler_t **handlers = NULL;
    int rc = -1;

    if (flux_get_rank (h, &rank) < 0)
        return -1;
    if (rank != 0) {
        flux_log (h, LOG_ERR, "this module must only run on rank 0");
        return -1;
    }
    flux_log (h, LOG_INFO, "module starting");

    if (flux_event_subscribe (h, "sim.start") < 0) {
        flux_log (h, LOG_ERR, "subscribing to event: %s", strerror (errno));
        return -1;
    }
    if (flux_msg_handler_addvec (h, htab, ctx, &handlers) < 0) {
        flux_log (h, LOG_ERR, "flux_msg_handler_add: %s", strerror (errno));
        return -1;
    }

    send_alive_request (h, module_name);

    if (flux_reactor_run (flux_get_reactor (h), 0) < 0) {
        flux_log (h, LOG_ERR, "flux_reactor_run: %s", strerror (errno));
        goto done_delvec;
    }
    rc = 0;
done_delvec:
    flux_msg_handler_delvec (handlers);
    return rc;
}
Пример #13
0
static int job_status_cb (const char *jcbstr, void *arg, int errnum)
{
    int64_t os = 0;
    int64_t ns = 0;
    int64_t j = 0;
    jstatctx_t *ctx = NULL;
    flux_t h = (flux_t)arg;
    JSON jcb = NULL;

    ctx = getctx (h);
    if (errnum > 0) {
        flux_log (ctx->h, LOG_ERR, "job_status_cb: errnum passed in");
        return -1;
    }

    if (!(jcb = Jfromstr (jcbstr))) {
        flux_log (ctx->h, LOG_ERR, "job_status_cb: error parsing JSON string");
        return -1;
    }
    get_jobid (jcb, &j);
    get_states (jcb, &os, &ns);
    Jput (jcb);

    fprintf (ctx->op, "%s->%s\n", 
    jsc_job_num2state ((job_state_t)os), 
    jsc_job_num2state ((job_state_t)ns));
    fflush (ctx->op);

    return 0;
}
Пример #14
0
static int reg_newjob_hdlr (flux_t h, kvs_set_int64_f func)
{
    if (kvs_watch_int64 (h,"lwj.next-id", func, (void *) h) < 0) {
        flux_log (h, LOG_ERR, "watch lwj.next-id: %s", strerror (errno));
        return -1;
    }
    flux_log (h, LOG_DEBUG, "registered job creation CB");
    return 0;
}
Пример #15
0
int mod_main (flux_t h, int argc, char **argv)
{
    ctx_t *ctx = getctx (h);
    char *sockpath = NULL, *dfltpath = NULL;
    int rc = -1;

    /* Parse args.
     */
    if (argc > 0)
        sockpath = argv[0];
    if (!sockpath)
        sockpath = dfltpath = xasprintf ("%s/flux-api", flux_get_tmpdir ());

    /* Create listen socket and watcher to handle new connections
     */
    if ((ctx->listen_fd = listener_init (ctx, sockpath)) < 0)
        goto done;
    if (!(ctx->listen_w = flux_fd_watcher_create (ctx->listen_fd,
                                           FLUX_POLLIN | FLUX_POLLERR,
                                           listener_cb, ctx))) {
        flux_log (h, LOG_ERR, "flux_fd_watcher_create: %s", strerror (errno));
        goto done;
    }
    flux_fd_watcher_start (h, ctx->listen_w);

    /* Create/start event/response message watchers
     */
    if (flux_msg_watcher_addvec (h, htab, ctx) < 0) {
        flux_log (h, LOG_ERR, "flux_msg_watcher_addvec: %s", strerror (errno));
        goto done;
    }

    /* Start reactor
     */
    if (flux_reactor_start (h) < 0) {
        flux_log (h, LOG_ERR, "flux_reactor_start: %s", strerror (errno));
        goto done;
    }
    rc = 0;
done:
    if (dfltpath)
        free (dfltpath);
    flux_msg_watcher_delvec (h, htab);
    flux_fd_watcher_destroy (ctx->listen_w);
    if (ctx->listen_fd >= 0) {
        if (close (ctx->listen_fd) < 0)
            flux_log (h, LOG_ERR, "close listen_fd: %s", strerror (errno));
    }
    if (ctx->clients) {
        client_t *c;
        while ((c = zlist_pop (ctx->clients)))
            client_destroy (c);
    }
    return rc;
}
Пример #16
0
int sched_plugin_load (struct sched_plugin_loader *sploader, const char *s)
{
    char *path = NULL;
    char *name = NULL;
    char *searchpath = getenv ("FLUX_MODULE_PATH");
    void *dso = NULL;

    if (sploader->plugin) {
        errno = EEXIST;
        goto error;
    }
    if (!searchpath) {
        flux_log (sploader->h, LOG_ERR, "FLUX_MODULE_PATH not set");
        goto error;
    }
    if (strchr (s, '/')) {
        if (!(name = flux_modname (s))) {
            flux_log (sploader->h, LOG_ERR, "%s: %s", s, dlerror ());
            errno = ENOENT;
            goto error;
        }
        if (!(path = strdup (s))) {
            errno = ENOMEM;
            goto error;
        }
    } else {
        if (!(path = flux_modfind (searchpath, s))) {
            flux_log (sploader->h, LOG_ERR,
                      "%s: not found in module search path %s", s, searchpath);
            goto error;
        }
        if (!(name = flux_modname (path)))
            goto error;
    }
    if (!(dso = dlopen (path, RTLD_NOW | RTLD_LOCAL | RTLD_DEEPBIND))) {
        flux_log (sploader->h, LOG_ERR, "failed to open sched plugin: %s",
                  dlerror ());
        goto error;
    }
    flux_log (sploader->h, LOG_DEBUG, "loaded: %s", name);
    if (!(sploader->plugin = plugin_create (sploader->h, dso))) {
        dlclose (dso);
        goto error;
    }
    sploader->plugin->name = name;
    sploader->plugin->path = path;
    return 0;
error:
    if (path)
        free (path);
    if (name)
        free (name);
    return -1;
}
Пример #17
0
// Looks at the current state and launches the next trigger
static int handle_next_event (ctx_t *ctx)
{
    zhash_t *timers;
    sim_state_t *sim_state = ctx->sim_state;
    int rc = 0;

    // get the timer hashtable, make sure its full, and get a list of its keys
    timers = sim_state->timers;
    if (zhash_size (timers) < 1) {
        flux_log (ctx->h, LOG_ERR, "timer hashtable has no elements");
        return -1;
    }

    // Get the next occuring event time/module
    double min_event_time = -1;
    double *curr_event_time = NULL;
    const char *mod_name = NULL, *curr_name = NULL;

    for (curr_event_time = zhash_first (timers);
         curr_event_time;
         curr_event_time = zhash_next (timers)) {
        curr_name = zhash_cursor (timers);
        if (min_event_time < 0 ||
            occurs_before (*curr_event_time, min_event_time) ||
            breaks_tie (*curr_event_time, min_event_time, curr_name)) {
            min_event_time = *curr_event_time;
            mod_name = curr_name;
        }
    }

    if (min_event_time < 0) {
        return -1;
    }

    // advance time then send the trigger to the module with the next event
    if (min_event_time > sim_state->sim_time) {
        // flux_log (ctx->h, LOG_DEBUG, "Time was advanced from %f to %f while
        // triggering the next event for %s",
        //		  sim_state->sim_time, *min_event_time, mod_name);
        sim_state->sim_time = min_event_time;
    } else {
        // flux_log (ctx->h, LOG_DEBUG, "Time was not advanced while triggering
        // the next event for %s", mod_name);
    }
    flux_log (ctx->h,
              LOG_DEBUG,
              "Triggering %s.  Curr sim time: %f",
              mod_name,
              sim_state->sim_time);

    rc = send_trigger (ctx->h, mod_name, sim_state);

    return rc;
}
Пример #18
0
/* Proxy ping.
 */
void xping_request_cb (flux_t h, flux_msg_handler_t *w,
                       const flux_msg_t *msg, void *arg)
{
    ctx_t *ctx = arg;
    const char *json_str;
    int saved_errno;
    int rank, seq = ctx->ping_seq++;
    const char *service;
    char *hashkey = NULL;
    JSON in = Jnew ();
    JSON o = NULL;
    flux_msg_t *cpy;

    if (flux_request_decode (msg, NULL, &json_str) < 0) {
        saved_errno = errno;
        goto error;
    }
    if (!(o = Jfromstr (json_str)) || !Jget_int (o, "rank", &rank)
                                   || !Jget_str (o, "service", &service)) {
        saved_errno = errno = EPROTO;
        goto error;
    }
    flux_log (h, LOG_DEBUG, "Rxping rank=%d service=%s", rank, service);

    Jadd_int (in, "seq", seq);
    flux_log (h, LOG_DEBUG, "Tping seq=%d %d!%s", seq, rank, service);

    flux_rpc_t *rpc;
    if (!(rpc = flux_rpc (h, service, Jtostr (in), rank,
                                            FLUX_RPC_NORESPONSE))) {
        saved_errno = errno;
        goto error;
    }
    flux_rpc_destroy (rpc);
    if (!(cpy = flux_msg_copy (msg, true))) {
        saved_errno = errno;
        goto error;
    }
    hashkey = xasprintf ("%d", seq);
    zhash_update (ctx->ping_requests, hashkey, cpy);
    zhash_freefn (ctx->ping_requests, hashkey, (zhash_free_fn *)flux_msg_destroy);
    Jput (o);
    Jput (in);
    if (hashkey)
        free (hashkey);
    return;
error:
    if (flux_respond (h, msg, saved_errno, NULL) < 0)
        flux_log_error (h, "%s: flux_respond", __FUNCTION__);
    Jput (o);
    Jput (in);
}
Пример #19
0
static int new_job_cb (const char *key, int64_t val, void *arg, int errnum)
{
    int64_t nj = 0;
    int64_t js = 0;
    JSON ss = NULL;
    JSON jcb = NULL;
    char k[20] = {'\0'};
    char path[20] = {'\0'};
    flux_t h = (flux_t) arg;
    jscctx_t *ctx = getctx (h);

    if (ctx->first_time == 1) {
        /* watch is invoked immediately and we shouldn't
         * rely on that event at all.
         */
        ctx->first_time = 0;
        return 0;
    }

    if (chk_errnum (h, errnum) < 0) return 0;

    flux_log (h, LOG_DEBUG, "new_job_cb invoked: key(%s), val(%ld)", key, val);

    js = J_NULL;
    nj = val-1;
    snprintf (k, 20, "%ld", nj);
    snprintf (path, 20, "lwj.%ld", nj);
    if (zhash_insert (ctx->active_jobs, k, (void *)(intptr_t)js) < 0) {
        flux_log (h, LOG_ERR, "new_job_cb: inserting a job to hash failed");
        goto done;
    }

    flux_log (h, LOG_DEBUG, "jobstate_hdlr registered");
    jcb = Jnew ();
    ss = Jnew ();
    Jadd_int64 (jcb, JSC_JOBID, nj);
    Jadd_int64 (ss, JSC_STATE_PAIR_OSTATE , (int64_t) js);
    Jadd_int64 (ss, JSC_STATE_PAIR_NSTATE, (int64_t) js);
    json_object_object_add (jcb, JSC_STATE_PAIR, ss);

    if (invoke_cbs (h, nj, jcb, errnum) < 0) {
        flux_log (h, LOG_ERR, "new_job_cb: failed to invoke callbacks");
    }
    if (reg_jobstate_hdlr (h, path, job_state_cb) == -1) {
        flux_log (h, LOG_ERR, "new_job_cb: reg_jobstate_hdlr: %s", 
            strerror (errno));
    }

done:
    /* always return 0 so that reactor won't return */
    return 0;
}
Пример #20
0
static int extract_raw_ntasks (flux_t h, int64_t j, int64_t *ntasks)
{
    int rc = 0;
    char key[20] = {'\0'};
    snprintf (key, 20, "lwj.%ld.ntasks", j);
    if (kvs_get_int64 (h, key, ntasks) < 0) {
        flux_log (h, LOG_ERR, "extract %s: %s", key, strerror (errno));
        rc = -1;
    }
    else 
        flux_log (h, LOG_DEBUG, "extract %s: %ld", key, *ntasks);
    return rc;
}
Пример #21
0
static int extract_raw_rdl (flux_t h, int64_t j, char **rdlstr)
{
    int rc = 0;
    char key[20] = {'\0'};
    snprintf (key, 20, "lwj.%ld.rdl", j);
    if (kvs_get_string (h, key, rdlstr) < 0) {
        flux_log (h, LOG_ERR, "extract %s: %s", key, strerror (errno));
        rc = -1;
    }
    else 
        flux_log (h, LOG_DEBUG, "rdl under %s extracted", key);
    return rc;
}
Пример #22
0
static int reg_jobstate_hdlr (flux_t h, const char *path, kvs_set_string_f func)
{
    int rc = 0;
    char key[20] = {'\0'};

    snprintf (key, 20, "%s.state", path);
    if (kvs_watch_string (h, key, func, (void *)h) < 0) {
        flux_log (h, LOG_ERR, "watch %s: %s.", key, strerror (errno));
        rc = -1;
    } else
        flux_log (h, LOG_DEBUG, "registered job %s.state CB", path);
    return rc;
}
Пример #23
0
int mod_main (flux_t h, int argc, char **argv)
{
    if (flux_msghandler_add (h, FLUX_MSGTYPE_REQUEST, "job.*",
                                            job_request_cb, NULL) < 0) {
        flux_log (h, LOG_ERR, "flux_msghandler_add: %s", strerror (errno));
        return -1;
    }
    if (flux_reactor_start (h) < 0) {
        flux_log (h, LOG_ERR, "flux_reactor_start: %s", strerror (errno));
        return -1;
    }
    return 0;
}
Пример #24
0
int mod_main (flux_t h, int argc, char **argv)
{
    flux_flags_set (h, FLUX_O_COPROC);

    if (flux_msghandler_addvec (h, htab, htablen, NULL) < 0) {
        flux_log (h, LOG_ERR, "flux_msghandler_addvec: %s", strerror (errno));
        return -1;
    }
    if (flux_reactor_start (h) < 0) {
        flux_log (h, LOG_ERR, "flux_reactor_start: %s", strerror (errno));
        return -1;
    }
    return 0;
}
Пример #25
0
/* The req.clog request will not be answered until req.flush is called.
 */
static int stuck_request_cb (flux_t h, int typemask, zmsg_t **zmsg, void *arg)
{
    if (flux_json_rpc (h, FLUX_NODEID_ANY, "req.clog", NULL, NULL) < 0) {
        flux_log (h, LOG_ERR, "%s: req.clog RPC: %s", __FUNCTION__,
                  strerror (errno));
        return -1;
    }
    if (flux_err_respond (h, 0, zmsg) < 0) {
        flux_log (h, LOG_ERR, "%s: responding: %s", __FUNCTION__,
                  strerror (errno));
        return -1;
    }
    return 0;
}
Пример #26
0
int mod_main (flux_t h, int argc, char **argv)
{
    ctx_t *ctx = getctx (h);

    if (flux_msghandler_addvec (h, htab, htablen, ctx) < 0) {
        flux_log (h, LOG_ERR, "flux_msghandler_addvec: %s", strerror (errno));
        return -1;
    }
    if (flux_reactor_start (h) < 0) {
        flux_log (h, LOG_ERR, "flux_reactor_start: %s", strerror (errno));
        return -1;
    }
    return 0;
}
Пример #27
0
static int disconnect (const char *key, void *item, void *arg)
{
    barrier_t *b = item;
    ctx_t *ctx = b->ctx;
    char *sender = arg;

    if (zhash_lookup (b->clients, sender)) {
        flux_log (ctx->h, LOG_INFO,
                    "abort %s due to premature disconnect by client %s",
                    b->name, sender);
        if (exit_event_send (ctx->h, b->name, ECONNABORTED) < 0)
            flux_log (ctx->h, LOG_ERR, "exit_event_send: %s", strerror (errno));
    }
    return 0;
}
Пример #28
0
static int job_state_cb (const char *key, const char *val, void *arg, int errnum)
{
    int64_t jobid = -1;
    flux_t h = (flux_t) arg;

    if (chk_errnum (h, errnum) < 0) 
        flux_log (h, LOG_ERR, "job_state_cb: key(%s), val(%s)", key, val);
    else if (parse_jobid (key, &jobid) != 0) 
        flux_log (h, LOG_ERR, "job_state_cb: key ill-formed");
    else if (invoke_cbs (h, jobid, get_update_jcb (h, jobid, val), errnum) < 0) 
        flux_log (h, LOG_ERR, "job_state_cb: failed to invoke callbacks");

    /* always return 0 so that reactor will not return */
    return 0;
}
Пример #29
0
static client_t * client_create (ctx_t *ctx, int fd)
{
    client_t *c;
    socklen_t crlen = sizeof (c->ucred);
    flux_t h = ctx->h;

    c = xzmalloc (sizeof (*c));
    c->fd = fd;
    if (!(c->uuid = zuuid_new ()))
        oom ();
    c->ctx = ctx;
    if (!(c->disconnect_notify = zhash_new ()))
        oom ();
    if (!(c->subscriptions = zlist_new ()))
        oom ();
    if (!(c->outqueue = zlist_new ()))
        oom ();
    if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &c->ucred, &crlen) < 0) {
        flux_log (h, LOG_ERR, "getsockopt SO_PEERCRED: %s", strerror (errno));
        goto error;
    }
    assert (crlen == sizeof (c->ucred));
    /* Deny connections by uid other than session owner for now.
     */
    if (c->ucred.uid != ctx->session_owner) {
        flux_log (h, LOG_ERR, "connect by uid=%d pid=%d denied",
                  c->ucred.uid, (int)c->ucred.pid);
        goto error;
    }
    c->inw = flux_fd_watcher_create (fd, FLUX_POLLIN, client_read_cb, c);
    c->outw = flux_fd_watcher_create (fd, FLUX_POLLOUT, client_write_cb, c);
    if (!c->inw || !c->outw) {
        flux_log (h, LOG_ERR, "flux_fd_watcher_create: %s", strerror (errno));
        goto error;
    }
    flux_fd_watcher_start (h, c->inw);
    flux_msg_iobuf_init (&c->inbuf);
    flux_msg_iobuf_init (&c->outbuf);
    if (set_nonblock (c->fd, true) < 0) {
        flux_log (h, LOG_ERR, "set_nonblock: %s", strerror (errno));
        goto error;
    }

    return (c);
error:
    client_destroy (c);
    return NULL;
}
Пример #30
0
/* Received response message from broker.
 * Look up the sender uuid in clients hash and deliver.
 * Responses for disconnected clients are silently discarded.
 */
static void response_cb (flux_t h, flux_msg_watcher_t *w,
                         const flux_msg_t *msg, void *arg)
{
    ctx_t *ctx = arg;
    char *uuid = NULL;
    client_t *c;
    flux_msg_t *cpy = flux_msg_copy (msg, true);

    if (!cpy)
        oom ();
    if (flux_msg_pop_route (cpy, &uuid) < 0)
        goto done;
    if (flux_msg_clear_route (cpy) < 0)
        goto done;
    c = zlist_first (ctx->clients);
    while (c) {
        if (!strcmp (uuid, zuuid_str (c->uuid))) {
            if (client_send_nocopy (c, &cpy) < 0) { /* FIXME handle errors */
                flux_log (h, LOG_ERR, "%s: client_send %s: %s",
                          __FUNCTION__, zuuid_str (c->uuid), strerror (errno));
                errno = 0;
            }
            break;
        }
        c = zlist_next (ctx->clients);
    }
    if (uuid)
        free (uuid);
done:
    flux_msg_destroy (cpy);
}