示例#1
0
static int wait_job_complete (flux_t h)
{
    int rc = -1;
    sig_flux_h = h;
    wjctx_t *ctx = getctx (h);

    if (signal (SIGINT, sig_handler) == SIG_ERR)
        goto done;

    if (jsc_notify_status (h, waitjob_cb, (void *)h) != 0) {
        flux_log (h, LOG_ERR, "failed to register a waitjob CB");
    }
    /* once jsc_notify_status is returned, all of JSC events
     * will be queued and delivered. It is safe to signal
     * readiness.
     */
    if (ctx->start)
        touch_outfile (ctx->start);

    if (complete_job (ctx)) {
        if (ctx->complete)
            touch_outfile (ctx->complete);
        flux_log (ctx->h, LOG_INFO, "wait_job_complete: completion detected");
    }
    if (flux_reactor_run (flux_get_reactor (h), 0) < 0) {
        flux_log (h, LOG_ERR, "error in flux_reactor_run");
        goto done;
    }
    rc = 0;
done:
    return rc;
}
示例#2
0
// Model io contention that occurred between previous event and the
// curr sim time. Remove completed jobs from the list of running jobs
static int advance_time (ctx_t *ctx, zhash_t *job_hash)
{
    // TODO: Make this not static? (pass it in?, store it in ctx?)
    static double curr_time = 0;

    job_t *job = NULL;
    int num_jobs = -1;
    double next_event = -1, next_termination = -1, curr_progress = -1
#if SIMEXEC_IO
        ,io_penalty = 0, io_percentage = 0;
    double *job_min_bandwidth = NULL;
#else
    ;
#endif

    zlist_t *running_jobs = ctx->running_jobs;
    double sim_time = ctx->sim_state->sim_time;

    while (curr_time < sim_time) {
        num_jobs = zlist_size (running_jobs);
        if (num_jobs == 0) {
            curr_time = sim_time;
            break;
        }
        next_termination =
            determine_next_termination (ctx, curr_time, job_hash);
        next_event = ((sim_time < next_termination) || (next_termination < 0))
                         ? sim_time
                         : next_termination;  // min of the two
        while (num_jobs > 0) {
            job = zlist_pop (running_jobs);
            if (job->start_time <= curr_time) {
#if SIMEXEC_IO
                // Get the minimum bandwidth between a resource in the job and
                // the pfs
                job_min_bandwidth = get_job_min_from_hash (job_hash, job->id);
                io_penalty =
                    determine_io_penalty (job->io_rate, *job_min_bandwidth);
                io_percentage = (io_penalty / (io_penalty + 1));
                job->io_time += (next_event - curr_time) * io_percentage;
#endif
                curr_progress = calc_curr_progress (job, next_event);
                if (curr_progress < 1)
                    zlist_append (running_jobs, job);
                else
                    complete_job (ctx, job, next_event);
            } else {
                zlist_append (running_jobs, job);
            }
            num_jobs--;
        }
        curr_time = next_event;
    }

    return 0;
}
示例#3
0
// Remove completed jobs from the list of running jobs
// Update sched timer as necessary (to trigger an event in sched)
// Also change the state of the job in the KVS
static int handle_completed_jobs (ctx_t *ctx)
{
    double curr_progress;
    zlist_t *running_jobs = ctx->running_jobs;
    job_t *job = NULL;
    int num_jobs = zlist_size (running_jobs);
    double sim_time = ctx->sim_state->sim_time;

    // print_next_completing (running_jobs, ctx);

    while (num_jobs > 0) {
        job = zlist_pop (running_jobs);
        if (job->execution_time > 0) {
            curr_progress = calc_curr_progress (job, ctx->sim_state->sim_time);
        } else {
            curr_progress = 1;
            flux_log (ctx->h,
                      LOG_DEBUG,
                      "handle_completed_jobs found a job (%d) with execution "
                      "time <= 0 (%f), setting progress = 1",
                      job->id,
                      job->execution_time);
        }
        if (curr_progress < 1) {
            zlist_append (running_jobs, job);
        } else {
            flux_log (ctx->h,
                      LOG_DEBUG,
                      "handle_completed_jobs found a completed job");
            complete_job (ctx, job, sim_time);
        }
        num_jobs--;
    }

    return 0;
}