Beispiel #1
0
// return 0 if the job, with the given delay bound,
// will complete by its deadline, and won't cause other jobs to miss deadlines.
//
static inline int check_deadline(
    WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav
) {
    if (config.ignore_delay_bound) return 0;

    // skip delay check if host currently doesn't have any work
    // and it's not a hard app.
    // (i.e. everyone gets one result, no matter how slow they are)
    //
    if (get_estimated_delay(bav) == 0 && !hard_app(app)) {
        if (config.debug_send) {
            log_messages.printf(MSG_NORMAL,
                "[send] est delay 0, skipping deadline check\n"
            );
        }
        return 0;
    }

    // if it's a hard app, don't send it to a host with no credit
    //
    if (hard_app(app) && g_reply->host.total_credit == 0) {
        return INFEASIBLE_CPU;
    }

    // do EDF simulation if possible; else use cruder approximation
    //
    if (config.workload_sim && g_request->have_other_results_list) {
        double est_dur = estimate_duration(wu, bav);
        if (g_reply->wreq.edf_reject_test(est_dur, wu.delay_bound)) {
            return INFEASIBLE_WORKLOAD;
        }
        IP_RESULT candidate("", wu.delay_bound, est_dur);
        safe_strcpy(candidate.name, wu.name);
        if (check_candidate(candidate, g_wreq->effective_ncpus, g_request->ip_results)) {
            // it passed the feasibility test,
            // but don't add it to the workload yet;
            // wait until we commit to sending it
        } else {
            g_reply->wreq.edf_reject(est_dur, wu.delay_bound);
            g_reply->wreq.speed.set_insufficient(0);
            return INFEASIBLE_WORKLOAD;
        }
    } else {
        double ewd = estimate_duration(wu, bav);
        if (hard_app(app)) ewd *= 1.3;
        double est_report_delay = get_estimated_delay(bav) + ewd;
        double diff = est_report_delay - wu.delay_bound;
        if (diff > 0) {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [WU#%u] deadline miss %d > %d\n",
                    wu.id, (int)est_report_delay, wu.delay_bound
                );
            }
            g_reply->wreq.speed.set_insufficient(diff);
            return INFEASIBLE_CPU;
        } else {
            if (config.debug_send) {
                log_messages.printf(MSG_NORMAL,
                    "[send] [WU#%u] meets deadline: %.2f + %.2f < %d\n",
                    wu.id, get_estimated_delay(bav), ewd, wu.delay_bound
                );
            }
        }
    }
    return 0;
}
Beispiel #2
0
// simulate trying to do an RPC;
// return true if we actually did one
//
bool CLIENT_STATE::simulate_rpc(PROJECT* p) {
    char buf[256], buf2[256];
    vector<IP_RESULT> ip_results;
    vector<RESULT*> new_results;

    bool avail;
    if (p->last_rpc_time) {
        double delta = now - p->last_rpc_time;
        avail = p->available.sample(delta);
    } else {
        avail = p->available.sample(0);
    }
    p->last_rpc_time = now;
    if (!avail) {
        sprintf(buf, "RPC to %s skipped - project down<br>", p->project_name);
        html_msg += buf;
        msg_printf(p, MSG_INFO, "RPC skipped: project down");
        gstate.scheduler_op->project_rpc_backoff(p, "project down");
        p->master_url_fetch_pending = false;
        return false;
    }

    // save request params for WORK_FETCH::handle_reply
    //
    double save_cpu_req_secs = rsc_work_fetch[0].req_secs;
    for (int i=1; i<coprocs.n_rsc; i++) {
        COPROC& cp = coprocs.coprocs[i];
        if (!strcmp(cp.type, "NVIDIA")) {
            coprocs.nvidia.req_secs = rsc_work_fetch[i].req_secs;
        }
        if (!strcmp(cp.type, "ATI")) {
            coprocs.ati.req_secs = rsc_work_fetch[i].req_secs;
        }
        if (!strcmp(cp.type, "intel_gpu")) {
            coprocs.intel_gpu.req_secs = rsc_work_fetch[i].req_secs;
        }
    }

    if (!server_uses_workload) {
        for (int i=0; i<coprocs.n_rsc; i++) {
            rsc_work_fetch[i].estimated_delay = rsc_work_fetch[i].busy_time_estimator.get_busy_time();
        }
    }

    for (unsigned int i=0; i<app_versions.size(); i++) {
        app_versions[i]->dont_use = false;
    }

    work_fetch.request_string(buf2, sizeof(buf2));
    sprintf(buf, "RPC to %s: %s<br>", p->project_name, buf2);
    html_msg += buf;

    msg_printf(p, MSG_INFO, "RPC: %s", buf2);

    handle_completed_results(p);

    if (server_uses_workload) {
        get_workload(ip_results);
    }

    bool sent_something = false;
    while (!existing_jobs_only) {
        vector<APP*> apps;
        get_apps_needing_work(p, apps);
        if (apps.empty()) break;
        RESULT* rp = new RESULT;
        WORKUNIT* wup = new WORKUNIT;
        make_job(p, wup, rp, apps);

        double et = wup->rsc_fpops_est / rp->avp->flops;
        if (server_uses_workload) {
            IP_RESULT c(rp->name, rp->report_deadline-now, et);
            if (check_candidate(c, ncpus, ip_results)) {
                ip_results.push_back(c);
            } else {
                msg_printf(p, MSG_INFO, "job for %s misses deadline sim\n", rp->app->name);
                APP_VERSION* avp = rp->avp;
                delete rp;
                delete wup;
                avp->dont_use = true;
                continue;
            }
        } else {
            double est_delay = get_estimated_delay(rp);
            if (est_delay + et > wup->app->latency_bound) {
                msg_printf(p, MSG_INFO,
                    "job for %s misses deadline approx: del %f + et %f > %f\n",
                    rp->app->name,
                    est_delay, et, wup->app->latency_bound
                );
                APP_VERSION* avp = rp->avp;
                delete rp;
                delete wup;
                avp->dont_use = true;
                continue;
            }
        }

        sent_something = true;
        rp->set_state(RESULT_FILES_DOWNLOADED, "simulate_rpc");
        results.push_back(rp);
        new_results.push_back(rp);
#if 0
        sprintf(buf, "got job %s: CPU time %.2f, deadline %s<br>",
            rp->name, rp->final_cpu_time, time_to_string(rp->report_deadline)
        );
        html_msg += buf;
#endif
        decrement_request(rp);
    }

    njobs += (int)new_results.size();
    msg_printf(0, MSG_INFO, "Got %lu tasks", new_results.size());
    sprintf(buf, "got %lu tasks<br>", new_results.size());
    html_msg += buf;

    SCHEDULER_REPLY sr;
    rsc_work_fetch[0].req_secs = save_cpu_req_secs;
    work_fetch.handle_reply(p, &sr, new_results);
    p->nrpc_failures = 0;
    p->sched_rpc_pending = 0;
    //p->min_rpc_time = now + 900;
    p->min_rpc_time = now;
    if (sent_something) {
        request_schedule_cpus("simulate_rpc");
        request_work_fetch("simulate_rpc");
    }
    sim_results.nrpcs++;
    return true;
}