// return 0 if the job, with the given delay bound, // will complete by its deadline, and won't cause other jobs to miss deadlines. // static inline int check_deadline( WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav ) { if (config.ignore_delay_bound) return 0; // skip delay check if host currently doesn't have any work // and it's not a hard app. // (i.e. everyone gets one result, no matter how slow they are) // if (get_estimated_delay(bav) == 0 && !hard_app(app)) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] est delay 0, skipping deadline check\n" ); } return 0; } // if it's a hard app, don't send it to a host with no credit // if (hard_app(app) && g_reply->host.total_credit == 0) { return INFEASIBLE_CPU; } // do EDF simulation if possible; else use cruder approximation // if (config.workload_sim && g_request->have_other_results_list) { double est_dur = estimate_duration(wu, bav); if (g_reply->wreq.edf_reject_test(est_dur, wu.delay_bound)) { return INFEASIBLE_WORKLOAD; } IP_RESULT candidate("", wu.delay_bound, est_dur); safe_strcpy(candidate.name, wu.name); if (check_candidate(candidate, g_wreq->effective_ncpus, g_request->ip_results)) { // it passed the feasibility test, // but don't add it to the workload yet; // wait until we commit to sending it } else { g_reply->wreq.edf_reject(est_dur, wu.delay_bound); g_reply->wreq.speed.set_insufficient(0); return INFEASIBLE_WORKLOAD; } } else { double ewd = estimate_duration(wu, bav); if (hard_app(app)) ewd *= 1.3; double est_report_delay = get_estimated_delay(bav) + ewd; double diff = est_report_delay - wu.delay_bound; if (diff > 0) { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [WU#%u] deadline miss %d > %d\n", wu.id, (int)est_report_delay, wu.delay_bound ); } g_reply->wreq.speed.set_insufficient(diff); return INFEASIBLE_CPU; } else { if (config.debug_send) { log_messages.printf(MSG_NORMAL, "[send] [WU#%u] meets deadline: %.2f + %.2f < %d\n", wu.id, get_estimated_delay(bav), ewd, wu.delay_bound ); } } } return 0; }
// simulate trying to do an RPC; // return true if we actually did one // bool CLIENT_STATE::simulate_rpc(PROJECT* p) { char buf[256], buf2[256]; vector<IP_RESULT> ip_results; vector<RESULT*> new_results; bool avail; if (p->last_rpc_time) { double delta = now - p->last_rpc_time; avail = p->available.sample(delta); } else { avail = p->available.sample(0); } p->last_rpc_time = now; if (!avail) { sprintf(buf, "RPC to %s skipped - project down<br>", p->project_name); html_msg += buf; msg_printf(p, MSG_INFO, "RPC skipped: project down"); gstate.scheduler_op->project_rpc_backoff(p, "project down"); p->master_url_fetch_pending = false; return false; } // save request params for WORK_FETCH::handle_reply // double save_cpu_req_secs = rsc_work_fetch[0].req_secs; for (int i=1; i<coprocs.n_rsc; i++) { COPROC& cp = coprocs.coprocs[i]; if (!strcmp(cp.type, "NVIDIA")) { coprocs.nvidia.req_secs = rsc_work_fetch[i].req_secs; } if (!strcmp(cp.type, "ATI")) { coprocs.ati.req_secs = rsc_work_fetch[i].req_secs; } if (!strcmp(cp.type, "intel_gpu")) { coprocs.intel_gpu.req_secs = rsc_work_fetch[i].req_secs; } } if (!server_uses_workload) { for (int i=0; i<coprocs.n_rsc; i++) { rsc_work_fetch[i].estimated_delay = rsc_work_fetch[i].busy_time_estimator.get_busy_time(); } } for (unsigned int i=0; i<app_versions.size(); i++) { app_versions[i]->dont_use = false; } work_fetch.request_string(buf2, sizeof(buf2)); sprintf(buf, "RPC to %s: %s<br>", p->project_name, buf2); html_msg += buf; msg_printf(p, MSG_INFO, "RPC: %s", buf2); handle_completed_results(p); if (server_uses_workload) { get_workload(ip_results); } bool sent_something = false; while (!existing_jobs_only) { vector<APP*> apps; get_apps_needing_work(p, apps); if (apps.empty()) break; RESULT* rp = new RESULT; WORKUNIT* wup = new WORKUNIT; make_job(p, wup, rp, apps); double et = wup->rsc_fpops_est / rp->avp->flops; if (server_uses_workload) { IP_RESULT c(rp->name, rp->report_deadline-now, et); if (check_candidate(c, ncpus, ip_results)) { ip_results.push_back(c); } else { msg_printf(p, MSG_INFO, "job for %s misses deadline sim\n", rp->app->name); APP_VERSION* avp = rp->avp; delete rp; delete wup; avp->dont_use = true; continue; } } else { double est_delay = get_estimated_delay(rp); if (est_delay + et > wup->app->latency_bound) { msg_printf(p, MSG_INFO, "job for %s misses deadline approx: del %f + et %f > %f\n", rp->app->name, est_delay, et, wup->app->latency_bound ); APP_VERSION* avp = rp->avp; delete rp; delete wup; avp->dont_use = true; continue; } } sent_something = true; rp->set_state(RESULT_FILES_DOWNLOADED, "simulate_rpc"); results.push_back(rp); new_results.push_back(rp); #if 0 sprintf(buf, "got job %s: CPU time %.2f, deadline %s<br>", rp->name, rp->final_cpu_time, time_to_string(rp->report_deadline) ); html_msg += buf; #endif decrement_request(rp); } njobs += (int)new_results.size(); msg_printf(0, MSG_INFO, "Got %lu tasks", new_results.size()); sprintf(buf, "got %lu tasks<br>", new_results.size()); html_msg += buf; SCHEDULER_REPLY sr; rsc_work_fetch[0].req_secs = save_cpu_req_secs; work_fetch.handle_reply(p, &sr, new_results); p->nrpc_failures = 0; p->sched_rpc_pending = 0; //p->min_rpc_time = now + 900; p->min_rpc_time = now; if (sent_something) { request_schedule_cpus("simulate_rpc"); request_work_fetch("simulate_rpc"); } sim_results.nrpcs++; return true; }