void RR_SIM::simulate() { PROJECT* pbest; RESULT* rp, *rpbest; unsigned int u; double ar = gstate.available_ram(); work_fetch.rr_init(); if (log_flags.rr_simulation) { msg_printf(0, MSG_INFO, "[rr_sim] start: work_buf min %.0f additional %.0f total %.0f on_frac %.3f active_frac %.3f", gstate.work_buf_min(), gstate.work_buf_additional(), gstate.work_buf_total(), gstate.time_stats.on_frac, gstate.time_stats.active_frac ); } project_priority_init(false); init_pending_lists(); // Simulation loop. Keep going until all jobs done // double buf_end = gstate.now + gstate.work_buf_total(); double sim_now = gstate.now; bool first = true; while (1) { pick_jobs_to_run(sim_now-gstate.now); if (first) { record_nidle_now(); first = false; } if (!active.size()) break; // compute finish times and see which job finishes first // rpbest = NULL; for (u=0; u<active.size(); u++) { rp = active[u]; rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops; if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) { rpbest = rp; } } // see if we finish a time slice before first job ends // double delta_t = rpbest->rrsim_finish_delay; if (log_flags.rrsim_detail) { msg_printf(NULL, MSG_INFO, "[rrsim_detail] rpbest: %s (finish delay %.2f)", rpbest->name, delta_t ); } if (delta_t > 3600) { rpbest = 0; // limit the granularity // if (delta_t > 36000) { delta_t /= 10; } else { delta_t = 3600; } if (log_flags.rrsim_detail) { msg_printf(NULL, MSG_INFO, "[rrsim_detail] time-slice step of %.2f sec", delta_t ); } } else { rpbest->rrsim_done = true; pbest = rpbest->project; if (log_flags.rr_simulation) { char buf[256]; rsc_string(rpbest, buf); msg_printf(pbest, MSG_INFO, "[rr_sim] %.2f: %s finishes (%s) (%.2fG/%.2fG)", sim_now + delta_t - gstate.now, rpbest->name, buf, rpbest->estimated_flops_remaining()/1e9, rpbest->rrsim_flops/1e9 ); } // Does it miss its deadline? // double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline(); if (diff > 0) { handle_missed_deadline(rpbest, diff, ar); // update busy time of relevant processor types // double frac = rpbest->uses_gpu()?gstate.overall_gpu_frac():gstate.overall_cpu_frac(); double dur = rpbest->estimated_runtime_remaining() / frac; rsc_work_fetch[0].update_busy_time(dur, rpbest->avp->avg_ncpus); int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].update_busy_time(dur, rpbest->avp->gpu_usage.usage); } } } // adjust FLOPS left of other active jobs // for (unsigned int i=0; i<active.size(); i++) { rp = active[i]; rp->rrsim_flops_left -= rp->rrsim_flops*delta_t; // can be slightly less than 0 due to roundoff // if (rp->rrsim_flops_left < -1e6) { if (log_flags.rr_simulation) { msg_printf(rp->project, MSG_INTERNAL_ERROR, "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left ); } } if (rp->rrsim_flops_left < 0) { rp->rrsim_flops_left = 0; } } for (int i=0; i<coprocs.n_rsc; i++) { rsc_work_fetch[i].update_stats(sim_now, delta_t, buf_end); } // update project REC // double f = gstate.host_info.p_fpops; for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; double dtemp = sim_now; double x = 0; for (int j=0; j<coprocs.n_rsc; j++) { x += p->rsc_pwf[j].sim_nused * delta_t * f * rsc_work_fetch[j].relative_speed; } x *= COBBLESTONE_SCALE; update_average( sim_now+delta_t, sim_now, x, cc_config.rec_half_life, p->pwf.rec_temp, dtemp ); p->compute_sched_priority(); } sim_now += delta_t; } // identify GPU instances starved because of exclusions // for (int i=1; i<coprocs.n_rsc; i++) { RSC_WORK_FETCH& rwf = rsc_work_fetch[i]; if (!rwf.has_exclusions) continue; COPROC& cp = coprocs.coprocs[i]; COPROC_INSTANCE_BITMAP mask = 0; for (int j=0; j<cp.count; j++) { mask |= ((COPROC_INSTANCE_BITMAP)1)<<j; } rwf.sim_excluded_instances = ~(rwf.sim_used_instances) & mask; if (log_flags.rrsim_detail) { msg_printf(0, MSG_INFO, "[rrsim_detail] rsc %d: sim_used_inst %lld mask %lld sim_excluded_instances %lld", i, rwf.sim_used_instances, mask, rwf.sim_excluded_instances ); } } // if simulation ends before end of buffer, take the tail into account // if (sim_now < buf_end) { double d_time = buf_end - sim_now; for (int i=0; i<coprocs.n_rsc; i++) { rsc_work_fetch[i].update_stats(sim_now, d_time, buf_end); } } }
// Pick jobs to run, putting them in "active" list. // Simulate what the job scheduler would do: // pick a job from the project P with highest scheduling priority, // then adjust P's scheduling priority. // // This is called at the start of the simulation, // and again each time a job finishes. // In the latter case, some resources may be saturated. // void RR_SIM::pick_jobs_to_run(double reltime) { active.clear(); // save and restore rec_temp // for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->pwf.rec_temp_save = p->pwf.rec_temp; } // loop over resource types; do the GPUs first // for (int rt=coprocs.n_rsc-1; rt>=0; rt--) { vector<PROJECT*> project_heap; // Make a heap of projects with runnable jobs for this resource, // ordered by scheduling priority. // Clear usage counts. // Initialize iterators to the pending list of each project. // rsc_work_fetch[rt].sim_nused = 0; for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; RSC_PROJECT_WORK_FETCH& rsc_pwf = p->rsc_pwf[rt]; if (rsc_pwf.pending.size() ==0) continue; rsc_pwf.pending_iter = rsc_pwf.pending.begin(); rsc_pwf.sim_nused = 0; p->pwf.rec_temp = p->pwf.rec; p->compute_sched_priority(); project_heap.push_back(p); } make_heap(project_heap.begin(), project_heap.end()); // Loop over jobs. // Keep going until the resource is saturated or there are no more jobs. // while (1) { if (project_heap.empty()) break; // p is the highest-priority project with work for this resource // PROJECT* p = project_heap.front(); RSC_PROJECT_WORK_FETCH& rsc_pwf = p->rsc_pwf[rt]; RESULT* rp = *rsc_pwf.pending_iter; // garbage-collect jobs that already completed in our simulation // (this is just a handy place to do this) // if (rp->rrsim_done) { rsc_pwf.pending_iter = rsc_pwf.pending.erase(rsc_pwf.pending_iter); } else { // add job to active list, and adjust project priority // activate(rp); adjust_rec_sched(rp); if (log_flags.rrsim_detail && !rp->already_selected) { char buf[256]; rsc_string(rp, buf); msg_printf(rp->project, MSG_INFO, "[rr_sim_detail] %.2f: starting %s (%s) (%.2fG/%.2fG)", reltime, rp->name, buf, rp->rrsim_flops_left/1e9, rp->rrsim_flops/1e9 ); rp->already_selected = true; } // check whether resource is saturated // if (rt) { if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count) { break; } // if a GPU isn't saturated but this project is using // its max given exclusions, remove it from project heap // if (rsc_pwf.sim_nused >= coprocs.coprocs[rt].count - p->rsc_pwf[rt].ncoprocs_excluded) { pop_heap(project_heap.begin(), project_heap.end()); project_heap.pop_back(); continue; } } else { if (rsc_work_fetch[rt].sim_nused >= gstate.ncpus) break; } rsc_pwf.pending_iter++; } if (rsc_pwf.pending_iter == rsc_pwf.pending.end()) { // if this project now has no more jobs for the resource, // remove it from the project heap // pop_heap(project_heap.begin(), project_heap.end()); project_heap.pop_back(); } else if (!rp->rrsim_done) { // Otherwise reshuffle the project heap // make_heap(project_heap.begin(), project_heap.end()); } } } for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->pwf.rec_temp = p->pwf.rec_temp_save; } }