// find a project with finished results that should be reported. // This means: // - we're not backing off contacting the project // - the result is ready_to_report (compute done; files uploaded) // - we're within a day of the report deadline, // or at least a day has elapsed since the result was completed, // or we have a sporadic connection // PROJECT* CLIENT_STATE::find_project_with_overdue_results() { unsigned int i; RESULT* r; for (i=0; i<results.size(); i++) { r = results[i]; if (!r->ready_to_report) continue; PROJECT* p = r->project; if (p->waiting_until_min_rpc_time()) continue; if (p->suspended_via_gui) continue; if (config.report_results_immediately) { return p; } if (net_status.have_sporadic_connection) { return p; } double cushion = std::max(REPORT_DEADLINE_CUSHION, work_buf_min()); if (gstate.now > r->report_deadline - cushion) { return p; } if (gstate.now > r->completed_time + SECONDS_PER_DAY) { return p; } } return 0; }
// per-file backoff policy: sets next_request_time // void PERS_FILE_XFER::do_backoff() { double backoff = 0; // don't count it as a server failure if network is down // if (!net_status.need_physical_connection) { nretry++; } // keep track of transient failures per project (not currently used) // PROJECT* p = fip->project; p->file_xfer_backoff(is_upload).file_xfer_failed(p); // Do an exponential backoff of e^nretry seconds, // keeping within the bounds of pers_retry_delay_min and // pers_retry_delay_max // backoff = calculate_exponential_backoff( nretry, gstate.pers_retry_delay_min, gstate.pers_retry_delay_max ); next_request_time = gstate.now + backoff; msg_printf(fip->project, MSG_INFO, "Backing off %s on %s of %s", timediff_format(backoff).c_str(), is_upload?"upload":"download", fip->name ); }
void CLIENT_STATE::show_global_prefs_source(bool found_venue) { PROJECT* pp = global_prefs_source_project(); if (pp) { msg_printf(pp, MSG_INFO, "General prefs: from %s (last modified %s)", pp->get_project_name(), time_to_string(global_prefs.mod_time) ); } else { msg_printf(NULL, MSG_INFO, "General prefs: from %s (last modified %s)", global_prefs.source_project, time_to_string(global_prefs.mod_time) ); } if (strlen(main_host_venue)) { msg_printf(pp, MSG_INFO, "Computer location: %s", main_host_venue); if (found_venue) { msg_printf(NULL, MSG_INFO, "General prefs: using separate prefs for %s", main_host_venue ); } else { msg_printf(pp, MSG_INFO, "General prefs: no separate prefs for %s; using your defaults", main_host_venue ); } } else { msg_printf(pp, MSG_INFO, "Host location: none"); msg_printf(pp, MSG_INFO, "General prefs: using your defaults"); } }
// find a project with finished results that should be reported. // This means: // - we're not backing off contacting the project // - no upload for that project is active // - the result is ready_to_report (compute done; files uploaded) // - we're within a day of the report deadline, // or at least a day has elapsed since the result was completed, // or we have a sporadic connection // or the project is in "don't request more work" state // or a network suspend period is coming up soon // or the project has > RESULT_REPORT_IF_AT_LEAST_N results ready to report // PROJECT* CLIENT_STATE::find_project_with_overdue_results( bool network_suspend_soon ) { unsigned int i; RESULT* r; for (i=0; i<projects.size(); i++) { PROJECT* p = projects[i]; p->n_ready = 0; p->dont_contact = false; if (p->waiting_until_min_rpc_time()) p->dont_contact = true; if (p->suspended_via_gui) p->dont_contact = true; #ifndef SIM if (actively_uploading(p)) p->dont_contact = true; #endif } for (i=0; i<results.size(); i++) { r = results[i]; if (!r->ready_to_report) continue; PROJECT* p = r->project; if (p->dont_contact) continue; if (p->dont_request_more_work) { return p; } if (r->report_immediately) { return p; } if (config.report_results_immediately) { return p; } if (net_status.have_sporadic_connection) { return p; } if (network_suspend_soon) { return p; } double cushion = std::max(REPORT_DEADLINE_CUSHION, work_buf_min()); if (gstate.now > r->report_deadline - cushion) { return p; } if (gstate.now > r->completed_time + SECONDS_PER_DAY) { return p; } p->n_ready++; if (p->n_ready >= RESULT_REPORT_IF_AT_LEAST_N) { return p; } } return 0; }
// populate: // PROJECT::disk_usage for all projects // GLOBAL_STATE::client_disk_usage // GLOBAL_STATE::total_disk_usage // int CLIENT_STATE::get_disk_usages() { unsigned int i; double size; PROJECT* p; int retval; char buf[MAXPATHLEN]; client_disk_usage = 0; total_disk_usage = 0; for (i=0; i<projects.size(); i++) { p = projects[i]; p->disk_usage = 0; retval = dir_size(p->project_dir(), size); if (!retval) p->disk_usage = size; } for (i=0; i<active_tasks.active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks.active_tasks[i]; get_slot_dir(atp->slot, buf, sizeof(buf)); retval = dir_size(buf, size); if (retval) continue; atp->wup->project->disk_usage += size; } for (i=0; i<projects.size(); i++) { p = projects[i]; total_disk_usage += p->disk_usage; } retval = dir_size(".", size, false); if (!retval) { client_disk_usage = size; total_disk_usage += size; } return 0; }
// look for app_versions.xml file in project dir. // If find, get app versions from there, // and use "anonymous platform" mechanism for this project // void CLIENT_STATE::check_anonymous() { unsigned int i; char path[MAXPATHLEN]; FILE* f; int retval; for (i=0; i<projects.size(); i++) { PROJECT* p = projects[i]; sprintf(path, "%s/%s", p->project_dir(), APP_INFO_FILE_NAME); f = fopen(path, "r"); if (!f) continue; msg_printf(p, MSG_INFO, "Found %s; using anonymous platform", APP_INFO_FILE_NAME ); p->anonymous_platform = true; // flag as anonymous even if can't parse file retval = parse_app_info(p, f); if (retval) { msg_printf_notice(p, false, "http://boinc.berkeley.edu/manager_links.php?target=notice&controlid=app_info", "%s", _("Syntax error in app_info.xml") ); } fclose(f); } }
static void handle_project_resume(GUI_RPC_CONN& grc) { PROJECT* p = get_project_parse(grc); if (!p) return; gstate.set_client_state_dirty("Project modified by user"); msg_printf(p, MSG_INFO, "project resumed by user"); p->resume(); grc.mfout.printf("<success/>\n"); }
static void handle_get_project_status(MIOFILE& fout) { unsigned int i; fout.printf("<projects>\n"); for (i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->write_state(fout, true); } fout.printf("</projects>\n"); }
static void handle_get_project_status(GUI_RPC_CONN& grc) { unsigned int i; grc.mfout.printf("<projects>\n"); for (i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->write_state(grc.mfout, true); } grc.mfout.printf("</projects>\n"); }
static void handle_get_simple_gui_info(MIOFILE& fout) { unsigned int i; fout.printf("<simple_gui_info>\n"); for (i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->write_state(fout, true); } gstate.write_tasks_gui(fout, false); fout.printf("</simple_gui_info>\n"); }
static void handle_get_simple_gui_info(GUI_RPC_CONN& grc) { unsigned int i; grc.mfout.printf("<simple_gui_info>\n"); for (i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->write_state(grc.mfout, true); } gstate.write_tasks_gui(grc.mfout, true); grc.mfout.printf("</simple_gui_info>\n"); }
static void handle_project_op(char* buf, MIOFILE& fout, const char* op) { PROJECT* p = get_project(buf, fout); if (!p) { fout.printf("<error>no such project</error>\n"); return; } gstate.set_client_state_dirty("Project modified by user"); if (!strcmp(op, "reset")) { gstate.request_schedule_cpus("project reset by user"); gstate.request_work_fetch("project reset by user"); gstate.reset_project(p, false); } else if (!strcmp(op, "suspend")) { msg_printf(p, MSG_INFO, "suspended by user"); p->suspend(); } else if (!strcmp(op, "resume")) { msg_printf(p, MSG_INFO, "resumed by user"); p->resume(); } else if (!strcmp(op, "detach")) { if (p->attached_via_acct_mgr) { msg_printf(p, MSG_INFO, "This project must be detached using the account manager web site." ); fout.printf("<error>must detach using account manager</error>"); return; } gstate.detach_project(p); gstate.request_schedule_cpus("project detached by user"); gstate.request_work_fetch("project detached by user"); } else if (!strcmp(op, "update")) { msg_printf(p, MSG_INFO, "update requested by user"); p->sched_rpc_pending = RPC_REASON_USER_REQ; p->min_rpc_time = 0; #if 1 rss_feeds.trigger_fetch(p); #endif gstate.request_work_fetch("project updated by user"); } else if (!strcmp(op, "nomorework")) { msg_printf(p, MSG_INFO, "work fetch suspended by user"); p->dont_request_more_work = true; } else if (!strcmp(op, "allowmorework")) { msg_printf(p, MSG_INFO, "work fetch resumed by user"); p->dont_request_more_work = false; gstate.request_work_fetch("project allowed to fetch work by user"); } else if (!strcmp(op, "detach_when_done")) { msg_printf(p, MSG_INFO, "detach when done set by user"); p->detach_when_done = true; p->dont_request_more_work = true; } else if (!strcmp(op, "dont_detach_when_done")) { msg_printf(p, MSG_INFO, "detach when done cleared by user"); p->detach_when_done = false; p->dont_request_more_work = false; } fout.printf("<success/>\n"); }
int CLIENT_STATE::parse_account_files_venue() { unsigned int i; for (i=0; i<projects.size(); i++) { PROJECT* p = projects[i]; if (strlen(p->host_venue)) { p->parse_account_file_venue(); } } return 0; }
void CLIENT_STATE::check_project_timeout() { unsigned int i; for (i=0; i<projects.size(); i++) { PROJECT* p = projects[i]; if (p->possibly_backed_off && now > p->min_rpc_time) { p->possibly_backed_off = false; char buf[256]; sprintf(buf, "Backoff ended for %s", p->get_project_name()); request_work_fetch(buf); } } }
// find a project for which a scheduler RPC has been requested // - by user // - by an account manager // - by the project // - because the project was just attached (for verification) // PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() { unsigned int i; PROJECT* p; for (i=0; i<projects.size(); i++) { p = projects[i]; bool honor_backoff = true; bool honor_suspend = true; // is a scheduler-requested RPC due? // if (!p->sched_rpc_pending && p->next_rpc_time && p->next_rpc_time<now) { // don't do it if project is set to no new work // and has no jobs currently // if (!p->dont_request_more_work || p->has_results()) { p->sched_rpc_pending = RPC_REASON_PROJECT_REQ; } } switch (p->sched_rpc_pending) { case RPC_REASON_USER_REQ: honor_backoff = false; honor_suspend = false; break; case RPC_REASON_RESULTS_DUE: break; case RPC_REASON_NEED_WORK: break; case RPC_REASON_TRICKLE_UP: break; case RPC_REASON_ACCT_MGR_REQ: // This is critical for acct mgrs, to propagate new host CPIDs honor_suspend = false; break; case RPC_REASON_INIT: break; case RPC_REASON_PROJECT_REQ: break; } if (honor_backoff && p->waiting_until_min_rpc_time()) { continue; } if (honor_suspend && p->suspended_via_gui) { continue; } if (p->sched_rpc_pending) { return p; } } return 0; }
// see if there's a fetchable non-CPU-intensive project without work // PROJECT* WORK_FETCH::non_cpu_intensive_project_needing_work() { for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; if (!p->non_cpu_intensive) continue; if (!p->can_request_work()) continue; if (p->rsc_pwf[0].backoff_time > gstate.now) continue; if (has_a_job(p)) continue; clear_request(); rsc_work_fetch[0].req_secs = 1; return p; } return 0; }
PROJECT* CLIENT_STATE::next_project_trickle_up_pending() { unsigned int i; PROJECT* p; for (i=0; i<projects.size(); i++) { p = projects[i]; if (p->waiting_until_min_rpc_time()) continue; if (p->suspended_via_gui) continue; if (p->trickle_up_pending) { return p; } } return 0; }
// find a project that needs to have its master file fetched // PROJECT* CLIENT_STATE::next_project_master_pending() { unsigned int i; PROJECT* p; for (i=0; i<projects.size(); i++) { p = projects[i]; if (p->waiting_until_min_rpc_time()) continue; if (p->suspended_via_gui) continue; if (p->suspended_during_update) continue; if (p->master_url_fetch_pending) { return p; } } return 0; }
void CViewProjects::GetDocProjectName(wxInt32 item, wxString& strBuffer) const { PROJECT* project = NULL; CMainDocument* pDoc = wxGetApp().GetDocument(); std::string project_name; if (pDoc) { project = pDoc->project(item); } if (project) { project->get_name(project_name); strBuffer = strBuffer = HtmlEntityDecode(wxString(project_name.c_str(), wxConvUTF8)); } else { strBuffer = wxEmptyString; } }
// find a project for which a scheduler RPC has been requested // - by user // - by an account manager // - by the project // - because the project was just attached (for verification) // PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() { unsigned int i; PROJECT* p; for (i=0; i<projects.size(); i++) { p = projects[i]; bool honor_backoff = true; bool honor_suspend = true; if (!p->sched_rpc_pending && p->next_rpc_time && p->next_rpc_time<now) { p->sched_rpc_pending = RPC_REASON_PROJECT_REQ; } switch (p->sched_rpc_pending) { case RPC_REASON_USER_REQ: honor_backoff = false; honor_suspend = false; break; case RPC_REASON_RESULTS_DUE: break; case RPC_REASON_NEED_WORK: break; case RPC_REASON_TRICKLE_UP: break; case RPC_REASON_ACCT_MGR_REQ: // This is critical for acct mgrs, to propagate new host CPIDs honor_suspend = false; break; case RPC_REASON_INIT: break; case RPC_REASON_PROJECT_REQ: break; } if (honor_backoff && p->waiting_until_min_rpc_time()) { continue; } if (honor_suspend && p->suspended_via_gui) { continue; } if (p->sched_rpc_pending) { return p; } } return 0; }
char *ProjectSave(char *projectname, DISPLAY *disp) { char buffer[256]; strcpy(buffer, projectname); SetProjectChanged(FALSE); return project.Save(buffer,disp); }
// check for app_config.xml files, and parse them. // Called at startup and on read_cc_config() RPC // void check_app_config() { char path[MAXPATHLEN]; FILE* f; for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; sprintf(path, "%s/%s", p->project_dir(), APP_CONFIG_FILE_NAME); f = boinc_fopen(path, "r"); if (!f) { clear_app_config(p); continue; } msg_printf(p, MSG_INFO, "Found %s", APP_CONFIG_FILE_NAME); int retval = p->app_configs.parse_file(f, p); if (!retval) { p->app_configs.config_app_versions(p, true); } fclose(f); } }
char *ProjectLoad(char *projectname, DISPLAY *disp) { char buffer[256]; strcpy(buffer, projectname); SetProjectChanged(FALSE); global.SetBackPic(NULL); global.SetReflMap(NULL); return project.Load(buffer,disp); }
int CLIENT_STATE::parse_account_files() { string name; PROJECT* project; FILE* f; int retval; DirScanner dir("."); while (dir.scan(name)) { if (!is_file(name.c_str())) continue; if (!is_account_file(name.c_str())) continue; f = boinc_fopen(name.c_str(), "r"); if (!f) continue; project = new PROJECT; // Assume master_url_fetch_pending, sched_rpc_pending are // true until we read client_state.xml // project->master_url_fetch_pending = true; project->sched_rpc_pending = RPC_REASON_INIT; retval = project->parse_account(f); fclose(f); if (retval) { msg_printf(project, MSG_INTERNAL_ERROR, "Couldn't parse account file %s", name.c_str() ); delete project; } else { if (lookup_project(project->master_url)) { msg_printf(project, MSG_INFO, "Duplicate account file %s - ignoring", name.c_str() ); delete project; } else { projects.push_back(project); } } } sort_projects(); return 0; }
// if the given project is highest-priority among the projects // eligible for the resource, set request fields // void RSC_WORK_FETCH::supplement(PROJECT* pp) { double x = pp->sched_priority; for (unsigned i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; if (p == pp) continue; if (p->pwf.cant_fetch_work_reason) continue; if (!project_state(p).may_have_work) continue; RSC_PROJECT_WORK_FETCH& rpwf = project_state(p); if (rpwf.anon_skip) continue; if (p->sched_priority > x) { if (log_flags.work_fetch_debug) { msg_printf(pp, MSG_INFO, "[work_fetch]: not requesting work for %s: %s has higher priority", rsc_name(rsc_type), p->get_project_name() ); } return; } } // didn't find a better project; ask for work // set_request(pp); }
// we're going to contact this project for reasons other than work fetch; // decide if we should piggy-back a work fetch request. // void WORK_FETCH::piggyback_work_request(PROJECT* p) { clear_request(); if (config.fetch_minimal_work && gstate.had_or_requested_work) return; if (p->dont_request_more_work) return; if (p->non_cpu_intensive) { if (!has_a_job(p)) { rsc_work_fetch[0].req_secs = 1; } return; } // if project was updated from manager and config says so, // always fetch work if needed // if (p->sched_rpc_pending && config.fetch_on_update) { set_all_requests_hyst(p, -1); return; } compute_cant_fetch_work_reason(); PROJECT* bestp = choose_project(false, p); if (p != bestp) { if (p->pwf.cant_fetch_work_reason == 0) { if (bestp) { p->pwf.cant_fetch_work_reason = CANT_FETCH_WORK_NOT_HIGHEST_PRIORITY; if (log_flags.work_fetch_debug) { msg_printf(0, MSG_INFO, "[work_fetch] not piggybacking work req: %s has higher priority", bestp->get_project_name() ); } } else { p->pwf.cant_fetch_work_reason = CANT_FETCH_WORK_DONT_NEED; } } clear_request(); } }
int CLIENT_STATE::parse_statistics_files() { string name; PROJECT* project; FILE* f; int retval; DirScanner dir("."); while (dir.scan(name)) { PROJECT temp; if (is_statistics_file(name.c_str())) { f = boinc_fopen(name.c_str(), "r"); if (!f) continue; retval = temp.parse_statistics(f); fclose(f); if (retval) { msg_printf(NULL, MSG_INTERNAL_ERROR, "Couldn't parse %s", name.c_str() ); } else { project = lookup_project(temp.master_url); if (project == NULL) { msg_printf(NULL, MSG_INFO, "Project for %s not found - ignoring", name.c_str() ); } else { for (std::vector<DAILY_STATS>::const_iterator i=temp.statistics.begin(); i!=temp.statistics.end(); ++i ) { project->statistics.push_back(*i); } } } } } return 0; }
int CLIENT_STATE::write_state_gui(MIOFILE& f) { unsigned int i, j; int retval; f.printf("<client_state>\n"); retval = host_info.write(f, true, true); if (retval) return retval; // the following are for compatibility with old managers // if (coprocs.have_nvidia()) { f.printf("<have_cuda/>\n"); } if (coprocs.have_ati()) { f.printf("<have_ati/>\n"); } #if 1 // NOTE: the following is not in CC_STATE. // However, BoincView (which does its own parsing) expects it // to be in the get_state() reply, so leave it in for now // retval = net_stats.write(f); if (retval) return retval; #endif retval = time_stats.write(f, true); if (retval) return retval; for (j=0; j<projects.size(); j++) { PROJECT* p = projects[j]; retval = p->write_state(f, true); if (retval) return retval; for (i=0; i<apps.size(); i++) { if (apps[i]->project == p) { retval = apps[i]->write(f); if (retval) return retval; } } for (i=0; i<app_versions.size(); i++) { if (app_versions[i]->project == p) app_versions[i]->write(f); } for (i=0; i<workunits.size(); i++) { if (workunits[i]->project == p) workunits[i]->write(f); } for (i=0; i<results.size(); i++) { if (results[i]->project == p) results[i]->write_gui(f); } } f.printf( "<platform_name>%s</platform_name>\n" "<core_client_major_version>%d</core_client_major_version>\n" "<core_client_minor_version>%d</core_client_minor_version>\n" "<core_client_release>%d</core_client_release>\n" "<executing_as_daemon>%d</executing_as_daemon>\n", get_primary_platform(), core_client_version.major, core_client_version.minor, core_client_version.release, executing_as_daemon?1:0 ); for (i=0; i<platforms.size(); i++) { f.printf( "<platform>%s</platform>\n", platforms[i].name.c_str() ); } global_prefs.write(f); // the following used by BoincView - don't remove // if (strlen(main_host_venue)) { f.printf("<host_venue>%s</host_venue>\n", main_host_venue); } f.printf("</client_state>\n"); return 0; }
// Pick jobs to run, putting them in "active" list. // Simulate what the job scheduler would do: // pick a job from the project P with highest scheduling priority, // then adjust P's scheduling priority. // // This is called at the start of the simulation, // and again each time a job finishes. // In the latter case, some resources may be saturated. // void RR_SIM::pick_jobs_to_run(double reltime) { active.clear(); // save and restore rec_temp // for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->pwf.rec_temp_save = p->pwf.rec_temp; } // loop over resource types; do the GPUs first // for (int rt=coprocs.n_rsc-1; rt>=0; rt--) { vector<PROJECT*> project_heap; // Make a heap of projects with runnable jobs for this resource, // ordered by scheduling priority. // Clear usage counts. // Initialize iterators to the pending list of each project. // rsc_work_fetch[rt].sim_nused = 0; for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; RSC_PROJECT_WORK_FETCH& rsc_pwf = p->rsc_pwf[rt]; if (rsc_pwf.pending.size() ==0) continue; rsc_pwf.pending_iter = rsc_pwf.pending.begin(); rsc_pwf.sim_nused = 0; p->pwf.rec_temp = p->pwf.rec; p->compute_sched_priority(); project_heap.push_back(p); } make_heap(project_heap.begin(), project_heap.end()); // Loop over jobs. // Keep going until the resource is saturated or there are no more jobs. // while (1) { if (project_heap.empty()) break; // p is the highest-priority project with work for this resource // PROJECT* p = project_heap.front(); RSC_PROJECT_WORK_FETCH& rsc_pwf = p->rsc_pwf[rt]; RESULT* rp = *rsc_pwf.pending_iter; // garbage-collect jobs that already completed in our simulation // (this is just a handy place to do this) // if (rp->rrsim_done) { rsc_pwf.pending_iter = rsc_pwf.pending.erase(rsc_pwf.pending_iter); } else { // add job to active list, and adjust project priority // activate(rp); adjust_rec_sched(rp); if (log_flags.rrsim_detail && !rp->already_selected) { char buf[256]; rsc_string(rp, buf); msg_printf(rp->project, MSG_INFO, "[rr_sim_detail] %.2f: starting %s (%s) (%.2fG/%.2fG)", reltime, rp->name, buf, rp->rrsim_flops_left/1e9, rp->rrsim_flops/1e9 ); rp->already_selected = true; } // check whether resource is saturated // if (rt) { if (rsc_work_fetch[rt].sim_nused >= coprocs.coprocs[rt].count) { break; } // if a GPU isn't saturated but this project is using // its max given exclusions, remove it from project heap // if (rsc_pwf.sim_nused >= coprocs.coprocs[rt].count - p->rsc_pwf[rt].ncoprocs_excluded) { pop_heap(project_heap.begin(), project_heap.end()); project_heap.pop_back(); continue; } } else { if (rsc_work_fetch[rt].sim_nused >= gstate.ncpus) break; } rsc_pwf.pending_iter++; } if (rsc_pwf.pending_iter == rsc_pwf.pending.end()) { // if this project now has no more jobs for the resource, // remove it from the project heap // pop_heap(project_heap.begin(), project_heap.end()); project_heap.pop_back(); } else if (!rp->rrsim_done) { // Otherwise reshuffle the project heap // make_heap(project_heap.begin(), project_heap.end()); } } } for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; p->pwf.rec_temp = p->pwf.rec_temp_save; } }
void RR_SIM::simulate() { PROJECT* pbest; RESULT* rp, *rpbest; unsigned int u; double ar = gstate.available_ram(); work_fetch.rr_init(); if (log_flags.rr_simulation) { msg_printf(0, MSG_INFO, "[rr_sim] start: work_buf min %.0f additional %.0f total %.0f on_frac %.3f active_frac %.3f", gstate.work_buf_min(), gstate.work_buf_additional(), gstate.work_buf_total(), gstate.time_stats.on_frac, gstate.time_stats.active_frac ); } project_priority_init(false); init_pending_lists(); // Simulation loop. Keep going until all jobs done // double buf_end = gstate.now + gstate.work_buf_total(); double sim_now = gstate.now; bool first = true; while (1) { pick_jobs_to_run(sim_now-gstate.now); if (first) { record_nidle_now(); first = false; } if (!active.size()) break; // compute finish times and see which job finishes first // rpbest = NULL; for (u=0; u<active.size(); u++) { rp = active[u]; rp->rrsim_finish_delay = rp->rrsim_flops_left/rp->rrsim_flops; if (!rpbest || rp->rrsim_finish_delay < rpbest->rrsim_finish_delay) { rpbest = rp; } } // see if we finish a time slice before first job ends // double delta_t = rpbest->rrsim_finish_delay; if (log_flags.rrsim_detail) { msg_printf(NULL, MSG_INFO, "[rrsim_detail] rpbest: %s (finish delay %.2f)", rpbest->name, delta_t ); } if (delta_t > 3600) { rpbest = 0; // limit the granularity // if (delta_t > 36000) { delta_t /= 10; } else { delta_t = 3600; } if (log_flags.rrsim_detail) { msg_printf(NULL, MSG_INFO, "[rrsim_detail] time-slice step of %.2f sec", delta_t ); } } else { rpbest->rrsim_done = true; pbest = rpbest->project; if (log_flags.rr_simulation) { char buf[256]; rsc_string(rpbest, buf); msg_printf(pbest, MSG_INFO, "[rr_sim] %.2f: %s finishes (%s) (%.2fG/%.2fG)", sim_now + delta_t - gstate.now, rpbest->name, buf, rpbest->estimated_flops_remaining()/1e9, rpbest->rrsim_flops/1e9 ); } // Does it miss its deadline? // double diff = (sim_now + rpbest->rrsim_finish_delay) - rpbest->computation_deadline(); if (diff > 0) { handle_missed_deadline(rpbest, diff, ar); // update busy time of relevant processor types // double frac = rpbest->uses_gpu()?gstate.overall_gpu_frac():gstate.overall_cpu_frac(); double dur = rpbest->estimated_runtime_remaining() / frac; rsc_work_fetch[0].update_busy_time(dur, rpbest->avp->avg_ncpus); int rt = rpbest->avp->gpu_usage.rsc_type; if (rt) { rsc_work_fetch[rt].update_busy_time(dur, rpbest->avp->gpu_usage.usage); } } } // adjust FLOPS left of other active jobs // for (unsigned int i=0; i<active.size(); i++) { rp = active[i]; rp->rrsim_flops_left -= rp->rrsim_flops*delta_t; // can be slightly less than 0 due to roundoff // if (rp->rrsim_flops_left < -1e6) { if (log_flags.rr_simulation) { msg_printf(rp->project, MSG_INTERNAL_ERROR, "%s: negative FLOPs left %f", rp->name, rp->rrsim_flops_left ); } } if (rp->rrsim_flops_left < 0) { rp->rrsim_flops_left = 0; } } for (int i=0; i<coprocs.n_rsc; i++) { rsc_work_fetch[i].update_stats(sim_now, delta_t, buf_end); } // update project REC // double f = gstate.host_info.p_fpops; for (unsigned int i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; double dtemp = sim_now; double x = 0; for (int j=0; j<coprocs.n_rsc; j++) { x += p->rsc_pwf[j].sim_nused * delta_t * f * rsc_work_fetch[j].relative_speed; } x *= COBBLESTONE_SCALE; update_average( sim_now+delta_t, sim_now, x, cc_config.rec_half_life, p->pwf.rec_temp, dtemp ); p->compute_sched_priority(); } sim_now += delta_t; } // identify GPU instances starved because of exclusions // for (int i=1; i<coprocs.n_rsc; i++) { RSC_WORK_FETCH& rwf = rsc_work_fetch[i]; if (!rwf.has_exclusions) continue; COPROC& cp = coprocs.coprocs[i]; COPROC_INSTANCE_BITMAP mask = 0; for (int j=0; j<cp.count; j++) { mask |= ((COPROC_INSTANCE_BITMAP)1)<<j; } rwf.sim_excluded_instances = ~(rwf.sim_used_instances) & mask; if (log_flags.rrsim_detail) { msg_printf(0, MSG_INFO, "[rrsim_detail] rsc %d: sim_used_inst %lld mask %lld sim_excluded_instances %lld", i, rwf.sim_used_instances, mask, rwf.sim_excluded_instances ); } } // if simulation ends before end of buffer, take the tail into account // if (sim_now < buf_end) { double d_time = buf_end - sim_now; for (int i=0; i<coprocs.n_rsc; i++) { rsc_work_fetch[i].update_stats(sim_now, d_time, buf_end); } } }