// Send kill signal to all app processes // Don't wait for them to exit // void ACTIVE_TASK_SET::kill_tasks(PROJECT* proj) { unsigned int i; ACTIVE_TASK *atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (proj && atp->wup->project != proj) continue; if (!atp->process_exists()) continue; atp->kill_task(false); } }
// a file upload has finished. // If any running apps are waiting for it, notify them // void ACTIVE_TASK_SET::upload_notify_app(FILE_INFO* fip) { for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; RESULT* rp = atp->result; FILE_REF* frp = rp->lookup_file(fip); if (frp) { atp->upload_notify_app(fip, frp); } } }
void ACTIVE_TASK_SET::init() { for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; atp->init(atp->result); atp->scheduler_state = CPU_SCHED_PREEMPTED; atp->read_task_state_file(); atp->current_cpu_time = atp->checkpoint_cpu_time; atp->elapsed_time = atp->checkpoint_elapsed_time; } }
void ACTIVE_TASK_SET::network_available() { #ifndef SIM for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->want_network) { atp->send_network_available(); } } #endif }
// Send quit message to all app processes // This is called when the core client exits, // or when a project is detached or reset // void ACTIVE_TASK_SET::request_tasks_exit(PROJECT* proj) { LOGD("app_control: ACTIVE_TASK::request_tasks_exit"); unsigned int i; ACTIVE_TASK *atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (proj && atp->wup->project != proj) continue; if (!atp->process_exists()) continue; atp->request_exit(); } }
// tell all running apps of a project to reread prefs // void ACTIVE_TASK_SET::request_reread_prefs(PROJECT* project) { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->result->project != project) continue; if (!atp->process_exists()) continue; atp->request_reread_prefs(); } }
// Check if any of the active tasks have exceeded their // resource limits on disk, CPU time or memory // bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { unsigned int i; ACTIVE_TASK *atp; static double last_disk_check_time = 0; bool do_disk_check = false; bool did_anything = false; double ram_left = gstate.available_ram(); double max_ram = gstate.max_available_ram(); // Some slot dirs have lots of files, // so only check every min(disk_interval, 300) secs // double min_interval = gstate.global_prefs.disk_interval; if (min_interval < 300) min_interval = 300; if (gstate.now > last_disk_check_time + min_interval) { do_disk_check = true; } for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; if (!atp->result->project->non_cpu_intensive && (atp->elapsed_time > atp->max_elapsed_time)) { msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: exceeded elapsed time limit %f\n", atp->result->name, atp->max_elapsed_time ); atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum elapsed time exceeded"); did_anything = true; continue; } if (atp->procinfo.working_set_size_smoothed > max_ram) { msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: exceeded memory limit %.2fMB > %.2fMB\n", atp->result->name, atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA ); atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum memory exceeded"); did_anything = true; continue; } if (do_disk_check && atp->check_max_disk_exceeded()) { did_anything = true; continue; } ram_left -= atp->procinfo.working_set_size_smoothed; } if (ram_left < 0) { gstate.request_schedule_cpus("RAM usage limit exceeded"); } if (do_disk_check) { last_disk_check_time = gstate.now; } return did_anything; }
// Check to see if any tasks are running // called if benchmarking and waiting for suspends to happen // or the system needs to suspend itself so we are suspending // the applications // bool ACTIVE_TASK_SET::is_task_executing() { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->task_state() == PROCESS_EXECUTING) { return true; } } return false; }
void ACTIVE_TASK_SET::graphics_poll() { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; atp->graphics_request_queue.msg_queue_poll( atp->app_client_shm.shm->graphics_request ); atp->check_graphics_mode_ack(); } }
// compute a per-app-version "temporary DCF" based on the elapsed time // and fraction done of running jobs // void compute_temp_dcf() { unsigned int i; for (i=0; i<gstate.app_versions.size(); i++) { gstate.app_versions[i]->temp_dcf = 1; } for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) { ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i]; double x = atp->est_dur(false) / atp->result->estimated_duration(false); APP_VERSION* avp = atp->result->avp; if (x < avp->temp_dcf) { avp->temp_dcf = x; } } }
// resume all currently scheduled tasks // void ACTIVE_TASK_SET::unsuspend_all() { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue; if (atp->task_state() == PROCESS_UNINITIALIZED) { if (atp->start(false)) { msg_printf(atp->wup->project, MSG_INTERNAL_ERROR, "Couldn't restart task %s", atp->result->name ); } } else if (atp->task_state() == PROCESS_SUSPENDED) { atp->unsuspend(); } } }
static void handle_get_screensaver_tasks(MIOFILE& fout) { unsigned int i; ACTIVE_TASK* atp; fout.printf( "<handle_get_screensaver_tasks>\n" " <suspend_reason>%d</suspend_reason>\n", gstate.suspend_reason ); for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) { atp = gstate.active_tasks.active_tasks[i]; if ((atp->task_state() == PROCESS_EXECUTING) || ((atp->task_state() == PROCESS_SUSPENDED) && (gstate.suspend_reason == SUSPEND_REASON_CPU_THROTTLE))) { atp->result->write_gui(fout); } } fout.printf("</handle_get_screensaver_tasks>\n"); }
static void handle_result_op(GUI_RPC_CONN& grc, const char* op) { RESULT* rp; char result_name[256]; ACTIVE_TASK* atp; string project_url; strcpy(result_name, ""); while (!grc.xp.get_tag()) { if (grc.xp.parse_str("name", result_name, sizeof(result_name))) continue; if (grc.xp.parse_string("project_url", project_url)) continue; } PROJECT* p = get_project(grc, project_url); if (!p) return; if (!strlen(result_name)) { grc.mfout.printf("<error>Missing result name</error>\n"); return; } rp = gstate.lookup_result(p, result_name); if (!rp) { grc.mfout.printf("<error>no such result</error>\n"); return; } if (!strcmp(op, "abort")) { msg_printf(p, MSG_INFO, "task %s aborted by user", result_name); atp = gstate.lookup_active_task_by_result(rp); if (atp) { atp->abort_task(EXIT_ABORTED_VIA_GUI, "aborted by user"); } else { rp->abort_inactive(EXIT_ABORTED_VIA_GUI); } gstate.request_work_fetch("result aborted by user"); } else if (!strcmp(op, "suspend")) { msg_printf(p, MSG_INFO, "task %s suspended by user", result_name); rp->suspended_via_gui = true; gstate.request_work_fetch("result suspended by user"); } else if (!strcmp(op, "resume")) { msg_printf(p, MSG_INFO, "task %s resumed by user", result_name); rp->suspended_via_gui = false; } gstate.request_schedule_cpus("result suspended, resumed or aborted by user"); gstate.set_client_state_dirty("Result RPC"); grc.mfout.printf("<success/>\n"); }
// check for msgs from active tasks, // and update their elapsed time and other info // void ACTIVE_TASK_SET::get_msgs() { //LOGD("app_control: ACTIVE_TASK::get_msgs"); unsigned int i; ACTIVE_TASK *atp; double old_time; static double last_time=0; double delta_t; if (last_time) { delta_t = gstate.now - last_time; // Normally this is called every second. // If delta_t is > 10, we'll assume that a period of hibernation // or suspension happened, and treat it as zero. // If negative, must be clock reset. Ignore. // if (delta_t > 10 || delta_t < 0) { delta_t = 0; } } else { delta_t = 0; } last_time = gstate.now; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; old_time = atp->checkpoint_cpu_time; if (atp->task_state() == PROCESS_EXECUTING) { atp->elapsed_time += delta_t; } if (atp->get_app_status_msg()) { if (old_time != atp->checkpoint_cpu_time) { char buf[256]; sprintf(buf, "%s checkpointed", atp->result->name); if (atp->overdue_checkpoint) { gstate.request_schedule_cpus(buf); } atp->checkpoint_wall_time = gstate.now; atp->premature_exit_count = 0; atp->checkpoint_elapsed_time = atp->elapsed_time; atp->checkpoint_fraction_done = atp->fraction_done; atp->checkpoint_fraction_done_elapsed_time = atp->fraction_done_elapsed_time; if (log_flags.checkpoint_debug) { msg_printf(atp->wup->project, MSG_INFO, "[checkpoint] result %s checkpointed", atp->result->name ); } else if (log_flags.task_debug) { msg_printf(atp->wup->project, MSG_INFO, "[task] result %s checkpointed", atp->result->name ); } atp->write_task_state_file(); } } atp->get_trickle_up_msg(); atp->get_graphics_msg(); } }
// clean up after finished apps // bool CLIENT_STATE::handle_finished_apps() { ACTIVE_TASK* atp; bool action = false; static double last_time = 0; if (!clock_change && now - last_time < HANDLE_FINISHED_APPS_PERIOD) return false; last_time = now; vector<ACTIVE_TASK*>::iterator iter; iter = active_tasks.active_tasks.begin(); while (iter != active_tasks.active_tasks.end()) { atp = *iter; switch (atp->task_state()) { case PROCESS_EXITED: case PROCESS_WAS_SIGNALED: case PROCESS_EXIT_UNKNOWN: case PROCESS_COULDNT_START: case PROCESS_ABORTED: if (log_flags.task) { msg_printf(atp->wup->project, MSG_INFO, "Computation for task %s finished", atp->result->name ); } app_finished(*atp); if (!action) { adjust_rec(); // update REC before erasing ACTIVE_TASK } iter = active_tasks.active_tasks.erase(iter); delete atp; set_client_state_dirty("handle_finished_apps"); // the following is critical; otherwise the result is // still in the "scheduled" list and enforce_schedule() // will try to run it again. // request_schedule_cpus("handle_finished_apps"); action = true; break; default: ++iter; } } return action; }
void ACTIVE_TASK_SET::send_trickle_downs() { unsigned int i; ACTIVE_TASK* atp; bool sent; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (atp->have_trickle_down) { if (!atp->app_client_shm.shm) continue; sent = atp->app_client_shm.shm->trickle_down.send_msg("<have_trickle_down/>\n"); if (sent) atp->have_trickle_down = false; } if (atp->send_upload_file_status) { if (!atp->app_client_shm.shm) continue; sent = atp->app_client_shm.shm->trickle_down.send_msg("<upload_file_status/>\n"); if (sent) atp->send_upload_file_status = false; } } }
static void handle_result_op(char* buf, MIOFILE& fout, const char* op) { RESULT* rp; char result_name[256]; ACTIVE_TASK* atp; PROJECT* p = get_project(buf, fout); if (!p) { fout.printf("<error>No such project</error>\n"); return; } if (!parse_str(buf, "<name>", result_name, sizeof(result_name))) { fout.printf("<error>Missing result name</error>\n"); return; } rp = gstate.lookup_result(p, result_name); if (!rp) { fout.printf("<error>no such result</error>\n"); return; } if (!strcmp(op, "abort")) { msg_printf(p, MSG_INFO, "task %s aborted by user", result_name); atp = gstate.lookup_active_task_by_result(rp); if (atp) { atp->abort_task(ERR_ABORTED_VIA_GUI, "aborted by user"); } else { rp->abort_inactive(ERR_ABORTED_VIA_GUI); } gstate.request_work_fetch("result aborted by user"); } else if (!strcmp(op, "suspend")) { msg_printf(p, MSG_INFO, "task %s suspended by user", result_name); rp->suspended_via_gui = true; gstate.request_work_fetch("result suspended by user"); } else if (!strcmp(op, "resume")) { msg_printf(p, MSG_INFO, "task %s resumed by user", result_name); rp->suspended_via_gui = false; } gstate.request_schedule_cpus("result suspended, resumed or aborted by user"); gstate.set_client_state_dirty("Result RPC"); fout.printf("<success/>\n"); }
static void handle_result_show_graphics(char* buf, MIOFILE& fout) { string result_name; GRAPHICS_MSG gm; ACTIVE_TASK* atp; if (match_tag(buf, "<full_screen/>")) { gm.mode = MODE_FULLSCREEN; } else if (match_tag(buf, "<hide/>")) { gm.mode = MODE_HIDE_GRAPHICS; } else { gm.mode = MODE_WINDOW; } parse_str(buf, "<window_station>", gm.window_station, sizeof(gm.window_station)); parse_str(buf, "<desktop>", gm.desktop, sizeof(gm.desktop)); parse_str(buf, "<display>", gm.display, sizeof(gm.display)); if (parse_str(buf, "<result_name>", result_name)) { PROJECT* p = get_project(buf, fout); if (!p) { fout.printf("<error>No such project</error>\n"); return; } RESULT* rp = gstate.lookup_result(p, result_name.c_str()); if (!rp) { fout.printf("<error>No such result</error>\n"); return; } atp = gstate.lookup_active_task_by_result(rp); if (!atp) { fout.printf("<error>no such result</error>\n"); return; } atp->request_graphics_mode(gm); } else { for (unsigned int i=0; i<gstate.active_tasks.active_tasks.size(); i++) { atp = gstate.active_tasks.active_tasks[i]; if (atp->scheduler_state != CPU_SCHED_SCHEDULED) continue; atp->request_graphics_mode(gm); } } fout.printf("<success/>\n"); }
// suspend all currently running tasks // called only from CLIENT_STATE::suspend_tasks(), // e.g. because on batteries, time of day, benchmarking, CPU throttle, etc. // void ACTIVE_TASK_SET::suspend_all(int reason) { for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; switch (reason) { case SUSPEND_REASON_CPU_THROTTLE: // if we're doing CPU throttling, don't bother suspending apps // that don't use a full CPU // if (atp->result->dont_throttle()) continue; if (atp->app_version->avg_ncpus < 1) continue; atp->preempt(REMOVE_NEVER); break; case SUSPEND_REASON_BENCHMARKS: atp->preempt(REMOVE_NEVER); break; case SUSPEND_REASON_CPU_USAGE: // If we're suspending because of non-BOINC CPU load, // don't remove from memory. // Some systems do a security check when apps are launched, // which uses a lot of CPU. // Avoid going into a preemption loop. // if (atp->result->non_cpu_intensive()) break; atp->preempt(REMOVE_NEVER); break; default: atp->preempt(REMOVE_MAYBE_USER); } } }
double RESULT::estimated_runtime_remaining() { if (computing_done()) return 0; ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this); if (app->non_cpu_intensive) { if (atp && atp->fraction_done>0) { double est_dur = atp->fraction_done_elapsed_time / atp->fraction_done; double x = est_dur - atp->elapsed_time; if (x <= 0) x = 1; return x; } return 0; } if (atp) { #ifdef SIM return sim_flops_left/avp->flops; #else return atp->est_dur() - atp->elapsed_time; #endif } return estimated_runtime(); }
// Wait up to wait_time seconds for processes to exit // If proj is zero, wait for all processes, else that project's // NOTE: it's bad form to sleep, but it would be complex to avoid it here // int ACTIVE_TASK_SET::wait_for_exit(double wait_time, PROJECT* proj) { bool all_exited; unsigned int i,n; ACTIVE_TASK *atp; for (i=0; i<10; i++) { all_exited = true; for (n=0; n<active_tasks.size(); n++) { atp = active_tasks[n]; if (proj && atp->wup->project != proj) continue; if (!atp->has_task_exited()) { all_exited = false; break; } } if (all_exited) return 0; boinc_sleep(wait_time/10.0); } return ERR_NOT_EXITED; }
// suspend all currently running tasks // called only from CLIENT_STATE::suspend_tasks(), // e.g. because on batteries, time of day, benchmarking, CPU throttle, etc. // void ACTIVE_TASK_SET::suspend_all(int reason) { for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; switch (reason) { case SUSPEND_REASON_CPU_THROTTLE: // if we're doing CPU throttling, don't bother suspending apps // that don't use a full CPU // if (atp->result->project->non_cpu_intensive) continue; if (atp->app_version->avg_ncpus < 1) continue; atp->preempt(REMOVE_NEVER); break; case SUSPEND_REASON_BENCHMARKS: atp->preempt(REMOVE_NEVER); break; case SUSPEND_REASON_CPU_USAGE: if (atp->result->project->non_cpu_intensive) break; // fall through default: atp->preempt(REMOVE_MAYBE_USER); } } }
// scan the set of all processes to // 1) get the working-set size of active tasks // 2) see if exclusive apps are running // 3) get CPU time of non-BOINC processes // void ACTIVE_TASK_SET::get_memory_usage() { static double last_mem_time=0; unsigned int i; int retval; static bool first = true; static double last_cpu_time; double diff=0; if (!first) { diff = gstate.now - last_mem_time; if (diff < 0 || diff > MEMORY_USAGE_PERIOD + 10) { // user has changed system clock, // or there has been a long system sleep // last_mem_time = gstate.now; return; } if (diff < MEMORY_USAGE_PERIOD) return; } last_mem_time = gstate.now; PROC_MAP pm; retval = procinfo_setup(pm); if (retval) { if (log_flags.mem_usage_debug) { msg_printf(NULL, MSG_INTERNAL_ERROR, "[mem_usage] procinfo_setup() returned %d", retval ); } return; } PROCINFO boinc_total; if (log_flags.mem_usage_debug) { boinc_total.clear(); boinc_total.working_set_size_smoothed = 0; } for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() == PROCESS_UNINITIALIZED) continue; if (atp->pid ==0) continue; // scan all active tasks with a process, even if not scheduled, because // 1) we might have recently suspended a tasks, // and we still need to count its time // 2) preempted tasks might not actually suspend themselves // (and we'd count that as non-BOINC CPU usage // and suspend everything). PROCINFO& pi = atp->procinfo; unsigned long last_page_fault_count = pi.page_fault_count; pi.clear(); pi.id = atp->pid; vector<int>* v = NULL; if (atp->other_pids.size()>0) { v = &(atp->other_pids); } procinfo_app(pi, v, pm, atp->app_version->graphics_exec_file); if (atp->app_version->is_vm_app) { // the memory of virtual machine apps is not reported correctly, // at least on Windows. Use the VM size instead. // pi.working_set_size_smoothed = atp->wup->rsc_memory_bound; } else { pi.working_set_size_smoothed = .5*(pi.working_set_size_smoothed + pi.working_set_size); } if (pi.working_set_size > atp->peak_working_set_size) { atp->peak_working_set_size = pi.working_set_size; } if (pi.swap_size > atp->peak_swap_size) { atp->peak_swap_size = pi.swap_size; } if (!first) { int pf = pi.page_fault_count - last_page_fault_count; pi.page_fault_rate = pf/diff; if (log_flags.mem_usage_debug) { msg_printf(atp->result->project, MSG_INFO, "[mem_usage] %s%s: WS %.2fMB, smoothed %.2fMB, swap %.2fMB, %.2f page faults/sec, user CPU %.3f, kernel CPU %.3f", atp->scheduler_state==CPU_SCHED_SCHEDULED?"":" (not running)", atp->result->name, pi.working_set_size/MEGA, pi.working_set_size_smoothed/MEGA, pi.swap_size/MEGA, pi.page_fault_rate, pi.user_time, pi.kernel_time ); boinc_total.working_set_size += pi.working_set_size; boinc_total.working_set_size_smoothed += pi.working_set_size_smoothed; boinc_total.swap_size += pi.swap_size; boinc_total.page_fault_rate += pi.page_fault_rate; } } } if (!first) { if (log_flags.mem_usage_debug) { msg_printf(0, MSG_INFO, "[mem_usage] BOINC totals: WS %.2fMB, smoothed %.2fMB, swap %.2fMB, %.2f page faults/sec", boinc_total.working_set_size/MEGA, boinc_total.working_set_size_smoothed/MEGA, boinc_total.swap_size/MEGA, boinc_total.page_fault_rate ); } } for (i=0; i<cc_config.exclusive_apps.size(); i++) { if (app_running(pm, cc_config.exclusive_apps[i].c_str())) { if (log_flags.mem_usage_debug) { msg_printf(NULL, MSG_INFO, "[mem_usage] exclusive app %s is running", cc_config.exclusive_apps[i].c_str() ); } exclusive_app_running = gstate.now; break; } } for (i=0; i<cc_config.exclusive_gpu_apps.size(); i++) { if (app_running(pm, cc_config.exclusive_gpu_apps[i].c_str())) { if (log_flags.mem_usage_debug) { msg_printf(NULL, MSG_INFO, "[mem_usage] exclusive GPU app %s is running", cc_config.exclusive_gpu_apps[i].c_str() ); } exclusive_gpu_app_running = gstate.now; break; } } // get info on non-BOINC processes. // mem usage info is not useful because most OSs don't // move idle processes out of RAM, so physical memory is always full. // Also (at least on Win) page faults are used for various things, // not all of them generate disk I/O, // so they're not useful for detecting paging/thrashing. // PROCINFO pi; procinfo_non_boinc(pi, pm); if (log_flags.mem_usage_debug) { //procinfo_show(pm); msg_printf(NULL, MSG_INFO, "[mem_usage] All others: WS %.2fMB, swap %.2fMB, user %.3fs, kernel %.3fs", pi.working_set_size/MEGA, pi.swap_size/MEGA, pi.user_time, pi.kernel_time ); } double new_cpu_time = pi.user_time + pi.kernel_time; if (!first) { non_boinc_cpu_usage = (new_cpu_time - last_cpu_time)/(diff*gstate.host_info.p_ncpus); // processes might have exited in the last 10 sec, // causing this to be negative. if (non_boinc_cpu_usage < 0) non_boinc_cpu_usage = 0; if (log_flags.mem_usage_debug) { msg_printf(NULL, MSG_INFO, "[mem_usage] non-BOINC CPU usage: %.2f%%", non_boinc_cpu_usage*100 ); } } last_cpu_time = new_cpu_time; first = false; }
void ACTIVE_TASK_SET::handle_upload_files() { for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; atp->handle_upload_files(); } }
// Handle the reply from a scheduler // int CLIENT_STATE::handle_scheduler_reply( PROJECT* project, char* scheduler_url ) { SCHEDULER_REPLY sr; FILE* f; int retval; unsigned int i; bool signature_valid, update_global_prefs=false, update_project_prefs=false; char buf[1024], filename[256]; std::string old_gui_urls = project->gui_urls; PROJECT* p2; vector<RESULT*>new_results; project->last_rpc_time = now; if (requested_work()) { had_or_requested_work = true; } get_sched_reply_filename(*project, filename, sizeof(filename)); f = fopen(filename, "r"); if (!f) return ERR_FOPEN; retval = sr.parse(f, project); fclose(f); if (retval) return retval; if (log_flags.sched_ops) { if (requested_work()) { sprintf(buf, ": got %d new tasks", (int)sr.results.size()); } else { strcpy(buf, ""); } msg_printf(project, MSG_INFO, "Scheduler request completed%s", buf); } if (log_flags.sched_op_debug) { if (sr.scheduler_version) { msg_printf(project, MSG_INFO, "[sched_op] Server version %d", sr.scheduler_version ); } } // check that master URL is correct // if (strlen(sr.master_url)) { canonicalize_master_url(sr.master_url); string url1 = sr.master_url; string url2 = project->master_url; downcase_string(url1); downcase_string(url2); if (url1 != url2) { p2 = lookup_project(sr.master_url); if (p2) { msg_printf(project, MSG_USER_ALERT, "You are attached to this project twice. Please remove projects named %s, then add %s", project->project_name, sr.master_url ); } else { msg_printf(project, MSG_INFO, _("You used the wrong URL for this project. When convenient, remove this project, then add %s"), sr.master_url ); } } } // make sure we don't already have a project of same name // bool dup_name = false; for (i=0; i<projects.size(); i++) { p2 = projects[i]; if (project == p2) continue; if (!strcmp(p2->project_name, project->project_name)) { dup_name = true; break; } } if (dup_name) { msg_printf(project, MSG_INFO, "Already attached to a project named %s (possibly with wrong URL)", project->project_name ); msg_printf(project, MSG_INFO, "Consider detaching this project, then trying again" ); } // show messages from server // for (i=0; i<sr.messages.size(); i++) { USER_MESSAGE& um = sr.messages[i]; int prio = (!strcmp(um.priority.c_str(), "notice"))?MSG_SCHEDULER_ALERT:MSG_INFO; string_substitute(um.message.c_str(), buf, sizeof(buf), "%", "%%"); msg_printf(project, prio, "%s", buf); } if (log_flags.sched_op_debug && sr.request_delay) { msg_printf(project, MSG_INFO, "Project requested delay of %.0f seconds", sr.request_delay ); } // if project is down, return error (so that we back off) // and don't do anything else // if (sr.project_is_down) { if (sr.request_delay) { double x = now + sr.request_delay; project->set_min_rpc_time(x, "project is down"); } return ERR_PROJECT_DOWN; } // if the scheduler reply includes global preferences, // insert extra elements, write to disk, and parse // if (sr.global_prefs_xml) { // skip this if we have host-specific prefs // and we're talking to an old scheduler // if (!global_prefs.host_specific || sr.scheduler_version >= 507) { retval = save_global_prefs( sr.global_prefs_xml, project->master_url, scheduler_url ); if (retval) { return retval; } update_global_prefs = true; } else { if (log_flags.sched_op_debug) { msg_printf(project, MSG_INFO, "ignoring prefs from old server; we have host-specific prefs" ); } } } // see if we have a new venue from this project // (this must go AFTER the above, since otherwise // global_prefs_source_project() is meaningless) // if (strcmp(project->host_venue, sr.host_venue)) { safe_strcpy(project->host_venue, sr.host_venue); msg_printf(project, MSG_INFO, "New computer location: %s", sr.host_venue); update_project_prefs = true; if (project == global_prefs_source_project()) { strcpy(main_host_venue, sr.host_venue); update_global_prefs = true; } } if (update_global_prefs) { read_global_prefs(); } // deal with project preferences (should always be there) // If they've changed, write to account file, // then parse to get our venue, and pass to running apps // if (sr.project_prefs_xml) { if (strcmp(project->project_prefs.c_str(), sr.project_prefs_xml)) { project->project_prefs = string(sr.project_prefs_xml); update_project_prefs = true; } } // the account file has GUI URLs and project prefs. // rewrite if either of these has changed // if (project->gui_urls != old_gui_urls || update_project_prefs) { retval = project->write_account_file(); if (retval) { msg_printf(project, MSG_INTERNAL_ERROR, "Can't write account file: %s", boincerror(retval) ); return retval; } } if (update_project_prefs) { project->parse_account_file(); if (strlen(project->host_venue)) { project->parse_account_file_venue(); } project->parse_preferences_for_user_files(); active_tasks.request_reread_prefs(project); } // if the scheduler reply includes a code-signing key, // accept it if we don't already have one from the project. // Otherwise verify its signature, using the key we already have. // if (sr.code_sign_key) { if (!strlen(project->code_sign_key)) { safe_strcpy(project->code_sign_key, sr.code_sign_key); } else { if (sr.code_sign_key_signature) { retval = check_string_signature2( sr.code_sign_key, sr.code_sign_key_signature, project->code_sign_key, signature_valid ); if (!retval && signature_valid) { safe_strcpy(project->code_sign_key, sr.code_sign_key); } else { msg_printf(project, MSG_INTERNAL_ERROR, "New code signing key doesn't validate" ); } } else { msg_printf(project, MSG_INTERNAL_ERROR, "Missing code sign key signature" ); } } } // copy new entities to client state // for (i=0; i<sr.apps.size(); i++) { APP* app = lookup_app(project, sr.apps[i].name); if (app) { strcpy(app->user_friendly_name, sr.apps[i].user_friendly_name); } else { app = new APP; *app = sr.apps[i]; retval = link_app(project, app); if (retval) { msg_printf(project, MSG_INTERNAL_ERROR, "Can't handle application %s in scheduler reply", app->name ); delete app; } else { apps.push_back(app); } } } FILE_INFO* fip; for (i=0; i<sr.file_infos.size(); i++) { fip = lookup_file_info(project, sr.file_infos[i].name); if (fip) { fip->merge_info(sr.file_infos[i]); } else { fip = new FILE_INFO; *fip = sr.file_infos[i]; retval = link_file_info(project, fip); if (retval) { msg_printf(project, MSG_INTERNAL_ERROR, "Can't handle file %s in scheduler reply", fip->name ); delete fip; } else { file_infos.push_back(fip); } } } for (i=0; i<sr.file_deletes.size(); i++) { fip = lookup_file_info(project, sr.file_deletes[i].c_str()); if (fip) { if (log_flags.file_xfer_debug) { msg_printf(project, MSG_INFO, "[file_xfer_debug] Got server request to delete file %s", fip->name ); } fip->sticky = false; } } for (i=0; i<sr.app_versions.size(); i++) { if (project->anonymous_platform) { msg_printf(project, MSG_INTERNAL_ERROR, "App version returned from anonymous platform project; ignoring" ); continue; } APP_VERSION& avpp = sr.app_versions[i]; if (strlen(avpp.platform) == 0) { strcpy(avpp.platform, get_primary_platform()); } else { if (!is_supported_platform(avpp.platform)) { msg_printf(project, MSG_INTERNAL_ERROR, "App version has unsupported platform %s", avpp.platform ); continue; } } if (avpp.missing_coproc) { msg_printf(project, MSG_INTERNAL_ERROR, "App version uses non-existent %s GPU", avpp.missing_coproc_name ); } APP* app = lookup_app(project, avpp.app_name); if (!app) { msg_printf(project, MSG_INTERNAL_ERROR, "Missing app %s", avpp.app_name ); continue; } APP_VERSION* avp = lookup_app_version( app, avpp.platform, avpp.version_num, avpp.plan_class ); if (avp) { // update performance-related info; // generally this shouldn't change, // but if it does it's better to use the new stuff // avp->avg_ncpus = avpp.avg_ncpus; avp->max_ncpus = avpp.max_ncpus; avp->flops = avpp.flops; strcpy(avp->cmdline, avpp.cmdline); avp->gpu_usage = avpp.gpu_usage; strlcpy(avp->api_version, avpp.api_version, sizeof(avp->api_version)); avp->dont_throttle = avpp.dont_throttle; avp->needs_network = avpp.needs_network; // if we had download failures, clear them // avp->clear_errors(); continue; } avp = new APP_VERSION; *avp = avpp; retval = link_app_version(project, avp); if (retval) { delete avp; continue; } app_versions.push_back(avp); } for (i=0; i<sr.workunits.size(); i++) { if (lookup_workunit(project, sr.workunits[i].name)) continue; WORKUNIT* wup = new WORKUNIT; *wup = sr.workunits[i]; wup->project = project; retval = link_workunit(project, wup); if (retval) { msg_printf(project, MSG_INTERNAL_ERROR, "Can't handle task %s in scheduler reply", wup->name ); delete wup; continue; } wup->clear_errors(); workunits.push_back(wup); } double est_rsc_runtime[MAX_RSC]; for (int j=0; j<coprocs.n_rsc; j++) { est_rsc_runtime[j] = 0; } for (i=0; i<sr.results.size(); i++) { if (lookup_result(project, sr.results[i].name)) { msg_printf(project, MSG_INTERNAL_ERROR, "Already have task %s\n", sr.results[i].name ); continue; } RESULT* rp = new RESULT; *rp = sr.results[i]; retval = link_result(project, rp); if (retval) { msg_printf(project, MSG_INTERNAL_ERROR, "Can't handle task %s in scheduler reply", rp->name ); delete rp; continue; } if (strlen(rp->platform) == 0) { strcpy(rp->platform, get_primary_platform()); rp->version_num = latest_version(rp->wup->app, rp->platform); } rp->avp = lookup_app_version( rp->wup->app, rp->platform, rp->version_num, rp->plan_class ); if (!rp->avp) { msg_printf(project, MSG_INTERNAL_ERROR, "No app version found for app %s platform %s ver %d class %s; discarding %s", rp->wup->app->name, rp->platform, rp->version_num, rp->plan_class, rp->name ); delete rp; continue; } if (rp->avp->missing_coproc) { msg_printf(project, MSG_INTERNAL_ERROR, "Missing coprocessor for task %s; aborting", rp->name ); rp->abort_inactive(EXIT_MISSING_COPROC); } else { rp->set_state(RESULT_NEW, "handle_scheduler_reply"); int rt = rp->avp->gpu_usage.rsc_type; if (rt > 0) { est_rsc_runtime[rt] += rp->estimated_runtime(); gpus_usable = true; // trigger a check of whether GPU is actually usable } else { est_rsc_runtime[0] += rp->estimated_runtime(); } } rp->wup->version_num = rp->version_num; rp->received_time = now; new_results.push_back(rp); results.push_back(rp); } sort_results(); if (log_flags.sched_op_debug) { if (sr.results.size()) { for (int j=0; j<coprocs.n_rsc; j++) { msg_printf(project, MSG_INFO, "[sched_op] estimated total %s task duration: %.0f seconds", rsc_name(j), est_rsc_runtime[j]/time_stats.availability_frac(j) ); } } } // update records for ack'ed results // for (i=0; i<sr.result_acks.size(); i++) { if (log_flags.sched_op_debug) { msg_printf(project, MSG_INFO, "[sched_op] handle_scheduler_reply(): got ack for task %s\n", sr.result_acks[i].name ); } RESULT* rp = lookup_result(project, sr.result_acks[i].name); if (rp) { rp->got_server_ack = true; } else { msg_printf(project, MSG_INTERNAL_ERROR, "Got ack for task %s, but can't find it", sr.result_acks[i].name ); } } // handle result abort requests // for (i=0; i<sr.result_abort.size(); i++) { RESULT* rp = lookup_result(project, sr.result_abort[i].name); if (rp) { ACTIVE_TASK* atp = lookup_active_task_by_result(rp); if (atp) { atp->abort_task(EXIT_ABORTED_BY_PROJECT, "aborted by project - no longer usable" ); } else { rp->abort_inactive(EXIT_ABORTED_BY_PROJECT); } } else { msg_printf(project, MSG_INTERNAL_ERROR, "Server requested abort of unknown task %s", sr.result_abort[i].name ); } } for (i=0; i<sr.result_abort_if_not_started.size(); i++) { RESULT* rp = lookup_result(project, sr.result_abort_if_not_started[i].name); if (!rp) { msg_printf(project, MSG_INTERNAL_ERROR, "Server requested conditional abort of unknown task %s", sr.result_abort_if_not_started[i].name ); continue; } if (rp->not_started) { rp->abort_inactive(EXIT_ABORTED_BY_PROJECT); } } // remove acked trickle files // if (sr.message_ack) { remove_trickle_files(project); } if (sr.send_full_workload) { project->send_full_workload = true; } project->dont_use_dcf = sr.dont_use_dcf; project->send_time_stats_log = sr.send_time_stats_log; project->send_job_log = sr.send_job_log; project->trickle_up_pending = false; // The project returns a hostid only if it has created a new host record. // In that case reset RPC seqno // if (sr.hostid) { if (project->hostid) { // if we already have a host ID for this project, // we must have sent it a stale seqno, // which usually means our state file was copied from another host. // So generate a new host CPID. // generate_new_host_cpid(); msg_printf(project, MSG_INFO, "Generated new computer cross-project ID: %s", host_info.host_cpid ); } //msg_printf(project, MSG_INFO, "Changing host ID from %d to %d", project->hostid, sr.hostid); project->hostid = sr.hostid; project->rpc_seqno = 0; } #ifdef ENABLE_AUTO_UPDATE if (sr.auto_update.present) { if (!sr.auto_update.validate_and_link(project)) { auto_update = sr.auto_update; } } #endif project->project_files = sr.project_files; project->link_project_files(); project->create_project_file_symlinks(); if (log_flags.state_debug) { msg_printf(project, MSG_INFO, "[state] handle_scheduler_reply(): State after handle_scheduler_reply():" ); print_summary(); } // the following must precede the backoff and request_delay checks, // since it overrides them // if (sr.next_rpc_delay) { project->next_rpc_time = now + sr.next_rpc_delay; } else { project->next_rpc_time = 0; } work_fetch.handle_reply(project, &sr, new_results); project->nrpc_failures = 0; project->min_rpc_time = 0; if (sr.request_delay) { double x = now + sr.request_delay; project->set_min_rpc_time(x, "requested by project"); } if (sr.got_rss_feeds) { handle_sr_feeds(sr.sr_feeds, project); } update_trickle_up_urls(project, sr.trickle_up_urls); // garbage collect in case the project sent us some irrelevant FILE_INFOs; // avoid starting transfers for them // gstate.garbage_collect_always(); return 0; }
void simulate() { bool action; double start = START_TIME; gstate.now = start; html_start(); fprintf(summary_file, "Hardware summary\n %d CPUs, %.1f GFLOPS\n", gstate.host_info.p_ncpus, gstate.host_info.p_fpops/1e9 ); for (int i=1; i<coprocs.n_rsc; i++) { fprintf(summary_file, " %d %s GPUs, %.1f GFLOPS\n", coprocs.coprocs[i].count, coprocs.coprocs[i].type, coprocs.coprocs[i].peak_flops/1e9 ); } fprintf(summary_file, "Preferences summary\n" " work buf min %f max %f\n" " Scheduling period %f\n" "Scheduling policies\n" " Round-robin only: %s\n" " Scheduler EDF simulation: %s\n" " REC half-life: %f\n", gstate.work_buf_min(), gstate.work_buf_total(), gstate.global_prefs.cpu_scheduling_period(), cpu_sched_rr_only?"yes":"no", server_uses_workload?"yes":"no", cc_config.rec_half_life ); fprintf(summary_file, "Jobs\n"); for (unsigned int i=0; i<gstate.results.size(); i++) { RESULT* rp = gstate.results[i]; fprintf(summary_file, " %s %s (%s)\n time left %s deadline %s\n", rp->project->project_name, rp->name, rsc_name_long(rp->avp->gpu_usage.rsc_type), timediff_format(rp->sim_flops_left/rp->avp->flops).c_str(), timediff_format(rp->report_deadline - START_TIME).c_str() ); } fprintf(summary_file, "Simulation parameters\n" " time step %f, duration %f\n" "-------------------\n", delta, duration ); write_inputs(); while (1) { on = on_proc.sample(delta); if (on) { active = active_proc.sample(delta); if (active) { gpu_active = gpu_active_proc.sample(delta); } else { gpu_active = false; } connected = connected_proc.sample(delta); } else { active = gpu_active = connected = false; } // do accounting for the period that just ended, // even if we're now in an "off" state. // // need both of the following, else crash // action |= gstate.active_tasks.poll(); action |= gstate.handle_finished_apps(); if (on) { while (1) { action = false; action |= gstate.schedule_cpus(); if (connected) { action |= gstate.scheduler_rpc_poll(); // this deletes completed results } action |= gstate.active_tasks.poll(); action |= gstate.handle_finished_apps(); gpu_suspend_reason = gpu_active?0:1; //msg_printf(0, MSG_INFO, action?"did action":"did no action"); if (!action) break; } } //msg_printf(0, MSG_INFO, "took time step"); for (unsigned int i=0; i<gstate.active_tasks.active_tasks.size(); i++) { ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i]; if (atp->task_state() == PROCESS_EXECUTING) { atp->elapsed_time += delta; } } html_rec(); write_recs(); gstate.now += delta; if (gstate.now > start + duration) break; } html_end(); }
bool ACTIVE_TASK_SET::poll() { unsigned int i; char buf[256]; bool action = false; static double last_time = START_TIME; double diff = gstate.now - last_time; if (diff < 1.0) return false; last_time = gstate.now; if (diff > delta) { diff = 0; } PROJECT* p; for (i=0; i<gstate.projects.size(); i++) { p = gstate.projects[i]; p->idle = true; } // we do two kinds of FLOPs accounting: // 1) actual FLOPS (for job completion) // 2) peak FLOPS (for total and per-project resource usage) // // CPU may be overcommitted, in which case we compute // a "cpu_scale" factor that is < 1. // GPUs are never overcommitted. // // actual FLOPS is based on app_version.flops, scaled by cpu_scale for CPU jobs // peak FLOPS is based on device peak FLOPS, // with CPU component scaled by cpu_scale for all jobs // get CPU usage by GPU and CPU jobs // double cpu_usage_cpu=0; double cpu_usage_gpu=0; for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; RESULT* rp = atp->result; if (rp->uses_gpu()) { if (gpu_active) { cpu_usage_gpu += rp->avp->avg_ncpus; } } else { cpu_usage_cpu += rp->avp->avg_ncpus; } } double cpu_usage = cpu_usage_cpu + cpu_usage_gpu; // if CPU is overcommitted, compute cpu_scale // double cpu_scale = 1; if (cpu_usage > gstate.ncpus) { cpu_scale = (gstate.ncpus - cpu_usage_gpu) / (cpu_usage - cpu_usage_gpu); } double used = 0; for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; RESULT* rp = atp->result; if (!gpu_active && rp->uses_gpu()) { continue; } atp->elapsed_time += diff; double flops = rp->avp->flops; if (!rp->uses_gpu()) { flops *= cpu_scale; } rp->sim_flops_left -= diff*flops; atp->fraction_done = 1 - rp->sim_flops_left / rp->wup->rsc_fpops_est; atp->checkpoint_wall_time = gstate.now; if (rp->sim_flops_left <= 0) { atp->set_task_state(PROCESS_EXITED, "poll"); rp->exit_status = 0; rp->ready_to_report = true; gstate.request_schedule_cpus("job finished"); gstate.request_work_fetch("job finished"); sprintf(buf, "result %s finished<br>", rp->name); html_msg += buf; action = true; } double pf = diff * app_peak_flops(rp->avp, cpu_scale); rp->project->project_results.flops_used += pf; rp->peak_flop_count += pf; sim_results.flops_used += pf; used += pf; rp->project->idle = false; } for (i=0; i<gstate.projects.size(); i++) { p = gstate.projects[i]; if (p->idle) { p->idle_time += diff; p->idle_time_sumsq += diff*(p->idle_time*p->idle_time); } else { p->idle_time = 0; } } active_time += diff; if (gpu_active) { gpu_active_time += diff; } return action; }
void show_resource(int rsc_type) { unsigned int i; char buf[256]; fprintf(html_out, "<td width=%d valign=top>", WIDTH2); bool found = false; for (i=0; i<gstate.active_tasks.active_tasks.size(); i++) { ACTIVE_TASK* atp = gstate.active_tasks.active_tasks[i]; RESULT* rp = atp->result; if (atp->task_state() != PROCESS_EXECUTING) continue; double ninst=0; if (rsc_type) { if (rp->avp->gpu_usage.rsc_type != rsc_type) continue; ninst = rp->avp->gpu_usage.usage; } else { ninst = rp->avp->avg_ncpus; } PROJECT* p = rp->project; if (!found) { found = true; fprintf(html_out, "<table>\n" "<tr><th>#devs</th><th>Job name (* = high priority)</th><th>GFLOPs left</th>%s</tr>\n", rsc_type?"<th>GPU</th>":"" ); } if (rsc_type) { sprintf(buf, "<td>%d</td>", rp->coproc_indices[0]); } else { safe_strcpy(buf, ""); } fprintf(html_out, "<tr valign=top><td>%.2f</td><td bgcolor=%s><font color=#ffffff>%s%s</font></td><td>%.0f</td>%s</tr>\n", ninst, colors[p->index%NCOLORS], rp->edf_scheduled?"*":"", rp->name, rp->sim_flops_left/1e9, buf ); } if (found) { fprintf(html_out, "</table>\n"); } else { fprintf(html_out, "IDLE\n"); } fprintf(html_out, "<table><tr><td>Project</td><td>In progress</td><td>done</td><td>REC</td></tr>\n" ); found = false; for (i=0; i<gstate.projects.size(); i++) { PROJECT* p = gstate.projects[i]; int in_progress, done; job_count(p, rsc_type, in_progress, done); if (in_progress || done) { fprintf(html_out, "<td bgcolor=%s><font color=#ffffff>%s</font></td><td>%d</td><td>%d</td><td>%.3f</td></tr>\n", colors[p->index%NCOLORS], p->project_name, in_progress, done, p->pwf.rec ); found = true; } } //if (!found) fprintf(html_out, " ---\n"); fprintf(html_out, "</table></td>"); }
// The following runs "test_app" and sends it various messages. // Used for testing the runtime system. // void run_test_app() { WORKUNIT wu; PROJECT project; APP app; APP_VERSION av; ACTIVE_TASK at; ACTIVE_TASK_SET ats; RESULT result; int retval; char buf[256]; getcwd(buf, sizeof(buf)); // so we can see where we're running gstate.run_test_app = true; wu.project = &project; wu.app = &app; wu.command_line = string("--critical_section"); strcpy(app.name, "test app"); av.init(); av.avg_ncpus = 1; strcpy(result.name, "test result"); result.avp = &av; result.wup = &wu; result.project = &project; result.app = &app; at.result = &result; at.wup = &wu; at.app_version = &av; at.max_elapsed_time = 1e6; at.max_disk_usage = 1e14; at.max_mem_usage = 1e14; strcpy(at.slot_dir, "."); #if 1 // test file copy // ASYNC_COPY* ac = new ASYNC_COPY; FILE_INFO fi; retval = ac->init(&at, &fi, "big_file", "./big_file_copy"); if (retval) { exit(1); } while (1) { do_async_file_ops(); if (at.async_copy == NULL) { break; } } fprintf(stderr, "done\n"); exit(0); #endif ats.active_tasks.push_back(&at); unlink("boinc_finish_called"); unlink("boinc_lockfile"); unlink("boinc_temporary_exit"); unlink("stderr.txt"); retval = at.start(true); if (retval) { fprintf(stderr, "start() failed: %s\n", boincerror(retval)); } while (1) { gstate.now = dtime(); at.preempt(REMOVE_NEVER); ats.poll(); boinc_sleep(.1); at.unsuspend(); ats.poll(); boinc_sleep(.2); //at.request_reread_prefs(); } }
int RESULT::write_gui(MIOFILE& out) { out.printf( "<result>\n" " <name>%s</name>\n" " <wu_name>%s</wu_name>\n" " <version_num>%d</version_num>\n" " <plan_class>%s</plan_class>\n" " <project_url>%s</project_url>\n" " <final_cpu_time>%f</final_cpu_time>\n" " <final_elapsed_time>%f</final_elapsed_time>\n" " <exit_status>%d</exit_status>\n" " <state>%d</state>\n" " <report_deadline>%f</report_deadline>\n" " <received_time>%f</received_time>\n" " <estimated_cpu_time_remaining>%f</estimated_cpu_time_remaining>\n", name, wu_name, version_num, plan_class, project->master_url, final_cpu_time, final_elapsed_time, exit_status, state(), report_deadline, received_time, estimated_runtime_remaining() ); if (got_server_ack) out.printf(" <got_server_ack/>\n"); if (ready_to_report) out.printf(" <ready_to_report/>\n"); if (completed_time) out.printf(" <completed_time>%f</completed_time>\n", completed_time); if (suspended_via_gui) out.printf(" <suspended_via_gui/>\n"); if (project->suspended_via_gui) out.printf(" <project_suspended_via_gui/>\n"); if (report_immediately) out.printf(" <report_immediately/>\n"); if (edf_scheduled) out.printf(" <edf_scheduled/>\n"); if (coproc_missing) out.printf(" <coproc_missing/>\n"); if (schedule_backoff > gstate.now) { out.printf(" <scheduler_wait/>\n"); if (strlen(schedule_backoff_reason)) { out.printf( " <scheduler_wait_reason>%s</scheduler_wait_reason>\n", schedule_backoff_reason ); } } if (avp->needs_network && gstate.network_suspended) out.printf(" <network_wait/>\n"); ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this); if (atp) { atp->write_gui(out); } if (!strlen(resources)) { // only need to compute this string once // if (avp->gpu_usage.rsc_type) { if (avp->gpu_usage.usage == 1) { sprintf(resources, "%.3g CPUs + 1 %s", avp->avg_ncpus, rsc_name_long(avp->gpu_usage.rsc_type) ); } else { sprintf(resources, "%.3g CPUs + %.3g %ss", avp->avg_ncpus, avp->gpu_usage.usage, rsc_name_long(avp->gpu_usage.rsc_type) ); } } else if (avp->missing_coproc) { sprintf(resources, "%.3g CPUs + %s GPU (missing)", avp->avg_ncpus, avp->missing_coproc_name ); } else if (!project->non_cpu_intensive && (avp->avg_ncpus != 1)) { sprintf(resources, "%.3g CPUs", avp->avg_ncpus); } else { strcpy(resources, " "); } } if (strlen(resources)>1) { char buf[256]; strcpy(buf, ""); if (atp && atp->scheduler_state == CPU_SCHED_SCHEDULED) { if (avp->gpu_usage.rsc_type) { COPROC& cp = coprocs.coprocs[avp->gpu_usage.rsc_type]; if (cp.count > 1) { // if there are multiple GPUs of this type, // show the user which one(s) are being used // int n = (int)ceil(avp->gpu_usage.usage); strcpy(buf, " (device "); for (int i=0; i<n; i++) { char buf2[256]; sprintf(buf2, "%d", cp.device_nums[coproc_indices[i]]); if (i > 0) { strcat(buf, "/"); } strcat(buf, buf2); } strcat(buf, ")"); } } } out.printf( " <resources>%s%s</resources>\n", resources, buf ); } out.printf("</result>\n"); return 0; }