// Check if any of the active tasks have exceeded their // resource limits on disk, CPU time or memory // bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { //LOGD("app_control: ACTIVE_TASK_SET::check_rsc_limits_exceeded"); unsigned int i; ACTIVE_TASK *atp; static double last_disk_check_time = 0; bool do_disk_check = false; bool did_anything = false; double ram_left = gstate.available_ram(); double max_ram = gstate.max_available_ram(); // Some slot dirs have lots of files, // so only check every min(disk_interval, 300) secs // double min_interval = gstate.global_prefs.disk_interval; if (min_interval < 300) min_interval = 300; if (gstate.now > last_disk_check_time + min_interval) { do_disk_check = true; } for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; if (!atp->result->non_cpu_intensive() && (atp->elapsed_time > atp->max_elapsed_time)) { msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: exceeded elapsed time limit %.2f (%.2fG/%.2fG)", atp->result->name, atp->max_elapsed_time, atp->result->wup->rsc_fpops_bound/1e9, atp->result->avp->flops/1e9 ); atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum elapsed time exceeded"); did_anything = true; continue; } if (atp->procinfo.working_set_size_smoothed > max_ram) { msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: exceeded memory limit %.2fMB > %.2fMB\n", atp->result->name, atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA ); atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum memory exceeded"); did_anything = true; continue; } if (do_disk_check && atp->check_max_disk_exceeded()) { did_anything = true; continue; } ram_left -= atp->procinfo.working_set_size_smoothed; } if (ram_left < 0) { gstate.request_schedule_cpus("RAM usage limit exceeded"); } if (do_disk_check) { last_disk_check_time = gstate.now; } return did_anything; }
// Check if any of the active tasks have exceeded their // resource limits on disk, CPU time or memory // // TODO: this gets called ever 1 sec, // but mem and disk usage are computed less often. // refactor. // bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { unsigned int i; ACTIVE_TASK *atp; static double last_disk_check_time = 0; bool do_disk_check = false; bool did_anything = false; char buf[256]; double ram_left = gstate.available_ram(); double max_ram = gstate.max_available_ram(); // Some slot dirs have lots of files, // so only check every min(disk_interval, 300) secs // double min_interval = gstate.global_prefs.disk_interval; if (min_interval < 300) min_interval = 300; if (gstate.clock_change || gstate.now > last_disk_check_time + min_interval) { do_disk_check = true; } for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; if (!atp->result->non_cpu_intensive() && (atp->elapsed_time > atp->max_elapsed_time)) { sprintf(buf, "exceeded elapsed time limit %.2f (%.2fG/%.2fG)", atp->max_elapsed_time, atp->result->wup->rsc_fpops_bound/1e9, atp->result->avp->flops/1e9 ); msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: %s", atp->result->name, buf ); atp->abort_task(EXIT_TIME_LIMIT_EXCEEDED, buf); did_anything = true; continue; } #if 0 // removing this for now because most projects currently // have too-low values of workunit.rsc_memory_bound // (causing lots of aborts) // and I don't think we can expect projects to provide // accurate bounds. // if (atp->procinfo.working_set_size_smoothed > atp->max_mem_usage) { sprintf(buf, "working set size > workunit.rsc_memory_bound: %.2fMB > %.2fMB", atp->procinfo.working_set_size_smoothed/MEGA, atp->max_mem_usage/MEGA ); msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: %s", atp->result->name, buf ); atp->abort_task(EXIT_MEM_LIMIT_EXCEEDED, buf); did_anything = true; continue; } #endif if (atp->procinfo.working_set_size_smoothed > max_ram) { sprintf(buf, "working set size > client RAM limit: %.2fMB > %.2fMB", atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA ); msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: %s", atp->result->name, buf ); atp->abort_task(EXIT_MEM_LIMIT_EXCEEDED, buf); did_anything = true; continue; } if (do_disk_check || atp->peak_disk_usage == 0) { if (atp->check_max_disk_exceeded()) { did_anything = true; continue; } } // don't count RAM usage of non-CPU-intensive jobs // if (!atp->result->non_cpu_intensive()) { ram_left -= atp->procinfo.working_set_size_smoothed; } } if (ram_left < 0) { gstate.request_schedule_cpus("RAM usage limit exceeded"); } if (do_disk_check) { last_disk_check_time = gstate.now; } return did_anything; }