Exemplo n.º 1
0
// Check if any of the active tasks have exceeded their
// resource limits on disk, CPU time or memory
//
bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() {
//LOGD("app_control: ACTIVE_TASK_SET::check_rsc_limits_exceeded");
    unsigned int i;
    ACTIVE_TASK *atp;
    static double last_disk_check_time = 0;
    bool do_disk_check = false;
    bool did_anything = false;

    double ram_left = gstate.available_ram();
    double max_ram = gstate.max_available_ram();

    // Some slot dirs have lots of files,
    // so only check every min(disk_interval, 300) secs
    //
    double min_interval = gstate.global_prefs.disk_interval;
    if (min_interval < 300) min_interval = 300;
    if (gstate.now > last_disk_check_time + min_interval) {
        do_disk_check = true;
    }
    for (i=0; i<active_tasks.size(); i++) {
        atp = active_tasks[i];
        if (atp->task_state() != PROCESS_EXECUTING) continue;
        if (!atp->result->non_cpu_intensive() && (atp->elapsed_time > atp->max_elapsed_time)) {
            msg_printf(atp->result->project, MSG_INFO,
                "Aborting task %s: exceeded elapsed time limit %.2f (%.2fG/%.2fG)",
                atp->result->name, atp->max_elapsed_time,
                atp->result->wup->rsc_fpops_bound/1e9,
                atp->result->avp->flops/1e9
            );
            atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum elapsed time exceeded");
            did_anything = true;
            continue;
        }
        if (atp->procinfo.working_set_size_smoothed > max_ram) {
            msg_printf(atp->result->project, MSG_INFO,
                "Aborting task %s: exceeded memory limit %.2fMB > %.2fMB\n",
                atp->result->name,
                atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA
            );
            atp->abort_task(ERR_RSC_LIMIT_EXCEEDED, "Maximum memory exceeded");
            did_anything = true;
            continue;
        }
        if (do_disk_check && atp->check_max_disk_exceeded()) {
            did_anything = true;
            continue;
        }
        ram_left -= atp->procinfo.working_set_size_smoothed;
    }
    if (ram_left < 0) {
        gstate.request_schedule_cpus("RAM usage limit exceeded");
    }
    if (do_disk_check) {
        last_disk_check_time = gstate.now;
    }
    return did_anything;
}
Exemplo n.º 2
0
// Check if any of the active tasks have exceeded their
// resource limits on disk, CPU time or memory
//
// TODO: this gets called ever 1 sec,
// but mem and disk usage are computed less often.
// refactor.
//
bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() {
    unsigned int i;
    ACTIVE_TASK *atp;
    static double last_disk_check_time = 0;
    bool do_disk_check = false;
    bool did_anything = false;
    char buf[256];

    double ram_left = gstate.available_ram();
    double max_ram = gstate.max_available_ram();

    // Some slot dirs have lots of files,
    // so only check every min(disk_interval, 300) secs
    //
    double min_interval = gstate.global_prefs.disk_interval;
    if (min_interval < 300) min_interval = 300;
    if (gstate.clock_change || gstate.now > last_disk_check_time + min_interval) {
        do_disk_check = true;
    }
    for (i=0; i<active_tasks.size(); i++) {
        atp = active_tasks[i];
        if (atp->task_state() != PROCESS_EXECUTING) continue;
        if (!atp->result->non_cpu_intensive() && (atp->elapsed_time > atp->max_elapsed_time)) {
            sprintf(buf, "exceeded elapsed time limit %.2f (%.2fG/%.2fG)",
                atp->max_elapsed_time,
                atp->result->wup->rsc_fpops_bound/1e9,
                atp->result->avp->flops/1e9
            );
            msg_printf(atp->result->project, MSG_INFO,
                "Aborting task %s: %s", atp->result->name, buf
            );
            atp->abort_task(EXIT_TIME_LIMIT_EXCEEDED, buf);
            did_anything = true;
            continue;
        }
#if 0
        // removing this for now because most projects currently
        // have too-low values of workunit.rsc_memory_bound
        // (causing lots of aborts)
        // and I don't think we can expect projects to provide
        // accurate bounds.
        //
        if (atp->procinfo.working_set_size_smoothed > atp->max_mem_usage) {
            sprintf(buf, "working set size > workunit.rsc_memory_bound: %.2fMB > %.2fMB",
                atp->procinfo.working_set_size_smoothed/MEGA, atp->max_mem_usage/MEGA
            );
            msg_printf(atp->result->project, MSG_INFO,
                "Aborting task %s: %s",
                atp->result->name, buf
            );
            atp->abort_task(EXIT_MEM_LIMIT_EXCEEDED, buf);
            did_anything = true;
            continue;
        }
#endif
        if (atp->procinfo.working_set_size_smoothed > max_ram) {
            sprintf(buf, "working set size > client RAM limit: %.2fMB > %.2fMB",
                atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA
            );
            msg_printf(atp->result->project, MSG_INFO,
                "Aborting task %s: %s",
                atp->result->name, buf
            );
            atp->abort_task(EXIT_MEM_LIMIT_EXCEEDED, buf);
            did_anything = true;
            continue;
        }
        if (do_disk_check || atp->peak_disk_usage == 0) {
            if (atp->check_max_disk_exceeded()) {
                did_anything = true;
                continue;
            }
        }

        // don't count RAM usage of non-CPU-intensive jobs
        //
        if (!atp->result->non_cpu_intensive()) {
            ram_left -= atp->procinfo.working_set_size_smoothed;
        }
    }
    if (ram_left < 0) {
        gstate.request_schedule_cpus("RAM usage limit exceeded");
    }
    if (do_disk_check) {
        last_disk_check_time = gstate.now;
    }
    return did_anything;
}