// See if any processes have exited
//
bool ACTIVE_TASK_SET::check_app_exited() {
//LOGD("app_control: ACTIVE_TASK_SET::check_app_exited");
    ACTIVE_TASK* atp;
    bool found = false;

#ifdef _WIN32
    unsigned long exit_code;
    unsigned int i;

    for (i=0; i<active_tasks.size(); i++) {
        atp = active_tasks[i];
        if (!atp->process_exists()) continue;
        if (GetExitCodeProcess(atp->process_handle, &exit_code)) {
            if (exit_code != STILL_ACTIVE) {
                found = true;
                atp->handle_exited_app(exit_code);
            }
        } else {
            if (log_flags.task_debug) {
                char errmsg[1024];
                msg_printf(atp->result->project, MSG_INFO,
                    "[task] task %s GetExitCodeProcess() failed - %s GLE %d (0x%x)",
                    atp->result->name,
                    windows_format_error_string(
                        GetLastError(), errmsg, sizeof(errmsg)
                    ),
                    GetLastError(), GetLastError()
                );
            }

            // The process doesn't seem to be there.
            // Mark task as aborted so we don't check it again.
            //
            atp->set_task_state(PROCESS_ABORTED, "check_app_exited");
        }
    }
#else
    int pid, stat;

    if ((pid = waitpid(-1, &stat, WNOHANG)) > 0) {
        atp = lookup_pid(pid);
        if (!atp) {
            // if we're running benchmarks, exited process
            // is probably a benchmark process; don't show error
            //
            if (!gstate.are_cpu_benchmarks_running() && log_flags.task_debug) {
                msg_printf(NULL, MSG_INTERNAL_ERROR,
                    "Process %d not found\n", pid
                );
            }
            return false;
        }
        atp->handle_exited_app(stat);
        found = true;
    }
#endif

    return found;
}
Example #2
0
bool ACTIVE_TASK_SET::poll() {
    unsigned int i;
    char buf[256];
    bool action = false;
    static double last_time = START_TIME;
    double diff = gstate.now - last_time;
    if (diff < 1.0) return false;
    last_time = gstate.now;
    if (diff > delta) {
        diff = 0;
    }
    PROJECT* p;

    for (i=0; i<gstate.projects.size(); i++) {
        p = gstate.projects[i];
        p->idle = true;
    }

    // we do two kinds of FLOPs accounting:
    // 1) actual FLOPS (for job completion)
    // 2) peak FLOPS (for total and per-project resource usage)
    //
    // CPU may be overcommitted, in which case we compute
    //  a "cpu_scale" factor that is < 1.
    // GPUs are never overcommitted.
    //
    // actual FLOPS is based on app_version.flops, scaled by cpu_scale for CPU jobs
    // peak FLOPS is based on device peak FLOPS,
    //  with CPU component scaled by cpu_scale for all jobs

    // get CPU usage by GPU and CPU jobs
    //
    double cpu_usage_cpu=0;
    double cpu_usage_gpu=0;
    for (i=0; i<active_tasks.size(); i++) {
        ACTIVE_TASK* atp = active_tasks[i];
        if (atp->task_state() != PROCESS_EXECUTING) continue;
        RESULT* rp = atp->result;
        if (rp->uses_gpu()) {
            if (gpu_active) {
                cpu_usage_gpu += rp->avp->avg_ncpus;
            }
        } else {
            cpu_usage_cpu += rp->avp->avg_ncpus;
        }
    }
    double cpu_usage = cpu_usage_cpu + cpu_usage_gpu;

    // if CPU is overcommitted, compute cpu_scale
    //
    double cpu_scale = 1;
    if (cpu_usage > gstate.ncpus) {
        cpu_scale = (gstate.ncpus - cpu_usage_gpu) / (cpu_usage - cpu_usage_gpu);
    }

    double used = 0;
    for (i=0; i<active_tasks.size(); i++) {
        ACTIVE_TASK* atp = active_tasks[i];
        if (atp->task_state() != PROCESS_EXECUTING) continue;
        RESULT* rp = atp->result;
        if (!gpu_active && rp->uses_gpu()) {
            continue;
        }
        atp->elapsed_time += diff;
        double flops = rp->avp->flops;
        if (!rp->uses_gpu()) {
            flops *= cpu_scale;
        }

        rp->sim_flops_left -= diff*flops;

        atp->fraction_done = 1 - rp->sim_flops_left / rp->wup->rsc_fpops_est;
        atp->checkpoint_wall_time = gstate.now;

        if (rp->sim_flops_left <= 0) {
            atp->set_task_state(PROCESS_EXITED, "poll");
            rp->exit_status = 0;
            rp->ready_to_report = true;
            gstate.request_schedule_cpus("job finished");
            gstate.request_work_fetch("job finished");
            sprintf(buf, "result %s finished<br>", rp->name);
            html_msg += buf;
            action = true;
        }
        double pf = diff * app_peak_flops(rp->avp, cpu_scale);
        rp->project->project_results.flops_used += pf;
        rp->peak_flop_count += pf;
        sim_results.flops_used += pf;
        used += pf;
        rp->project->idle = false;
    }

    for (i=0; i<gstate.projects.size(); i++) {
        p = gstate.projects[i];
        if (p->idle) {
            p->idle_time += diff;
            p->idle_time_sumsq += diff*(p->idle_time*p->idle_time);
        } else {
            p->idle_time = 0;
        }
    }
    active_time += diff;
    if (gpu_active) {
        gpu_active_time += diff;
    }

    return action;
}