// See if any processes have exited // bool ACTIVE_TASK_SET::check_app_exited() { //LOGD("app_control: ACTIVE_TASK_SET::check_app_exited"); ACTIVE_TASK* atp; bool found = false; #ifdef _WIN32 unsigned long exit_code; unsigned int i; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (GetExitCodeProcess(atp->process_handle, &exit_code)) { if (exit_code != STILL_ACTIVE) { found = true; atp->handle_exited_app(exit_code); } } else { if (log_flags.task_debug) { char errmsg[1024]; msg_printf(atp->result->project, MSG_INFO, "[task] task %s GetExitCodeProcess() failed - %s GLE %d (0x%x)", atp->result->name, windows_format_error_string( GetLastError(), errmsg, sizeof(errmsg) ), GetLastError(), GetLastError() ); } // The process doesn't seem to be there. // Mark task as aborted so we don't check it again. // atp->set_task_state(PROCESS_ABORTED, "check_app_exited"); } } #else int pid, stat; if ((pid = waitpid(-1, &stat, WNOHANG)) > 0) { atp = lookup_pid(pid); if (!atp) { // if we're running benchmarks, exited process // is probably a benchmark process; don't show error // if (!gstate.are_cpu_benchmarks_running() && log_flags.task_debug) { msg_printf(NULL, MSG_INTERNAL_ERROR, "Process %d not found\n", pid ); } return false; } atp->handle_exited_app(stat); found = true; } #endif return found; }
bool ACTIVE_TASK_SET::poll() { unsigned int i; char buf[256]; bool action = false; static double last_time = START_TIME; double diff = gstate.now - last_time; if (diff < 1.0) return false; last_time = gstate.now; if (diff > delta) { diff = 0; } PROJECT* p; for (i=0; i<gstate.projects.size(); i++) { p = gstate.projects[i]; p->idle = true; } // we do two kinds of FLOPs accounting: // 1) actual FLOPS (for job completion) // 2) peak FLOPS (for total and per-project resource usage) // // CPU may be overcommitted, in which case we compute // a "cpu_scale" factor that is < 1. // GPUs are never overcommitted. // // actual FLOPS is based on app_version.flops, scaled by cpu_scale for CPU jobs // peak FLOPS is based on device peak FLOPS, // with CPU component scaled by cpu_scale for all jobs // get CPU usage by GPU and CPU jobs // double cpu_usage_cpu=0; double cpu_usage_gpu=0; for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; RESULT* rp = atp->result; if (rp->uses_gpu()) { if (gpu_active) { cpu_usage_gpu += rp->avp->avg_ncpus; } } else { cpu_usage_cpu += rp->avp->avg_ncpus; } } double cpu_usage = cpu_usage_cpu + cpu_usage_gpu; // if CPU is overcommitted, compute cpu_scale // double cpu_scale = 1; if (cpu_usage > gstate.ncpus) { cpu_scale = (gstate.ncpus - cpu_usage_gpu) / (cpu_usage - cpu_usage_gpu); } double used = 0; for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; RESULT* rp = atp->result; if (!gpu_active && rp->uses_gpu()) { continue; } atp->elapsed_time += diff; double flops = rp->avp->flops; if (!rp->uses_gpu()) { flops *= cpu_scale; } rp->sim_flops_left -= diff*flops; atp->fraction_done = 1 - rp->sim_flops_left / rp->wup->rsc_fpops_est; atp->checkpoint_wall_time = gstate.now; if (rp->sim_flops_left <= 0) { atp->set_task_state(PROCESS_EXITED, "poll"); rp->exit_status = 0; rp->ready_to_report = true; gstate.request_schedule_cpus("job finished"); gstate.request_work_fetch("job finished"); sprintf(buf, "result %s finished<br>", rp->name); html_msg += buf; action = true; } double pf = diff * app_peak_flops(rp->avp, cpu_scale); rp->project->project_results.flops_used += pf; rp->peak_flop_count += pf; sim_results.flops_used += pf; used += pf; rp->project->idle = false; } for (i=0; i<gstate.projects.size(); i++) { p = gstate.projects[i]; if (p->idle) { p->idle_time += diff; p->idle_time_sumsq += diff*(p->idle_time*p->idle_time); } else { p->idle_time = 0; } } active_time += diff; if (gpu_active) { gpu_active_time += diff; } return action; }