void ACTIVE_TASK_SET::process_control_poll() { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (!atp->app_client_shm.shm) continue; // if app has had the same message in its send buffer for 180 sec, // assume it's hung and restart it // if (atp->process_control_queue.timeout(180)) { if (log_flags.task_debug) { msg_printf(atp->result->project, MSG_INFO, "Restarting %s - message timeout", atp->result->name ); } atp->kill_task(true); } else { atp->process_control_queue.msg_queue_poll( atp->app_client_shm.shm->process_control_request ); } } }
void ACTIVE_TASK_SET::request_reread_app_info() { for (unsigned int i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (!atp->process_exists()) continue; atp->request_reread_app_info(); } }
void ACTIVE_TASK_SET::send_heartbeats() { unsigned int i; ACTIVE_TASK* atp; char buf[1024]; double ar = gstate.available_ram(); for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (!atp->app_client_shm.shm) continue; snprintf(buf, sizeof(buf), "<heartbeat/>" "<wss>%e</wss>" "<max_wss>%e</max_wss>", atp->procinfo.working_set_size, ar ); bool sent = atp->app_client_shm.shm->heartbeat.send_msg(buf); if (log_flags.app_msg_send) { if (sent) { msg_printf(atp->result->project, MSG_INFO, "[app_msg_send] sent heartbeat to %s", atp->result->name ); } else { msg_printf(atp->result->project, MSG_INFO, "[app_msg_send] failed to send heartbeat to %s", atp->result->name ); } } } }
void ACTIVE_TASK_SET::send_heartbeats() { unsigned int i; ACTIVE_TASK* atp; char buf[1024]; double ar = gstate.available_ram(); for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (!atp->app_client_shm.shm) continue; snprintf(buf, sizeof(buf), "<heartbeat/>" "<wss>%e</wss>" "<max_wss>%e</max_wss>", atp->procinfo.working_set_size, ar ); if (gstate.network_suspended) { strcat(buf, "<network_suspended/>"); } bool sent = atp->app_client_shm.shm->heartbeat.send_msg(buf); if (log_flags.heartbeat_debug) { if (sent) { msg_printf(atp->result->project, MSG_INFO, "[heartbeat] Heartbeat sent to task %s", atp->result->name ); } else { msg_printf(atp->result->project, MSG_INFO, "[heartbeat] Heartbeat to task %s failed, previous message unread", atp->result->name ); } } } }
// check for msgs from active tasks, // and update their elapsed time and other info // void ACTIVE_TASK_SET::get_msgs() { //LOGD("app_control: ACTIVE_TASK::get_msgs"); unsigned int i; ACTIVE_TASK *atp; double old_time; static double last_time=0; double delta_t; if (last_time) { delta_t = gstate.now - last_time; // Normally this is called every second. // If delta_t is > 10, we'll assume that a period of hibernation // or suspension happened, and treat it as zero. // If negative, must be clock reset. Ignore. // if (delta_t > 10 || delta_t < 0) { delta_t = 0; } } else { delta_t = 0; } last_time = gstate.now; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; old_time = atp->checkpoint_cpu_time; if (atp->task_state() == PROCESS_EXECUTING) { atp->elapsed_time += delta_t; } if (atp->get_app_status_msg()) { if (old_time != atp->checkpoint_cpu_time) { char buf[256]; sprintf(buf, "%s checkpointed", atp->result->name); if (atp->overdue_checkpoint) { gstate.request_schedule_cpus(buf); } atp->checkpoint_wall_time = gstate.now; atp->premature_exit_count = 0; atp->checkpoint_elapsed_time = atp->elapsed_time; atp->checkpoint_fraction_done = atp->fraction_done; atp->checkpoint_fraction_done_elapsed_time = atp->fraction_done_elapsed_time; if (log_flags.checkpoint_debug) { msg_printf(atp->wup->project, MSG_INFO, "[checkpoint] result %s checkpointed", atp->result->name ); } else if (log_flags.task_debug) { msg_printf(atp->wup->project, MSG_INFO, "[task] result %s checkpointed", atp->result->name ); } atp->write_task_state_file(); } } atp->get_trickle_up_msg(); atp->get_graphics_msg(); } }
// See if any processes have exited // bool ACTIVE_TASK_SET::check_app_exited() { //LOGD("app_control: ACTIVE_TASK_SET::check_app_exited"); ACTIVE_TASK* atp; bool found = false; #ifdef _WIN32 unsigned long exit_code; unsigned int i; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (GetExitCodeProcess(atp->process_handle, &exit_code)) { if (exit_code != STILL_ACTIVE) { found = true; atp->handle_exited_app(exit_code); } } else { if (log_flags.task_debug) { char errmsg[1024]; msg_printf(atp->result->project, MSG_INFO, "[task] task %s GetExitCodeProcess() failed - %s GLE %d (0x%x)", atp->result->name, windows_format_error_string( GetLastError(), errmsg, sizeof(errmsg) ), GetLastError(), GetLastError() ); } // The process doesn't seem to be there. // Mark task as aborted so we don't check it again. // atp->set_task_state(PROCESS_ABORTED, "check_app_exited"); } } #else int pid, stat; if ((pid = waitpid(-1, &stat, WNOHANG)) > 0) { atp = lookup_pid(pid); if (!atp) { // if we're running benchmarks, exited process // is probably a benchmark process; don't show error // if (!gstate.are_cpu_benchmarks_running() && log_flags.task_debug) { msg_printf(NULL, MSG_INTERNAL_ERROR, "Process %d not found\n", pid ); } return false; } atp->handle_exited_app(stat); found = true; } #endif return found; }
// Send kill signal to all app processes // Don't wait for them to exit // void ACTIVE_TASK_SET::kill_tasks(PROJECT* proj) { unsigned int i; ACTIVE_TASK *atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (proj && atp->wup->project != proj) continue; if (!atp->process_exists()) continue; atp->kill_task(false); } }
// tell all running apps of a project to reread prefs // void ACTIVE_TASK_SET::request_reread_prefs(PROJECT* project) { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (atp->result->project != project) continue; if (!atp->process_exists()) continue; atp->request_reread_prefs(); } }
// Send quit message to all app processes // This is called when the core client exits, // or when a project is detached or reset // void ACTIVE_TASK_SET::request_tasks_exit(PROJECT* proj) { LOGD("app_control: ACTIVE_TASK::request_tasks_exit"); unsigned int i; ACTIVE_TASK *atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (proj && atp->wup->project != proj) continue; if (!atp->process_exists()) continue; atp->request_exit(); } }
void ACTIVE_TASK_SET::graphics_poll() { unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; atp->graphics_request_queue.msg_queue_poll( atp->app_client_shm.shm->graphics_request ); atp->check_graphics_mode_ack(); } }
void ACTIVE_TASK_SET::send_trickle_downs() { unsigned int i; ACTIVE_TASK* atp; bool sent; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (atp->have_trickle_down) { if (!atp->app_client_shm.shm) continue; sent = atp->app_client_shm.shm->trickle_down.send_msg("<have_trickle_down/>\n"); if (sent) atp->have_trickle_down = false; } if (atp->send_upload_file_status) { if (!atp->app_client_shm.shm) continue; sent = atp->app_client_shm.shm->trickle_down.send_msg("<upload_file_status/>\n"); if (sent) atp->send_upload_file_status = false; } } }
// check for msgs from active tasks, // and update their elapsed time and other info // void ACTIVE_TASK_SET::get_msgs() { unsigned int i; ACTIVE_TASK *atp; double old_time; static double last_time=0; double delta_t; if (!gstate.clock_change && last_time) { delta_t = gstate.now - last_time; // Normally this is called every second. // If delta_t is > 10, we'll assume that a period of hibernation // or suspension happened, and treat it as zero. // If negative, must be clock reset. Ignore. // if (delta_t > 10 || delta_t < 0) { delta_t = 0; } } else { delta_t = 0; } last_time = gstate.now; double et_diff, et_diff_throttle; switch (gstate.suspend_reason) { case 0: case SUSPEND_REASON_CPU_THROTTLE: et_diff = delta_t; et_diff_throttle = delta_t * gstate.global_prefs.cpu_usage_limit/100; break; default: et_diff = et_diff_throttle = 0; break; } for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; old_time = atp->checkpoint_cpu_time; if (atp->scheduler_state == CPU_SCHED_SCHEDULED) { atp->elapsed_time += atp->result->dont_throttle()?et_diff:et_diff_throttle; } if (atp->get_app_status_msg()) { if (old_time != atp->checkpoint_cpu_time) { char buf[256]; sprintf(buf, "%s checkpointed", atp->result->name); if (atp->overdue_checkpoint) { gstate.request_schedule_cpus(buf); } atp->checkpoint_wall_time = gstate.now; atp->premature_exit_count = 0; atp->checkpoint_elapsed_time = atp->elapsed_time; atp->checkpoint_fraction_done = atp->fraction_done; atp->checkpoint_fraction_done_elapsed_time = atp->fraction_done_elapsed_time; if (log_flags.checkpoint_debug) { msg_printf(atp->wup->project, MSG_INFO, "[checkpoint] result %s checkpointed", atp->result->name ); } else if (log_flags.task_debug) { msg_printf(atp->wup->project, MSG_INFO, "[task] result %s checkpointed", atp->result->name ); } atp->write_task_state_file(); } } atp->get_trickle_up_msg(); atp->get_graphics_msg(); } }