void ACTIVE_TASK_SET::process_control_poll() { //LOGD("app_control: ACTIVE_TASK_SET::process_control_poll"); unsigned int i; ACTIVE_TASK* atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (!atp->process_exists()) continue; if (!atp->app_client_shm.shm) continue; // if app has had the same message in its send buffer for 180 sec, // assume it's hung and restart it // if (atp->process_control_queue.timeout(180)) { if (log_flags.task_debug) { msg_printf(atp->result->project, MSG_INFO, "Restarting %s - message timeout", atp->result->name ); } atp->kill_task(true); } else { atp->process_control_queue.msg_queue_poll( atp->app_client_shm.shm->process_control_request ); } } }
// Send kill signal to all app processes // Don't wait for them to exit // void ACTIVE_TASK_SET::kill_tasks(PROJECT* proj) { unsigned int i; ACTIVE_TASK *atp; for (i=0; i<active_tasks.size(); i++) { atp = active_tasks[i]; if (proj && atp->wup->project != proj) continue; if (!atp->process_exists()) continue; atp->kill_task(false); } }
// Do periodic checks on running apps: // - get latest CPU time and % done info // - check if any has exited, and clean up // - see if any has exceeded its CPU or disk space limits, and abort it // bool ACTIVE_TASK_SET::poll() { bool action; unsigned int i; static double last_time = 0; if (!gstate.clock_change && gstate.now - last_time < TASK_POLL_PERIOD) return false; last_time = gstate.now; action = check_app_exited(); send_heartbeats(); send_trickle_downs(); process_control_poll(); action |= check_rsc_limits_exceeded(); get_msgs(); for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() == PROCESS_ABORT_PENDING) { if (gstate.now > atp->abort_time + ABORT_TIMEOUT) { if (log_flags.task_debug) { msg_printf(atp->result->project, MSG_INFO, "[task] abort request timed out, killing task %s", atp->result->name ); } atp->kill_task(false); } } if (atp->task_state() == PROCESS_QUIT_PENDING) { if (gstate.now > atp->quit_time + QUIT_TIMEOUT) { if (log_flags.task_debug) { msg_printf(atp->result->project, MSG_INFO, "[task] quit request timed out, killing task %s", atp->result->name ); } atp->kill_task(true); } } } // Check for finish files every 10 sec. // If we already found a finish file, abort the app; // it must be hung somewhere in boinc_finish(); // static double last_finish_check_time = 0; if (gstate.clock_change || gstate.now - last_finish_check_time > 10) { last_finish_check_time = gstate.now; for (i=0; i<active_tasks.size(); i++) { ACTIVE_TASK* atp = active_tasks[i]; if (atp->task_state() == PROCESS_UNINITIALIZED) continue; if (atp->finish_file_time) { // process is still there 10 sec after it wrote finish file. // abort the job atp->abort_task(EXIT_ABORTED_BY_CLIENT, "finish file present too long"); } else if (atp->finish_file_present()) { atp->finish_file_time = gstate.now; } } } if (action) { gstate.set_client_state_dirty("ACTIVE_TASK_SET::poll"); } return action; }