void ACTIVE_TASK_SET::process_control_poll() {
//LOGD("app_control: ACTIVE_TASK_SET::process_control_poll");
    unsigned int i;
    ACTIVE_TASK* atp;

    for (i=0; i<active_tasks.size(); i++) {
        atp = active_tasks[i];
        if (!atp->process_exists()) continue;
        if (!atp->app_client_shm.shm) continue;

        // if app has had the same message in its send buffer for 180 sec,
        // assume it's hung and restart it
        //
        if (atp->process_control_queue.timeout(180)) {
            if (log_flags.task_debug) {
                msg_printf(atp->result->project, MSG_INFO,
                    "Restarting %s - message timeout", atp->result->name
                );
            }
            atp->kill_task(true);
        } else {
            atp->process_control_queue.msg_queue_poll(
                atp->app_client_shm.shm->process_control_request
            );
        }
    }
}
// Send kill signal to all app processes
// Don't wait for them to exit
//
void ACTIVE_TASK_SET::kill_tasks(PROJECT* proj) {
    unsigned int i;
    ACTIVE_TASK *atp;
    for (i=0; i<active_tasks.size(); i++) {
        atp = active_tasks[i];
        if (proj && atp->wup->project != proj) continue;
        if (!atp->process_exists()) continue;
        atp->kill_task(false);
    }
}
Exemple #3
0
// Do periodic checks on running apps:
// - get latest CPU time and % done info
// - check if any has exited, and clean up
// - see if any has exceeded its CPU or disk space limits, and abort it
//
bool ACTIVE_TASK_SET::poll() {
    bool action;
    unsigned int i;
    static double last_time = 0;
    if (!gstate.clock_change && gstate.now - last_time < TASK_POLL_PERIOD) return false;
    last_time = gstate.now;

    action = check_app_exited();
    send_heartbeats();
    send_trickle_downs();
    process_control_poll();
    action |= check_rsc_limits_exceeded();
    get_msgs();
    for (i=0; i<active_tasks.size(); i++) {
        ACTIVE_TASK* atp = active_tasks[i];
        if (atp->task_state() == PROCESS_ABORT_PENDING) {
            if (gstate.now > atp->abort_time + ABORT_TIMEOUT) {
                if (log_flags.task_debug) {
                    msg_printf(atp->result->project, MSG_INFO,
                        "[task] abort request timed out, killing task %s",
                        atp->result->name
                    );
                }
                atp->kill_task(false);
            }
        }
        if (atp->task_state() == PROCESS_QUIT_PENDING) {
            if (gstate.now > atp->quit_time + QUIT_TIMEOUT) {
                if (log_flags.task_debug) {
                    msg_printf(atp->result->project, MSG_INFO,
                        "[task] quit request timed out, killing task %s",
                        atp->result->name
                    );
                }
                atp->kill_task(true);
            }
        }
    }

    // Check for finish files every 10 sec.
    // If we already found a finish file, abort the app;
    // it must be hung somewhere in boinc_finish();
    //
    static double last_finish_check_time = 0;
    if (gstate.clock_change || gstate.now - last_finish_check_time > 10) {
        last_finish_check_time = gstate.now;
        for (i=0; i<active_tasks.size(); i++) {
            ACTIVE_TASK* atp = active_tasks[i];
            if (atp->task_state() == PROCESS_UNINITIALIZED) continue;
            if (atp->finish_file_time) {
                // process is still there 10 sec after it wrote finish file.
                // abort the job
                atp->abort_task(EXIT_ABORTED_BY_CLIENT, "finish file present too long");
            } else if (atp->finish_file_present()) {
                atp->finish_file_time = gstate.now;
            }
        }
    }
    if (action) {
        gstate.set_client_state_dirty("ACTIVE_TASK_SET::poll");
    }

    return action;
}