void gw_dm_kill_hard (void *_job_id) { gw_job_t * job; int job_id; int rt; int array_id; int task_id; gw_array_t * array; char conf_filename[GW_MSG_STRING_LONG]; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (KILL_HARD).\n",job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED",_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Hard Kill the job */ /* ----------------------------------------------------------- */ switch (job->job_state) { case GW_JOB_STATE_MIGR_PROLOG: case GW_JOB_STATE_MIGR_EPILOG: gw_host_dec_rjobs(job->history->next->host); job->history->next->stats[EXIT_TIME] = time(NULL); case GW_JOB_STATE_PROLOG: gw_host_dec_uslots(job->history->host, job->template.np);
void gw_dm_stopped ( void *_job_id ) { gw_job_t * job; int job_id; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_STOPPED).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Update Job state */ /* ----------------------------------------------------------- */ gw_job_set_state(job, GW_JOB_STATE_STOPPED, GW_FALSE); /* -------- Update Host & User running jobs -------- */ gw_user_pool_dec_running_jobs(job->user_id); gw_host_dec_rjobs(job->history->host); /* ----------------------------------------------------------- */ /* 2.- Notify Request Manager */ /* ----------------------------------------------------------- */ gw_am_trigger(gw_dm.rm_am,"GW_RM_STOP_SUCCESS", _job_id); pthread_mutex_unlock(&(job->mutex)); }
void gw_dm_pending( void *_job_id ) { gw_job_t *job; int job_id; gw_boolean_t failed; gw_migration_reason_t reason; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_PENDING).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Set state */ /* ----------------------------------------------------------- */ gw_job_print(job,"DM",'I',"Rescheduling job.\n"); gw_log_print("DM",'I',"Rescheduling job %d.\n", job->id); gw_job_set_state (job, GW_JOB_STATE_PENDING, GW_FALSE); /* -------- Update Host & User running jobs -------- */ gw_user_pool_dec_running_jobs(job->user_id); gw_host_dec_rjobs(job->history->host); /* ------------- Restart counter --------------- */ job->restarted++; /* ------------- Notify the Scheduler --------------- */ if (job->history != NULL) { reason = GW_REASON_NONE; failed = (job->history->reason == GW_REASON_EXECUTION_ERROR) || (job->history->reason == GW_REASON_PERFORMANCE); if (failed) { gw_dm_mad_job_failed(&gw_dm.dm_mad[0], job->history->host->host_id, job->user_id, job->history->reason); } gw_dm_mad_job_schedule(&gw_dm.dm_mad[0], job_id, job->array_id, job->user_id, reason); } else gw_log_print("DM",'E',"Rescheduling job %d, but no history records found.\n", job->id); /* ------------------------------------------------- */ free(_job_id); pthread_mutex_unlock(&(job->mutex)); }
void gw_dm_zombie ( void *_job_id ) { gw_job_t * job; gw_array_t * array; int job_id; int task_id; int array_id; int rt; char conf_filename[2048]; time_t prolog, epilog; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id == NULL ) return; job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_ZOMBIE).\n",job_id); free(_job_id); return; } /* ----------------------------------------------------------- */ /* 0.- Update Job state */ /* ----------------------------------------------------------- */ switch (job->job_state) { case GW_JOB_STATE_EPILOG: gw_job_set_state(job, GW_JOB_STATE_ZOMBIE, GW_FALSE); gw_log_print("DM",'I',"Job %i done, with exit code %i.\n",job->id, job->exit_code); job->history->reason = GW_REASON_NONE; job->exit_time = time(NULL); /* ------------- Print job history and send usage ------------ */ gw_job_print(job,"DM",'I',"Job done, history:\n"); gw_job_print_history(job); gw_job_send_usage(job); if ( job->client_waiting > 0 ) gw_am_trigger(gw_dm.rm_am,"GW_RM_WAIT_SUCCESS", _job_id); else { if (gw_conf.dispose == GW_TRUE) gw_am_trigger(&(gw_dm.am), "GW_DM_KILL", _job_id); else free(_job_id); } /* -------- Update User & Host running jobs -------- */ gw_user_pool_dec_running_jobs(job->user_id); gw_host_dec_rjobs(job->history->host); /* -------- Notify the scheduler -------- */ prolog = gw_job_history_get_prolog_time(job->history); epilog = gw_job_history_get_epilog_time(job->history); gw_dm_mad_job_success(&gw_dm.dm_mad[0], job->history->host->host_id, job->user_id, (prolog + epilog), job->history->stats[SUSPENSION_TIME], job->history->stats[ACTIVE_TIME]); pthread_mutex_unlock(&(job->mutex)); /* -------- Update other jobs dependencies -------- */ gw_job_pool_dep_check(job_id); break; case GW_JOB_STATE_KILL_EPILOG: gw_job_set_state(job, GW_JOB_STATE_ZOMBIE, GW_FALSE); job->exit_time = time(NULL); /* ------------- Print job history and send usage ------------ */ gw_job_print(job,"DM",'I',"Job killed, history:\n"); gw_job_print_history(job); gw_job_send_usage(job); /* ---------------- Free job & Notify RM ---------------- */ array_id = job->array_id; task_id = job->task_id; /* -------- Update User & Host running jobs -------- */ gw_user_pool_dec_running_jobs(job->user_id); gw_host_dec_rjobs(job->history->host); sprintf(conf_filename, "%s/job.conf", job->directory); unlink(conf_filename); pthread_mutex_unlock(&(job->mutex)); /* ------------------------------------------------- */ gw_job_pool_free(job_id); gw_log_print("DM",'I',"Job %i killed and freed.\n", job_id); if (array_id != -1) { array = gw_array_pool_get_array(array_id,GW_TRUE); if ( array != NULL ) { rt = gw_array_del_task(array,task_id); pthread_mutex_unlock(&(array->mutex)); if (rt == 0) { gw_array_pool_array_free(array_id); gw_log_print("DM",'I',"Array %i freed\n",array_id); } } else gw_log_print("DM",'E',"Could not delete task %i from array %i.\n", task_id, array_id); } gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_SUCCESS", _job_id); break; default: gw_log_print("DM",'E',"Zombie callback in wrong job (%i) state.\n", job_id); free(_job_id); pthread_mutex_unlock(&(job->mutex)); break; } }
void gw_dm_failed ( void *_job_id ) { gw_job_t * job; int job_id; gw_boolean_t failed; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_FAILED).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Set state */ /* ----------------------------------------------------------- */ gw_log_print("DM",'I',"Job %i failed.\n",job->id); gw_job_set_state(job, GW_JOB_STATE_FAILED, GW_FALSE); gw_job_print(job,"DM",'I',"Job failed, history:\n"); gw_job_print_history(job); job->exit_time = time(NULL); if ( job->client_waiting > 0 ) gw_am_trigger(gw_dm.rm_am,"GW_RM_WAIT_SUCCESS", _job_id); else free(_job_id); /* -------- Update Host & User running jobs -------- */ gw_user_pool_dec_running_jobs(job->user_id); gw_host_dec_rjobs(job->history->host); /* --------- Notify the Scheduler ---------- */ failed = (job->history->reason == GW_REASON_EXECUTION_ERROR) || (job->history->reason == GW_REASON_PERFORMANCE); if (failed) { gw_dm_mad_job_failed(&gw_dm.dm_mad[0], job->history->host->host_id, job->user_id, job->history->reason); } pthread_mutex_unlock(&(job->mutex)); }