void gw_dm_stop (void *_job_id) { gw_job_t * job; int job_id; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (STOP).\n",job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_STOP_FAILED", _job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Stop the job */ /* ----------------------------------------------------------- */ switch (job->job_state) { case GW_JOB_STATE_STOPPED: gw_log_print("DM",'W',"Job %i already stopped.\n", job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_STOP_FAILED", _job_id); break; case GW_JOB_STATE_WRAPPER: if (job->history != NULL ) job->history->reason = GW_REASON_STOP_RESUME; gw_log_print("DM",'I',"Stopping job %i.\n", job_id); gw_job_set_state(job, GW_JOB_STATE_STOP_CANCEL, GW_FALSE); if ( job->reschedule == GW_TRUE ) { job->reschedule = GW_FALSE; gw_dm_mad_job_del(&gw_dm.dm_mad[0],job->id); } gw_am_trigger(gw_dm.em_am, "GW_EM_CANCEL", _job_id); break; default: gw_log_print("DM",'W',"Job %i can not be stopped in current state.\n", job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_STOP_FAILED", _job_id); break; } pthread_mutex_unlock(&(job->mutex)); }
void gw_dm_wrapper_done_cb ( void *_job_id ) { gw_job_t * job; int job_id; time_t total; time_t active; time_t suspension; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_WRAPPER_DONE_CB).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Set execution times & state transition */ /* ----------------------------------------------------------- */ job->em_state = GW_EM_STATE_INIT; switch (job->job_state) { case GW_JOB_STATE_PRE_WRAPPER: /* --------------- Update pre-wrapper stats -------------------- */ job->history->stats[PRE_WRAPPER_EXIT_TIME] = time(NULL); total = gw_job_history_get_pre_wrapper_time(job->history); active = job->history->stats[ACTIVE_TIME]; suspension = job->history->stats[SUSPENSION_TIME]; gw_job_print(job,"DM",'I',"Pre-Wrapper DONE:\n"); gw_job_print(job,"DM",'I',"\tActive time : %i\n", active); gw_job_print(job,"DM",'I',"\tSuspension time : %i\n", suspension); gw_job_print(job,"DM",'I',"\tTotal time : %i\n", total); /* -------------- Transition to Wrapper state ------------------ */ gw_am_trigger(&(gw_dm.am), "GW_DM_STATE_WRAPPER", _job_id); break; case GW_JOB_STATE_WRAPPER: /* ----------------- Update wrapper stats ---------------------- */ job->history->stats[WRAPPER_EXIT_TIME] = time(NULL); total = gw_job_history_get_wrapper_time(job->history); active = job->history->stats[ACTIVE_TIME]; suspension = job->history->stats[SUSPENSION_TIME]; gw_job_print(job,"DM",'I',"Wrapper DONE:\n"); gw_job_print(job,"DM",'I',"\tActive time : %i\n", active); gw_job_print(job,"DM",'I',"\tSuspension time : %i\n", suspension); gw_job_print(job,"DM",'I',"\tTotal time : %i\n", total); /* -------------- Free used slot from this host -------------- */ gw_host_dec_uslots(job->history->host); /* ---------- We do not need to re-schedule this job --------- */ if ( job->reschedule == GW_TRUE ) { job->reschedule = GW_FALSE; gw_dm_mad_job_del(&gw_dm.dm_mad[0],job->id); } /* -------------- Transition to Epilog state ------------------ */ gw_am_trigger(&(gw_dm.am), "GW_DM_STATE_EPILOG_STD", _job_id); break; case GW_JOB_STATE_STOP_CANCEL: /* ----------------- Update wrapper stats ---------------------- */ job->history->stats[WRAPPER_EXIT_TIME] = time(NULL); total = gw_job_history_get_wrapper_time(job->history); active = job->history->stats[ACTIVE_TIME]; suspension = job->history->stats[SUSPENSION_TIME]; gw_job_print(job,"DM",'I',"Wrapper CANCELED:\n"); gw_job_print(job,"DM",'I',"\tActive time : %i\n", active); gw_job_print(job,"DM",'I',"\tSuspension time : %i\n", suspension); gw_job_print(job,"DM",'I',"\tTotal time : %i\n", total); /* -------------- Free used slot from this host -------------- */ gw_host_dec_uslots(job->history->host); /* ------------ Transition to Stop Epilog state --------------- */ gw_am_trigger(&(gw_dm.am), "GW_DM_STATE_STOP_EPILOG", _job_id); break; case GW_JOB_STATE_KILL_CANCEL: /* ----------------- Update wrapper stats ---------------------- */ job->history->stats[WRAPPER_EXIT_TIME] = time(NULL); total = gw_job_history_get_wrapper_time(job->history); active = job->history->stats[ACTIVE_TIME]; suspension = job->history->stats[SUSPENSION_TIME]; gw_job_print(job,"DM",'I',"Wrapper CANCELED:\n"); gw_job_print(job,"DM",'I',"\tActive time : %i\n", active); gw_job_print(job,"DM",'I',"\tSuspension time : %i\n", suspension); gw_job_print(job,"DM",'I',"\tTotal time : %i\n", total); /* -------------- Free used slot from this host -------------- */ gw_host_dec_uslots(job->history->host); /* ------------ Transition to Kill Epilog state ---------------- */ gw_am_trigger(&(gw_dm.am), "GW_DM_STATE_KILL_EPILOG", _job_id); break; case GW_JOB_STATE_MIGR_CANCEL: /* ----------- Update previous wrapper stats ------------------- */ job->history->next->stats[WRAPPER_EXIT_TIME] = time(NULL); active = job->history->next->stats[ACTIVE_TIME]; suspension = job->history->next->stats[SUSPENSION_TIME]; gw_job_print(job,"DM",'I',"Wrapper CANCELED:\n"); gw_job_print(job,"DM",'I',"\tActive time : %i\n", active); gw_job_print(job,"DM",'I',"\tSuspension time : %i\n", suspension); /* -------------- Free used slot from previous host ------------ */ gw_host_dec_uslots(job->history->next->host); /* ---------- Transition to Migration Prolog state ------------ */ gw_am_trigger(&(gw_dm.am), "GW_DM_STATE_MIGR_PROLOG", _job_id); break; default: gw_log_print("DM",'E',"Wrapper done callback in wrong job (%i) state.\n", job_id); free(_job_id); break; } pthread_mutex_unlock(&(job->mutex)); }
void gw_dm_wrapper_failed_cb ( void *_job_id ) { gw_job_t * job; int job_id; time_t total; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_WRAPPER_FAILED_CB).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Set execution times & state transition */ /* ----------------------------------------------------------- */ job->em_state = GW_EM_STATE_INIT; switch (job->job_state) { case GW_JOB_STATE_PRE_WRAPPER: /* --------------- Update pre-wrapper stats -------------------- */ job->history->stats[PRE_WRAPPER_EXIT_TIME] = time(NULL); total = gw_job_history_get_pre_wrapper_time(job->history); gw_job_print(job,"DM",'E',"Pre-Wrapper failed:\n"); gw_job_print(job,"DM",'E',"\tTotal time : %i\n", total); break; case GW_JOB_STATE_WRAPPER: /* ----------------- Update wrapper stats ---------------------- */ job->history->stats[WRAPPER_EXIT_TIME] = time(NULL); total = gw_job_history_get_wrapper_time(job->history); gw_job_print(job,"DM",'E',"Wrapper failed:\n"); gw_job_print(job,"DM",'E',"\tTotal time : %i\n", total); /* ---------- We do not need to re-schedule this job --------- */ if ( job->reschedule == GW_TRUE ) { job->reschedule = GW_FALSE; gw_dm_mad_job_del(&gw_dm.dm_mad[0],job->id); } break; default: gw_log_print("DM",'E',"Wrapper failed callback in wrong job (%i) state.\n", job_id); break; } /* ----------------------------------------------------------- */ /* 1.- State transtition */ /* ----------------------------------------------------------- */ /* -------------- Free used slot from this host -------------- */ if (job->history != NULL) { job->history->reason = GW_REASON_EXECUTION_ERROR; gw_host_dec_uslots(job->history->host); } /* ----------------------------------------------------------- */ gw_am_trigger(&(gw_dm.am), "GW_DM_STATE_EPILOG_FAIL", _job_id); pthread_mutex_unlock(&(job->mutex)); }
void gw_dm_kill (void *_job_id) { gw_job_t * job; int job_id; int rt; int array_id; int task_id; gw_array_t * array; char conf_filename[GW_MSG_STRING_LONG]; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (KILL).\n",job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED", _job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Kill the job */ /* ----------------------------------------------------------- */ switch (job->job_state) { case GW_JOB_STATE_INIT: case GW_JOB_STATE_PENDING: case GW_JOB_STATE_HOLD: case GW_JOB_STATE_STOPPED: job->exit_time = time(NULL); case GW_JOB_STATE_FAILED: case GW_JOB_STATE_ZOMBIE: array_id = job->array_id; task_id = job->task_id; sprintf(conf_filename, "%s/job.conf", job->directory); unlink(conf_filename); pthread_mutex_unlock(&(job->mutex)); gw_job_pool_free(job_id); gw_log_print("DM",'I',"Job %i killed and freed.\n", job_id); if (array_id != -1) { array = gw_array_pool_get_array(array_id, GW_TRUE); if ( array != NULL ) { rt = gw_array_del_task(array,task_id); pthread_mutex_unlock(&(array->mutex)); if (rt == 0) { gw_array_pool_array_free(array_id); gw_log_print("DM",'I',"Array %i freed.\n",array_id); } } else gw_log_print("DM",'E',"Array %i does not exisit (KILL - task %i).\n", array_id, task_id); } gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_SUCCESS", _job_id); break; case GW_JOB_STATE_WRAPPER: if (job->history != NULL ) job->history->reason = GW_REASON_KILL; gw_log_print("DM",'I',"Killing job %i.\n", job_id); gw_job_set_state(job, GW_JOB_STATE_KILL_CANCEL, GW_FALSE); if ( job->reschedule == GW_TRUE ) { job->reschedule = GW_FALSE; gw_dm_mad_job_del(&gw_dm.dm_mad[0],job->id); } gw_am_trigger(gw_dm.em_am, "GW_EM_CANCEL", _job_id); pthread_mutex_unlock(&(job->mutex)); break; default: gw_log_print("DM",'W',"Job %i can not be killed in current state.\n", job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED", _job_id); pthread_mutex_unlock(&(job->mutex)); break; } }