void gw_em_cancel(void *_job_id) { int job_id; gw_job_t *job; gw_em_mad_t *mad; gw_em_state_t current_em_state; if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); free(_job_id); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("EM",'E',"Job %s no longer exists (CANCEL).\n", job_id); return; } } else return; /* -------------------------------------------------------------------- */ current_em_state = job->em_state; if ( issubmitted(current_em_state) ) { gw_job_print (job,"EM",'I',"Cancelling job.\n"); mad = job->history->em_mad; /* Warning! When in Migration Cancel, the previous MAD should be used */ if (job->job_state == GW_JOB_STATE_MIGR_CANCEL) { if (job->history->next == NULL) { gw_log_print("EM",'E',"Previous history record of job %i no longer exists\n", job_id); pthread_mutex_unlock(&(job->mutex)); return; } else mad = job->history->next->em_mad; } gw_em_mad_cancel(mad, job_id); gw_log_print ("EM",'I',"Cancelling job %i.\n", job_id); } else gw_log_print ("EM",'W',"Ignoring cancel request for job %i, will re-try.\n", job_id); /* -------------------------------------------------------------------- */ pthread_mutex_unlock(&(job->mutex)); }
void gw_dm_kill_hard (void *_job_id) { gw_job_t * job; int job_id; int rt; int array_id; int task_id; gw_array_t * array; char conf_filename[2048]; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (KILL_HARD).\n",job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED",_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Hard Kill the job */ /* ----------------------------------------------------------- */ switch (job->job_state) { case GW_JOB_STATE_MIGR_PROLOG: case GW_JOB_STATE_MIGR_EPILOG: pthread_mutex_lock(&(job->history->next->host->mutex)); job->history->next->host->running_jobs--; pthread_mutex_unlock(&(job->history->next->host->mutex)); job->history->next->stats[EXIT_TIME] = time(NULL); case GW_JOB_STATE_PROLOG: pthread_mutex_lock(&(job->history->host->mutex)); job->history->host->used_slots--; pthread_mutex_unlock(&(job->history->host->mutex)); case GW_JOB_STATE_EPILOG: case GW_JOB_STATE_EPILOG_STD: case GW_JOB_STATE_EPILOG_RESTART: case GW_JOB_STATE_EPILOG_FAIL: job->history->reason = GW_REASON_KILL; case GW_JOB_STATE_STOP_EPILOG: case GW_JOB_STATE_KILL_EPILOG: pthread_mutex_lock(&(job->history->host->mutex)); job->history->host->running_jobs--; pthread_mutex_unlock(&(job->history->host->mutex)); job->exit_time = time(NULL); job->history->stats[EXIT_TIME] = time(NULL); job->tm_state = GW_TM_STATE_HARD_KILL; if (job->history != NULL) { gw_log_print("DM",'I',"Cancelling prolog/epilog transfers of job %i.\n", job_id); gw_tm_mad_end(job->history->tm_mad, job->id); } break; case GW_JOB_STATE_PRE_WRAPPER: case GW_JOB_STATE_WRAPPER: job->history->reason = GW_REASON_KILL; pthread_mutex_lock(&(job->history->host->mutex)); job->history->host->used_slots--; job->history->host->running_jobs--; pthread_mutex_unlock(&(job->history->host->mutex)); job->exit_time = time(NULL); job->history->stats[EXIT_TIME] = time(NULL); job->em_state = GW_EM_STATE_HARD_KILL; if (job->history != NULL) { gw_log_print("DM",'I',"Cancelling execution of job %i.\n", job_id); gw_em_mad_cancel(job->history->em_mad, job_id); } break; case GW_JOB_STATE_MIGR_CANCEL: pthread_mutex_lock(&(job->history->next->host->mutex)); job->history->next->host->used_slots--; job->history->next->host->running_jobs--; pthread_mutex_unlock(&(job->history->next->host->mutex)); job->history->next->stats[EXIT_TIME] = time(NULL); job->history->reason = GW_REASON_KILL; case GW_JOB_STATE_STOP_CANCEL: case GW_JOB_STATE_KILL_CANCEL: pthread_mutex_lock(&(job->history->host->mutex)); job->history->host->used_slots--; job->history->host->running_jobs--; pthread_mutex_unlock(&(job->history->host->mutex)); job->exit_time = time(NULL); job->history->stats[EXIT_TIME] = time(NULL); job->em_state = GW_EM_STATE_HARD_KILL; break; case GW_JOB_STATE_INIT: case GW_JOB_STATE_PENDING: case GW_JOB_STATE_HOLD: case GW_JOB_STATE_STOPPED: job->exit_time = time(NULL); break; case GW_JOB_STATE_FAILED: case GW_JOB_STATE_ZOMBIE: break; default: gw_log_print("DM",'W',"Job %i can not be killed in current state.\n", job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_FAILED", _job_id); pthread_mutex_unlock(&(job->mutex)); return; } array_id = job->array_id; task_id = job->task_id; sprintf(conf_filename, "%s/job.conf", job->directory); unlink(conf_filename); pthread_mutex_unlock(&(job->mutex)); gw_job_pool_free(job_id); gw_log_print("DM",'I',"Job %i killed (hard) and freed.\n", job_id); if (array_id != -1) { array = gw_array_pool_get_array(array_id, GW_TRUE); if ( array != NULL ) { rt = gw_array_del_task(array,task_id); pthread_mutex_unlock(&(array->mutex)); if (rt == 0) { gw_array_pool_array_free(array_id); gw_log_print("DM",'I',"Array %i freed.\n",array_id); } } else gw_log_print("DM",'E',"Array %i does not exisit (KILL - task %i).\n", array_id, task_id); } gw_am_trigger(gw_dm.rm_am,"GW_RM_KILL_SUCCESS", _job_id); }
int main (int argc, char **argv ) { int rc; char action[20]; char jid_s[20]; int jid = 0; char contact[500]; char rsl_file[1024]; int status = -1; char info[500]; int end = 0; fd_set in_pipes; int j; char c; char str[4096]; struct timeval tv; int timer_interval = 300; time_t last_timer = 0; time_t the_time; struct timeval t1,t2; double waited; setbuf(stdout,NULL); rc = globus_module_activate(GLOBUS_COMMON_MODULE); if ( rc != GLOBUS_SUCCESS ) return -1; waited = 0; while (!end) { FD_ZERO(&in_pipes); FD_SET (0,&in_pipes); tv.tv_sec = 0; tv.tv_usec = 1000; gettimeofday(&t1, NULL); rc = select(1, &in_pipes, NULL, NULL, &tv); gettimeofday(&t2, NULL); waited += ((t2.tv_sec - t1.tv_sec)*1000000) + (t2.tv_usec - t1.tv_usec); if ( waited > 999 ) { globus_poll(); waited = 0; } if (rc == -1) { exit(-1); } else if (rc == 1) { j = 0; do { rc = read(0, (void *) &c, sizeof(char)); str[j++] = c; } while ( rc > 0 && c != '\n' ); str[j] = '\0'; if (rc <= 0) exit(-1); rc = sscanf(str, "%s %s %s %[^\n]", action, jid_s, contact, rsl_file); if (rc != 4 ) { printf("FAILURE Not all four arguments defined\n"); continue; } jid = atoi(jid_s); if (strcmp(action, "INIT") == 0 ) { status = gw_em_mad_init(jid, info); } else if (strcmp(action, "SUBMIT") == 0 ) { status = gw_em_mad_submit(jid, contact, rsl_file, info); } else if (strcmp(action, "RECOVER") == 0 ) { status = gw_em_mad_recover(jid, contact, info); } else if (strcmp(action, "CANCEL") == 0 ) { status = gw_em_mad_cancel(jid, info); } else if (strcmp(action, "POLL") == 0 ) { status = gw_em_mad_poll(jid, info); } else if (strcmp(action, "FINALIZE") == 0 ) { status = gw_em_mad_finalize(info); end = 1; return 0; } if (status != 0) printf("%s %d FAILURE %s\n", action, jid, info); } the_time = time(NULL); if (the_time - last_timer >= timer_interval) { last_timer = the_time; if (mad.initialized == 1) { status = gw_em_mad_check_credentials(info); if (status != 0) printf("%s %d FAILURE %s\n", action, jid, info); } } } return 0; }