/* * check_exiting_jobs() * * loops over the recorded exiting job information and retries * jobs that have been stale long enough. */ int check_exiting_jobs() { exiting_jobs_info_iterator *iter = NULL; char *jobid; job *pjob; while ((jobid = get_next_retryable_jobid(&iter)) != NULL) { if ((pjob = svr_find_job(jobid, TRUE)) == NULL) { remove_from_exiting_list_by_jobid(jobid); free(jobid); } else { mutex_mgr pjob_mutex = mutex_mgr(pjob->ji_mutex, true); if (pjob->ji_qs.ji_state == JOB_STATE_COMPLETE) { remove_from_exiting_list_by_jobid(jobid); free(jobid); } else { pjob_mutex.unlock(); /* jobid is freed in on_job_exit() */ retry_job_exit(jobid); } } } /* END loop over exiting job information */ return(PBSE_NONE); } /* END check_exiting_jobs() */
int check_exiting_jobs() { int iter = -1; job_exiting_retry_info *jeri; job *pjob; time_t time_now = time(NULL); while ((jeri = (job_exiting_retry_info *)next_from_hash_map(exiting_jobs_info, &iter)) != NULL) { if (time_now - jeri->last_attempt > EXITING_RETRY_TIME) { if ((pjob = svr_find_job(jeri->jobid, TRUE)) == NULL) { remove_entry_from_exiting_list(jeri); } else { if (pjob->ji_qs.ji_state == JOB_STATE_COMPLETE) { remove_entry_from_exiting_list(jeri); unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); } else { unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); retry_job_exit(jeri); } } } } return(PBSE_NONE); } /* END check_exiting_jobs() */
END_TEST START_TEST(retry_job_exit_test) { fail_unless(retry_job_exit(strdup("1.napali")) == 0, "Didn't retry job"); }