static int forced_jobpurge( job *pjob, struct batch_request *preq) { long owner_purge = FALSE; /* check about possibly purging the job */ if (preq->rq_extend != NULL) { if (!strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr))) { get_svr_attr_l(SRV_ATR_OwnerPurge, &owner_purge); if (((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) != 0) || ((svr_chk_owner(preq, pjob) == 0) && (owner_purge))) { force_purge_work(pjob); return(PURGE_SUCCESS); } else { /* FAILURE */ req_reject(PBSE_PERM, 0, preq, NULL, NULL); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); return(-1); } } } return(PBSE_NONE); } /* END forced_jobpurge() */
int retry_job_exit( job_exiting_retry_info *jeri) { char log_buf[LOCAL_LOG_BUF_SIZE]; job *pjob; jeri->attempts++; if (jeri->attempts >= MAX_EXITING_RETRY_ATTEMPTS) { /* job has been attempted the maximum number of times. Destroy the job */ if ((pjob = svr_find_job(jeri->jobid, TRUE)) != NULL) { force_purge_work(pjob); } remove_entry_from_exiting_list(jeri); } else { snprintf(log_buf, sizeof(log_buf), "Retrying job exiting for job %s", jeri->jobid); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); jeri->last_attempt = time(NULL); on_job_exit(NULL, strdup(jeri->jobid)); } return(PBSE_NONE); } /* END retry_job_exit() */
END_TEST START_TEST(test_force_purge_work) { job *pjob = (job *)calloc(1, sizeof(job)); pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str = strdup("bob"); force_purge_work(pjob); /* normally pjob wouldn't be valid at this point, but I've made the functions * that free set these values in the scaffolding so we can test what happened */ fail_unless(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str == NULL); fail_unless(pjob->ji_qs.ji_state == JOB_STATE_COMPLETE); }
char *get_next_retryable_jobid( exiting_jobs_info_iterator **iter) { job_exiting_retry_info *jeri; job *pjob; time_t time_now = time(NULL); char log_buf[LOCAL_LOG_BUF_SIZE]; exiting_jobs_info.lock(); if(*iter == NULL) { *iter = exiting_jobs_info.get_iterator(); } while ((jeri = (*iter)->get_next_item()) != NULL) { if (time_now - jeri->last_attempt > EXITING_RETRY_TIME) { if (jeri->attempts >= MAX_EXITING_RETRY_ATTEMPTS) { std::string jid(jeri->jobid); exiting_jobs_info.remove(jeri->jobid); free(jeri); exiting_jobs_info.unlock(); if ((pjob = svr_find_job((char *)jid.c_str(), TRUE)) != NULL) { snprintf(log_buf, sizeof(log_buf), "Job %s has had its exiting re-tried %d times, purging.", jeri->jobid, MAX_EXITING_RETRY_ATTEMPTS); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); force_purge_work(pjob); } exiting_jobs_info.lock(); } else { jeri->attempts++; jeri->last_attempt = time_now; exiting_jobs_info.unlock(); char *jobid = strdup(jeri->jobid); return(jobid); } } } exiting_jobs_info.unlock(); return(NULL); } /* END get_next_retryable_jobid() */
char *get_next_retryable_jobid( int *iter) { job_exiting_retry_info *jeri; job *pjob; time_t time_now = time(NULL); char log_buf[LOCAL_LOG_BUF_SIZE]; pthread_mutex_lock(exiting_jobs_info->hm_mutex); mutex_mgr exit_mgr(exiting_jobs_info->hm_mutex, true); while ((jeri = (job_exiting_retry_info *)next_from_hash_map(exiting_jobs_info, iter, true)) != NULL) { if (time_now - jeri->last_attempt > EXITING_RETRY_TIME) { if (jeri->attempts >= MAX_EXITING_RETRY_ATTEMPTS) { std::string jid(jeri->jobid); remove_from_hash_map(exiting_jobs_info, jeri->jobid, true); free(jeri); exit_mgr.unlock(); //Don't hold on to a mutex when trying to lock another. if ((pjob = svr_find_job((char *)jid.c_str(), TRUE)) != NULL) { snprintf(log_buf, sizeof(log_buf), "Job %s has had its exiting re-tried %d times, purging.", jeri->jobid, MAX_EXITING_RETRY_ATTEMPTS); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); force_purge_work(pjob); } exit_mgr.lock(); } else { jeri->attempts++; jeri->last_attempt = time_now; char *jobid = strdup(jeri->jobid); return(jobid); } } } return(NULL); } /* END get_next_retryable_jobid() */
void ensure_deleted( struct work_task *ptask) /* I */ { job *pjob; char *jobid; jobid = ptask->wt_parm1; if (jobid != NULL) { if ((pjob = svr_find_job(jobid, FALSE)) != NULL) { force_purge_work(pjob); } } free(jobid); free(ptask->wt_mutex); free(ptask); } /* END ensure_deleted() */