job *get_next_status_job( struct stat_cntl *cntl, int &job_array_index, job_array *pa, all_jobs_iterator *iter) { job *pjob = NULL; if (cntl->sc_type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,iter); else if (cntl->sc_type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,iter); else if (cntl->sc_type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,iter); else if (cntl->sc_type == tjstArray) { /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs, iter); return(pjob); } // END get_next_status_job()
END_TEST START_TEST(next_job_test) { struct all_jobs alljobs; struct job *result; initialize_all_jobs_array(&alljobs); result = next_job(NULL,NULL); fail_unless(result == NULL, "null input parameters fail"); result = next_job(&alljobs,NULL); fail_unless(result == NULL, "NULL input iterator fail"); }
int handle_requeue_all( batch_request *preq) { int rc; job *pjob; all_jobs_iterator *iter; if ((preq->rq_perm & (ATR_DFLAG_MGWR)) == 0) { rc = PBSE_PERM; req_reject(rc, 0, preq, NULL, "You must be a manager to requeue all jobs"); return(rc); } alljobs.lock(); iter = alljobs.get_iterator(); alljobs.unlock(); while ((pjob = next_job(&alljobs, iter)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); requeue_job_without_contacting_mom(*pjob); } delete iter; reply_ack(preq); return(PBSE_NONE); } /* END handle_requeue_all() */
job *next_job( struct all_jobs *aj, int *iter) { job *pjob; pthread_mutex_lock(aj->alljobs_mutex); pjob = (job *)next_thing(aj->ra,iter); pthread_mutex_unlock(aj->alljobs_mutex); if (pjob != NULL) { lock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); if (pjob->ji_being_recycled == TRUE) { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); pjob = next_job(aj,iter); } } return(pjob); } /* END next_job() */
/* * Thread function * Each thread on the pool will be running this function since creation * The idea is that each one of them will be waiting for some job to be * added to the queue. When that happens, one of them will acquire the job * and execute it */ static void* thread_func(void *args) { thread_pool_t* pool =(thread_pool_t*) args; while(1) { pthread_mutex_lock(&pool->mutex); while(pool->queue.length == 0) { DEBUG("Wating for jobs..."); pthread_cond_wait(&pool->has_jobs, &pool->mutex); } pool->n_threads_working++; DEBUG("Got a Job!"); job_t* job = next_job(&pool->queue); if(job == NULL) continue; pthread_mutex_unlock(&pool->mutex); job->func(job->arg); pthread_mutex_lock(&pool->mutex); pool->n_threads_working--; pthread_mutex_unlock(&pool->mutex); } return NULL; }
int line_solve(Puzzle *puz, Solution *sol, int contradicting) { extern dir_t cont_dir; extern line_t cont_line; dir_t dir; line_t i; int depth; while (next_job(puz, &dir, &i, &depth)) { nlines++; if ((VB && !VC) || WL(dir,i)) printf("*** %s %d\n",CLUENAME(puz->type,dir), i); if (VB || WL(dir,i)) dump_line(stdout,puz,sol,dir,i); if (contradicting && depth >= contradepth) { /* At max depth we just check if the line is solvable */ line_t *pos, *bcl; if (!left_solve(puz, sol, dir, i, 0, &pos, &bcl)) { if ((VC&&VV) || WL(dir,i)) printf("C: %s %d OK AT DEPTH %d\n", cluename(puz->type,dir),i,depth); } else { if ((VC&&VV) || WL(dir,i)) printf("C: %s %d FAILED AT DEPTH %d\n", cluename(puz->type,dir),i,depth); if (contradicting) { cont_dir= dir; cont_line= i; } return 0; } } else if (apply_lro(puz, sol, dir, i, depth + 1)) { /* Found a contradiction */ if (contradicting) { cont_dir= dir; cont_line= i; } return 0; } if (VJ) { printf("CURRENT JOBS:\n"); dump_jobs(stdout,puz); } } return 1; }
static void* bdberl_tpool_main(void* arg) { TPool* tpool = (TPool*)arg; LOCK(tpool); tpool->active_threads++; while(1) { // Check for shutdown... if (tpool->shutdown) { tpool->active_threads--; erl_drv_cond_broadcast(tpool->work_cv); UNLOCK(tpool); return 0; } // Get the next job TPoolJob* job = next_job(tpool); if (job) { // Unlock to avoid blocking others UNLOCK(tpool); // Invoke the function (*(job->main_fn))(job->arg); // Relock LOCK(tpool); // Mark the job as not running (important for cancellation to know it's done) job->running = 0; // If the job was cancelled, signal the cancellation cv so that anyone waiting on the // job knows it's complete if (job->canceled) { erl_drv_cond_broadcast(tpool->cancel_cv); } // Cleanup the job (remove from active list, free, etc.) cleanup_job(tpool, job); } else { // Wait for a job to come available then jump back to top of loop erl_drv_cond_wait(tpool->work_cv, tpool->lock); } } return 0; }
job *find_array_template( char *arrayid) { char *at; char *comp; int different = FALSE; int iter = -1; job *pj; if ((at = strchr(arrayid, (int)'@')) != NULL) * at = '\0'; /* strip off @server_name */ if ((is_svr_attr_set(SRV_ATR_display_job_server_suffix) == TRUE) || (is_svr_attr_set(SRV_ATR_job_suffix_alias) == TRUE)) { comp = get_correct_jobname(arrayid); different = TRUE; if (comp == NULL) return NULL; } else { comp = arrayid; } while ((pj = next_job(&array_summary,&iter)) != NULL) { if (!strcmp(comp, pj->ji_qs.ji_jobid)) break; unlock_ji_mutex(pj, __func__, NULL, LOGLEVEL); } if (at) *at = '@'; /* restore @server_name */ if (different) free(comp); return(pj); /* may be NULL */ } /* END find_array_template() */
static void* thr_fn(void *arg) { struct worker_t *w = (struct worker_t*)arg; struct job_t job; unsigned int sleep_time = 0, slept_time; time_t start, now; char now_str[128], ap_str[128]; struct tm now_tm, ap_tm; sigset_t mask; sigfillset(&mask); sigdelset(&mask, SIG_WORKER_INTERRUPT); sigdelset(&mask, SIG_WORKER_KILL); pthread_sigmask(SIG_BLOCK, &mask, NULL); while (!__sync_fetch_and_add(&w->exit_loop, 0)) { start = time(NULL); do { if (sleep_time == 0) sleep_time = w->interval; next_job(w, &job, &sleep_time); if (job.id > 0) w->wlog("next job[%d] start in %d seconds\n", job.id, sleep_time); sleep_time = sleep(sleep_time); if (sleep_time > 0) w->wlog("interrupt by signal, %d seconds left\n", sleep_time); } while (sleep_time > 0); now = time(NULL); slept_time = now - start; strftime(now_str, 128, "%T", localtime_r(&now, &now_tm)); if (job.id > 0) { strftime(ap_str, 128, "%T", localtime_r(&job.ap_time, &ap_tm)); w->wlog("[%s] do job[%d], appointment time[%s], slept %d seconds\n", now_str, job.id, ap_str, slept_time); delete_job(w, job.id); } else { w->wlog("[%s] slept %d seconds\n", now_str, slept_time); } } return NULL; }
job *next_job( all_jobs *aj, all_jobs_iterator *iter) { job *pjob; if (aj == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "null input pointer to all_jobs struct"); return(NULL); } if (iter == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "null input iterator"); return(NULL); } aj->lock(); pjob = iter->get_next_item(); aj->unlock(); if (pjob != NULL) { lock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); if (pjob->ji_being_recycled == TRUE) { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); pjob = next_job(aj,iter); } } return(pjob); } /* END next_job() */
job *next_job( struct all_jobs *aj, int *iter) { job *pjob; if (aj == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "null input pointer to all_jobs struct"); return(NULL); } if (iter == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "null input iterator"); return(NULL); } pthread_mutex_lock(aj->alljobs_mutex); pjob = (job *)next_thing(aj->ra,iter); pthread_mutex_unlock(aj->alljobs_mutex); if (pjob != NULL) { lock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); if (pjob->ji_being_recycled == TRUE) { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); pjob = next_job(aj,iter); } } return(pjob); } /* END next_job() */
void handle_truncated_qstat( bool exec_only, bool condensed, batch_request *preq) { long sentJobCounter = 0; long qmaxreport; all_queues_iterator *queue_iter = NULL; pbs_queue *pque; char log_buf[LOCAL_LOG_BUF_SIZE]; job *pjob; svrattrl *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); batch_reply *preply = &preq->rq_reply; int bad = 0; svr_queues.lock(); queue_iter = svr_queues.get_iterator(); svr_queues.unlock(); /* loop through all queues */ while ((pque = next_queue(&svr_queues, queue_iter)) != NULL) { long qjcounter = 0; mutex_mgr queue_mutex(pque->qu_mutex, true); if ((exec_only == true) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { snprintf(log_buf, sizeof(log_buf), "Reporting up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } /* loop through jobs in queue */ all_jobs_iterator *jobiter = NULL; pque->qu_jobs->lock(); jobiter = pque->qu_jobs->get_iterator(); pque->qu_jobs->unlock(); while ((pjob = next_job(pque->qu_jobs, jobiter)) != NULL) { mutex_mgr job_mgr(pjob->ji_mutex, true); if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ continue; } int rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, condensed, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); delete queue_iter; return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; } /* END foreach (pjob from pque) */ if (LOGLEVEL >= 5) { snprintf(log_buf, sizeof(log_buf), "Reported %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } } /* END for (pque) */ reply_send_svr(preq); delete queue_iter; return; } // END handle_truncated_qstat()
void svr_shutdown( int type) /* I */ { pbs_attribute *pattr; job *pjob; long state = SV_STATE_DOWN; int iter; char log_buf[LOCAL_LOG_BUF_SIZE]; close(lockfds); save_queues(); /* Lets start by logging shutdown and saving everything */ get_svr_attr_l(SRV_ATR_State, &state); strcpy(log_buf, msg_shutdown_start); if (state == SV_STATE_SHUTIMM) { /* if already shuting down, another Immed/sig will force it */ if ((type == SHUT_IMMEDIATE) || (type == SHUT_SIG)) { state = SV_STATE_DOWN; set_svr_attr(SRV_ATR_State, &state); strcat(log_buf, "Forced"); log_event( PBSEVENT_SYSTEM | PBSEVENT_ADMIN | PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, msg_daemonname, log_buf); return; } } if (type == SHUT_IMMEDIATE) { state = SV_STATE_SHUTIMM; set_svr_attr(SRV_ATR_State, &state); strcat(log_buf, "Immediate"); } else if (type == SHUT_DELAY) { state = SV_STATE_SHUTDEL; set_svr_attr(SRV_ATR_State, &state); strcat(log_buf, "Delayed"); } else if (type == SHUT_QUICK) { state = SV_STATE_DOWN; /* set to down to brk pbsd_main loop */ set_svr_attr(SRV_ATR_State, &state); strcat(log_buf, "Quick"); } else { state = SV_STATE_SHUTIMM; set_svr_attr(SRV_ATR_State, &state); strcat(log_buf, "By Signal"); } log_event( PBSEVENT_SYSTEM | PBSEVENT_ADMIN | PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, msg_daemonname, log_buf); if ((type == SHUT_QUICK) || (type == SHUT_SIG)) /* quick, leave jobs as are */ { return; } svr_save(&server, SVR_SAVE_QUICK); iter = -1; while ((pjob = next_job(&alljobs,&iter)) != NULL) { if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HOTSTART | JOB_SVFLG_HASRUN; pattr = &pjob->ji_wattr[JOB_ATR_checkpoint]; if ((pattr->at_flags & ATR_VFLAG_SET) && ((csv_find_string(pattr->at_val.at_str, "s") != NULL) || (csv_find_string(pattr->at_val.at_str, "c") != NULL) || (csv_find_string(pattr->at_val.at_str, "shutdown") != NULL))) { /* do checkpoint of job */ if (shutdown_checkpoint(&pjob) == 0) { if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); continue; } } /* if no checkpoint (not supported, not allowed, or fails */ /* rerun if possible, else kill job */ rerun_or_kill(&pjob, msg_on_shutdown); } if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); } return; } /* END svr_shutdown() */
void *queue_route( void *vp) { pbs_queue *pque; job *pjob = NULL; char *queue_name; char log_buf[LOCAL_LOG_BUF_SIZE]; all_jobs_iterator *iter = NULL; queue_name = (char *)vp; if (queue_name == NULL) { sprintf(log_buf, "NULL queue name"); log_err(-1, __func__, log_buf); return(NULL); } while (1) { pthread_mutex_lock(reroute_job_mutex); /* Before we attempt to service this queue, make sure we can find it. */ pque = find_queuebyname(queue_name); if (pque == NULL) { sprintf(log_buf, "Could not find queue %s", queue_name); log_err(-1, __func__, log_buf); free(queue_name); return(NULL); } mutex_mgr que_mutex(pque->qu_mutex, true); pque->qu_jobs->lock(); iter = pque->qu_jobs->get_iterator(); pque->qu_jobs->unlock(); if (LOGLEVEL >= 7) { snprintf(log_buf, sizeof(log_buf), "routing any ready jobs in queue: %s", queue_name); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, __func__, log_buf); } while ((pjob = next_job(pque->qu_jobs,iter)) != NULL) { /* We only want to try if routing has been tried at least once - this is to let * req_commit have the first crack at routing always. */ if (pjob->ji_commit_done == 0) /* when req_commit is done it will set ji_commit_done to 1 */ { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); continue; } /* queue must be unlocked when calling reroute_job */ que_mutex.unlock(); reroute_job(pjob); unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); /* need to relock queue when we go to call next_job */ pque = find_queuebyname(queue_name); if (pque == NULL) { sprintf(log_buf, "Could not find queue %s", queue_name); log_err(-1, __func__, log_buf); free(queue_name); delete iter; return(NULL); } que_mutex.mark_as_locked(); } /* we come out of the while loop with the queue locked. We don't want it locked while we sleep */ que_mutex.unlock(); pthread_mutex_unlock(reroute_job_mutex); delete iter; sleep(route_retry_interval); } free(queue_name); return(NULL); } /* END queue_route() */
static void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (free'd on return) */ { svrattrl *pal; job *pjob = NULL; struct batch_request *preq; struct batch_reply *preply; int rc = 0; enum TJobStatTypeEnum type; pbs_queue *pque = NULL; int exec_only = 0; int bad = 0; long DTime; /* delta time - only report full pbs_attribute list if J->MTime > DTime */ static svrattrl *dpal = NULL; int job_array_index = 0; job_array *pa = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; all_jobs_iterator *iter; preq = cntl->sc_origrq; type = (enum TJobStatTypeEnum)cntl->sc_type; preply = &preq->rq_reply; /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */ if (dpal == NULL) { /* build 'delta' pbs_attribute list */ svrattrl *tpal; tlist_head dalist; int aindex; int atrlist[] = { JOB_ATR_jobname, JOB_ATR_resc_used, JOB_ATR_LAST }; CLEAR_LINK(dalist); for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++) { if ((tpal = attrlist_create("", "", 23)) == NULL) { return; } tpal->al_valln = atrlist[aindex]; if (dpal == NULL) dpal = tpal; append_link(&dalist, &tpal->al_link, tpal); } } /* END if (dpal == NULL) */ if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); if (pa == NULL) { req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array"); return; } } { all_jobs *ajptr = NULL; if (type == tjstQueue) ajptr = cntl->sc_pque->qu_jobs; else if (type == tjstSummarizeArraysQueue) ajptr = cntl->sc_pque->qu_jobs_array_sum; else if (type == tjstSummarizeArraysServer) ajptr = &array_summary; else ajptr = &alljobs; ajptr->lock(); iter = ajptr->get_iterator(); ajptr->unlock(); } /* * now ready for part 3, building the status reply, * loop through again */ if ((type == tjstSummarizeArraysQueue) || (type == tjstSummarizeArraysServer)) { /* No array can be owned for these options */ update_array_statuses(); } if (type == tjstJob) pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); else if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,iter); else if (type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,iter); else if (type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,iter); else if (type == tjstArray) { job_array_index = -1; pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs,iter); DTime = 0; if (preq->rq_extend != NULL) { char *ptr; /* FORMAT: { EXECQONLY | DELTA:<EPOCHTIME> } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = 1; ptr = strstr(preq->rq_extend, "DELTA:"); if (ptr != NULL) { ptr += strlen("delta:"); DTime = strtol(ptr, NULL, 10); } } if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { long sentJobCounter; long qjcounter; long qmaxreport; all_queues_iterator *iter = NULL; svr_queues.lock(); iter = svr_queues.get_iterator(); svr_queues.unlock(); /* loop through all queues */ while ((pque = next_queue(&svr_queues,iter)) != NULL) { qjcounter = 0; if ((exec_only == 1) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ unlock_queue(pque, __func__, "ignore queue", LOGLEVEL); continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { sprintf(log_buf,"giving scheduler up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } sentJobCounter = 0; /* loop through jobs in queue */ if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL); all_jobs_iterator *jobiter = NULL; pque->qu_jobs->lock(); jobiter = pque->qu_jobs->get_iterator(); pque->qu_jobs->unlock(); while ((pjob = next_job(pque->qu_jobs,jobiter)) != NULL) { if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL); continue; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, (pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL); unlock_queue(pque, __func__, "perm", LOGLEVEL); delete iter; return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL); } /* END foreach (pjob from pque) */ if (LOGLEVEL >= 5) { sprintf(log_buf,"sent scheduler %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } unlock_queue(pque, __func__, "end while", LOGLEVEL); } /* END for (pque) */ reply_send_svr(preq); delete iter; return; } /* END if ((type == tjstTruncatedServer) || ...) */ while (pjob != NULL) { /* go ahead and build the status reply for this job */ if (exec_only) { if (cntl->sc_pque != NULL) { if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution) goto nextjob; } else { if (pa != NULL) pthread_mutex_unlock(pa->ai_mutex); pque = get_jobs_queue(&pjob); if (pa != NULL) pthread_mutex_lock(pa->ai_mutex); if ((pjob == NULL) || (pque == NULL)) goto nextjob; mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true); if (pque->qu_qs.qu_type != QTYPE_Execution) { goto nextjob; } } } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, pal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } unlock_ji_mutex(pjob, __func__, "9", LOGLEVEL); req_reject(rc, bad, preq, NULL, NULL); delete iter; return; } /* get next job */ nextjob: if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "10", LOGLEVEL); if (type == tjstJob) break; if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,iter); else if (type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,iter); else if (type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,iter); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs,iter); rc = 0; } /* END while (pjob != NULL) */ delete iter; if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } reply_send_svr(preq); if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */
int init_scheduling_cycle(server_info *sinfo) { group_info *user; /* the user for the running jobs of the last cycle */ queue_info *qinfo; /* user to cycle through the queues to sort the jobs */ char decayed = 0; /* boolean: have we decayed usage? */ time_t t; /* used in decaying fair share */ int i, j; if (cstat.fair_share) { if (last_running != NULL) { /* add the usage which was accumulated between the last cycle and this * one and calculate a new value */ for (i = 0; i < last_running_size ; i++) { job_info** jobs; user = last_running[i].ginfo; #if HIGH_PRECISION_FAIRSHARE jobs = sinfo -> jobs; /* check all jobs (exiting, completed, running) */ #else jobs = sinfo -> running_jobs; /* check only running */ #endif for (j = 0; jobs[j] != NULL; j++) { if (jobs[j] -> is_completed || jobs[j] -> is_exiting || jobs[j] -> is_running) if (!strcmp(last_running[i].name, jobs[j] -> name)) break; } if (jobs[j] != NULL) { user -> usage += calculate_usage_value(jobs[j] -> resused) - calculate_usage_value(last_running[i].resused); } } /* assign usage into temp usage since temp usage is used for usage * calculations. Temp usage starts at usage and can be modified later. */ for (i = 0; i < last_running_size; i++) last_running[i].ginfo -> temp_usage = last_running[i].ginfo -> usage; } /* The half life for the fair share tree might have passed since the last * scheduling cycle. For that matter, several half lives could have * passed. If this is the case, perform as many decays as necessary */ t = cstat.current_time; while (t - last_decay > conf.half_life) { sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_SERVER, "", "Decaying Fairshare Tree"); decay_fairshare_tree(conf.group_root); t -= conf.half_life; decayed = 1; } if (decayed) { /* set the time to the acuall the half-life should have occured */ last_decay = cstat.current_time - (cstat.current_time - last_decay) % conf.half_life; } if (cstat.current_time - last_sync > conf.sync_time) { write_usage(); last_sync = cstat.current_time; sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_SERVER, "", "Usage Sync"); } } if (cstat.help_starving_jobs) cstat.starving_job = update_starvation(sinfo -> jobs); /* sort queues by priority if requested */ if (cstat.sort_queues) qsort(sinfo -> queues, sinfo -> num_queues, sizeof(queue_info *), cmp_queue_prio_dsc); if (cstat.sort_by[0].sort != NO_SORT) { if (cstat.by_queue || cstat.round_robin) { for (i = 0; i < sinfo -> num_queues; i++) { qinfo = sinfo -> queues[i]; qsort(qinfo -> jobs, qinfo -> sc.total, sizeof(job_info *), cmp_sort); } } else qsort(sinfo -> jobs, sinfo -> sc.total, sizeof(job_info *), cmp_sort); } next_job(sinfo, INITIALIZE); return 1; /* SUCCESS */ }
int scheduling_cycle( int sd) { server_info *sinfo; /* ptr to the server/queue/job/node info */ job_info *jinfo; /* ptr to the job to see if it can run */ int ret = SUCCESS; /* return code from is_ok_to_run_job() */ char log_msg[MAX_LOG_SIZE]; /* used to log an message about job */ char comment[MAX_COMMENT_SIZE]; /* used to update comment of job */ sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", "Entering Schedule"); update_cycle_status(); /* create the server / queue / job / node structures */ if ((sinfo = query_server(sd)) == NULL) { fprintf(stderr, "Problem with creating server data strucutre\n"); return(0); } if (init_scheduling_cycle(sinfo) == 0) { sched_log( PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, sinfo -> name, "init_scheduling_cycle failed."); free_server(sinfo, 1); return(0); } /* main scheduling loop */ while ((jinfo = next_job(sinfo, 0))) { sched_log( PBSEVENT_DEBUG2, PBS_EVENTCLASS_JOB, jinfo->name, "Considering job to run"); if ((ret = is_ok_to_run_job(sd, sinfo, jinfo->queue, jinfo)) == SUCCESS) { run_update_job(sd, sinfo, jinfo->queue, jinfo); } else { if (jinfo->can_never_run) { sched_log( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jinfo->name, "Job Deleted because it would never run"); pbs_deljob(sd, jinfo->name, "Job could never run"); } jinfo->can_not_run = 1; if (translate_job_fail_code(ret, comment, log_msg)) { /* if the comment doesn't get changed, its because it hasn't changed. * if the reason for the job has not changed, we do not need to log it */ if (update_job_comment(sd, jinfo, comment) == 0) { sched_log( PBSEVENT_SCHED, PBS_EVENTCLASS_JOB, jinfo->name, log_msg); } } if ((ret != NOT_QUEUED) && cstat.strict_fifo) { update_jobs_cant_run( sd, jinfo->queue->jobs, jinfo, COMMENT_STRICT_FIFO, START_AFTER_JOB); } } } if (cstat.fair_share) update_last_running(sinfo); free_server(sinfo, 1); /* free server and queues and jobs */ sched_log(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", "Leaving schedule\n"); return 0; }
/** * @brief Update function called after every event * * The heart of the scheduler, the actual scheduling algorithm. This will be * passed to the event loop as a call back and will be called every time an event * is executed. Therefore the code should be light weight since it will be run * very frequently. * * @TODO: * currently this will only grab a job and create a single agent to execute * the job. * * @TODO: allow for runonpfile jobs to have multiple agents based on size * @TODO: allow for job preemption. The scheduler can pause jobs, allow it * @TODO: allow for specific hosts to be chosen. */ void scheduler_update(scheduler_t* scheduler) { /* queue used to hold jobs if an exclusive job enters the system */ static job_t* job = NULL; static host_t* host = NULL; static int lockout = 0; /* locals */ int n_agents = g_tree_nnodes(scheduler->agents); int n_jobs = active_jobs(scheduler->job_list); /* check to see if we are in and can exit the startup state */ if(scheduler->s_startup && n_agents == 0) { event_signal(database_update_event, NULL); scheduler->s_startup = 0; } /* check if we are able to close the scheduler */ if(closing && n_agents == 0 && n_jobs == 0) { event_loop_terminate(); return; } if(lockout && n_agents == 0 && n_jobs == 0) lockout = 0; if(job == NULL && !lockout) { while((job = peek_job(scheduler->job_queue)) != NULL) { // check if the agent is required to run on local host if(is_meta_special( g_tree_lookup(scheduler->meta_agents, job->agent_type), SAG_LOCAL)) { host = g_tree_lookup(scheduler->host_list, LOCAL_HOST); if(!(host->running < host->max)) { job = NULL; break; } } // check if the job is required to run on a specific machine else if((job->required_host != NULL)) { host = g_tree_lookup(scheduler->host_list, job->required_host); if(host != NULL) { if(!(host->running < host->max)) { job = NULL; break; } } else { //log_printf("ERROR %s.%d: jq_pk %d jq_host '%s' not in the agent list!\n", // __FILE__, __LINE__, job->id, job->required_host); job->message = "ERROR: jq_host not in the agent list!"; job_fail_event(scheduler, job); job = NULL; break; } } // the generic case, this can run anywhere, find a place else if((host = get_host(&(scheduler->host_queue), 1)) == NULL) { job = NULL; break; } next_job(scheduler->job_queue); if(is_meta_special( g_tree_lookup(scheduler->meta_agents, job->agent_type), SAG_EXCLUSIVE)) { V_SCHED("JOB_INIT: exclusive, postponing initialization\n"); break; } V_SCHED("Starting JOB[%d].%s\n", job->id, job->agent_type); agent_init(scheduler, host, job); job = NULL; } } if(job != NULL && n_agents == 0 && n_jobs == 0) { agent_init(scheduler, host, job); lockout = 1; job = NULL; host = NULL; } if(scheduler->s_pause) { scheduler->s_startup = 1; scheduler->s_pause = 0; } }
static void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (free'd on return) */ { svrattrl *pal; job *pjob = NULL; struct batch_request *preq; struct batch_reply *preply; int rc = 0; enum TJobStatTypeEnum type; pbs_queue *pque = NULL; int exec_only = 0; int bad = 0; long DTime; /* delta time - only report full pbs_attribute list if J->MTime > DTime */ static svrattrl *dpal = NULL; int job_array_index = 0; job_array *pa = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; int iter; time_t time_now = time(NULL); long poll_jobs = 0; char job_id[PBS_MAXSVRJOBID+1]; int job_substate = -1; time_t job_momstattime = -1; preq = cntl->sc_origrq; type = (enum TJobStatTypeEnum)cntl->sc_type; preply = &preq->rq_reply; /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */ if (dpal == NULL) { /* build 'delta' pbs_attribute list */ svrattrl *tpal; tlist_head dalist; int aindex; int atrlist[] = { JOB_ATR_jobname, JOB_ATR_resc_used, JOB_ATR_LAST }; CLEAR_LINK(dalist); for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++) { if ((tpal = attrlist_create("", "", 23)) == NULL) { return; } tpal->al_valln = atrlist[aindex]; if (dpal == NULL) dpal = tpal; append_link(&dalist, &tpal->al_link, tpal); } } /* END if (dpal == NULL) */ if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); if (pa == NULL) { req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array"); return; } } iter = -1; get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs); if (!poll_jobs) { /* polljobs not set - indicates we may need to obtain fresh data from MOM */ if (cntl->sc_jobid[0] == '\0') pjob = NULL; else pjob = svr_find_job(cntl->sc_jobid, FALSE); while (1) { if (pjob == NULL) { /* start from the first job */ if (type == tjstJob) { pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); } else if (type == tjstQueue) { pjob = next_job(cntl->sc_pque->qu_jobs,&iter); } else if (type == tjstArray) { job_array_index = 0; /* increment job_array_index until we find a non-null pointer or hit the end */ while (job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); break; } } job_array_index++; } } else { pjob = next_job(&alljobs,&iter); } } /* END if (pjob == NULL) */ else { strcpy(job_id, pjob->ji_qs.ji_jobid); unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); if (type == tjstJob) break; if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,&iter); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); break; } } } } else pjob = next_job(&alljobs,&iter); } if (pjob == NULL) break; strcpy(job_id, pjob->ji_qs.ji_jobid); job_substate = pjob->ji_qs.ji_substate; job_momstattime = pjob->ji_momstat; strcpy(cntl->sc_jobid, job_id); unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL); pjob = NULL; /* PBS_RESTAT_JOB defaults to 30 seconds */ if ((job_substate == JOB_SUBSTATE_RUNNING) && ((time_now - job_momstattime) > JobStatRate)) { /* go to MOM for status */ if ((rc = stat_to_mom(job_id, cntl)) == PBSE_MEM_MALLOC) break; if (rc != 0) { pjob = svr_find_job(job_id, FALSE); rc = 0; continue; } if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); return; /* will pick up after mom replies */ } } /* END while(1) */ if (rc != 0) { if (pa != NULL) unlock_ai_mutex(pa, __func__, "2", LOGLEVEL); reply_free(preply); req_reject(rc, 0, preq, NULL, "cannot get update from mom"); return; } } /* END if (!server.sv_attr[SRV_ATR_PollJobs].at_val.at_long) */ /* * now ready for part 3, building the status reply, * loop through again */ if ((type == tjstSummarizeArraysQueue) || (type == tjstSummarizeArraysServer)) { /* No array can be owned for these options */ update_array_statuses(); } if (type == tjstJob) pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); else if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,&iter); else if (type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter); else if (type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,&iter); else if (type == tjstArray) { job_array_index = -1; pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs,&iter); DTime = 0; if (preq->rq_extend != NULL) { char *ptr; /* FORMAT: { EXECQONLY | DELTA:<EPOCHTIME> } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = 1; ptr = strstr(preq->rq_extend, "DELTA:"); if (ptr != NULL) { ptr += strlen("delta:"); DTime = strtol(ptr, NULL, 10); } } if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { long sentJobCounter; long qjcounter; long qmaxreport; int iter = -1; /* loop through all queues */ while ((pque = next_queue(&svr_queues,&iter)) != NULL) { qjcounter = 0; if ((exec_only == 1) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ unlock_queue(pque, __func__, "ignore queue", LOGLEVEL); continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { sprintf(log_buf,"giving scheduler up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } sentJobCounter = 0; /* loop through jobs in queue */ if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL); iter = -1; while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL) { if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL); continue; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, (pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL); unlock_queue(pque, __func__, "perm", LOGLEVEL); return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL); } /* END foreach (pjob from pque) */ if (LOGLEVEL >= 5) { sprintf(log_buf,"sent scheduler %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } unlock_queue(pque, __func__, "end while", LOGLEVEL); } /* END for (pque) */ if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); reply_send_svr(preq); return; } /* END if ((type == tjstTruncatedServer) || ...) */ while (pjob != NULL) { /* go ahead and build the status reply for this job */ if (exec_only) { if (cntl->sc_pque != NULL) { if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution) goto nextjob; } else { if (pa != NULL) pthread_mutex_unlock(pa->ai_mutex); pque = get_jobs_queue(&pjob); if (pa != NULL) pthread_mutex_lock(pa->ai_mutex); if ((pjob == NULL) || (pque == NULL)) goto nextjob; if (pque->qu_qs.qu_type != QTYPE_Execution) { unlock_queue(pque, __func__, "not exec", LOGLEVEL); goto nextjob; } unlock_queue(pque, __func__, "exec", LOGLEVEL); } } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, pal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } unlock_ji_mutex(pjob, __func__, "9", LOGLEVEL); req_reject(rc, bad, preq, NULL, NULL); return; } /* get next job */ nextjob: if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "10", LOGLEVEL); if (type == tjstJob) break; if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,&iter); else if (type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter); else if (type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,&iter); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs,&iter); rc = 0; } /* END while (pjob != NULL) */ if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } reply_send_svr(preq); if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */
void *queue_route( void *vp) { pbs_queue *pque; job *pjob = NULL; char *queue_name; char log_buf[LOCAL_LOG_BUF_SIZE]; int iter = -1; time_t time_now = time(NULL); queue_name = (char *)vp; if (queue_name == NULL) { sprintf(log_buf, "NULL queue name"); log_err(-1, __func__, log_buf); return(NULL); } if (LOGLEVEL >= 7) { snprintf(log_buf, sizeof(log_buf), "queue name: %s", queue_name); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, __func__, log_buf); } pthread_mutex_lock(reroute_job_mutex); pque = find_queuebyname(queue_name); if (pque == NULL) { sprintf(log_buf, "Could not find queue %s", queue_name); log_err(-1, __func__, log_buf); free(queue_name); pthread_mutex_unlock(reroute_job_mutex); return(NULL); } while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL) { /* the second condition says we only want to try if routing * has been tried once - this is to let req_commit have the * first crack at routing always */ unlock_queue(pque, __func__, (char *)NULL, 0); if ((pjob->ji_qs.ji_un.ji_routet.ji_rteretry <= time_now - ROUTE_RETRY_TIME) && (pjob->ji_qs.ji_un.ji_routet.ji_rteretry != 0)) { reroute_job(pjob, pque); unlock_ji_mutex(pjob, __func__, (char *)"1", LOGLEVEL); } else unlock_ji_mutex(pjob, __func__, (char *)"1", LOGLEVEL); } free(queue_name); unlock_queue(pque, __func__, (char *)NULL, 0); pthread_mutex_unlock(reroute_job_mutex); return(NULL); } /* END queue_route() */
void *delete_all_work( void *vp) { batch_request *preq = (batch_request *)vp; if (qdel_all_tracker.start_deleting_all_if_possible(preq->rq_user, preq->rq_perm) == false) { reply_ack(preq); return(NULL); } batch_request *preq_dup = duplicate_request(preq); job *pjob; all_jobs_iterator *iter = NULL; int failed_deletes = 0; int total_jobs = 0; int rc = PBSE_NONE; char tmpLine[MAXLINE]; char *Msg = preq->rq_extend; alljobs.lock(); iter = alljobs.get_iterator(); alljobs.unlock(); while ((pjob = next_job(&alljobs, iter)) != NULL) { // use mutex manager to make sure job mutex locks are properly handled at exit mutex_mgr job_mutex(pjob->ji_mutex, true); if ((rc = forced_jobpurge(pjob, preq_dup)) == PURGE_SUCCESS) { job_mutex.set_unlock_on_exit(false); continue; } if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) { job_mutex.unlock(); if(rc == -1) { //forced_jobpurge freed preq_dup so reallocate it. preq_dup = duplicate_request(preq); preq_dup->rq_noreply = TRUE; } continue; } total_jobs++; /* mutex is freed below */ if (rc == PBSE_NONE) { if ((rc = execute_job_delete(pjob, Msg, preq_dup)) == PBSE_NONE) { // execute_job_delete() handles mutex so don't unlock on exit job_mutex.set_unlock_on_exit(false); reply_ack(preq_dup); } /* preq_dup has been freed at this point. Either reallocate it or set it to NULL*/ if (rc == PURGE_SUCCESS) { preq_dup = duplicate_request(preq); preq_dup->rq_noreply = TRUE; } else preq_dup = NULL; } if (rc != PURGE_SUCCESS) { /* duplicate the preq so we don't have a problem with double frees */ preq_dup = duplicate_request(preq); preq_dup->rq_noreply = TRUE; if ((rc == MOM_DELETE) || (rc == ROUTE_DELETE)) failed_deletes++; } } delete iter; qdel_all_tracker.done_deleting_all(preq->rq_user, preq->rq_perm); if (failed_deletes == 0) { reply_ack(preq); /* PURGE SUCCESS means this was qdel -p all. In this case no reply_*() * functions have been called */ if (rc == PURGE_SUCCESS) { free_br(preq_dup); preq_dup = NULL; } } else { snprintf(tmpLine,sizeof(tmpLine),"Deletes failed for %d of %d jobs", failed_deletes, total_jobs); req_reject(PBSE_SYSTEM, 0, preq, NULL, tmpLine); } /* preq_dup happens at the end of the loop, so free the extra one if * it is there */ if (preq_dup != NULL) free_br(preq_dup); return(NULL); } /* END delete_all_work() */
void purge_completed_jobs( struct batch_request *preq) /* I */ { job *pjob; char *time_str; time_t purge_time = 0; int iter; char log_buf[LOCAL_LOG_BUF_SIZE]; /* get the time to purge the jobs that completed before */ time_str = preq->rq_extend; time_str += strlen(PURGECOMP); purge_time = strtol(time_str,NULL,10); /* * Clean unreported capability is only for operators and managers. * Check if request is authorized */ if ((preq->rq_perm & (ATR_DFLAG_OPRD|ATR_DFLAG_OPWR| ATR_DFLAG_MGRD|ATR_DFLAG_MGWR)) == 0) { req_reject(PBSE_PERM,0,preq,NULL, "must have operator or manager privilege to use -c parameter"); return; } reply_ack(preq); if (LOGLEVEL >= 4) { sprintf(log_buf,"Received purge completed jobs command, purge time is %ld (%s)", (long)purge_time, preq->rq_extend); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } iter = -1; while ((pjob = next_job(&alljobs,&iter)) != NULL) { if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_COMPLETE) && (pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long <= purge_time) && ((pjob->ji_wattr[JOB_ATR_reported].at_flags & ATR_VFLAG_SET) != 0) && (pjob->ji_wattr[JOB_ATR_reported].at_val.at_long == 0)) { if (LOGLEVEL >= 4) { sprintf(log_buf,"Reported job is COMPLETED (%ld), setting reported to TRUE", pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); } pjob->ji_wattr[JOB_ATR_reported].at_val.at_long = 1; pjob->ji_wattr[JOB_ATR_reported].at_flags = ATR_VFLAG_SET | ATR_VFLAG_MODIFY; job_save(pjob, SAVEJOB_FULL, 0); } unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } return; } /* END purge_completed_jobs() */
int handle_delete_all( struct batch_request *preq, struct batch_request *preq_tmp, char *Msg) { /* don't use the actual request so we can reply about all of the jobs */ struct batch_request *preq_dup = duplicate_request(preq); job *pjob; int iter = -1; int failed_deletes = 0; int total_jobs = 0; int rc = PBSE_NONE; char tmpLine[MAXLINE]; preq_dup->rq_noreply = TRUE; if (preq_tmp != NULL) { reply_ack(preq_tmp); preq->rq_noreply = TRUE; /* set for no more replies */ } while ((pjob = next_job(&alljobs, &iter)) != NULL) { if ((rc = forced_jobpurge(pjob, preq_dup)) == PURGE_SUCCESS) { continue; } if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); continue; } total_jobs++; /* mutex is freed below */ if (rc == PBSE_NONE) { if ((rc = execute_job_delete(pjob, Msg, preq_dup)) == PBSE_NONE) reply_ack(preq_dup); /* mark this as NULL because it has been freed */ preq_dup = NULL; } if (rc != PURGE_SUCCESS) { /* duplicate the preq so we don't have a problem with double frees */ preq_dup = duplicate_request(preq); preq_dup->rq_noreply = TRUE; if ((rc == MOM_DELETE) || (rc == ROUTE_DELETE)) failed_deletes++; } } if (failed_deletes == 0) { reply_ack(preq); /* PURGE SUCCESS means this was qdel -p all. In this case no reply_*() * functions have been called */ if (rc == PURGE_SUCCESS) { free_br(preq_dup); preq_dup = NULL; } } else { snprintf(tmpLine,sizeof(tmpLine),"Deletes failed for %d of %d jobs", failed_deletes, total_jobs); req_reject(PBSE_SYSTEM, 0, preq, NULL, tmpLine); } /* preq_dup happens at the end of the loop, so free the extra one if * it is there */ if (preq_dup != NULL) free_br(preq_dup); return(PBSE_NONE); } /* END handle_delete_all() */
END_TEST START_TEST(next_job_test) { all_jobs alljobs; struct job *result; result = next_job(NULL,NULL); fail_unless(result == NULL, "null input parameters fail"); result = next_job(&alljobs,NULL); fail_unless(result == NULL, "NULL input iterator fail"); struct job *test_job1 = job_alloc(); strcpy(test_job1->ji_qs.ji_jobid, "test_job1"); int rc = insert_job(&alljobs,test_job1); fail_unless(rc == PBSE_NONE, "job insert fail1"); struct job *test_job2 = job_alloc(); strcpy(test_job2->ji_qs.ji_jobid, "test_job2"); rc = insert_job(&alljobs,test_job2); fail_unless(rc == PBSE_NONE, "job insert fail2"); struct job *test_job3 = job_alloc(); strcpy(test_job3->ji_qs.ji_jobid, "test_job3"); rc = insert_job(&alljobs,test_job3); fail_unless(rc == PBSE_NONE, "job insert fai3"); struct job *test_job4 = job_alloc(); strcpy(test_job4->ji_qs.ji_jobid, "test_job4"); rc = insert_job(&alljobs,test_job4); fail_unless(rc == PBSE_NONE, "job insert fail4"); struct job *test_job5 = job_alloc(); strcpy(test_job5->ji_qs.ji_jobid, "test_job5"); rc = insert_job(&alljobs,test_job5); fail_unless(rc == PBSE_NONE, "job insert fail5"); /* first transverse to see if we get all 5 jobs */ all_jobs_iterator *iter; alljobs.lock(); iter = alljobs.get_iterator(); alljobs.unlock(); job *pjob = next_job(&alljobs,iter); int jobcount = 0; while(pjob != NULL) { jobcount++; pjob = next_job(&alljobs,iter); } fail_unless(jobcount == 5, "Expected job counts to be 5, but it was %d", jobcount); all_jobs_iterator *iter2; alljobs.lock(); iter2 = alljobs.get_iterator(); alljobs.unlock(); /* simulate another thread had added more jobs to the alljobs */ struct job *test_job6 = job_alloc(); strcpy(test_job6->ji_qs.ji_jobid, "test_job6"); rc = insert_job(&alljobs,test_job6); fail_unless(rc == PBSE_NONE, "job insert fail6"); pjob = next_job(&alljobs,iter2); jobcount = 0; while(pjob != NULL) { jobcount++; fail_unless(pjob->ji_qs.ji_jobid[0] != (char)254, "get_next returned a deleted job"); pjob = next_job(&alljobs,iter2); } fail_unless(jobcount == 6, "Expected job counts to be 6, but it was %d", jobcount); }