void stat_mom_job( char *job_id) { struct stat_cntl *cntl; cntl = (struct stat_cntl *)calloc(1, sizeof(struct stat_cntl)); if (cntl == NULL) { return; } cntl->sc_type = 1; cntl->sc_conn = -1; cntl->sc_pque = (pbs_queue *)0; cntl->sc_origrq = (struct batch_request *)0; cntl->sc_post = 0; /* tell stat_update() to free cntl */ cntl->sc_jobid[0] = '\0'; /* cause "start from beginning" */ if (stat_to_mom(job_id, cntl) != 0) { free(cntl); } /* if not an error, cntl free'd in stat_update() */ return; } /* END stat_mom_job() */
static void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (free'd on return) */ { svrattrl *pal; job *pjob = NULL; struct batch_request *preq; struct batch_reply *preply; int rc = 0; enum TJobStatTypeEnum type; pbs_queue *pque = NULL; int exec_only = 0; int bad = 0; long DTime; /* delta time - only report full pbs_attribute list if J->MTime > DTime */ static svrattrl *dpal = NULL; int job_array_index = 0; job_array *pa = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; int iter; time_t time_now = time(NULL); long poll_jobs = 0; char job_id[PBS_MAXSVRJOBID+1]; int job_substate = -1; time_t job_momstattime = -1; preq = cntl->sc_origrq; type = (enum TJobStatTypeEnum)cntl->sc_type; preply = &preq->rq_reply; /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */ if (dpal == NULL) { /* build 'delta' pbs_attribute list */ svrattrl *tpal; tlist_head dalist; int aindex; int atrlist[] = { JOB_ATR_jobname, JOB_ATR_resc_used, JOB_ATR_LAST }; CLEAR_LINK(dalist); for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++) { if ((tpal = attrlist_create("", "", 23)) == NULL) { return; } tpal->al_valln = atrlist[aindex]; if (dpal == NULL) dpal = tpal; append_link(&dalist, &tpal->al_link, tpal); } } /* END if (dpal == NULL) */ if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); if (pa == NULL) { req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array"); return; } } iter = -1; get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs); if (!poll_jobs) { /* polljobs not set - indicates we may need to obtain fresh data from MOM */ if (cntl->sc_jobid[0] == '\0') pjob = NULL; else pjob = svr_find_job(cntl->sc_jobid, FALSE); while (1) { if (pjob == NULL) { /* start from the first job */ if (type == tjstJob) { pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); } else if (type == tjstQueue) { pjob = next_job(cntl->sc_pque->qu_jobs,&iter); } else if (type == tjstArray) { job_array_index = 0; /* increment job_array_index until we find a non-null pointer or hit the end */ while (job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); break; } } job_array_index++; } } else { pjob = next_job(&alljobs,&iter); } } /* END if (pjob == NULL) */ else { strcpy(job_id, pjob->ji_qs.ji_jobid); unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); if (type == tjstJob) break; if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,&iter); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); break; } } } } else pjob = next_job(&alljobs,&iter); } if (pjob == NULL) break; strcpy(job_id, pjob->ji_qs.ji_jobid); job_substate = pjob->ji_qs.ji_substate; job_momstattime = pjob->ji_momstat; strcpy(cntl->sc_jobid, job_id); unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL); pjob = NULL; /* PBS_RESTAT_JOB defaults to 30 seconds */ if ((job_substate == JOB_SUBSTATE_RUNNING) && ((time_now - job_momstattime) > JobStatRate)) { /* go to MOM for status */ if ((rc = stat_to_mom(job_id, cntl)) == PBSE_MEM_MALLOC) break; if (rc != 0) { pjob = svr_find_job(job_id, FALSE); rc = 0; continue; } if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); return; /* will pick up after mom replies */ } } /* END while(1) */ if (rc != 0) { if (pa != NULL) unlock_ai_mutex(pa, __func__, "2", LOGLEVEL); reply_free(preply); req_reject(rc, 0, preq, NULL, "cannot get update from mom"); return; } } /* END if (!server.sv_attr[SRV_ATR_PollJobs].at_val.at_long) */ /* * now ready for part 3, building the status reply, * loop through again */ if ((type == tjstSummarizeArraysQueue) || (type == tjstSummarizeArraysServer)) { /* No array can be owned for these options */ update_array_statuses(); } if (type == tjstJob) pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); else if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,&iter); else if (type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter); else if (type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,&iter); else if (type == tjstArray) { job_array_index = -1; pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs,&iter); DTime = 0; if (preq->rq_extend != NULL) { char *ptr; /* FORMAT: { EXECQONLY | DELTA:<EPOCHTIME> } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = 1; ptr = strstr(preq->rq_extend, "DELTA:"); if (ptr != NULL) { ptr += strlen("delta:"); DTime = strtol(ptr, NULL, 10); } } if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { long sentJobCounter; long qjcounter; long qmaxreport; int iter = -1; /* loop through all queues */ while ((pque = next_queue(&svr_queues,&iter)) != NULL) { qjcounter = 0; if ((exec_only == 1) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ unlock_queue(pque, __func__, "ignore queue", LOGLEVEL); continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { sprintf(log_buf,"giving scheduler up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } sentJobCounter = 0; /* loop through jobs in queue */ if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL); iter = -1; while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL) { if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL); continue; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, (pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL); unlock_queue(pque, __func__, "perm", LOGLEVEL); return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; unlock_ji_mutex(pjob, __func__, "8", LOGLEVEL); } /* END foreach (pjob from pque) */ if (LOGLEVEL >= 5) { sprintf(log_buf,"sent scheduler %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } unlock_queue(pque, __func__, "end while", LOGLEVEL); } /* END for (pque) */ if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); reply_send_svr(preq); return; } /* END if ((type == tjstTruncatedServer) || ...) */ while (pjob != NULL) { /* go ahead and build the status reply for this job */ if (exec_only) { if (cntl->sc_pque != NULL) { if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution) goto nextjob; } else { if (pa != NULL) pthread_mutex_unlock(pa->ai_mutex); pque = get_jobs_queue(&pjob); if (pa != NULL) pthread_mutex_lock(pa->ai_mutex); if ((pjob == NULL) || (pque == NULL)) goto nextjob; if (pque->qu_qs.qu_type != QTYPE_Execution) { unlock_queue(pque, __func__, "not exec", LOGLEVEL); goto nextjob; } unlock_queue(pque, __func__, "exec", LOGLEVEL); } } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, pal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } unlock_ji_mutex(pjob, __func__, "9", LOGLEVEL); req_reject(rc, bad, preq, NULL, NULL); return; } /* get next job */ nextjob: if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "10", LOGLEVEL); if (type == tjstJob) break; if (type == tjstQueue) pjob = next_job(cntl->sc_pque->qu_jobs,&iter); else if (type == tjstSummarizeArraysQueue) pjob = next_job(cntl->sc_pque->qu_jobs_array_sum,&iter); else if (type == tjstSummarizeArraysServer) pjob = next_job(&array_summary,&iter); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size) { if (pa->job_ids[job_array_index] != NULL) { if ((pjob = svr_find_job(pa->job_ids[job_array_index], FALSE)) != NULL) { break; } } } } else pjob = next_job(&alljobs,&iter); rc = 0; } /* END while (pjob != NULL) */ if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } reply_send_svr(preq); if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */
static void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (freed on return) */ { svrattrl *pal; job *pjob = NULL; struct batch_request *preq; struct batch_reply *preply; int rc = 0; enum TJobStatTypeEnum type; pbs_queue *pque = NULL; int exec_only = 0; int IsTruncated = 0; long DTime; /* delta time - only report full attribute list if J->MTime > DTime */ static svrattrl *dpal = NULL; int job_array_index = 0; job_array *pa = NULL; preq = cntl->sc_origrq; type = (enum TJobStatTypeEnum)cntl->sc_type; preply = &preq->rq_reply; /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */ /* NOTE: If IsTruncated is true, should walk all queues and walk jobs in each queue until max_reported is reached (NYI) */ if (dpal == NULL) { /* build 'delta' attribute list */ svrattrl *tpal; tlist_head dalist; int aindex; int atrlist[] = { JOB_ATR_jobname, JOB_ATR_resc_used, JOB_ATR_LAST }; CLEAR_LINK(dalist); for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++) { if ((tpal = attrlist_create("", "", 23)) == NULL) { return; } tpal->al_valln = atrlist[aindex]; if (dpal == NULL) dpal = tpal; append_link(&dalist, &tpal->al_link, tpal); } } /* END if (dpal == NULL) */ if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); } if (!server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long) { /* polljobs not set - indicates we may need to obtain fresh data from MOM */ if (cntl->sc_jobid[0] == '\0') pjob = NULL; else pjob = find_job(cntl->sc_jobid); while (1) { if (pjob == NULL) { /* start from the first job */ if (type == tjstJob) { pjob = find_job(preq->rq_ind.rq_status.rq_id); } else if (type == tjstQueue) { pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs); } else if (type == tjstArray) { job_array_index = 0; /* increment job_array_index until we find a non-null pointer or hit the end */ while (job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) job_array_index++; } else { if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) IsTruncated = TRUE; pjob = (job *)GET_NEXT(svr_alljobs); } } /* END if (pjob == NULL) */ else { /* get next job */ if (type == tjstJob) break; if (type == tjstQueue) pjob = (job *)GET_NEXT(pjob->ji_jobque); else pjob = (job *)GET_NEXT(pjob->ji_alljobs); if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) ; } } if (pjob == NULL) break; /* PBS_RESTAT_JOB defaults to 30 seconds */ if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING) && ((time_now - pjob->ji_momstat) > JobStatRate)) { /* go to MOM for status */ strcpy(cntl->sc_jobid, pjob->ji_qs.ji_jobid); if ((rc = stat_to_mom(pjob, cntl)) == PBSE_SYSTEM) { break; } if (rc != 0) { rc = 0; continue; } return; /* will pick up after mom replies */ } } /* END while(1) */ if (cntl->sc_conn >= 0) svr_disconnect(cntl->sc_conn); /* close connection to MOM */ if (rc != 0) { free(cntl); reply_free(preply); req_reject(rc, 0, preq, NULL, "cannot get update from mom"); return; } } /* END if (!server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long) */ /* * now ready for part 3, building the status reply, * loop through again */ if (type == tjstSummarizeArraysQueue || type == tjstSummarizeArraysServer) { update_array_statuses(); } if (type == tjstJob) pjob = find_job(preq->rq_ind.rq_status.rq_id); else if (type == tjstQueue) pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs); else if (type == tjstSummarizeArraysQueue) pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs_array_sum); else if (type == tjstSummarizeArraysServer) pjob = (job *)GET_NEXT(svr_jobs_array_sum); else if (type == tjstArray) { job_array_index = 0; pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) job_array_index++; } else pjob = (job *)GET_NEXT(svr_alljobs); DTime = 0; if (preq->rq_extend != NULL) { char *ptr; /* FORMAT: { EXECQONLY | DELTA:<EPOCHTIME> } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = 1; ptr = strstr(preq->rq_extend, "DELTA:"); if (ptr != NULL) { ptr += strlen("delta:"); DTime = strtol(ptr, NULL, 10); } } free(cntl); if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { long sentJobCounter; long qjcounter; long qmaxreport; /* loop through all queues */ for (pque = (pbs_queue *)GET_NEXT(svr_queues); pque != NULL; pque = (pbs_queue *)GET_NEXT(pque->qu_link)) { qjcounter = 0; if ((exec_only == 1) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { sprintf(log_buffer,"giving scheduler up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event( PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, pque->qu_qs.qu_name, log_buffer); } sentJobCounter = 0; /* loop through jobs in queue */ for (pjob = (job *)GET_NEXT(pque->qu_jobs); pjob != NULL; pjob = (job *)GET_NEXT(pjob->ji_jobque)) { if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ continue; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, (pjob->ji_wattr[(int)JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; } /* END for (pjob) */ if (LOGLEVEL >= 5) { sprintf(log_buffer,"sent scheduler %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event( PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, pque->qu_qs.qu_name, log_buffer); } } /* END for (pque) */ reply_send(preq); return; } /* END if ((type == tjstTruncatedServer) || ...) */ while (pjob != NULL) { /* go ahead and build the status reply for this job */ if (exec_only) { pque = find_queuebyname(pjob->ji_qs.ji_queue); if (pque->qu_qs.qu_type != QTYPE_Execution) goto nextjob; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, pal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); return; } /* get next job */ nextjob: if (type == tjstJob) break; if (type == tjstQueue) pjob = (job *)GET_NEXT(pjob->ji_jobque); else if (type == tjstSummarizeArraysQueue) pjob = (job *)GET_NEXT(pjob->ji_jobque_array_sum); else if (type == tjstSummarizeArraysServer) pjob = (job *)GET_NEXT(pjob->ji_jobs_array_sum); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) ; } else pjob = (job *)GET_NEXT(pjob->ji_alljobs); rc = 0; } /* END while (pjob != NULL) */ reply_send(preq); if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */