void req_stat_que(struct batch_request *preq) { char *name; pbs_queue *pque; struct batch_reply *preply; int rc = 0; int type = 0; /* * first, validate the name of the requested object, either * a queue, or null for all queues */ name = preq->rq_ind.rq_status.rq_id; if ((*name == '\0') || (*name =='@')) type = 1; else { pque = find_queuebyname(name); #ifdef NAS /* localmod 075 */ if (pque == NULL) pque = find_resvqueuebyname(name); #endif /* localmod 075 */ if (pque == NULL) { req_reject(PBSE_UNKQUE, 0, preq); return; } } preply = &preq->rq_reply; preply->brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preply->brp_un.brp_status); if (type == 0) { /* get status of the one named queue */ rc = status_que(pque, preq, &preply->brp_un.brp_status); } else { /* get status of queues */ pque = (pbs_queue *)GET_NEXT(svr_queues); while (pque) { rc = status_que(pque, preq, &preply->brp_un.brp_status); if (rc != 0) { if (rc == PBSE_PERM) rc = 0; else break; } pque = (pbs_queue *)GET_NEXT(pque->qu_link); } } if (rc) { (void)reply_free(preply); req_reject(rc, bad, preq); } else { (void)reply_send(preq); } }
pbs_queue * get_dfltque(void) { pbs_queue *pq = (pbs_queue *)0; if (server.sv_attr[SRV_ATR_dflt_que].at_flags & ATR_VFLAG_SET) pq = find_queuebyname(server.sv_attr[SRV_ATR_dflt_que].at_val.at_str); return (pq); }
pbs_queue *get_dfltque(void) { pbs_queue *pq = NULL; if (server.sv_attr[SRV_ATR_dflt_que].at_flags & ATR_VFLAG_SET) { pq = find_queuebyname(server.sv_attr[SRV_ATR_dflt_que].at_val.at_str); } return(pq); } /* END get_dfltque() */
pbs_queue *get_dfltque(void) { pbs_queue *pq = NULL; char *dque = NULL; if (get_svr_attr_str(SRV_ATR_dflt_que, &dque) == PBSE_NONE) { pq = find_queuebyname(dque); } return(pq); } /* END get_dfltque() */
static int local_move( job *jobp, struct batch_request *req) { char *id = "local_move"; pbs_queue *qp; char *destination = jobp->ji_qs.ji_destin; int mtype; /* search for destination queue */ if ((qp = find_queuebyname(destination)) == NULL) { sprintf(log_buffer, "queue %s does not exist\n", destination); log_err(-1, id, log_buffer); pbs_errno = PBSE_UNKQUE; return(ROUTE_PERM_FAILURE); } /* * if being moved at specific request of administrator, then * checks on queue availability, etc. are skipped; * otherwise all checks are enforced. */ if (req == 0) { mtype = MOVE_TYPE_Route; /* route */ } else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) { mtype = MOVE_TYPE_MgrMv; /* privileged move */ } else { mtype = MOVE_TYPE_Move; /* non-privileged move */ } if ((pbs_errno = svr_chkque( jobp, qp, get_variable(jobp, pbs_o_host), mtype, NULL))) { /* should this queue be retried? */ return(should_retry_route(pbs_errno)); } /* dequeue job from present queue, update destination and */ /* queue_rank for new queue and enqueue into destination */ svr_dequejob(jobp); strcpy(jobp->ji_qs.ji_queue, destination); jobp->ji_wattr[JOB_ATR_qrank].at_val.at_long = ++queue_rank; pbs_errno = svr_enquejob(jobp); if (pbs_errno != 0) { return(ROUTE_PERM_FAILURE); /* should never ever get here */ } jobp->ji_lastdest = 0; /* reset in case of another route */ job_save(jobp, SAVEJOB_FULL); return(ROUTE_SUCCESS); } /* END local_move() */
int req_stat_job( struct batch_request *preq) /* ptr to the decoded request */ { struct stat_cntl *cntl; /* see svrfunc.h */ char *name; job *pjob = NULL; pbs_queue *pque = NULL; int rc = PBSE_NONE; char log_buf[LOCAL_LOG_BUF_SIZE]; enum TJobStatTypeEnum type = tjstNONE; /* * first, validate the name of the requested object, either * a job, a queue, or the whole server. */ if (LOGLEVEL >= 7) { sprintf(log_buf, "note"); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } /* FORMAT: name = { <JOBID> | <QUEUEID> | '' } */ name = preq->rq_ind.rq_status.rq_id; if (preq->rq_extend != NULL) { /* evaluate pbs_job_stat() 'extension' field */ if (!strncasecmp(preq->rq_extend, "truncated", strlen("truncated"))) { /* truncate response by 'max_report' */ type = tjstTruncatedServer; } else if (!strncasecmp(preq->rq_extend, "summarize_arrays", strlen("summarize_arrays"))) { type = tjstSummarizeArraysServer; } } /* END if (preq->rq_extend != NULL) */ if (isdigit((int)*name)) { /* status a single job */ if (is_array(name)) { if (type != tjstSummarizeArraysServer) { type = tjstArray; } } else { type = tjstJob; if ((pjob = svr_find_job(name, FALSE)) == NULL) { rc = PBSE_UNKJOBID; } else unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } } else if (isalpha(name[0])) { if (type == tjstNONE) type = tjstQueue; else if (type == tjstSummarizeArraysServer) type = tjstSummarizeArraysQueue; else type = tjstTruncatedQueue; /* if found, this mutex is released later */ if ((pque = find_queuebyname(name)) == NULL) { rc = PBSE_UNKQUE; } } else if ((*name == '\0') || (*name == '@')) { /* status all jobs at server */ if (type == tjstNONE) type = tjstServer; } else { rc = PBSE_IVALREQ; } if (rc != 0) { /* is invalid - an error */ req_reject(rc, 0, preq, NULL, NULL); return(rc); } preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preq->rq_reply.brp_un.brp_status); cntl = (struct stat_cntl *)calloc(1, sizeof(struct stat_cntl)); if (cntl == NULL) { if (pque != NULL) unlock_queue(pque, "req_stat_job", (char *)"no memory cntl", LOGLEVEL); req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL); return(PBSE_SYSTEM); } if ((type == tjstTruncatedQueue) || (type == tjstTruncatedServer)) { if (pque != NULL) { unlock_queue(pque, __func__, "", LOGLEVEL); pque = NULL; } } cntl->sc_type = (int)type; cntl->sc_conn = -1; cntl->sc_pque = pque; cntl->sc_origrq = preq; cntl->sc_post = req_stat_job_step2; cntl->sc_jobid[0] = '\0'; /* cause "start from beginning" */ req_stat_job_step2(cntl); /* go to step 2, see if running is current */ if (pque != NULL) unlock_queue(pque, "req_stat_job", (char *)"success", LOGLEVEL); free(cntl); return(PBSE_NONE); } /* END req_stat_job() */
int req_stat_que( batch_request *preq) { char *name; pbs_queue *pque = NULL; struct batch_reply *preply; int rc = 0; int type = 0; char log_buf[LOCAL_LOG_BUF_SIZE+1]; /* * first, validate the name of the requested object, either * a queue, or null for all queues */ name = preq->rq_ind.rq_status.rq_id; if ((*name == '\0') || (*name == '@')) { type = 1; } else { pque = find_queuebyname(name); if (pque == NULL) { rc = PBSE_UNKQUE; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "cannot locate queue %s", name); req_reject(rc, 0, preq, NULL, log_buf); return rc; } } preply = &preq->rq_reply; preply->brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preply->brp_un.brp_status); if (type == 0) { /* get status of the named queue */ mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true); rc = status_que(pque, preq, &preply->brp_un.brp_status); /* pque_qu_mutex will be unlocked in the destructor when we leave this scope */ } else { /* pque == NULL before next_queue */ svr_queues.lock(); all_queues_iterator *iter = svr_queues.get_iterator(); svr_queues.unlock(); /* get status of all queues */ while ((pque = next_queue(&svr_queues,iter)) != NULL) { mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true); rc = status_que(pque, preq, &preply->brp_un.brp_status); if (rc != 0) { if (rc != PBSE_PERM) { break; } rc = 0; } } delete iter; } if (rc != PBSE_NONE) { reply_free(preply); req_reject(PBSE_NOATTR, rc, preq, NULL, "status_queue failed"); } else { reply_send_svr(preq); } return rc; } /* END req_stat_que() */
int req_stat_que( struct batch_request *preq) { char *name; pbs_queue *pque = NULL; struct batch_reply *preply; int rc = 0; int type = 0; char log_buf[LOCAL_LOG_BUF_SIZE+1]; /* * first, validate the name of the requested object, either * a queue, or null for all queues */ name = preq->rq_ind.rq_status.rq_id; if ((*name == '\0') || (*name == '@')) { type = 1; } else { pque = find_queuebyname(name); if (pque == NULL) { rc = PBSE_UNKQUE; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "can not locate queue %s", name); req_reject(rc, 0, preq, NULL, log_buf); return rc; } } preply = &preq->rq_reply; preply->brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preply->brp_un.brp_status); if (type == 0) { /* get status of the named queue */ rc = status_que(pque, preq, &preply->brp_un.brp_status); unlock_queue(pque, "req_stat_que", "type == 0", LOGLEVEL); } else { /* pque == NULL before next_queue */ int iter = -1; /* get status of all queues */ while ((pque = next_queue(&svr_queues,&iter)) != NULL) { rc = status_que(pque, preq, &preply->brp_un.brp_status); if (rc != 0) { if (rc != PBSE_PERM) { unlock_queue(pque, "req_stat_que", "break", LOGLEVEL); break; } rc = 0; } unlock_queue(pque, "req_stat_que", "end while", LOGLEVEL); } } if (rc != PBSE_NONE) { reply_free(preply); req_reject(PBSE_NOATTR, rc, preq, NULL, "status_queue failed"); } else { reply_send_svr(preq); } return rc; } /* END req_stat_que() */
void *queue_route( void *vp) { pbs_queue *pque; job *pjob = NULL; char *queue_name; char log_buf[LOCAL_LOG_BUF_SIZE]; int iter = -1; time_t time_now = time(NULL); queue_name = (char *)vp; if (queue_name == NULL) { sprintf(log_buf, "NULL queue name"); log_err(-1, __func__, log_buf); return(NULL); } if (LOGLEVEL >= 7) { snprintf(log_buf, sizeof(log_buf), "queue name: %s", queue_name); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, __func__, log_buf); } pthread_mutex_lock(reroute_job_mutex); pque = find_queuebyname(queue_name); if (pque == NULL) { sprintf(log_buf, "Could not find queue %s", queue_name); log_err(-1, __func__, log_buf); free(queue_name); pthread_mutex_unlock(reroute_job_mutex); return(NULL); } while ((pjob = next_job(pque->qu_jobs,&iter)) != NULL) { /* the second condition says we only want to try if routing * has been tried once - this is to let req_commit have the * first crack at routing always */ unlock_queue(pque, __func__, (char *)NULL, 0); if ((pjob->ji_qs.ji_un.ji_routet.ji_rteretry <= time_now - ROUTE_RETRY_TIME) && (pjob->ji_qs.ji_un.ji_routet.ji_rteretry != 0)) { reroute_job(pjob, pque); unlock_ji_mutex(pjob, __func__, (char *)"1", LOGLEVEL); } else unlock_ji_mutex(pjob, __func__, (char *)"1", LOGLEVEL); } free(queue_name); unlock_queue(pque, __func__, (char *)NULL, 0); pthread_mutex_unlock(reroute_job_mutex); return(NULL); } /* END queue_route() */
void *queue_route( void *vp) { pbs_queue *pque; job *pjob = NULL; char *queue_name; char log_buf[LOCAL_LOG_BUF_SIZE]; all_jobs_iterator *iter = NULL; queue_name = (char *)vp; if (queue_name == NULL) { sprintf(log_buf, "NULL queue name"); log_err(-1, __func__, log_buf); return(NULL); } while (1) { pthread_mutex_lock(reroute_job_mutex); /* Before we attempt to service this queue, make sure we can find it. */ pque = find_queuebyname(queue_name); if (pque == NULL) { sprintf(log_buf, "Could not find queue %s", queue_name); log_err(-1, __func__, log_buf); free(queue_name); return(NULL); } mutex_mgr que_mutex(pque->qu_mutex, true); pque->qu_jobs->lock(); iter = pque->qu_jobs->get_iterator(); pque->qu_jobs->unlock(); if (LOGLEVEL >= 7) { snprintf(log_buf, sizeof(log_buf), "routing any ready jobs in queue: %s", queue_name); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, __func__, log_buf); } while ((pjob = next_job(pque->qu_jobs,iter)) != NULL) { /* We only want to try if routing has been tried at least once - this is to let * req_commit have the first crack at routing always. */ if (pjob->ji_commit_done == 0) /* when req_commit is done it will set ji_commit_done to 1 */ { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); continue; } /* queue must be unlocked when calling reroute_job */ que_mutex.unlock(); reroute_job(pjob); unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); /* need to relock queue when we go to call next_job */ pque = find_queuebyname(queue_name); if (pque == NULL) { sprintf(log_buf, "Could not find queue %s", queue_name); log_err(-1, __func__, log_buf); free(queue_name); delete iter; return(NULL); } que_mutex.mark_as_locked(); } /* we come out of the while loop with the queue locked. We don't want it locked while we sleep */ que_mutex.unlock(); pthread_mutex_unlock(reroute_job_mutex); delete iter; sleep(route_retry_interval); } free(queue_name); return(NULL); } /* END queue_route() */
int local_move( job *pjob, int *my_err, struct batch_request *req) { pbs_queue *dest_que = NULL; char *destination = pjob->ji_qs.ji_destin; int mtype; char log_buf[LOCAL_LOG_BUF_SIZE]; char job_id[PBS_MAXSVRJOBID+1]; int rc; bool reservation = false; /* Sometimes multiple threads are trying to route the same job. Protect against this * by making sure that the destionation queue and the current queue are different. * If they are the same then consider it done correctly */ if (!strcmp(pjob->ji_qs.ji_queue, pjob->ji_qs.ji_destin)) return(PBSE_NONE); if (LOGLEVEL >= 8) { sprintf(log_buf, "%s", pjob->ji_qs.ji_jobid); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } /* * if being moved at specific request of administrator, then * checks on queue availability, etc. are skipped; * otherwise all checks are enforced. */ if (req == 0) { mtype = MOVE_TYPE_Route; /* route */ } else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) { mtype = MOVE_TYPE_MgrMv; /* privileged move */ } else { mtype = MOVE_TYPE_Move; /* non-privileged move */ } strcpy(job_id, pjob->ji_qs.ji_jobid); unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL); dest_que = find_queuebyname(destination); if (dest_que == NULL) { /* this should never happen */ sprintf(log_buf, "queue %s does not exist\n", pjob->ji_qs.ji_queue); log_err(-1, __func__, log_buf); *my_err = PBSE_UNKQUE; return(-1); } mutex_mgr dest_que_mutex = mutex_mgr(dest_que->qu_mutex, true); if ((pjob = svr_find_job(job_id, TRUE)) == NULL) { /* job disappeared while locking queue */ return(PBSE_JOB_RECYCLED); } /* check the destination */ if ((*my_err = svr_chkque(pjob, dest_que, get_variable(pjob, pbs_o_host), mtype, NULL))) { /* should this queue be retried? */ return(should_retry_route(*my_err)); } reservation = have_reservation(pjob, dest_que); /* dequeue job from present queue, update destination and */ /* queue_rank for new queue and enqueue into destination */ dest_que_mutex.unlock(); rc = svr_dequejob(pjob, FALSE); if (rc) return(rc); snprintf(pjob->ji_qs.ji_queue, sizeof(pjob->ji_qs.ji_queue), "%s", destination); pjob->ji_wattr[JOB_ATR_qrank].at_val.at_long = ++queue_rank; if ((*my_err = svr_enquejob(pjob, FALSE, NULL, reservation, false)) == PBSE_JOB_RECYCLED) return(-1); if (*my_err != PBSE_NONE) { return(-1); /* should never ever get here */ } if (pjob != NULL) { pjob->ji_lastdest = 0; /* reset in case of another route */ job_save(pjob, SAVEJOB_FULL, 0); } return(PBSE_NONE); } /* END local_move() */
void req_stat_job(struct batch_request *preq) { int at_least_one_success = 0; int dosubjobs = 0; int dohistjobs = 0; char *name; job *pjob = NULL; pbs_queue *pque = NULL; struct batch_reply *preply; int rc = 0; int type = 0; char *pnxtjid = NULL; /* check for any extended flag in the batch request. 't' for * the sub jobs. If 'x' is there, then check if the server is * configured for history job info. If not set or set to FALSE, * return with PBSE_JOBHISTNOTSET error. Otherwise select history * jobs. */ if (preq->rq_extend) { if (strchr(preq->rq_extend, (int)'t')) dosubjobs = 1; /* status sub jobs of an Array Job */ if (strchr(preq->rq_extend, (int)'x')) { if (svr_history_enable == 0) { req_reject(PBSE_JOBHISTNOTSET, 0, preq); return; } dohistjobs = 1; /* status history jobs */ } } /* * first, validate the name of the requested object, either * a job, a queue, or the whole server. * type = 1 for a job, Array job, subjob or range of subjobs, or * a comma separated list of the above. * 2 for jobs in a queue, * 3 for jobs in the server, or */ name = preq->rq_ind.rq_status.rq_id; if ( isdigit((int)*name) ) { /* a single job id */ type = 1; rc = PBSE_UNKJOBID; } else if ( isalpha((int)*name) ) { pque = find_queuebyname(name) /* status jobs in a queue */; #ifdef NAS /* localmod 075 */ if (pque == NULL) pque = find_resvqueuebyname(name); #endif /* localmod 075 */ if (pque) type = 2; else rc = PBSE_UNKQUE; } else if ((*name == '\0') || (*name == '@')) { type = 3; /* status all jobs at server */ } else rc = PBSE_IVALREQ; if (type == 0) { /* is invalid - an error */ req_reject(rc, 0, preq); return; } preply = &preq->rq_reply; preply->brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preply->brp_un.brp_status); rc = PBSE_NONE; if (type == 1) { /* * If there is more than one job id, any status for any * one job is returned, then no error is given. * If a single job id is requested and there is an error * the error is returned. */ pnxtjid = name; while ((name = parse_comma_string_r(&pnxtjid)) != NULL) { if ((rc = stat_a_jobidname(preq, name, dohistjobs, dosubjobs)) == PBSE_NONE) at_least_one_success = 1; } if (at_least_one_success == 1) reply_send(preq); else req_reject(rc, 0, preq); return; } else if (type == 2) { pjob = (job *)GET_NEXT(pque->qu_jobs); while (pjob && (rc == PBSE_NONE)) { rc = do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs); pjob = (job *)GET_NEXT(pjob->ji_jobque); } } else { pjob = (job *)GET_NEXT(svr_alljobs); while (pjob && (rc == PBSE_NONE)) { rc = do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs); pjob = (job *)GET_NEXT(pjob->ji_alljobs); } } if (rc && (rc != PBSE_PERM)) req_reject(rc, bad, preq); else reply_send(preq); }
/** * @brief * Move a job to another queue in this Server. * * @par * Check the destination to see if it can accept the job. * If the job can enter the new queue, dequeue from the existing queue and * enqueue into the new queue * * @par * Note - the destination is specified by the queue's name in the * ji_qs.ji_destin element of the job structure. * * param[in] jobp - pointer to job to move * param[in] req - client request from a qmove client, null if a route * * @return int * @retval 0 : success * @retval -1 : permanent failure or rejection, see pbs_errno * @retval 1 : failed but try again later */ int local_move(job *jobp, struct batch_request *req) { pbs_queue *qp; char *destination = jobp->ji_qs.ji_destin; int mtype; attribute *pattr; long newtype = -1; /* search for destination queue */ if ((qp = find_queuebyname(destination)) == NULL) { sprintf(log_buffer, "queue %s does not exist", destination); log_err(-1, __func__, log_buffer); pbs_errno = PBSE_UNKQUE; return -1; } /* * if being moved at specific request of administrator, then * checks on queue availability, etc. are skipped; * otherwise all checks are enforced. */ if (req == NULL) { mtype = MOVE_TYPE_Route; /* route */ } else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) { mtype = MOVE_TYPE_MgrMv; /* privileged move */ } else { mtype = MOVE_TYPE_Move; /* non-privileged move */ } pbs_errno = svr_chkque(jobp, qp, get_hostPart(jobp->ji_wattr[(int)JOB_ATR_job_owner].at_val.at_str), mtype); if (pbs_errno) { /* should this queue be retried? */ return (should_retry_route(pbs_errno)); } /* dequeue job from present queue, update destination and */ /* queue_rank for new queue and enqueue into destination */ svr_dequejob(jobp); jobp->ji_myResv = NULL; strncpy(jobp->ji_qs.ji_queue, qp->qu_qs.qu_name, PBS_MAXQUEUENAME); jobp->ji_qs.ji_queue[PBS_MAXQUEUENAME] = '\0'; jobp->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = ++queue_rank; jobp->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_MODCACHE; pattr = &jobp->ji_wattr[(int)JOB_ATR_reserve_ID]; if (qp->qu_resvp) { job_attr_def[(int)JOB_ATR_reserve_ID].at_decode(pattr, (char *)0, (char *)0, qp->qu_resvp->ri_qs.ri_resvID); jobp->ji_myResv = qp->qu_resvp; } else { job_attr_def[(int)JOB_ATR_reserve_ID].at_decode(pattr, (char *)0, (char *)0, (char*)0); } if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) { newtype = determine_accruetype(jobp); if (newtype == -1) /* unable to determine accruetype, set it to NEW */ (void)update_eligible_time(JOB_INITIAL, jobp); else /* found suiting accruetype, update to this */ (void)update_eligible_time(newtype, jobp); } if ((pbs_errno = svr_enquejob(jobp)) != 0) return -1; /* should never ever get here */ jobp->ji_lastdest = 0; /* reset in case of another route */ (void)job_save(jobp, SAVEJOB_FULL); /* If a scheduling cycle is in progress, then this moved job may have * had changes resulting from the move that would impact scheduling or * placement, add job to list of jobs which cannot be run in this cycle. */ if ((req == NULL || (req->rq_conn != scheduler_sock)) && (scheduler_jobs_stat)) am_jobs_add(jobp); return 0; }
static void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (freed on return) */ { svrattrl *pal; job *pjob = NULL; struct batch_request *preq; struct batch_reply *preply; int rc = 0; enum TJobStatTypeEnum type; pbs_queue *pque = NULL; int exec_only = 0; int IsTruncated = 0; long DTime; /* delta time - only report full attribute list if J->MTime > DTime */ static svrattrl *dpal = NULL; int job_array_index = 0; job_array *pa = NULL; preq = cntl->sc_origrq; type = (enum TJobStatTypeEnum)cntl->sc_type; preply = &preq->rq_reply; /* See pbs_server_attributes(1B) for details on "poll_jobs" behaviour */ /* NOTE: If IsTruncated is true, should walk all queues and walk jobs in each queue until max_reported is reached (NYI) */ if (dpal == NULL) { /* build 'delta' attribute list */ svrattrl *tpal; tlist_head dalist; int aindex; int atrlist[] = { JOB_ATR_jobname, JOB_ATR_resc_used, JOB_ATR_LAST }; CLEAR_LINK(dalist); for (aindex = 0;atrlist[aindex] != JOB_ATR_LAST;aindex++) { if ((tpal = attrlist_create("", "", 23)) == NULL) { return; } tpal->al_valln = atrlist[aindex]; if (dpal == NULL) dpal = tpal; append_link(&dalist, &tpal->al_link, tpal); } } /* END if (dpal == NULL) */ if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); } if (!server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long) { /* polljobs not set - indicates we may need to obtain fresh data from MOM */ if (cntl->sc_jobid[0] == '\0') pjob = NULL; else pjob = find_job(cntl->sc_jobid); while (1) { if (pjob == NULL) { /* start from the first job */ if (type == tjstJob) { pjob = find_job(preq->rq_ind.rq_status.rq_id); } else if (type == tjstQueue) { pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs); } else if (type == tjstArray) { job_array_index = 0; /* increment job_array_index until we find a non-null pointer or hit the end */ while (job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) job_array_index++; } else { if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) IsTruncated = TRUE; pjob = (job *)GET_NEXT(svr_alljobs); } } /* END if (pjob == NULL) */ else { /* get next job */ if (type == tjstJob) break; if (type == tjstQueue) pjob = (job *)GET_NEXT(pjob->ji_jobque); else pjob = (job *)GET_NEXT(pjob->ji_alljobs); if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) ; } } if (pjob == NULL) break; /* PBS_RESTAT_JOB defaults to 30 seconds */ if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING) && ((time_now - pjob->ji_momstat) > JobStatRate)) { /* go to MOM for status */ strcpy(cntl->sc_jobid, pjob->ji_qs.ji_jobid); if ((rc = stat_to_mom(pjob, cntl)) == PBSE_SYSTEM) { break; } if (rc != 0) { rc = 0; continue; } return; /* will pick up after mom replies */ } } /* END while(1) */ if (cntl->sc_conn >= 0) svr_disconnect(cntl->sc_conn); /* close connection to MOM */ if (rc != 0) { free(cntl); reply_free(preply); req_reject(rc, 0, preq, NULL, "cannot get update from mom"); return; } } /* END if (!server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long) */ /* * now ready for part 3, building the status reply, * loop through again */ if (type == tjstSummarizeArraysQueue || type == tjstSummarizeArraysServer) { update_array_statuses(); } if (type == tjstJob) pjob = find_job(preq->rq_ind.rq_status.rq_id); else if (type == tjstQueue) pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs); else if (type == tjstSummarizeArraysQueue) pjob = (job *)GET_NEXT(cntl->sc_pque->qu_jobs_array_sum); else if (type == tjstSummarizeArraysServer) pjob = (job *)GET_NEXT(svr_jobs_array_sum); else if (type == tjstArray) { job_array_index = 0; pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) job_array_index++; } else pjob = (job *)GET_NEXT(svr_alljobs); DTime = 0; if (preq->rq_extend != NULL) { char *ptr; /* FORMAT: { EXECQONLY | DELTA:<EPOCHTIME> } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = 1; ptr = strstr(preq->rq_extend, "DELTA:"); if (ptr != NULL) { ptr += strlen("delta:"); DTime = strtol(ptr, NULL, 10); } } free(cntl); if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { long sentJobCounter; long qjcounter; long qmaxreport; /* loop through all queues */ for (pque = (pbs_queue *)GET_NEXT(svr_queues); pque != NULL; pque = (pbs_queue *)GET_NEXT(pque->qu_link)) { qjcounter = 0; if ((exec_only == 1) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { sprintf(log_buffer,"giving scheduler up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event( PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, pque->qu_qs.qu_name, log_buffer); } sentJobCounter = 0; /* loop through jobs in queue */ for (pjob = (job *)GET_NEXT(pque->qu_jobs); pjob != NULL; pjob = (job *)GET_NEXT(pjob->ji_jobque)) { if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ continue; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, (pjob->ji_wattr[(int)JOB_ATR_mtime].at_val.at_long >= DTime) ? pal : dpal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; } /* END for (pjob) */ if (LOGLEVEL >= 5) { sprintf(log_buffer,"sent scheduler %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event( PBSEVENT_SYSTEM, PBS_EVENTCLASS_QUEUE, pque->qu_qs.qu_name, log_buffer); } } /* END for (pque) */ reply_send(preq); return; } /* END if ((type == tjstTruncatedServer) || ...) */ while (pjob != NULL) { /* go ahead and build the status reply for this job */ if (exec_only) { pque = find_queuebyname(pjob->ji_qs.ji_queue); if (pque->qu_qs.qu_type != QTYPE_Execution) goto nextjob; } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_job( pjob, preq, pal, &preply->brp_un.brp_status, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); return; } /* get next job */ nextjob: if (type == tjstJob) break; if (type == tjstQueue) pjob = (job *)GET_NEXT(pjob->ji_jobque); else if (type == tjstSummarizeArraysQueue) pjob = (job *)GET_NEXT(pjob->ji_jobque_array_sum); else if (type == tjstSummarizeArraysServer) pjob = (job *)GET_NEXT(pjob->ji_jobs_array_sum); else if (type == tjstArray) { pjob = NULL; /* increment job_array_index until we find a non-null pointer or hit the end */ while (++job_array_index < pa->ai_qs.array_size && (pjob = pa->jobs[job_array_index]) == NULL) ; } else pjob = (job *)GET_NEXT(pjob->ji_alljobs); rc = 0; } /* END while (pjob != NULL) */ reply_send(preq); if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */
void req_stat_job( struct batch_request *preq) /* ptr to the decoded request */ { struct stat_cntl *cntl; /* see svrfunc.h */ char *name; job *pjob = NULL; pbs_queue *pque = NULL; int rc = 0; enum TJobStatTypeEnum type = tjstNONE; /* * first, validate the name of the requested object, either * a job, a queue, or the whole server. */ /* FORMAT: name = { <JOBID> | <QUEUEID> | '' } */ name = preq->rq_ind.rq_status.rq_id; if (preq->rq_extend != NULL) { /* evaluate pbs_job_stat() 'extension' field */ if (!strncasecmp(preq->rq_extend, "truncated", strlen("truncated"))) { /* truncate response by 'max_report' */ type = tjstTruncatedServer; } else if (!strncasecmp(preq->rq_extend, "summarize_arrays", strlen("summarize_arrays"))) { type = tjstSummarizeArraysServer; } } /* END if (preq->rq_extend != NULL) */ if (isdigit((int)*name)) { /* status a single job */ if (is_array(name)) { if (type != tjstSummarizeArraysServer) { type = tjstArray; } pjob = find_array_template(name); } else { type = tjstJob; if ((pjob = find_job(name)) == NULL) { rc = PBSE_UNKJOBID; } } } else if (isalpha(name[0])) { if (type == tjstNONE) type = tjstQueue; else if (type == tjstSummarizeArraysServer) type = tjstSummarizeArraysQueue; else type = tjstTruncatedQueue; if ((pque = find_queuebyname(name)) == NULL) { rc = PBSE_UNKQUE; } } else if ((*name == '\0') || (*name == '@')) { /* status all jobs at server */ if (type == tjstNONE) type = tjstServer; } else { rc = PBSE_IVALREQ; } if (rc != 0) { /* is invalid - an error */ req_reject(rc, 0, preq, NULL, NULL); return; } preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preq->rq_reply.brp_un.brp_status); cntl = (struct stat_cntl *)malloc(sizeof(struct stat_cntl)); if (cntl == NULL) { req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL); return; } cntl->sc_type = (int)type; cntl->sc_conn = -1; cntl->sc_pque = pque; cntl->sc_origrq = preq; cntl->sc_post = req_stat_job_step2; cntl->sc_jobid[0] = '\0'; /* cause "start from beginning" */ if (server.sv_attr[(int)SRV_ATR_PollJobs].at_val.at_long) cntl->sc_post = 0; /* we're not going to make clients wait */ req_stat_job_step2(cntl); /* go to step 2, see if running is current */ return; } /* END req_stat_job() */
void req_stat_que( struct batch_request *preq) /* ptr to the decoded request */ { char *name; pbs_queue *pque = NULL; struct batch_reply *preply; int rc = 0; int type = 0; /* * first, validate the name of the requested object, either * a queue, or null for all queues */ name = preq->rq_ind.rq_status.rq_id; if ((*name == '\0') || (*name == '@')) { type = 1; } else { pque = find_queuebyname(name); if (pque == NULL) { req_reject(PBSE_UNKQUE, 0, preq, NULL, "cannot locate queue"); return; } } preply = &preq->rq_reply; preply->brp_choice = BATCH_REPLY_CHOICE_Status; CLEAR_HEAD(preply->brp_un.brp_status); if (type == 0) { /* get status of the named queue */ rc = status_que(pque, preq, &preply->brp_un.brp_status); } else { /* get status of all queues */ pque = (pbs_queue *)GET_NEXT(svr_queues); while (pque != NULL) { rc = status_que(pque, preq, &preply->brp_un.brp_status); if (rc != 0) { if (rc != PBSE_PERM) break; rc = 0; } pque = (pbs_queue *)GET_NEXT(pque->qu_link); } } if (rc != 0) { reply_free(preply); req_reject(rc, bad, preq, NULL, "status_queue failed"); } else { reply_send(preq); } return; } /* END req_stat_que() */