static int status_que(pbs_queue *pque, struct batch_request *preq, pbs_list_head *pstathd) { struct brp_status *pstat; svrattrl *pal; if ((preq->rq_perm & ATR_DFLAG_RDACC) == 0) return (PBSE_PERM); /* ok going to do status, update count and state counts from qu_qs */ if (!svr_chk_history_conf()) { pque->qu_attr[(int)QA_ATR_TotalJobs].at_val.at_long = pque->qu_numjobs; } else { pque->qu_attr[(int)QA_ATR_TotalJobs].at_val.at_long = pque->qu_numjobs - (pque->qu_njstate[JOB_STATE_MOVED] + pque->qu_njstate[JOB_STATE_FINISHED]); } pque->qu_attr[(int)QA_ATR_TotalJobs].at_flags |= ATR_VFLAG_SET|ATR_VFLAG_MODCACHE; update_state_ct(&pque->qu_attr[(int)QA_ATR_JobsByState], pque->qu_njstate, pque->qu_jobstbuf); /* allocate status sub-structure and fill in header portion */ pstat = (struct brp_status *)malloc(sizeof(struct brp_status)); if (pstat == NULL) return (PBSE_SYSTEM); pstat->brp_objtype = MGR_OBJ_QUEUE; (void)strcpy(pstat->brp_objname, pque->qu_qs.qu_name); CLEAR_LINK(pstat->brp_stlink); CLEAR_HEAD(pstat->brp_attr); append_link(pstathd, &pstat->brp_stlink, pstat); /* add attributes to the status reply */ bad = 0; pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); if (status_attrib(pal, que_attr_def, pque->qu_attr, QA_ATR_LAST, preq->rq_perm, &pstat->brp_attr, &bad)) return (PBSE_NOATTR); return (0); }
/** * @brief * update_subjob_state - update the subjob state in the table entry for * the subjob and the total counts for each state. * If job going into EXPIRED state, the job exitstatus is saved in the tbl * * @param[in] pjob - pointer to the actual subjob job entry * @param[in] newstate - newstate of the sub job. * * @return void */ void update_subjob_state(job *pjob, int newstate) { int len; job *parent; char *pc; struct ajtrkhd *ptbl; parent = pjob->ji_parentaj; if (parent == NULL) return; ptbl = parent->ji_ajtrk; if (ptbl == NULL) return; /* verify that parent job is in fact the parent Array Job */ pc = strchr(pjob->ji_qs.ji_jobid, (int)'['); len = pc - pjob->ji_qs.ji_jobid - 1; if ((strncmp(pjob->ji_qs.ji_jobid, parent->ji_qs.ji_jobid, len) != 0) || (ptbl == NULL)) return; /* nope, not the parent */ set_subjob_tblstate(parent, pjob->ji_subjindx, newstate); if (newstate == JOB_STATE_EXPIRED) { ptbl->tkm_tbl[pjob->ji_subjindx].trk_error = pjob->ji_qs.ji_un.ji_exect.ji_exitstat; if (svr_chk_history_conf()) { if ((pjob->ji_wattr[(int)JOB_ATR_stageout_status].at_flags) & ATR_VFLAG_SET) { ptbl->tkm_tbl[pjob->ji_subjindx].trk_stgout = pjob->ji_wattr[(int)JOB_ATR_stageout_status].at_val.at_long; } if ((pjob->ji_wattr[(int)JOB_ATR_exit_status].at_flags) & ATR_VFLAG_SET) { ptbl->tkm_tbl[pjob->ji_subjindx].trk_exitstat = 1; } } ptbl->tkm_tbl[pjob->ji_subjindx].trk_substate = pjob->ji_qs.ji_substate; parent->ji_modified = 1; } chk_array_doneness(parent); }
/** * @brief * post_movejob - clean up action for child started in net_move/send_job * to "move" a job to another server * @par * If move was successfull, delete server's copy of thejob structure, * and reply to request. * @par * If route didn't work, reject the request. * * @param[in] pwt - work task structure * * @return none. */ static void post_movejob(struct work_task *pwt) { char *id = "post_movejob"; struct batch_request *req; int newstate; int newsub; int stat; int r; job *jobp; req = (struct batch_request *)pwt->wt_parm1; stat = pwt->wt_aux; pbs_errno = PBSE_NONE; if (req->rq_type != PBS_BATCH_MoveJob) { sprintf(log_buffer, "bad request type %d", req->rq_type); log_err(-1, __func__, log_buffer); return; } jobp = find_job(req->rq_ind.rq_move.rq_jid); if ((jobp == NULL) || (jobp != (job *)pwt->wt_parm2)) { sprintf(log_buffer, "job %s not found", req->rq_ind.rq_move.rq_jid); log_err(-1, __func__, log_buffer); } if (WIFEXITED(stat)) { r = WEXITSTATUS(stat); if (r == SEND_JOB_OK) { /* purge server's job structure */ if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) remove_stagein(jobp); (void)strcpy(log_buffer, msg_movejob); (void)sprintf(log_buffer+strlen(log_buffer), msg_manager, req->rq_ind.rq_move.rq_destin, req->rq_user, req->rq_host); /* * If server is configured to keep job history info and * the job is created here, then keep the job struture * for history purpose without purging. No need to check * for sub-jobs as sub jobs can't be moved. */ if (svr_chk_history_conf()) svr_setjob_histinfo(jobp, T_MOV_JOB); else job_purge(jobp); } else r = PBSE_ROUTEREJ; } else { r = PBSE_SYSTEM; (void)sprintf(log_buffer, msg_badexit, stat); (void)strcat(log_buffer, __func__); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_NOTICE, jobp->ji_qs.ji_jobid, log_buffer); } if (r) { if (jobp) { /* force re-eval of job state out of Transit */ svr_evaljobstate(jobp, &newstate, &newsub, 1); svr_setjobstate(jobp, newstate, newsub); } req_reject(r, 0, req); } else reply_ack(req); return; }
/** * @brief * post_routejob - clean up action for child started in net_move/send_job * to "route" a job to another server * @par * If route was successfull, delete job. * @par * If route didn't work, mark destination not to be tried again for this * job and call route again. * * @param[in] pwt - work task structure * * @return none. */ static void post_routejob(struct work_task *pwt) { int newstate; int newsub; int r; int stat = pwt->wt_aux; job *jobp = (job *)pwt->wt_parm2; if (jobp == NULL) { log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO, "", "post_routejob failed, jobp NULL"); return; } if (WIFEXITED(stat)) { r = WEXITSTATUS(stat); } else { r = SEND_JOB_FATAL; (void)sprintf(log_buffer, msg_badexit, stat); (void)strcat(log_buffer, __func__); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, LOG_NOTICE, jobp->ji_qs.ji_jobid, log_buffer); } switch (r) { case SEND_JOB_OK: /* normal return, job was routed */ if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) remove_stagein(jobp); /* * If the server is configured to keep job history and the job * is created here, do not purge the job structure but save * it for history purpose. No need to check for sub-jobs as * sub jobs can not be routed. */ if (svr_chk_history_conf()) svr_setjob_histinfo(jobp, T_MOV_JOB); else job_purge(jobp); /* need to remove server job struct */ return; case SEND_JOB_FATAL: /* permanent rejection (or signal) */ if (jobp->ji_qs.ji_substate == JOB_SUBSTATE_ABORT) { /* Job Delete in progress, just set to queued status */ (void)svr_setjobstate(jobp, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT); return; } add_dest(jobp); /* else mark destination as bad */ /* fall through */ default : /* try routing again */ /* force re-eval of job state out of Transit */ svr_evaljobstate(jobp, &newstate, &newsub, 1); (void)svr_setjobstate(jobp, newstate, newsub); jobp->ji_retryok = 1; if ((r = job_route(jobp)) == PBSE_ROUTEREJ) (void)job_abt(jobp, msg_routebad); else if (r != 0) (void)job_abt(jobp, msg_routexceed); break; } return; }