/** * @brief * fixup_arrayindicies - set state of subjobs based on array_indicies_remaining * @par Functionality: * This is used when a job is being qmoved into this server. * It is necessary that the indices_submitted be first to cause the * creation of the tracking tbl. If the job is created here, it is not * being qmoved. * @param[in] pattr - pointer to special attributes of an Array Job * @param[in] pobj - pointer to job structure * @param[in] mode - actmode * @return PBS error * @retval 0 - success */ int fixup_arrayindicies(attribute *pattr, void *pobj, int mode) { int i; int x, y, z, ct; char *ep; job *pjob = pobj; char *str; if ((mode != ATR_ACTION_NEW) || (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE)) return (PBSE_NONE); /* set all all sub jobs expired, then reset queued the ones in "remaining" */ for (i=0; i < pjob->ji_ajtrk->tkm_ct; i++) set_subjob_tblstate(pjob, i, JOB_STATE_EXPIRED); str = pattr->at_val.at_str; while (1) { if (parse_subjob_index(str, &ep, &x, &y, &z, &ct) != 0) break; while (x <= y) { i = numindex_to_offset(pjob, x); if (i != -1) set_subjob_tblstate(pjob, i, JOB_STATE_QUEUED); x += z; } str = ep; } return (PBSE_NONE); }
/** * @brief * update_subjob_state - update the subjob state in the table entry for * the subjob and the total counts for each state. * If job going into EXPIRED state, the job exitstatus is saved in the tbl * * @param[in] pjob - pointer to the actual subjob job entry * @param[in] newstate - newstate of the sub job. * * @return void */ void update_subjob_state(job *pjob, int newstate) { int len; job *parent; char *pc; struct ajtrkhd *ptbl; parent = pjob->ji_parentaj; if (parent == NULL) return; ptbl = parent->ji_ajtrk; if (ptbl == NULL) return; /* verify that parent job is in fact the parent Array Job */ pc = strchr(pjob->ji_qs.ji_jobid, (int)'['); len = pc - pjob->ji_qs.ji_jobid - 1; if ((strncmp(pjob->ji_qs.ji_jobid, parent->ji_qs.ji_jobid, len) != 0) || (ptbl == NULL)) return; /* nope, not the parent */ set_subjob_tblstate(parent, pjob->ji_subjindx, newstate); if (newstate == JOB_STATE_EXPIRED) { ptbl->tkm_tbl[pjob->ji_subjindx].trk_error = pjob->ji_qs.ji_un.ji_exect.ji_exitstat; if (svr_chk_history_conf()) { if ((pjob->ji_wattr[(int)JOB_ATR_stageout_status].at_flags) & ATR_VFLAG_SET) { ptbl->tkm_tbl[pjob->ji_subjindx].trk_stgout = pjob->ji_wattr[(int)JOB_ATR_stageout_status].at_val.at_long; } if ((pjob->ji_wattr[(int)JOB_ATR_exit_status].at_flags) & ATR_VFLAG_SET) { ptbl->tkm_tbl[pjob->ji_subjindx].trk_exitstat = 1; } } ptbl->tkm_tbl[pjob->ji_subjindx].trk_substate = pjob->ji_qs.ji_substate; parent->ji_modified = 1; } chk_array_doneness(parent); }
void req_rerunjob(struct batch_request *preq) { int anygood = 0; int i; int j; char *jid; int jt; /* job type */ int offset; char *pc; job *pjob; job *parent; char *range; char *vrange; int x, y, z; jid = preq->rq_ind.rq_signal.rq_jid; parent = chk_job_request(jid, preq, &jt); if (parent == (job *)0) return; /* note, req_reject already called */ if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* just a regular job, pass it on down the line and be done */ req_rerunjob2(preq, parent); return; } else if (jt == IS_ARRAY_Single) { /* single subjob, if running can signal */ offset = subjob_index_to_offset(parent, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(parent, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == JOB_STATE_RUNNING) { pjob = find_job(jid); /* get ptr to the subjob */ if (pjob) { req_rerunjob2(preq, pjob); } else { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { req_reject(PBSE_BADSTATE, 0, preq); return; } return; } else if (jt == IS_ARRAY_ArrayJob) { /* The Array Job itself ... */ if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* for each subjob that is running, call req_rerunjob2 */ ++preq->rq_refct; /* protect the request/reply struct */ /* Setting deleted subjobs count to 0, * since all the deleted subjobs will be moved to Q state */ parent->ji_ajtrk->tkm_dsubjsct = 0; for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } else { set_subjob_tblstate(parent, i, JOB_STATE_QUEUED); } } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; } /* what's left to handle is a range of subjobs, foreach subjob */ /* if running, all req_rerunjob2 */ range = get_index_from_jid(jid); if (range == NULL) { req_reject(PBSE_IVALREQ, 0, preq); return; } /* first check that all in the subrange are in fact running */ vrange = range; while (1) { if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i >= 0) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) anygood++; } x += z; } vrange = pc; } if (anygood == 0) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* now do the deed */ ++preq->rq_refct; /* protect the request/reply struct */ while (1) { if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); break; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i < 0) { x += z; continue; } if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } x += z; } range = pc; } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; }