/** * @brief * fixup_arrayindicies - set state of subjobs based on array_indicies_remaining * @par Functionality: * This is used when a job is being qmoved into this server. * It is necessary that the indices_submitted be first to cause the * creation of the tracking tbl. If the job is created here, it is not * being qmoved. * @param[in] pattr - pointer to special attributes of an Array Job * @param[in] pobj - pointer to job structure * @param[in] mode - actmode * @return PBS error * @retval 0 - success */ int fixup_arrayindicies(attribute *pattr, void *pobj, int mode) { int i; int x, y, z, ct; char *ep; job *pjob = pobj; char *str; if ((mode != ATR_ACTION_NEW) || (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE)) return (PBSE_NONE); /* set all all sub jobs expired, then reset queued the ones in "remaining" */ for (i=0; i < pjob->ji_ajtrk->tkm_ct; i++) set_subjob_tblstate(pjob, i, JOB_STATE_EXPIRED); str = pattr->at_val.at_str; while (1) { if (parse_subjob_index(str, &ep, &x, &y, &z, &ct) != 0) break; while (x <= y) { i = numindex_to_offset(pjob, x); if (i != -1) set_subjob_tblstate(pjob, i, JOB_STATE_QUEUED); x += z; } str = ep; } return (PBSE_NONE); }
/** * @brief * mk_subjob_index_tbl - make the subjob index tracking table * (struct ajtrkhd) based on the number of indexes in the "range" * * @param[in] range - subjob index range * @param[in] initialstate - job state * * @return ptr to table * @retval NULL - error */ static struct ajtrkhd *mk_subjob_index_tbl(char *range, int initalstate) { int ct; int i, j; int x, y, z; char *eptr; struct ajtrkhd *t; size_t sz; i = parse_subjob_index(range, &eptr, &x, &y, &z, &ct); if (i != 0) return NULL; /* parse error */ sz = sizeof(struct ajtrkhd) + ((ct-1) * sizeof(struct ajtrk)); t = (struct ajtrkhd *)malloc(sz); if (t == NULL) return NULL; t->tkm_ct = ct; t->tkm_step = z; t->tkm_size = sz; t->tkm_flags = 0; for (i=0; i<PBS_NUMJOBSTATE; i++) t->tkm_subjsct[i] = 0; t->tkm_subjsct[JOB_STATE_QUEUED] = ct; t->tkm_dsubjsct = 0; j = 0; for (i=x; i<=y; i+=z, j++) { t->tkm_tbl[j].trk_index = i; t->tkm_tbl[j].trk_status = initalstate; t->tkm_tbl[j].trk_error = 0; t->tkm_tbl[j].trk_substate = JOB_SUBSTATE_FINISHED; t->tkm_tbl[j].trk_stgout = -1; t->tkm_tbl[j].trk_exitstat = 0; } return t; }
/** * @brief * setup_arrayjob_attrs - set up the special attributes of an Array Job * Called as "action" routine for the attribute array_indices_submitted * * @param[in] pattr - pointer to special attributes of an Array Job * @param[in] pobj - pointer to job structure * @param[in] mode - actmode * * @return PBS error * @retval 0 - success */ int setup_arrayjob_attrs(attribute *pattr, void *pobj, int mode) { int i; job *pjob = pobj; if (mode == ATR_ACTION_NEW) { /* validate max array size */ int l, x, y, z, ct; char *ep; if (server.sv_attr[(int)SVR_ATR_maxarraysize].at_flags & ATR_VFLAG_SET) l = server.sv_attr[(int)SVR_ATR_maxarraysize].at_val.at_long; else l = PBS_MAX_ARRAY_JOB_DFL; /* default limit 10000 */ if (parse_subjob_index(pattr->at_val.at_str, &ep, &x, &y, &z, &ct) != 0) return PBSE_BADATVAL; if (ct > l) return PBSE_MaxArraySize; } /* set attribute "array" True and clear "array_state_count" */ pjob->ji_wattr[(int)JOB_ATR_array].at_val.at_long = 1; pjob->ji_wattr[(int)JOB_ATR_array].at_flags = ATR_VFLAG_SET | ATR_VFLAG_MODCACHE; job_attr_def[(int)JOB_ATR_array_state_count].at_free(&pjob->ji_wattr[(int)JOB_ATR_array_state_count]); if (mode == ATR_ACTION_RECOV) { int x, y, z, ct; char *ep; /* on recovery ... */ /* parse the various components again, since we dont store them */ if (parse_subjob_index(pattr->at_val.at_str, &ep, &x, &y, &z, &ct) != 0) return PBSE_BADATVAL; pjob->ji_ajtrk->tkm_ct = ct; pjob->ji_ajtrk->tkm_step = z; pjob->ji_ajtrk->tkm_flags = 0; /* reset counts and any running/exiting subjob to queued */ for (i=0; i < PBS_NUMJOBSTATE; ++i) pjob->ji_ajtrk->tkm_subjsct[i] = 0; for (i=0; i < pjob->ji_ajtrk->tkm_ct; ++i) { if ((pjob->ji_ajtrk->tkm_tbl[i].trk_status == JOB_STATE_RUNNING) || (pjob->ji_ajtrk->tkm_tbl[i].trk_status == JOB_STATE_EXITING)) pjob->ji_ajtrk->tkm_tbl[i].trk_status =JOB_STATE_QUEUED; pjob->ji_ajtrk->tkm_subjsct[pjob->ji_ajtrk->tkm_tbl[i].trk_status]++; } /* clear and reset array_indices_remaining to new value */ job_attr_def[(int)JOB_ATR_array_indices_remaining].at_free(&pjob->ji_wattr[(int)JOB_ATR_array_indices_remaining]); job_attr_def[(int)JOB_ATR_array_indices_remaining].at_decode(&pjob->ji_wattr[(int)JOB_ATR_array_indices_remaining], NULL, NULL, cvt_range(pjob->ji_ajtrk, JOB_STATE_QUEUED)); return (PBSE_NONE); } if ((mode != ATR_ACTION_ALTER) && (mode != ATR_ACTION_NEW)) return PBSE_BADATVAL; if (is_job_array(pjob->ji_qs.ji_jobid) != IS_ARRAY_ArrayJob) return PBSE_BADATVAL; /* not an Array Job */ if (mode == ATR_ACTION_ALTER) { if (pjob->ji_qs.ji_state != JOB_STATE_QUEUED) return PBSE_MODATRRUN; /* cannot modify once begun */ /* clear "array_indices_remaining" so can be reset */ job_attr_def[(int)JOB_ATR_array_indices_remaining].at_free(&pjob->ji_wattr[(int)JOB_ATR_array_indices_remaining]); } /* set "array_indices_remaining" if not already set */ if ((pjob->ji_wattr[(int)JOB_ATR_array_indices_remaining].at_flags & ATR_VFLAG_SET) == 0) job_attr_def[(int)JOB_ATR_array_indices_remaining].at_decode(&pjob->ji_wattr[(int)JOB_ATR_array_indices_remaining], NULL, NULL, pattr->at_val.at_str); /* set other Array related fields in the job structure */ pjob->ji_qs.ji_svrflags |= JOB_SVFLG_ArrayJob; if (mode == ATR_ACTION_NEW) { if (pjob->ji_ajtrk) free(pjob->ji_ajtrk); if ((pjob->ji_ajtrk = mk_subjob_index_tbl(pjob->ji_wattr[(int)JOB_ATR_array_indices_submitted].at_val.at_str, JOB_STATE_QUEUED)) == NULL) return PBSE_BADATVAL; } return (PBSE_NONE); }
/** * @brief * Support function for req_stat_job(). * Builds status reply for a single job id, which may be: a normal job, * an Array job, a single subjob or a range of subjobs. * Finds the job structure for the job id and calls either do_stat_of_a_job() * or status_subjob() to build that actual status reply. * * @param[in,out] preq - pointer to the stat job batch request, reply updated * @param[in] name - job id to be statused * @param[in] dohistjobs - flag to include job if it is a history job * @param[in] dosubjobs - flag to expand a Array job to include all subjobs * * @return int * @retval PBSE_NONE (0) : no error * @retval non-zero : PBS error code to return to client */ static int stat_a_jobidname(struct batch_request *preq, char *name, int dohistjobs, int dosubjobs) { int i, indx, x, y, z; char *pc; char *range; int rc; job *pjob; struct batch_reply *preply = &preq->rq_reply; svrattrl *pal; if ((i = is_job_array(name)) == IS_ARRAY_Single) { pjob = find_arrayparent(name); if (pjob == NULL) { return (PBSE_UNKJOBID); } else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) { return (rc); } indx = subjob_index_to_offset(pjob, get_index_from_jid(name)); if (indx != -1) { pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_subjob(pjob, preq, pal, indx, &preply->brp_un.brp_status, &bad); } else { rc = PBSE_UNKJOBID; } return (rc); /* no job still needs to be stat-ed */ } else if ((i == IS_ARRAY_NO) || (i == IS_ARRAY_ArrayJob)) { pjob = find_job(name); if (pjob == NULL) { return (PBSE_UNKJOBID); } else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) { return (rc); } return (do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs)); } else { /* range of sub jobs */ range = get_index_from_jid(name); if (range == NULL) { return (PBSE_IVALREQ); } pjob = find_arrayparent(name); if (pjob == NULL) { return (PBSE_UNKJOBID); } else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) { return (rc); } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); while (1) { if ((i=parse_subjob_index(range,&pc,&x,&y,&z,&i)) == -1) { return (PBSE_IVALREQ); } else if (i == 1) break; while (x <= y) { indx = numindex_to_offset(pjob, x); if (indx < 0) { x += z; continue; } rc = status_subjob(pjob, preq, pal, indx, &preply->brp_un.brp_status, &bad); if (rc && (rc != PBSE_PERM)) { return (rc); } x += z; } range = pc; } /* stat-ed the range, no more to stat for this id */ return (PBSE_NONE); } }
void req_rerunjob(struct batch_request *preq) { int anygood = 0; int i; int j; char *jid; int jt; /* job type */ int offset; char *pc; job *pjob; job *parent; char *range; char *vrange; int x, y, z; jid = preq->rq_ind.rq_signal.rq_jid; parent = chk_job_request(jid, preq, &jt); if (parent == (job *)0) return; /* note, req_reject already called */ if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* just a regular job, pass it on down the line and be done */ req_rerunjob2(preq, parent); return; } else if (jt == IS_ARRAY_Single) { /* single subjob, if running can signal */ offset = subjob_index_to_offset(parent, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(parent, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == JOB_STATE_RUNNING) { pjob = find_job(jid); /* get ptr to the subjob */ if (pjob) { req_rerunjob2(preq, pjob); } else { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { req_reject(PBSE_BADSTATE, 0, preq); return; } return; } else if (jt == IS_ARRAY_ArrayJob) { /* The Array Job itself ... */ if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* for each subjob that is running, call req_rerunjob2 */ ++preq->rq_refct; /* protect the request/reply struct */ /* Setting deleted subjobs count to 0, * since all the deleted subjobs will be moved to Q state */ parent->ji_ajtrk->tkm_dsubjsct = 0; for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } else { set_subjob_tblstate(parent, i, JOB_STATE_QUEUED); } } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; } /* what's left to handle is a range of subjobs, foreach subjob */ /* if running, all req_rerunjob2 */ range = get_index_from_jid(jid); if (range == NULL) { req_reject(PBSE_IVALREQ, 0, preq); return; } /* first check that all in the subrange are in fact running */ vrange = range; while (1) { if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i >= 0) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) anygood++; } x += z; } vrange = pc; } if (anygood == 0) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* now do the deed */ ++preq->rq_refct; /* protect the request/reply struct */ while (1) { if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); break; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i < 0) { x += z; continue; } if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } x += z; } range = pc; } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; }