void req_relnodesjob(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; char *jid; int i, offset; char *nodeslist = NULL; char msg[LOG_BUF_SIZE]; if (preq == NULL) return; jid = preq->rq_ind.rq_relnodes.rq_jid; if (jid == NULL) return; /* ** Returns job pointer for singleton job or "parent" of ** an array job. */ pjob = chk_job_request(jid, preq, &jt); if (pjob == NULL) { return; } if (jt == IS_ARRAY_NO) { /* a regular job is okay */ /* the job must be running */ if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) || (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else if (jt == IS_ARRAY_Single) { /* a single subjob is okay */ offset = subjob_index_to_offset(pjob, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(pjob, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } if (i != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) { req_reject(PBSE_UNKJOBID, 0, preq); return; } if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs or multiple sub-jobs"); return; } nodeslist = preq->rq_ind.rq_relnodes.rq_node_list; if ((nodeslist != NULL) && (nodeslist[0] == '\0')) { nodeslist = NULL; } rc = free_sister_vnodes(pjob, nodeslist, msg, LOG_BUF_SIZE, preq); if (rc != 0) { reply_text(preq, PBSE_SYSTEM, msg); } }
void req_py_spawn(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; char *jid = preq->rq_ind.rq_py_spawn.rq_jid; int i, offset; /* ** Returns job pointer for singleton job or "parent" of ** an array job. */ pjob = chk_job_request(jid, preq, &jt); if (pjob == NULL) return; /* see if requestor is the job owner */ if (svr_chk_owner(preq, pjob) != 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* a regular job is okay */ /* the job must be running */ if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) || (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else if (jt == IS_ARRAY_Single) { /* a single subjob is okay */ offset = subjob_index_to_offset(pjob, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(pjob, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } if (i != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) { req_reject(PBSE_UNKJOBID, 0, preq); return; } if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs or multiple sub-jobs"); return; } /* ** Pass the request on to MOM. If this works, the function ** post_py_spawn_req will be called to handle the reply. ** If it fails, send the reply now. */ rc = relay_to_mom(pjob, preq, post_py_spawn_req); if (rc) req_reject(rc, 0, preq); /* unable to get to MOM */ }
/** * @brief * Support function for req_stat_job(). * Builds status reply for a single job id, which may be: a normal job, * an Array job, a single subjob or a range of subjobs. * Finds the job structure for the job id and calls either do_stat_of_a_job() * or status_subjob() to build that actual status reply. * * @param[in,out] preq - pointer to the stat job batch request, reply updated * @param[in] name - job id to be statused * @param[in] dohistjobs - flag to include job if it is a history job * @param[in] dosubjobs - flag to expand a Array job to include all subjobs * * @return int * @retval PBSE_NONE (0) : no error * @retval non-zero : PBS error code to return to client */ static int stat_a_jobidname(struct batch_request *preq, char *name, int dohistjobs, int dosubjobs) { int i, indx, x, y, z; char *pc; char *range; int rc; job *pjob; struct batch_reply *preply = &preq->rq_reply; svrattrl *pal; if ((i = is_job_array(name)) == IS_ARRAY_Single) { pjob = find_arrayparent(name); if (pjob == NULL) { return (PBSE_UNKJOBID); } else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) { return (rc); } indx = subjob_index_to_offset(pjob, get_index_from_jid(name)); if (indx != -1) { pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); rc = status_subjob(pjob, preq, pal, indx, &preply->brp_un.brp_status, &bad); } else { rc = PBSE_UNKJOBID; } return (rc); /* no job still needs to be stat-ed */ } else if ((i == IS_ARRAY_NO) || (i == IS_ARRAY_ArrayJob)) { pjob = find_job(name); if (pjob == NULL) { return (PBSE_UNKJOBID); } else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) { return (rc); } return (do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs)); } else { /* range of sub jobs */ range = get_index_from_jid(name); if (range == NULL) { return (PBSE_IVALREQ); } pjob = find_arrayparent(name); if (pjob == NULL) { return (PBSE_UNKJOBID); } else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) { return (rc); } pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); while (1) { if ((i=parse_subjob_index(range,&pc,&x,&y,&z,&i)) == -1) { return (PBSE_IVALREQ); } else if (i == 1) break; while (x <= y) { indx = numindex_to_offset(pjob, x); if (indx < 0) { x += z; continue; } rc = status_subjob(pjob, preq, pal, indx, &preply->brp_un.brp_status, &bad); if (rc && (rc != PBSE_PERM)) { return (rc); } x += z; } range = pc; } /* stat-ed the range, no more to stat for this id */ return (PBSE_NONE); } }
/** * @brief * create_subjob - create a Subjob from the parent Array Job * Certain attributes are changed or left out * @param[in] parent - pointer to parent Job * @param[in] newjid - new job id * @param[in] rc - return code * @return pointer to new job * @retval NULL - error */ job * create_subjob(job *parent, char *newjid, int *rc) { pbs_list_head attrl; int i; int j; int indx; char *index; attribute_def *pdef; attribute *ppar; attribute *psub; svrattrl *psatl; job *subj; long eligibletime; long time_msec; #ifdef WIN32 struct _timeb tval; #else struct timeval tval; #endif if ((parent->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) == 0) { *rc = PBSE_IVALREQ; return NULL; /* parent not an array job */ } /* find and copy the index */ if ((index = get_index_from_jid(newjid)) == NULL) { *rc = PBSE_IVALREQ; return NULL; } if ((indx = subjob_index_to_offset(parent, index)) == -1) { *rc = PBSE_UNKJOBID; return NULL; } if (parent->ji_ajtrk->tkm_tbl[indx].trk_status != JOB_STATE_QUEUED) { *rc = PBSE_BADSTATE; return NULL; } /* * allocate and clear basic structure * cannot copy job attributes because cannot share strings and other * malloc-ed data, so copy ji_qs as a whole and then copy the * non-saved items before ji_qs. */ subj = job_alloc(); subj->ji_qs = parent->ji_qs; /* copy the fixed save area */ #ifdef PBS_CRED_GRIDPROXY subj->ji_gsscontext = parent->ji_gsscontext; #endif subj->ji_qhdr = parent->ji_qhdr; subj->ji_resvp = parent->ji_resvp; subj->ji_myResv = parent->ji_myResv; subj->ji_parentaj = parent; strcpy(subj->ji_qs.ji_jobid, newjid); /* replace job id */ *subj->ji_qs.ji_fileprefix = '\0'; subj->ji_subjindx = indx; /* * now that is all done, copy the required attributes by * encoding and then decoding into the new array. Then add the * subjob specific attributes. */ resc_access_perm = ATR_DFLAG_ACCESS; CLEAR_HEAD(attrl); for (i = 0; attrs_to_copy[i] != JOB_ATR_LAST; i++) { j = (int)attrs_to_copy[i]; ppar = &parent->ji_wattr[j]; psub = &subj->ji_wattr[j]; pdef = &job_attr_def[j]; if (pdef->at_encode(ppar, &attrl, pdef->at_name, NULL, ATR_ENCODE_MOM, &psatl) > 0) { for (psatl = (svrattrl *)GET_NEXT(attrl); psatl; psatl = ((svrattrl *)GET_NEXT(psatl->al_link))) { pdef->at_decode(psub, psatl->al_name, psatl->al_resc, psatl->al_value); } /* carry forward the default bit if set */ psub->at_flags |= (ppar->at_flags & ATR_VFLAG_DEFLT); free_attrlist(&attrl); } } psub = &subj->ji_wattr[(int)JOB_ATR_array_id]; job_attr_def[(int)JOB_ATR_array_id].at_decode(psub, NULL, NULL, parent->ji_qs.ji_jobid); psub = &subj->ji_wattr[(int)JOB_ATR_array_index]; job_attr_def[(int)JOB_ATR_array_index].at_decode(psub, NULL, NULL, index); /* Lastly, set or clear a few flags and link in the structure */ subj->ji_qs.ji_svrflags &= ~JOB_SVFLG_ArrayJob; subj->ji_qs.ji_svrflags |= JOB_SVFLG_SubJob; subj->ji_modified = 1; /* ** will likely take this out ** */ subj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSICM; (void)svr_setjobstate(subj, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED); subj->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_SET; subj->ji_wattr[(int)JOB_ATR_substate].at_flags |= ATR_VFLAG_SET; /* subjob needs to borrow eligible time from parent job array. * expecting only to accrue eligible_time and nothing else. */ if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) { eligibletime = parent->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long; if (parent->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) eligibletime += subj->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long - parent->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long; subj->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long = eligibletime; subj->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; } #ifdef WIN32 _ftime_s(&tval); time_msec = (tval.time * 1000L) + tval.millitm; #else gettimeofday(&tval, NULL); time_msec = (tval.tv_sec * 1000L) + (tval.tv_usec/1000L); #endif /* set the queue rank attribute */ subj->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = time_msec; subj->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_SET|ATR_VFLAG_MODCACHE; if (svr_enquejob(subj) != 0) { job_purge(subj); *rc = PBSE_IVALREQ; return NULL; } *rc = PBSE_NONE; return subj; }
void req_rerunjob(struct batch_request *preq) { int anygood = 0; int i; int j; char *jid; int jt; /* job type */ int offset; char *pc; job *pjob; job *parent; char *range; char *vrange; int x, y, z; jid = preq->rq_ind.rq_signal.rq_jid; parent = chk_job_request(jid, preq, &jt); if (parent == (job *)0) return; /* note, req_reject already called */ if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* just a regular job, pass it on down the line and be done */ req_rerunjob2(preq, parent); return; } else if (jt == IS_ARRAY_Single) { /* single subjob, if running can signal */ offset = subjob_index_to_offset(parent, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(parent, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == JOB_STATE_RUNNING) { pjob = find_job(jid); /* get ptr to the subjob */ if (pjob) { req_rerunjob2(preq, pjob); } else { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { req_reject(PBSE_BADSTATE, 0, preq); return; } return; } else if (jt == IS_ARRAY_ArrayJob) { /* The Array Job itself ... */ if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* for each subjob that is running, call req_rerunjob2 */ ++preq->rq_refct; /* protect the request/reply struct */ /* Setting deleted subjobs count to 0, * since all the deleted subjobs will be moved to Q state */ parent->ji_ajtrk->tkm_dsubjsct = 0; for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } else { set_subjob_tblstate(parent, i, JOB_STATE_QUEUED); } } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; } /* what's left to handle is a range of subjobs, foreach subjob */ /* if running, all req_rerunjob2 */ range = get_index_from_jid(jid); if (range == NULL) { req_reject(PBSE_IVALREQ, 0, preq); return; } /* first check that all in the subrange are in fact running */ vrange = range; while (1) { if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i >= 0) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) anygood++; } x += z; } vrange = pc; } if (anygood == 0) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* now do the deed */ ++preq->rq_refct; /* protect the request/reply struct */ while (1) { if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); break; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i < 0) { x += z; continue; } if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } x += z; } range = pc; } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; }