void req_relnodesjob(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; char *jid; int i, offset; char *nodeslist = NULL; char msg[LOG_BUF_SIZE]; if (preq == NULL) return; jid = preq->rq_ind.rq_relnodes.rq_jid; if (jid == NULL) return; /* ** Returns job pointer for singleton job or "parent" of ** an array job. */ pjob = chk_job_request(jid, preq, &jt); if (pjob == NULL) { return; } if (jt == IS_ARRAY_NO) { /* a regular job is okay */ /* the job must be running */ if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) || (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else if (jt == IS_ARRAY_Single) { /* a single subjob is okay */ offset = subjob_index_to_offset(pjob, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(pjob, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } if (i != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) { req_reject(PBSE_UNKJOBID, 0, preq); return; } if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs or multiple sub-jobs"); return; } nodeslist = preq->rq_ind.rq_relnodes.rq_node_list; if ((nodeslist != NULL) && (nodeslist[0] == '\0')) { nodeslist = NULL; } rc = free_sister_vnodes(pjob, nodeslist, msg, LOG_BUF_SIZE, preq); if (rc != 0) { reply_text(preq, PBSE_SYSTEM, msg); } }
/** * @brief * status_subjob - status a single subjob (of an Array Job) * Works by statusing the parrent unless subjob is actually running. * * @param[in,out] pjob - ptr to parent Array * @param[in] preq - request structure * @param[in] pal - specific attributes to status * @param[in] subj - if not = -1 then include subjob [n] * @param[in,out] pstathd - RETURN: head of list to append status to * @param[out] bad - RETURN: index of first bad attribute * * @return int * @retval 0 : success * @retval PBSE_PERM : client is not authorized to status the job * @retval PBSE_SYSTEM : memory allocation error * @retval PBSE_IVALREQ : something wrong with the flags */ int status_subjob(job *pjob, struct batch_request *preq, svrattrl *pal, int subj, pbs_list_head *pstathd, int *bad) { int limit = (int)JOB_ATR_LAST; struct brp_status *pstat; job *psubjob; /* ptr to job to status */ char realstate; int rc = 0; int oldeligflags = 0; int oldatypflags = 0; int subjob_state = -1; char *old_subjob_comment = NULL; /* see if the client is authorized to status this job */ if (! server.sv_attr[(int)SRV_ATR_query_others].at_val.at_long) if (svr_authorize_jobreq(preq, pjob)) return (PBSE_PERM); if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) == 0) return PBSE_IVALREQ; /* if subjob is running, use real job structure */ if (get_subjob_state(pjob, subj) == JOB_STATE_RUNNING) { psubjob = find_job(mk_subjob_id(pjob, subj)); if (psubjob) status_job(psubjob, preq, pal, pstathd, bad); return 0; } /* otherwise we fake it with info from the parent */ /* allocate reply structure and fill in header portion */ /* for the general case, we don't want to include the parent's */ /* array related attrbutes as they belong only to the Array */ if (pal == NULL) limit = JOB_ATR_array; pstat = (struct brp_status *)malloc(sizeof(struct brp_status)); if (pstat == (struct brp_status *)0) return (PBSE_SYSTEM); CLEAR_LINK(pstat->brp_stlink); pstat->brp_objtype = MGR_OBJ_JOB; (void)strcpy(pstat->brp_objname, mk_subjob_id(pjob, subj)); CLEAR_HEAD(pstat->brp_attr); append_link(pstathd, &pstat->brp_stlink, pstat); /* add attributes to the status reply */ *bad = 0; /* * fake the job state and comment by setting the parent job's state * and comment to that of the subjob */ subjob_state = get_subjob_state(pjob, subj); realstate = pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char; pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char = statechars[subjob_state]; pjob->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_MODCACHE; if (subjob_state == JOB_STATE_EXPIRED || subjob_state == JOB_STATE_FINISHED) { if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_FINISHED) { if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) { old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str); if (old_subjob_comment == (char *)0) return (PBSE_SYSTEM); } if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment], (char *)0, (char *)0, "Subjob finished") == PBSE_SYSTEM) { free(old_subjob_comment); return (PBSE_SYSTEM); } } else if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_FAILED) { if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) { old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str); if (old_subjob_comment == (char *)0) return (PBSE_SYSTEM); } if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment], (char *)0, (char *)0, "Subjob failed") == PBSE_SYSTEM) { free(old_subjob_comment); return (PBSE_SYSTEM); } } else if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_TERMINATED) { if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) { old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str); if (old_subjob_comment == (char *)0) return (PBSE_SYSTEM); } if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment], (char *)0, (char *)0, "Subjob terminated") == PBSE_SYSTEM) { free(old_subjob_comment); return (PBSE_SYSTEM); } } } /* when eligible_time_enable is off, */ /* clear the set flag so that eligible_time and accrue_type dont show */ if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 0) { oldeligflags = pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags; pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags &= ~ATR_VFLAG_SET; pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE; oldatypflags = pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags; pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags &= ~ATR_VFLAG_SET; pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags |= ATR_VFLAG_MODCACHE; /* Note: ATR_VFLAG_MODCACHE must be set because of svr_cached() does */ /* not correctly check ATR_VFLAG_SET */ } if (status_attrib(pal, job_attr_def, pjob->ji_wattr, limit, preq->rq_perm, &pstat->brp_attr, bad)) rc = PBSE_NOATTR; /* Set the parent state back to what it really is */ pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char = realstate; pjob->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_MODCACHE; /* Set the parent comment back to what it really is */ if (old_subjob_comment != NULL) { if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment], (char *)0, (char *)0, old_subjob_comment) == PBSE_SYSTEM) { free(old_subjob_comment); return (PBSE_SYSTEM); } free(old_subjob_comment); } /* reset the flags */ if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 0) { pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags = oldeligflags; pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags = oldatypflags; } return (rc); }
void req_py_spawn(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; char *jid = preq->rq_ind.rq_py_spawn.rq_jid; int i, offset; /* ** Returns job pointer for singleton job or "parent" of ** an array job. */ pjob = chk_job_request(jid, preq, &jt); if (pjob == NULL) return; /* see if requestor is the job owner */ if (svr_chk_owner(preq, pjob) != 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* a regular job is okay */ /* the job must be running */ if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) || (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else if (jt == IS_ARRAY_Single) { /* a single subjob is okay */ offset = subjob_index_to_offset(pjob, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(pjob, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } if (i != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) { req_reject(PBSE_UNKJOBID, 0, preq); return; } if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs or multiple sub-jobs"); return; } /* ** Pass the request on to MOM. If this works, the function ** post_py_spawn_req will be called to handle the reply. ** If it fails, send the reply now. */ rc = relay_to_mom(pjob, preq, post_py_spawn_req); if (rc) req_reject(rc, 0, preq); /* unable to get to MOM */ }
void req_rerunjob(struct batch_request *preq) { int anygood = 0; int i; int j; char *jid; int jt; /* job type */ int offset; char *pc; job *pjob; job *parent; char *range; char *vrange; int x, y, z; jid = preq->rq_ind.rq_signal.rq_jid; parent = chk_job_request(jid, preq, &jt); if (parent == (job *)0) return; /* note, req_reject already called */ if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* just a regular job, pass it on down the line and be done */ req_rerunjob2(preq, parent); return; } else if (jt == IS_ARRAY_Single) { /* single subjob, if running can signal */ offset = subjob_index_to_offset(parent, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(parent, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == JOB_STATE_RUNNING) { pjob = find_job(jid); /* get ptr to the subjob */ if (pjob) { req_rerunjob2(preq, pjob); } else { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { req_reject(PBSE_BADSTATE, 0, preq); return; } return; } else if (jt == IS_ARRAY_ArrayJob) { /* The Array Job itself ... */ if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* for each subjob that is running, call req_rerunjob2 */ ++preq->rq_refct; /* protect the request/reply struct */ /* Setting deleted subjobs count to 0, * since all the deleted subjobs will be moved to Q state */ parent->ji_ajtrk->tkm_dsubjsct = 0; for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } else { set_subjob_tblstate(parent, i, JOB_STATE_QUEUED); } } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; } /* what's left to handle is a range of subjobs, foreach subjob */ /* if running, all req_rerunjob2 */ range = get_index_from_jid(jid); if (range == NULL) { req_reject(PBSE_IVALREQ, 0, preq); return; } /* first check that all in the subrange are in fact running */ vrange = range; while (1) { if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i >= 0) { if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) anygood++; } x += z; } vrange = pc; } if (anygood == 0) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* now do the deed */ ++preq->rq_refct; /* protect the request/reply struct */ while (1) { if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) { req_reject(PBSE_IVALREQ, 0, preq); break; } else if (i == 1) break; while (x <= y) { i = numindex_to_offset(parent, x); if (i < 0) { x += z; continue; } if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) { pjob = find_job(mk_subjob_id(parent, i)); if (pjob) { dup_br_for_subjob(preq, pjob, req_rerunjob2); } } x += z; } range = pc; } /* if not waiting on any running subjobs, can reply; else */ /* it is taken care of when last running subjob responds */ if (--preq->rq_refct == 0) reply_send(preq); return; }