示例#1
0
/**
 * @brief
 * 		fixup_arrayindicies - set state of subjobs based on array_indicies_remaining
 * @par	Functionality:
 * 		This is used when a job is being qmoved into this server.
 * 		It is necessary that the indices_submitted be first to cause the
 * 		creation of the tracking tbl.  If the job is created here, it is not
 * 		being qmoved.
 * @param[in]	pattr - pointer to special attributes of an Array Job
 * @param[in]	pobj -  pointer to job structure
 * @param[in]	mode -  actmode
 * @return	PBS error
 * @retval  0	- success
 */
int
fixup_arrayindicies(attribute *pattr, void *pobj, int mode)
{
	int   i;
	int   x, y, z, ct;
	char *ep;
	job  *pjob = pobj;
	char *str;

	if ((mode != ATR_ACTION_NEW) || (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE))
		return (PBSE_NONE);

	/* set all all sub jobs expired,  then reset queued the ones in "remaining" */
	for (i=0; i < pjob->ji_ajtrk->tkm_ct; i++)
		set_subjob_tblstate(pjob, i, JOB_STATE_EXPIRED);

	str = pattr->at_val.at_str;
	while (1) {
		if (parse_subjob_index(str, &ep, &x, &y, &z, &ct) != 0)
			break;
		while (x <= y) {
			i = numindex_to_offset(pjob, x);
			if (i != -1)
				set_subjob_tblstate(pjob, i, JOB_STATE_QUEUED);
			x += z;
		}
		str = ep;
	}

	return (PBSE_NONE);
}
示例#2
0
/**
 * @brief
 * 		update_subjob_state - update the subjob state in the table entry for
 * 		the subjob and the total counts for each state.
 * 		If job going into EXPIRED state, the job exitstatus is saved in the tbl
 *
 * @param[in]	pjob - pointer to the actual subjob job entry
 * @param[in]	newstate - newstate of the sub job.
 *
 *	@return	void
 */
void
update_subjob_state(job *pjob, int newstate)
{
	int		 len;
	job		*parent;
	char		*pc;
	struct ajtrkhd	*ptbl;

	parent = pjob->ji_parentaj;
	if (parent == NULL)
		return;
	ptbl   = parent->ji_ajtrk;
	if (ptbl == NULL)
		return;

	/* verify that parent job is in fact the parent Array Job */
	pc  = strchr(pjob->ji_qs.ji_jobid, (int)'[');
	len = pc - pjob->ji_qs.ji_jobid - 1;
	if ((strncmp(pjob->ji_qs.ji_jobid, parent->ji_qs.ji_jobid, len) != 0) ||
		(ptbl == NULL))
		return;	/* nope, not the parent */

	set_subjob_tblstate(parent, pjob->ji_subjindx, newstate);
	if (newstate == JOB_STATE_EXPIRED) {
		ptbl->tkm_tbl[pjob->ji_subjindx].trk_error =
			pjob->ji_qs.ji_un.ji_exect.ji_exitstat;

		if (svr_chk_history_conf()) {
			if ((pjob->ji_wattr[(int)JOB_ATR_stageout_status].at_flags) & ATR_VFLAG_SET) {
				ptbl->tkm_tbl[pjob->ji_subjindx].trk_stgout =
					pjob->ji_wattr[(int)JOB_ATR_stageout_status].at_val.at_long;
			}
			if ((pjob->ji_wattr[(int)JOB_ATR_exit_status].at_flags) & ATR_VFLAG_SET) {
				ptbl->tkm_tbl[pjob->ji_subjindx].trk_exitstat = 1;
			}
		}
		ptbl->tkm_tbl[pjob->ji_subjindx].trk_substate = pjob->ji_qs.ji_substate;

		parent->ji_modified = 1;
	}
	chk_array_doneness(parent);
}
示例#3
0
void
req_rerunjob(struct batch_request *preq)
{
	int		  anygood = 0;
	int		  i;
	int		  j;
	char		 *jid;
	int		  jt;		/* job type */
	int		  offset;
	char		 *pc;
	job		 *pjob;
	job		 *parent;
	char		 *range;
	char		 *vrange;
	int		  x, y, z;

	jid = preq->rq_ind.rq_signal.rq_jid;
	parent = chk_job_request(jid, preq, &jt);
	if (parent == (job *)0)
		return;		/* note, req_reject already called */

	if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	if (jt == IS_ARRAY_NO) {

		/* just a regular job, pass it on down the line and be done */

		req_rerunjob2(preq, parent);
		return;

	} else if (jt == IS_ARRAY_Single) {

		/* single subjob, if running can signal */

		offset = subjob_index_to_offset(parent, get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		i = get_subjob_state(parent, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		} else if (i == JOB_STATE_RUNNING) {
			pjob = find_job(jid);		/* get ptr to the subjob */
			if (pjob) {
				req_rerunjob2(preq, pjob);
			} else {
				req_reject(PBSE_BADSTATE, 0, preq);
				return;
			}
		} else {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		return;

	} else if (jt == IS_ARRAY_ArrayJob) {

		/* The Array Job itself ... */

		if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}

		/* for each subjob that is running, call req_rerunjob2 */

		++preq->rq_refct;	/* protect the request/reply struct */

		/* Setting deleted subjobs count to 0,
		 * since all the deleted subjobs will be moved to Q state
		 */
		parent->ji_ajtrk->tkm_dsubjsct = 0;

		for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) {
			if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) {
				pjob = find_job(mk_subjob_id(parent, i));
				if (pjob) {
					dup_br_for_subjob(preq, pjob, req_rerunjob2);
				}
			} else {
				set_subjob_tblstate(parent, i, JOB_STATE_QUEUED);
			}
		}
		/* if not waiting on any running subjobs, can reply; else */
		/* it is taken care of when last running subjob responds  */
		if (--preq->rq_refct == 0)
			reply_send(preq);
		return;

	}
	/* what's left to handle is a range of subjobs, foreach subjob 	*/
	/* if running, all req_rerunjob2			        */

	range = get_index_from_jid(jid);
	if (range == NULL) {
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}

	/* first check that all in the subrange are in fact running */

	vrange = range;
	while (1) {
		if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		} else if (i == 1)
			break;
		while (x <= y) {
			i = numindex_to_offset(parent, x);
			if (i >= 0) {
				if (get_subjob_state(parent, i) == JOB_STATE_RUNNING)
					anygood++;
			}
			x += z;
		}
		vrange = pc;
	}
	if (anygood == 0) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	/* now do the deed */

	++preq->rq_refct;	/* protect the request/reply struct */

	while (1) {
		if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			break;
		} else if (i == 1)
			break;
		while (x <= y) {
			i = numindex_to_offset(parent, x);
			if (i < 0) {
				x += z;
				continue;
			}

			if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) {
				pjob = find_job(mk_subjob_id(parent, i));
				if (pjob) {
					dup_br_for_subjob(preq, pjob, req_rerunjob2);
				}
			}
			x += z;
		}
		range = pc;
	}

	/* if not waiting on any running subjobs, can reply; else */
	/* it is taken care of when last running subjob responds  */
	if (--preq->rq_refct == 0)
		reply_send(preq);
	return;
}