Beispiel #1
0
void
req_relnodesjob(struct batch_request *preq)
{
	int             jt;		/* job type */
	job		*pjob;
	int		rc;
	char		*jid;
	int		i, offset;
	char		*nodeslist = NULL;
	char		msg[LOG_BUF_SIZE];

 
	if (preq == NULL)
		return;

	jid = preq->rq_ind.rq_relnodes.rq_jid;
	if (jid == NULL)
		return;

	/*
	 ** Returns job pointer for singleton job or "parent" of
	 ** an array job.
	 */
	pjob = chk_job_request(jid, preq, &jt);
	if (pjob == NULL) {
		return;
	}

	if (jt == IS_ARRAY_NO) {		/* a regular job is okay */
		/* the job must be running */
		if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) ||
			(pjob->ji_qs.ji_substate !=
			JOB_SUBSTATE_RUNNING)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}
	else if (jt == IS_ARRAY_Single) {	/* a single subjob is okay */

		offset = subjob_index_to_offset(pjob,
			get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}

		i = get_subjob_state(pjob, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		}

		if (i != JOB_STATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	} else {
		reply_text(preq, PBSE_NOSUP,
			"not supported for Array Jobs or multiple sub-jobs");
		return;
	}

	nodeslist = preq->rq_ind.rq_relnodes.rq_node_list;

	if ((nodeslist != NULL) && (nodeslist[0] == '\0')) {
		nodeslist = NULL;
	}
	rc = free_sister_vnodes(pjob, nodeslist, msg, LOG_BUF_SIZE, preq);

	if (rc != 0) {
		reply_text(preq, PBSE_SYSTEM, msg);
	}
}
Beispiel #2
0
void
req_py_spawn(struct batch_request *preq)
{
	int             jt;		/* job type */
	job		*pjob;
	int		rc;
	char		*jid = preq->rq_ind.rq_py_spawn.rq_jid;
	int		i, offset;

	/*
	 ** Returns job pointer for singleton job or "parent" of
	 ** an array job.
	 */
	pjob = chk_job_request(jid, preq, &jt);
	if (pjob == NULL)
		return;

	/* see if requestor is the job owner */
	if (svr_chk_owner(preq, pjob) != 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	if (jt == IS_ARRAY_NO) {		/* a regular job is okay */
		/* the job must be running */
		if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) ||
			(pjob->ji_qs.ji_substate !=
			JOB_SUBSTATE_RUNNING)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}
	else if (jt == IS_ARRAY_Single) {	/* a single subjob is okay */

		offset = subjob_index_to_offset(pjob,
			get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}

		i = get_subjob_state(pjob, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		}

		if (i != JOB_STATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	} else {
		reply_text(preq, PBSE_NOSUP,
			"not supported for Array Jobs or multiple sub-jobs");
		return;
	}

	/*
	 ** Pass the request on to MOM.  If this works, the function
	 ** post_py_spawn_req will be called to handle the reply.
	 ** If it fails, send the reply now.
	 */
	rc = relay_to_mom(pjob, preq, post_py_spawn_req);
	if (rc)
		req_reject(rc, 0, preq);	/* unable to get to MOM */
}
Beispiel #3
0
/**
 * @brief
 * 		Support function for req_stat_job().
 * 		Builds status reply for a single job id, which may be: a normal job,
 * 		an Array job, a single subjob or a range of subjobs.
 * 		Finds the job structure for the job id and calls either do_stat_of_a_job()
 * 		or status_subjob() to build that actual status reply.
 *
 * @param[in,out]	preq	-	pointer to the stat job batch request, reply updated
 * @param[in]	name	-	job id to be statused
 * @param[in]	dohistjobs	-	flag to include job if it is a history job
 * @param[in]	dosubjobs	-	flag to expand a Array job to include all subjobs
 *
 * @return	int
 * @retval	PBSE_NONE (0)	: no error
 * @retval	non-zero	: PBS error code to return to client
 */
static int
stat_a_jobidname(struct batch_request *preq, char *name, int dohistjobs, int dosubjobs)
{
	int   i, indx, x, y, z;
	char *pc;
	char *range;
	int   rc;
	job  *pjob;
	struct batch_reply *preply = &preq->rq_reply;
	svrattrl	   *pal;

	if ((i = is_job_array(name)) == IS_ARRAY_Single) {
		pjob = find_arrayparent(name);
		if (pjob == NULL) {
			return (PBSE_UNKJOBID);
		} else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) {
			return (rc);
		}
		indx = subjob_index_to_offset(pjob, get_index_from_jid(name));
		if (indx != -1) {
			pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
			rc = status_subjob(pjob, preq, pal, indx, &preply->brp_un.brp_status, &bad);
		} else {
			rc = PBSE_UNKJOBID;
		}
		return (rc);	/* no job still needs to be stat-ed */

	} else if ((i == IS_ARRAY_NO) || (i == IS_ARRAY_ArrayJob)) {
		pjob = find_job(name);
		if (pjob == NULL) {
			return (PBSE_UNKJOBID);
		} else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) {
			return (rc);
		}
		return (do_stat_of_a_job(preq, pjob, dohistjobs, dosubjobs));
	} else {
		/* range of sub jobs */
		range = get_index_from_jid(name);
		if (range == NULL) {
			return (PBSE_IVALREQ);
		}
		pjob = find_arrayparent(name);
		if (pjob == NULL) {
			return (PBSE_UNKJOBID);
		} else if ((!dohistjobs) && (rc = svr_chk_histjob(pjob))) {
			return (rc);
		}
		pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
		while (1) {
			if ((i=parse_subjob_index(range,&pc,&x,&y,&z,&i)) == -1) {
		    		return (PBSE_IVALREQ);
			} else if (i == 1)
				break;
			while (x <= y) {
				indx = numindex_to_offset(pjob, x);
				if (indx < 0) {
					x += z;
					continue;
				}
				rc = status_subjob(pjob, preq, pal, indx, &preply->brp_un.brp_status, &bad);
				if (rc && (rc != PBSE_PERM)) {
					return (rc);
				}
				x += z;
			}
			range = pc;
		}
		/* stat-ed the range, no more to stat for this id */
		return (PBSE_NONE);
	}
}
Beispiel #4
0
/**
 * @brief
 * 		create_subjob - create a Subjob from the parent Array Job
 * 		Certain attributes are changed or left out
 * @param[in]	parent - pointer to parent Job
 * @param[in]	newjid -  new job id
 * @param[in]	rc -  return code
 * @return	pointer to new job
 * @retval  NULL	- error
 */
job *
create_subjob(job *parent, char *newjid, int *rc)
{
	pbs_list_head  attrl;
	int	   i;
	int	   j;
	int	   indx;
	char	  *index;
	attribute_def *pdef;
	attribute *ppar;
	attribute *psub;
	svrattrl  *psatl;
	job 	  *subj;
	long	   eligibletime;
	long	    time_msec;
#ifdef	WIN32
	struct	_timeb	    tval;
#else
	struct timeval	    tval;
#endif


	if ((parent->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) == 0) {
		*rc = PBSE_IVALREQ;
		return NULL;	/* parent not an array job */
	}

	/* find and copy the index */

	if ((index = get_index_from_jid(newjid)) == NULL) {
		*rc = PBSE_IVALREQ;
		return NULL;
	}
	if ((indx = subjob_index_to_offset(parent, index)) == -1) {
		*rc = PBSE_UNKJOBID;
		return NULL;
	}
	if (parent->ji_ajtrk->tkm_tbl[indx].trk_status != JOB_STATE_QUEUED) {
		*rc = PBSE_BADSTATE;
		return NULL;
	}

	/*
	 * allocate and clear basic structure
	 * cannot copy job attributes because cannot share strings and other
	 * malloc-ed data,  so copy ji_qs as a whole and then copy the
	 * non-saved items before ji_qs.
	 */

	subj = job_alloc();
	subj->ji_qs = parent->ji_qs;	/* copy the fixed save area */

#ifdef PBS_CRED_GRIDPROXY
	subj->ji_gsscontext  = parent->ji_gsscontext;
#endif
	subj->ji_qhdr     = parent->ji_qhdr;
	subj->ji_resvp    = parent->ji_resvp;
	subj->ji_myResv   = parent->ji_myResv;
	subj->ji_parentaj = parent;
	strcpy(subj->ji_qs.ji_jobid, newjid);	/* replace job id */
	*subj->ji_qs.ji_fileprefix = '\0';
	subj->ji_subjindx = indx;

	/*
	 * now that is all done, copy the required attributes by
	 * encoding and then decoding into the new array.  Then add the
	 * subjob specific attributes.
	 */

	resc_access_perm = ATR_DFLAG_ACCESS;
	CLEAR_HEAD(attrl);
	for (i = 0; attrs_to_copy[i] != JOB_ATR_LAST; i++) {
		j    = (int)attrs_to_copy[i];
		ppar = &parent->ji_wattr[j];
		psub = &subj->ji_wattr[j];
		pdef = &job_attr_def[j];

		if (pdef->at_encode(ppar, &attrl, pdef->at_name, NULL,
			ATR_ENCODE_MOM, &psatl) > 0) {
			for (psatl = (svrattrl *)GET_NEXT(attrl); psatl;
				psatl = ((svrattrl *)GET_NEXT(psatl->al_link))) {
				pdef->at_decode(psub, psatl->al_name, psatl->al_resc,
					psatl->al_value);
			}
			/* carry forward the default bit if set */
			psub->at_flags |= (ppar->at_flags & ATR_VFLAG_DEFLT);
			free_attrlist(&attrl);
		}
	}

	psub = &subj->ji_wattr[(int)JOB_ATR_array_id];
	job_attr_def[(int)JOB_ATR_array_id].at_decode(psub, NULL, NULL,
		parent->ji_qs.ji_jobid);

	psub = &subj->ji_wattr[(int)JOB_ATR_array_index];
	job_attr_def[(int)JOB_ATR_array_index].at_decode(psub, NULL, NULL, index);

	/* Lastly, set or clear a few flags and link in the structure */

	subj->ji_qs.ji_svrflags &= ~JOB_SVFLG_ArrayJob;
	subj->ji_qs.ji_svrflags |=  JOB_SVFLG_SubJob;
	subj->ji_modified = 1;	/* ** will likely take this out ** */

	subj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSICM;
	(void)svr_setjobstate(subj, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED);
	subj->ji_wattr[(int)JOB_ATR_state].at_flags    |= ATR_VFLAG_SET;
	subj->ji_wattr[(int)JOB_ATR_substate].at_flags |= ATR_VFLAG_SET;

	/* subjob needs to borrow eligible time from parent job array.
	 * expecting only to accrue eligible_time and nothing else.
	 */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) {

		eligibletime = parent->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long;

		if (parent->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE)
			eligibletime += subj->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long - parent->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long;

		subj->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long = eligibletime;
		subj->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;

	}
#ifdef WIN32
	_ftime_s(&tval);
	time_msec = (tval.time * 1000L) + tval.millitm;
#else
	gettimeofday(&tval, NULL);
	time_msec = (tval.tv_sec * 1000L) + (tval.tv_usec/1000L);
#endif
	/* set the queue rank attribute */
	subj->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = time_msec;
	subj->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_SET|ATR_VFLAG_MODCACHE;
	if (svr_enquejob(subj) != 0) {
		job_purge(subj);
		*rc = PBSE_IVALREQ;
		return NULL;
	}
	*rc = PBSE_NONE;
	return subj;
}
Beispiel #5
0
void
req_rerunjob(struct batch_request *preq)
{
	int		  anygood = 0;
	int		  i;
	int		  j;
	char		 *jid;
	int		  jt;		/* job type */
	int		  offset;
	char		 *pc;
	job		 *pjob;
	job		 *parent;
	char		 *range;
	char		 *vrange;
	int		  x, y, z;

	jid = preq->rq_ind.rq_signal.rq_jid;
	parent = chk_job_request(jid, preq, &jt);
	if (parent == (job *)0)
		return;		/* note, req_reject already called */

	if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	if (jt == IS_ARRAY_NO) {

		/* just a regular job, pass it on down the line and be done */

		req_rerunjob2(preq, parent);
		return;

	} else if (jt == IS_ARRAY_Single) {

		/* single subjob, if running can signal */

		offset = subjob_index_to_offset(parent, get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		i = get_subjob_state(parent, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		} else if (i == JOB_STATE_RUNNING) {
			pjob = find_job(jid);		/* get ptr to the subjob */
			if (pjob) {
				req_rerunjob2(preq, pjob);
			} else {
				req_reject(PBSE_BADSTATE, 0, preq);
				return;
			}
		} else {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		return;

	} else if (jt == IS_ARRAY_ArrayJob) {

		/* The Array Job itself ... */

		if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}

		/* for each subjob that is running, call req_rerunjob2 */

		++preq->rq_refct;	/* protect the request/reply struct */

		/* Setting deleted subjobs count to 0,
		 * since all the deleted subjobs will be moved to Q state
		 */
		parent->ji_ajtrk->tkm_dsubjsct = 0;

		for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) {
			if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) {
				pjob = find_job(mk_subjob_id(parent, i));
				if (pjob) {
					dup_br_for_subjob(preq, pjob, req_rerunjob2);
				}
			} else {
				set_subjob_tblstate(parent, i, JOB_STATE_QUEUED);
			}
		}
		/* if not waiting on any running subjobs, can reply; else */
		/* it is taken care of when last running subjob responds  */
		if (--preq->rq_refct == 0)
			reply_send(preq);
		return;

	}
	/* what's left to handle is a range of subjobs, foreach subjob 	*/
	/* if running, all req_rerunjob2			        */

	range = get_index_from_jid(jid);
	if (range == NULL) {
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}

	/* first check that all in the subrange are in fact running */

	vrange = range;
	while (1) {
		if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		} else if (i == 1)
			break;
		while (x <= y) {
			i = numindex_to_offset(parent, x);
			if (i >= 0) {
				if (get_subjob_state(parent, i) == JOB_STATE_RUNNING)
					anygood++;
			}
			x += z;
		}
		vrange = pc;
	}
	if (anygood == 0) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	/* now do the deed */

	++preq->rq_refct;	/* protect the request/reply struct */

	while (1) {
		if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			break;
		} else if (i == 1)
			break;
		while (x <= y) {
			i = numindex_to_offset(parent, x);
			if (i < 0) {
				x += z;
				continue;
			}

			if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) {
				pjob = find_job(mk_subjob_id(parent, i));
				if (pjob) {
					dup_br_for_subjob(preq, pjob, req_rerunjob2);
				}
			}
			x += z;
		}
		range = pc;
	}

	/* if not waiting on any running subjobs, can reply; else */
	/* it is taken care of when last running subjob responds  */
	if (--preq->rq_refct == 0)
		reply_send(preq);
	return;
}