Beispiel #1
0
void
req_relnodesjob(struct batch_request *preq)
{
	int             jt;		/* job type */
	job		*pjob;
	int		rc;
	char		*jid;
	int		i, offset;
	char		*nodeslist = NULL;
	char		msg[LOG_BUF_SIZE];

 
	if (preq == NULL)
		return;

	jid = preq->rq_ind.rq_relnodes.rq_jid;
	if (jid == NULL)
		return;

	/*
	 ** Returns job pointer for singleton job or "parent" of
	 ** an array job.
	 */
	pjob = chk_job_request(jid, preq, &jt);
	if (pjob == NULL) {
		return;
	}

	if (jt == IS_ARRAY_NO) {		/* a regular job is okay */
		/* the job must be running */
		if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) ||
			(pjob->ji_qs.ji_substate !=
			JOB_SUBSTATE_RUNNING)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}
	else if (jt == IS_ARRAY_Single) {	/* a single subjob is okay */

		offset = subjob_index_to_offset(pjob,
			get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}

		i = get_subjob_state(pjob, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		}

		if (i != JOB_STATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	} else {
		reply_text(preq, PBSE_NOSUP,
			"not supported for Array Jobs or multiple sub-jobs");
		return;
	}

	nodeslist = preq->rq_ind.rq_relnodes.rq_node_list;

	if ((nodeslist != NULL) && (nodeslist[0] == '\0')) {
		nodeslist = NULL;
	}
	rc = free_sister_vnodes(pjob, nodeslist, msg, LOG_BUF_SIZE, preq);

	if (rc != 0) {
		reply_text(preq, PBSE_SYSTEM, msg);
	}
}
Beispiel #2
0
/**
 * @brief
 * 		status_subjob - status a single subjob (of an Array Job)
 *		Works by statusing the parrent unless subjob is actually running.
 *
 * @param[in,out]	pjob	-	ptr to parent Array
 * @param[in]	preq	-	request structure
 * @param[in]	pal	-	specific attributes to status
 * @param[in]	subj	-	if not = -1 then include subjob [n]
 * @param[in,out]	pstathd	-	RETURN: head of list to append status to
 * @param[out]	bad	-	RETURN: index of first bad attribute
 *
 * @return	int
 * @retval	0	: success
 * @retval	PBSE_PERM	: client is not authorized to status the job
 * @retval	PBSE_SYSTEM	: memory allocation error
 * @retval	PBSE_IVALREQ	: something wrong with the flags
 */
int
status_subjob(job *pjob, struct batch_request *preq, svrattrl *pal, int subj, pbs_list_head *pstathd, int *bad)
{
	int		   limit = (int)JOB_ATR_LAST;
	struct brp_status *pstat;
	job		  *psubjob;	/* ptr to job to status */
	char		   realstate;
	int		   rc = 0;
	int		   oldeligflags = 0;
	int		   oldatypflags = 0;
	int 		   subjob_state = -1;
	char 		   *old_subjob_comment = NULL;

	/* see if the client is authorized to status this job */

	if (! server.sv_attr[(int)SRV_ATR_query_others].at_val.at_long)
		if (svr_authorize_jobreq(preq, pjob))
			return (PBSE_PERM);

	if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) == 0)
		return PBSE_IVALREQ;

	/* if subjob is running, use real job structure */

	if (get_subjob_state(pjob, subj) == JOB_STATE_RUNNING) {
		psubjob = find_job(mk_subjob_id(pjob, subj));
		if (psubjob)
			status_job(psubjob, preq, pal, pstathd, bad);
		return 0;
	}

	/* otherwise we fake it with info from the parent      */
	/* allocate reply structure and fill in header portion */


	/* for the general case, we don't want to include the parent's */
	/* array related attrbutes as they belong only to the Array    */
	if (pal == NULL)
		limit = JOB_ATR_array;
	pstat = (struct brp_status *)malloc(sizeof(struct brp_status));
	if (pstat == (struct brp_status *)0)
		return (PBSE_SYSTEM);
	CLEAR_LINK(pstat->brp_stlink);
	pstat->brp_objtype = MGR_OBJ_JOB;
	(void)strcpy(pstat->brp_objname, mk_subjob_id(pjob, subj));
	CLEAR_HEAD(pstat->brp_attr);
	append_link(pstathd, &pstat->brp_stlink, pstat);

	/* add attributes to the status reply */

	*bad = 0;

	/*
	 * fake the job state and comment by setting the parent job's state
	 * and comment to that of the subjob
	 */
	subjob_state = get_subjob_state(pjob, subj);
	realstate = pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char;
	pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char = statechars[subjob_state];
	pjob->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_MODCACHE;

	if (subjob_state == JOB_STATE_EXPIRED || subjob_state == JOB_STATE_FINISHED) {
		if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_FINISHED) {
			if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) {
				old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str);
				if (old_subjob_comment == (char *)0)
					return (PBSE_SYSTEM);
			}
			if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
				(char *)0, (char *)0, "Subjob finished") == PBSE_SYSTEM) {
				free(old_subjob_comment);
				return (PBSE_SYSTEM);
			}
		} else if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_FAILED) {
			if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) {
				old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str);
				if (old_subjob_comment == (char *)0)
					return (PBSE_SYSTEM);
			}
			if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
				(char *)0, (char *)0, "Subjob failed") == PBSE_SYSTEM) {
				free(old_subjob_comment);
				return (PBSE_SYSTEM);
			}
		} else if (pjob->ji_ajtrk->tkm_tbl[subj].trk_substate == JOB_SUBSTATE_TERMINATED) {
			if (pjob->ji_wattr[(int)JOB_ATR_Comment].at_flags & ATR_VFLAG_SET) {
				old_subjob_comment = strdup(pjob->ji_wattr[(int)JOB_ATR_Comment].at_val.at_str);
				if (old_subjob_comment == (char *)0)
					return (PBSE_SYSTEM);
			}
			if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
				(char *)0, (char *)0, "Subjob terminated") == PBSE_SYSTEM) {
				free(old_subjob_comment);
				return (PBSE_SYSTEM);
			}
		}
	}

	/* when eligible_time_enable is off,				      */
	/* clear the set flag so that eligible_time and accrue_type dont show */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 0) {
		oldeligflags = pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags;
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags &= ~ATR_VFLAG_SET;
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE;

		oldatypflags = pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags &= ~ATR_VFLAG_SET;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags |= ATR_VFLAG_MODCACHE;

		/* Note: ATR_VFLAG_MODCACHE must be set because of svr_cached() does */
		/* 	 not correctly check ATR_VFLAG_SET */
	}

	if (status_attrib(pal, job_attr_def, pjob->ji_wattr, limit,
		preq->rq_perm, &pstat->brp_attr, bad))
		rc =  PBSE_NOATTR;

	/* Set the parent state back to what it really is */

	pjob->ji_wattr[(int)JOB_ATR_state].at_val.at_char = realstate;
	pjob->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_MODCACHE;

	/* Set the parent comment back to what it really is */
	if (old_subjob_comment != NULL) {
		if (job_attr_def[(int)JOB_ATR_Comment].at_decode(&pjob->ji_wattr[(int)JOB_ATR_Comment],
			(char *)0, (char *)0, old_subjob_comment) == PBSE_SYSTEM) {
			free(old_subjob_comment);
			return (PBSE_SYSTEM);
		}

		free(old_subjob_comment);
	}

	/* reset the flags */
	if (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 0) {
		pjob->ji_wattr[(int)JOB_ATR_eligible_time].at_flags = oldeligflags;
		pjob->ji_wattr[(int)JOB_ATR_accrue_type].at_flags = oldatypflags;
	}

	return (rc);
}
Beispiel #3
0
void
req_py_spawn(struct batch_request *preq)
{
	int             jt;		/* job type */
	job		*pjob;
	int		rc;
	char		*jid = preq->rq_ind.rq_py_spawn.rq_jid;
	int		i, offset;

	/*
	 ** Returns job pointer for singleton job or "parent" of
	 ** an array job.
	 */
	pjob = chk_job_request(jid, preq, &jt);
	if (pjob == NULL)
		return;

	/* see if requestor is the job owner */
	if (svr_chk_owner(preq, pjob) != 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	if (jt == IS_ARRAY_NO) {		/* a regular job is okay */
		/* the job must be running */
		if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) ||
			(pjob->ji_qs.ji_substate !=
			JOB_SUBSTATE_RUNNING)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}
	else if (jt == IS_ARRAY_Single) {	/* a single subjob is okay */

		offset = subjob_index_to_offset(pjob,
			get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}

		i = get_subjob_state(pjob, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		}

		if (i != JOB_STATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	} else {
		reply_text(preq, PBSE_NOSUP,
			"not supported for Array Jobs or multiple sub-jobs");
		return;
	}

	/*
	 ** Pass the request on to MOM.  If this works, the function
	 ** post_py_spawn_req will be called to handle the reply.
	 ** If it fails, send the reply now.
	 */
	rc = relay_to_mom(pjob, preq, post_py_spawn_req);
	if (rc)
		req_reject(rc, 0, preq);	/* unable to get to MOM */
}
Beispiel #4
0
void
req_rerunjob(struct batch_request *preq)
{
	int		  anygood = 0;
	int		  i;
	int		  j;
	char		 *jid;
	int		  jt;		/* job type */
	int		  offset;
	char		 *pc;
	job		 *pjob;
	job		 *parent;
	char		 *range;
	char		 *vrange;
	int		  x, y, z;

	jid = preq->rq_ind.rq_signal.rq_jid;
	parent = chk_job_request(jid, preq, &jt);
	if (parent == (job *)0)
		return;		/* note, req_reject already called */

	if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	if (jt == IS_ARRAY_NO) {

		/* just a regular job, pass it on down the line and be done */

		req_rerunjob2(preq, parent);
		return;

	} else if (jt == IS_ARRAY_Single) {

		/* single subjob, if running can signal */

		offset = subjob_index_to_offset(parent, get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		i = get_subjob_state(parent, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		} else if (i == JOB_STATE_RUNNING) {
			pjob = find_job(jid);		/* get ptr to the subjob */
			if (pjob) {
				req_rerunjob2(preq, pjob);
			} else {
				req_reject(PBSE_BADSTATE, 0, preq);
				return;
			}
		} else {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		return;

	} else if (jt == IS_ARRAY_ArrayJob) {

		/* The Array Job itself ... */

		if (parent->ji_qs.ji_state != JOB_STATE_BEGUN) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}

		/* for each subjob that is running, call req_rerunjob2 */

		++preq->rq_refct;	/* protect the request/reply struct */

		/* Setting deleted subjobs count to 0,
		 * since all the deleted subjobs will be moved to Q state
		 */
		parent->ji_ajtrk->tkm_dsubjsct = 0;

		for (i=0; i<parent->ji_ajtrk->tkm_ct; i++) {
			if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) {
				pjob = find_job(mk_subjob_id(parent, i));
				if (pjob) {
					dup_br_for_subjob(preq, pjob, req_rerunjob2);
				}
			} else {
				set_subjob_tblstate(parent, i, JOB_STATE_QUEUED);
			}
		}
		/* if not waiting on any running subjobs, can reply; else */
		/* it is taken care of when last running subjob responds  */
		if (--preq->rq_refct == 0)
			reply_send(preq);
		return;

	}
	/* what's left to handle is a range of subjobs, foreach subjob 	*/
	/* if running, all req_rerunjob2			        */

	range = get_index_from_jid(jid);
	if (range == NULL) {
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}

	/* first check that all in the subrange are in fact running */

	vrange = range;
	while (1) {
		if ((i = parse_subjob_index(vrange, &pc, &x, &y, &z, &j)) == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		} else if (i == 1)
			break;
		while (x <= y) {
			i = numindex_to_offset(parent, x);
			if (i >= 0) {
				if (get_subjob_state(parent, i) == JOB_STATE_RUNNING)
					anygood++;
			}
			x += z;
		}
		vrange = pc;
	}
	if (anygood == 0) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	/* now do the deed */

	++preq->rq_refct;	/* protect the request/reply struct */

	while (1) {
		if ((i = parse_subjob_index(range, &pc, &x, &y, &z, &j)) == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			break;
		} else if (i == 1)
			break;
		while (x <= y) {
			i = numindex_to_offset(parent, x);
			if (i < 0) {
				x += z;
				continue;
			}

			if (get_subjob_state(parent, i) == JOB_STATE_RUNNING) {
				pjob = find_job(mk_subjob_id(parent, i));
				if (pjob) {
					dup_br_for_subjob(preq, pjob, req_rerunjob2);
				}
			}
			x += z;
		}
		range = pc;
	}

	/* if not waiting on any running subjobs, can reply; else */
	/* it is taken care of when last running subjob responds  */
	if (--preq->rq_refct == 0)
		reply_send(preq);
	return;
}