Exemple #1
0
/**
 * @brief
 * 		chk_array_doneness - check if all subjobs are expired and if so,
 *		purge the Array Job itself
 *
 * @param[in,out]	parent - pointer to parent job.
 *
 *	@return	void
 */
void
chk_array_doneness(job *parent)
{
	char acctbuf[40];
	int e;
	int i;
	struct ajtrkhd	*ptbl = parent->ji_ajtrk;

	if (ptbl == NULL)
		return;

	if (ptbl->tkm_flags & TKMFLG_NO_DELETE)
		return;	/* delete of subjobs in progress, don't array */

	if (ptbl->tkm_subjsct[JOB_STATE_QUEUED] + ptbl->tkm_subjsct[JOB_STATE_RUNNING] + ptbl->tkm_subjsct[JOB_STATE_EXITING] == 0) {

		/* Array Job all done, do simple eoj processing */

		for (e=i=0; i<ptbl->tkm_ct; ++i) {
			if (ptbl->tkm_tbl[i].trk_error > 0)
				e = 1;
			else if (ptbl->tkm_tbl[i].trk_error < 0) {
				e = 2;
				break;
			}
		}
		parent->ji_qs.ji_un_type = JOB_UNION_TYPE_EXEC;
		parent->ji_qs.ji_un.ji_exect.ji_momaddr = 0;
		parent->ji_qs.ji_un.ji_exect.ji_momport = 0;
		parent->ji_qs.ji_un.ji_exect.ji_exitstat = e;

		check_block(parent, "");
		if (parent->ji_qs.ji_state == JOB_STATE_BEGUN) {
			/* if BEGUN, issue 'E' account record */
			sprintf(acctbuf, msg_job_end_stat, e);
			account_job_update(parent, PBS_ACCT_LAST);
			account_jobend(parent, acctbuf, PBS_ACCT_END);

			svr_mailowner(parent, MAIL_END, MAIL_NORMAL, acctbuf);
		}
		if (parent->ji_wattr[(int)JOB_ATR_depend].at_flags & ATR_VFLAG_SET)
			(void)depend_on_term(parent);

		/*
		 * Check if the history of the finished job can be saved or it needs to be purged .
		 */
		svr_saveorpurge_finjobhist(parent);
	} else {
		(void)job_save(parent, SAVEJOB_FULL);
	}
}
Exemple #2
0
/**
 * @brief
 * 		force_reque - requeue (rerun) a job
 *
 * @param[in,out]	pwt	-	job which needs to be rerun
 */
void
force_reque(job *pjob)
{
	int  newstate;
	int  newsubstate;

	pjob->ji_modified = 1;
	pjob->ji_momhandle = -1;
	pjob->ji_mom_prot = PROT_INVALID;

	/* simulate rerun: free nodes, clear checkpoint flag, and */
	/* clear exec_vnode string				  */

	rel_resc(pjob);

	/* note in accounting file */
	account_jobend(pjob, pjob->ji_acctrec, PBS_ACCT_RERUN);

	/* if a subjob,  we set substate to RERUN3 to cause trktbl entry */
	/* to be reset to Qeued, and then blow away the job struct       */

	if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SubJob) {
		pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN3;
		job_purge(pjob);
		return;
	}

	/*
	 * Clear any JOB_SVFLG_Actsuspd flag too, as the job is no longer
	 * suspended (User busy).  A suspended job is rerun in case of a
	 * MOM failure after the workstation becomes active(busy).
	 */
	pjob->ji_qs.ji_svrflags &= ~(JOB_SVFLG_Actsuspd | JOB_SVFLG_StagedIn | JOB_SVFLG_CHKPT);
	job_attr_def[(int)JOB_ATR_exec_host].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_exec_host]);
	job_attr_def[(int)JOB_ATR_exec_host2].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_exec_host2]);
	job_attr_def[(int)JOB_ATR_exec_vnode].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_exec_vnode]);
	job_attr_def[(int)JOB_ATR_pset].at_free(
		&pjob->ji_wattr[(int)JOB_ATR_pset]);
	/* job dir has no meaning for re-queued jobs, so unset it */
	job_attr_def[(int)JOB_ATR_jobdir].at_free(&pjob->
		ji_wattr[(int)JOB_ATR_jobdir]);
	svr_evaljobstate(pjob, &newstate, &newsubstate, 1);
	(void)svr_setjobstate(pjob, newstate, newsubstate);
}