/** * @brief * chk_array_doneness - check if all subjobs are expired and if so, * purge the Array Job itself * * @param[in,out] parent - pointer to parent job. * * @return void */ void chk_array_doneness(job *parent) { char acctbuf[40]; int e; int i; struct ajtrkhd *ptbl = parent->ji_ajtrk; if (ptbl == NULL) return; if (ptbl->tkm_flags & TKMFLG_NO_DELETE) return; /* delete of subjobs in progress, don't array */ if (ptbl->tkm_subjsct[JOB_STATE_QUEUED] + ptbl->tkm_subjsct[JOB_STATE_RUNNING] + ptbl->tkm_subjsct[JOB_STATE_EXITING] == 0) { /* Array Job all done, do simple eoj processing */ for (e=i=0; i<ptbl->tkm_ct; ++i) { if (ptbl->tkm_tbl[i].trk_error > 0) e = 1; else if (ptbl->tkm_tbl[i].trk_error < 0) { e = 2; break; } } parent->ji_qs.ji_un_type = JOB_UNION_TYPE_EXEC; parent->ji_qs.ji_un.ji_exect.ji_momaddr = 0; parent->ji_qs.ji_un.ji_exect.ji_momport = 0; parent->ji_qs.ji_un.ji_exect.ji_exitstat = e; check_block(parent, ""); if (parent->ji_qs.ji_state == JOB_STATE_BEGUN) { /* if BEGUN, issue 'E' account record */ sprintf(acctbuf, msg_job_end_stat, e); account_job_update(parent, PBS_ACCT_LAST); account_jobend(parent, acctbuf, PBS_ACCT_END); svr_mailowner(parent, MAIL_END, MAIL_NORMAL, acctbuf); } if (parent->ji_wattr[(int)JOB_ATR_depend].at_flags & ATR_VFLAG_SET) (void)depend_on_term(parent); /* * Check if the history of the finished job can be saved or it needs to be purged . */ svr_saveorpurge_finjobhist(parent); } else { (void)job_save(parent, SAVEJOB_FULL); } }
/** * @brief * force_reque - requeue (rerun) a job * * @param[in,out] pwt - job which needs to be rerun */ void force_reque(job *pjob) { int newstate; int newsubstate; pjob->ji_modified = 1; pjob->ji_momhandle = -1; pjob->ji_mom_prot = PROT_INVALID; /* simulate rerun: free nodes, clear checkpoint flag, and */ /* clear exec_vnode string */ rel_resc(pjob); /* note in accounting file */ account_jobend(pjob, pjob->ji_acctrec, PBS_ACCT_RERUN); /* if a subjob, we set substate to RERUN3 to cause trktbl entry */ /* to be reset to Qeued, and then blow away the job struct */ if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SubJob) { pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN3; job_purge(pjob); return; } /* * Clear any JOB_SVFLG_Actsuspd flag too, as the job is no longer * suspended (User busy). A suspended job is rerun in case of a * MOM failure after the workstation becomes active(busy). */ pjob->ji_qs.ji_svrflags &= ~(JOB_SVFLG_Actsuspd | JOB_SVFLG_StagedIn | JOB_SVFLG_CHKPT); job_attr_def[(int)JOB_ATR_exec_host].at_free( &pjob->ji_wattr[(int)JOB_ATR_exec_host]); job_attr_def[(int)JOB_ATR_exec_host2].at_free( &pjob->ji_wattr[(int)JOB_ATR_exec_host2]); job_attr_def[(int)JOB_ATR_exec_vnode].at_free( &pjob->ji_wattr[(int)JOB_ATR_exec_vnode]); job_attr_def[(int)JOB_ATR_pset].at_free( &pjob->ji_wattr[(int)JOB_ATR_pset]); /* job dir has no meaning for re-queued jobs, so unset it */ job_attr_def[(int)JOB_ATR_jobdir].at_free(&pjob-> ji_wattr[(int)JOB_ATR_jobdir]); svr_evaljobstate(pjob, &newstate, &newsubstate, 1); (void)svr_setjobstate(pjob, newstate, newsubstate); }