int MJobSelectPJobList( mjob_t *PreemptorJ, /* I */ int RequiredTasks, /* I */ int RequiredNodes, /* I */ mjob_t **FeasibleJobList, /* I */ mnalloc_t *FNL, /* I */ mjob_t *PreempteeJList[], /* O: list of preemptible jobs */ int *PreempteeTCList, /* O (proc count) */ int *PreempteeNCList, nodelist_t *PreempteeTaskList[]) { mjob_t *J; mnode_t *N; mres_t *R; mreq_t *RQ; int TC; __preempt_prio_t pJ[MAX_MJOB]; nodelist_t tmpTaskList; mcres_t PreemptRes; int index; int jindex; int jindex2; int nindex; int tindex; int rindex; int TotalTasks; int TotalNodes; int NodeCount; mbool_t Preemptor; mbool_t OwnerPreemptor; double JobRunPriority; const char *FName = "MJobSelectPJobList"; DBG(2,fSCHED) DPrint("%s(%s,%d,%d,FJobList,PJList,PTCList,PNCList,PTL)\n", FName, (PreemptorJ != NULL) ? PreemptorJ->Name : "NULL", RequiredTasks, RequiredNodes); if ((PreemptorJ == NULL) || (FeasibleJobList == NULL) || (FeasibleJobList[0] == NULL) || (FNL == NULL) || (PreempteeJList == NULL) || (PreempteeTCList == NULL) || (PreempteeNCList == NULL)) { DBG(1,fSCHED) DPrint("ALERT: invalid parameters passed to %s\n", FName); return(FAILURE); } /* NOTE: select 'best' list of jobs to preempt so as to provide */ /* needed tasks/nodes for preemptorJ on feasiblenodelist */ /* lower 'run' priority means better preempt candidate */ /* determine number of available tasks associated with each job */ index = 0; TotalNodes = 0; TotalTasks = 0; RQ = PreemptorJ->Req[0]; /* only support single req preemption */ for (jindex = 0;FeasibleJobList[jindex] != NULL;jindex++) { MJobGetRunPriority(FeasibleJobList[jindex],0,&JobRunPriority,NULL); memset(&PreemptRes,0,sizeof(PreemptRes)); NodeCount = 0; tindex = 0; for (nindex = 0;FNL[nindex].N != NULL;nindex++) { N = FNL[nindex].N; OwnerPreemptor = FALSE; if (MISSET(PreemptorJ->Flags,mjfPreemptor) == TRUE) { Preemptor = TRUE; } else { Preemptor = FALSE; /* determine if 'ownerpreempt' is active */ for (rindex = 0;N->R[rindex] != NULL;rindex++) { R = N->R[rindex]; if ((R == (mres_t *)1) || (R->Name[0] == '\0') || (R == (mres_t *)1) || (R->Name[0] == '\1')) continue; if ((MISSET(R->Flags,mrfOwnerPreempt) == FALSE) || (R->IsActive == FALSE)) continue; if (MCredIsMatch(&PreemptorJ->Cred,R->O,R->OType) == FAILURE) continue; Preemptor = TRUE; OwnerPreemptor = TRUE; break; } /* END for (rindex) */ } if (Preemptor == FALSE) { continue; } for (jindex2 = 0;jindex2 < MAX_MJOB_PER_NODE;jindex2++) { J = N->JList[jindex2]; if (J == NULL) break; if (J != FeasibleJobList[jindex]) continue; TC = N->JTC[jindex2]; if (!(J->Flags & (1 << mjfPreemptee))) { DBG(6,fSCHED) DPrint("INFO: job %s not considered for preemption (PREEMPTEE flag not set)\n", J->Name); continue; } else if (OwnerPreemptor == TRUE) { /* all criteria satisfied */ /* NO-OP */ } else if (J->StartPriority > PreemptorJ->StartPriority) { DBG(6,fSCHED) DPrint("INFO: job %s not considered for preemption (preempt priority too low (%ld < %ld)\n", J->Name, J->StartPriority, PreemptorJ->StartPriority); continue; } MCResAdd(&PreemptRes,&N->CRes,&RQ->DRes,TC,FALSE); NodeCount += 1; tmpTaskList[tindex].N = N; tmpTaskList[tindex].TC = TC; tindex++; } /* END for (jindex2) */ } /* END for (nindex) */ DBG(6,fSCHED) DPrint("INFO: preemptible job %s provides %d/%d tasks/nodes\n", FeasibleJobList[jindex]->Name, PreemptRes.Procs, NodeCount); if (PreemptRes.Procs <= 0) { continue; } tmpTaskList[tindex].N = NULL; /* determine 'cost per task' associated with job */ pJ[index].J = FeasibleJobList[jindex]; pJ[index].Tasks = PreemptRes.Procs; pJ[index].Nodes = NodeCount; pJ[index].Cost = (long)JobRunPriority / PreemptRes.Procs; if (PreempteeTaskList != NULL) { pJ[index].TL = (nodelist_t *)calloc(1, sizeof(nodelist_t)); memcpy(pJ[index].TL,&tmpTaskList,sizeof(mnalloc_t) * (tindex + 1)); } index++; } /* END for (jindex) */ pJ[index].J = NULL; /* sort job list */ if (index > 1) { qsort( (void *)&pJ[0], index, sizeof(pJ[0]), (int(*)(const void *,const void *))__MJobPreemptPrioComp); } for (jindex = 0;jindex < index;jindex++) { PreempteeJList[jindex] = pJ[jindex].J; PreempteeTCList[jindex] = pJ[jindex].Tasks; PreempteeNCList[jindex] = pJ[jindex].Nodes; TotalTasks += pJ[jindex].Tasks; TotalNodes += pJ[jindex].Nodes; if (PreempteeTaskList != NULL) PreempteeTaskList[jindex] = pJ[jindex].TL; } /* END for (jindex) */ PreempteeJList[jindex] = NULL; if ((jindex == 0) || (RequiredTasks > TotalTasks) || (RequiredNodes > TotalNodes)) { /* inadequate preemptible resources located */ DBG(2,fSCHED) DPrint("INFO: inadequate preempt jobs (%d) located (P: %d of %d,N: %d of %d)\n", jindex, TotalTasks, RequiredTasks, TotalNodes, RequiredNodes); return(FAILURE); } return(SUCCESS); } /* END MJobSelectPJobList() */
int MJobProcessCompleted( mjob_t **JP) /* I (freed on success, if job is not restarted) */ { int nindex; int rqindex; enum MS3CodeDecadeEnum tmpS3C; char Line[MMAX_LINE]; mnode_t *N; mreq_t *RQ; mjob_t *J; char Message[MMAX_LINE]; const char *FName = "MJobProcessCompleted"; MDB(2,fSTAT) MLog("%s(%s)\n", FName, ((JP != NULL) && (*JP != NULL)) ? (*JP)->Name : "NULL"); if ((JP == NULL) || (*JP == NULL)) { return(FAILURE); } J = *JP; if ((J->StartTime > 0) && (J->CompletionTime > J->StartTime)) { long ComputedWallTime = J->CompletionTime - J->StartTime - J->SWallTime; /* sanity check walltime */ if (J->AWallTime != ComputedWallTime) { MDB(7,fSCHED) MLog("INFO: job %s setting walltime to %ld from %ld in function %s\n", J->Name, ComputedWallTime, J->AWallTime, __FUNCTION__); J->AWallTime = ComputedWallTime; } J->AWallTime = MAX(J->AWallTime,(long)J->Req[0]->RMWTime); } MJobProcessTerminated(J,mjsCompleted); if (bmisset(&J->IFlags,mjifWasCanceled) && bmisset(&J->NotifyBM,mntJobFail) && (J->Credential.U->EMailAddress != NULL)) { char tmpLine[MMAX_LINE]; mstring_t Msg(MMAX_LINE); snprintf(tmpLine,sizeof(tmpLine),"Moab job '%s' was canceled",J->Name); Msg = tmpLine; MStringAppend(&Msg,"\n\n========== output of checkjob ==========\n\n"); MUICheckJob(J,&Msg,mptHard,NULL,NULL,NULL,NULL,0); MSysSendMail(J->Credential.U->EMailAddress,NULL,tmpLine,NULL,Msg.c_str()); } /* set/incr job variables (usually to be used by triggers) */ MJobProcessTVariables(J); if (MAMHandleEnd(&MAM[0],(void *)J,mxoJob,Message,&tmpS3C) == FAILURE) { MDB(1,fSTAT) MLog("ERROR: Unable to register job end with accounting manager for job '%s'\n", J->Name); sprintf(Line,"AMFAILURE: Unable to register job end with accounting manager for job %s, reason %s (%s)\n", J->Name, (tmpS3C > ms3cNone && tmpS3C < ms3cLAST && MS3CodeDecade[tmpS3C] != NULL) ? MS3CodeDecade[tmpS3C] : "Unknown Failure", Message); MSysRegEvent(Line,mactNONE,1); } if ((J->TemplateExtensions != NULL) && (J->TemplateExtensions->TJobAction != mtjatNONE) && (J->TemplateExtensions->JobReceivingAction != NULL)) { mjob_t *JobReceivingAction = NULL; if (MJobFind(J->TemplateExtensions->JobReceivingAction,&JobReceivingAction,mjsmBasic) == SUCCESS) { switch (J->TemplateExtensions->TJobAction) { case mtjatDestroy: if ((J->CompletionCode == 0) && (J->CompletionTime > 0)) { char tmpMsg[MMAX_LINE]; snprintf(tmpMsg,sizeof(tmpMsg),"%s cancelled by destroy action job %s\n", JobReceivingAction->Name, J->Name); bmset(&JobReceivingAction->IFlags,mjifDestroyByDestroyTemplate); MJobCancel(JobReceivingAction,tmpMsg,FALSE,NULL,NULL); } else { /* Job failed, need to clear the flag on job so that we can resubmit the destroy job */ bmunset(&JobReceivingAction->Flags,mjfDestroyTemplateSubmitted); bmunset(&JobReceivingAction->SpecFlags,mjfDestroyTemplateSubmitted); } break; default: /* NO-OP */ break; } /* END switch (J->TX->TJobAction) */ } /* END if (MJobFind(J->TX->JobReceivingAction) == SUCCESS) */ } /* END if ((J->TX != NULL) && ...) */ /* determine if job should be restarted due to policies */ /* (should other completion events happen before this occurs?) */ if ((MSched.JobFailRetryCount > 0) && (J->CompletionCode != 0) && !bmisset(&J->IFlags,mjifWasCanceled)) { char EMsg[MMAX_LINE]; if (J->StartCount < MSched.JobFailRetryCount) { /* restart/requeue job */ MJobSetState(J,mjsStarting); MJobRequeue(J,NULL,NULL,EMsg,NULL); return(SUCCESS); } else if (MSched.JobFailRetryCount > 1) { /* only requeue the job with a hold if the user has enabled a * JobFailRetryCount > 1 */ MJobSetState(J,mjsStarting); MJobRequeue(J,NULL,NULL,EMsg,NULL); /* update Moab's in-memory state */ MJobSetHold(J,mhUser,MMAX_TIME,mhrPolicyViolation,"restart count violation"); /* tell RM about change in hold */ if ((J->DestinationRM != NULL) && (J->DestinationRM->Type == mrmtPBS)) { MRMJobModify(J,"hold",NULL,"user",mSet,NULL,NULL,NULL); } return(SUCCESS); } } if (MJOBISCOMPLETE(J) == FALSE) { if (MJOBISACTIVE(J) == TRUE) MJobSetState(J,mjsCompleted); else if ((MJobIsArrayMaster(J) == TRUE) && (bmisset(&J->IFlags,mjifWasCanceled))) MJobSetState(J,mjsRemoved); else if (MJobIsArrayMaster(J) == TRUE) /* Array master's state is always idle. Will need to handle case where * one sub job fails so that the master fails as well in order fulfill * job array dependencies. */ MJobSetState(J,mjsCompleted); else MJobSetState(J,mjsVacated); } #ifdef __NCSA if (J->CompletionTime == J->StartTime) { sprintf(Line,"WARNING: job '%s' completed with a walltime of 0 seconds\n", J->Name); MSysRegEvent(Line,mactMail,0,1); } #endif /* __NCSA */ if (MPar[0].BFChunkDuration > 0) { MPar[0].BFChunkBlockTime = MSched.Time + MPar[0].BFChunkDuration; } if (J->Triggers != NULL) { if (J->CompletionCode != 0) { MOReportEvent( (void *)J, J->Name, mxoJob, mttFailure, (J->CompletionTime > 0) ? J->CompletionTime : MSched.Time, TRUE); } else { MOReportEvent( (void *)J, J->Name, mxoJob, mttEnd, (J->CompletionTime > 0) ? J->CompletionTime : MSched.Time, TRUE); } MSchedCheckTriggers(J->Triggers,-1,NULL); } /* provide feedback info to user */ MJobSendFB(J); /* handle statistics */ MStatUpdateCompletedJobUsage(J,msmNONE,0); /* create job stat record */ MOWriteEvent((void *)J,mxoJob,mrelJobComplete,NULL,MStat.eventfp,NULL); /* modify node expected state and dedicated resources */ /* NOTE: for mjfRsvMap jobs should restore N->ARes (NYI) */ /* TC = RQ->NodeList[nindex].TC; MCResRemove(&N->ARes,&N->CRes,&RQ->DRes,TC,TRUE); */ nindex = 0; for (rqindex = 0;J->Req[rqindex] != NULL;rqindex++) { RQ = J->Req[rqindex]; for (nindex = 0;MNLGetNodeAtIndex(&RQ->NodeList,nindex,&N) == SUCCESS;nindex++) { if (bmisset(&J->Flags,mjfRsvMap)) MCResAdd(&N->ARes,&N->CRes,&RQ->DRes,MNLGetTCAtIndex(&RQ->NodeList,nindex),TRUE); MDB(3,fSTAT) MLog("INFO: node '%s' released from job %s\n", N->Name, J->Name); /* determine new expected state for node */ if (MNodeCheckAllocation(N) != SUCCESS) { N->EState = mnsIdle; } else { N->EState = mnsActive; } if (N->EState == N->State) N->SyncDeadLine = MMAX_TIME; else N->SyncDeadLine = MSched.Time + MSched.NodeSyncDeadline; } /* END for (nindex) */ } /* END for (rqindex) */ MJobRemoveCP(J); /* modify expected job state */ J->EState = J->State; MJobCleanupForRemoval(J); /* transition to MCJobHT table */ if (MQueueAddCJob(J) == FAILURE) { MJobRemove(JP); } return(SUCCESS); } /* END MJobProcessCompleted() */