void NordugridJob::NotifyNewRemoteStatus( const char *status ) { if ( SetRemoteJobStatus( status ) ) { remoteJobState = status; SetEvaluateState(); if ( condorState == IDLE && ( remoteJobState == REMOTE_STATE_INLRMS_R || remoteJobState == REMOTE_STATE_INLRMS_R2 || remoteJobState == REMOTE_STATE_INLRMS_E || remoteJobState == REMOTE_STATE_INLRMS_E2 || remoteJobState == REMOTE_STATE_EXECUTED || remoteJobState == REMOTE_STATE_FINISHING || remoteJobState == REMOTE_STATE_FINISHED || remoteJobState == REMOTE_STATE_FAILED ) ) { JobRunning(); } else if ( condorState == RUNNING && ( remoteJobState == REMOTE_STATE_INLRMS_Q || remoteJobState == REMOTE_STATE_INLRMS_Q2 || remoteJobState == REMOTE_STATE_INLRMS_S || remoteJobState == REMOTE_STATE_INLRMS_S2 ) ) { JobIdle(); } } if ( gmState == GM_RECOVER_QUERY ) { SetEvaluateState(); } }
int BaseJob::EvalPeriodicJobExpr() { float old_run_time; bool old_run_time_dirty; UserPolicy user_policy; #ifdef USE_NON_MUTATING_USERPOLICY user_policy.Init(); #else user_policy.Init( jobAd ); #endif UpdateJobTime( &old_run_time, &old_run_time_dirty ); #ifdef USE_NON_MUTATING_USERPOLICY int action = user_policy.AnalyzePolicy( *jobAd, PERIODIC_ONLY ); #else int action = user_policy.AnalyzePolicy( PERIODIC_ONLY ); #endif RestoreJobTime( old_run_time, old_run_time_dirty ); MyString reason_buf; int reason_code; int reason_subcode; user_policy.FiringReason(reason_buf,reason_code,reason_subcode); char const *reason = reason_buf.Value(); if ( reason == NULL || !reason[0] ) { reason = "Unknown user policy expression"; } switch( action ) { case UNDEFINED_EVAL: case HOLD_IN_QUEUE: JobHeld( reason, reason_code, reason_subcode ); SetEvaluateState(); break; case STAYS_IN_QUEUE: // do nothing break; case REMOVE_FROM_QUEUE: JobRemoved( reason ); SetEvaluateState(); break; case RELEASE_FROM_HOLD: // When a job gets held and then released while the gridmanager // is managing it, the gridmanager cleans up and deletes its // local data for the job (canceling the remote submission if // possible), then picks it up as a new job from the schedd. // So ignore release-from-hold and let the schedd deal with it. break; default: EXCEPT( "Unknown action (%d) in BaseJob::EvalPeriodicJobExpr", action ); } return 0; }
void BaseJob::JobLeaseSentExpired() { dprintf(D_FULLDEBUG,"(%d.%d) BaseJob::JobLeaseSentExpired()\n",procID.cluster,procID.proc); if ( jobLeaseSentExpiredTid != TIMER_UNSET ) { daemonCore->Cancel_Timer( jobLeaseSentExpiredTid ); jobLeaseSentExpiredTid = TIMER_UNSET; } SetEvaluateState(); }
void BaseJob::NotifyResourceUp() { resourceStateKnown = true; if ( resourceDown == true ) { // The GlobusResourceUp event is now deprecated WriteGlobusResourceUpEventToUserLog( jobAd ); WriteGridResourceUpEventToUserLog( jobAd ); jobAd->AssignExpr( ATTR_GRID_RESOURCE_UNAVAILABLE_TIME, "Undefined" ); requestScheddUpdate( this, false ); } resourceDown = false; if ( resourcePingPending ) { resourcePingPending = false; resourcePingComplete = true; } SetEvaluateState(); }
void BaseJob::CheckRemoteStatus() { const int stale_limit = 15*60; // TODO return time that this job status could become stale? // TODO compute stale_limit from job's poll interval? // TODO make stale_limit configurable? if ( m_lastRemoteStatusUpdate == 0 || m_currentStatusUnknown == true ) { return; } if ( time(NULL) > m_lastRemoteStatusUpdate + stale_limit ) { m_currentStatusUnknown = true; jobAd->Assign( ATTR_CURRENT_STATUS_UNKNOWN, true ); requestScheddUpdate( this, false ); WriteJobStatusUnknownEventToUserLog( jobAd ); SetEvaluateState(); } }
void BaseJob::JobLeaseReceivedExpired() { dprintf(D_FULLDEBUG,"(%d.%d) BaseJob::JobLeaseReceivedExpired()\n",procID.cluster,procID.proc); if ( jobLeaseReceivedExpiredTid != TIMER_UNSET ) { daemonCore->Cancel_Timer( jobLeaseReceivedExpiredTid ); jobLeaseReceivedExpiredTid = TIMER_UNSET; } condorState = REMOVED; jobAd->Assign( ATTR_JOB_STATUS, condorState ); jobAd->Assign( ATTR_ENTERED_CURRENT_STATUS, (int)time(NULL) ); jobAd->Assign( ATTR_REMOVE_REASON, "Job lease expired" ); UpdateRuntimeStats(); requestScheddUpdate( this, false ); SetEvaluateState(); }
void BaseJob::JobAdUpdateFromSchedd( const ClassAd *new_ad, bool full_ad ) { static const char *held_removed_update_attrs[] = { ATTR_JOB_STATUS, ATTR_HOLD_REASON, ATTR_HOLD_REASON_CODE, ATTR_HOLD_REASON_SUBCODE, ATTR_LAST_HOLD_REASON, ATTR_RELEASE_REASON, ATTR_LAST_RELEASE_REASON, ATTR_ENTERED_CURRENT_STATUS, ATTR_NUM_SYSTEM_HOLDS, ATTR_REMOVE_REASON, NULL }; int new_condor_state; new_ad->LookupInteger( ATTR_JOB_STATUS, new_condor_state ); if ( new_condor_state == condorState ) { if ( !full_ad ) { MergeClassAds( jobAd, const_cast<ClassAd*>(new_ad), true, false ); } return; } if ( new_condor_state == REMOVED && condorState == HELD ) { int release_status = IDLE; jobAd->LookupInteger( ATTR_JOB_STATUS_ON_RELEASE, release_status ); if ( release_status == REMOVED ) { // We already know about this REMOVED state and have // decided to go on hold afterwards, so ignore this // "update". return; } } if ( new_condor_state == REMOVED || new_condor_state == HELD ) { for ( int i = 0; held_removed_update_attrs[i] != NULL; i++ ) { ExprTree *expr; if ( (expr = new_ad->LookupExpr( held_removed_update_attrs[i] )) != NULL ) { ExprTree * pTree = expr->Copy(); jobAd->Insert( held_removed_update_attrs[i], pTree, false ); } else { jobAd->Delete( held_removed_update_attrs[i] ); } jobAd->SetDirtyFlag( held_removed_update_attrs[i], false ); } if ( new_condor_state == HELD && writeUserLog && !holdLogged ) { // TODO should this log event be delayed until gridmanager is // done dealing with the job? WriteHoldEventToUserLog( jobAd ); holdLogged = true; } // If we're about to put a job on hold and learn that it's been // removed, make sure the state returns to removed when it is // released. This is normally checked in JobHeld(), but it's // possible to learn of the removal just as we're about to // update the schedd with the hold. if ( new_condor_state == REMOVED && condorState == HELD ) { bool dirty; jobAd->GetDirtyFlag( ATTR_JOB_STATUS, NULL, &dirty ); if ( dirty ) { jobAd->Assign( ATTR_JOB_STATUS_ON_RELEASE, REMOVED ); } } condorState = new_condor_state; // TODO do we need to call UpdateRuntimeStats() here? UpdateRuntimeStats(); SetEvaluateState(); } else if ( new_condor_state == COMPLETED ) { condorState = new_condor_state; // TODO do we need to update any other attributes? SetEvaluateState(); } else if ( !full_ad ) { MergeClassAds( jobAd, const_cast<ClassAd*>(new_ad), true, false ); } }