Beispiel #1
0
void NordugridJob::NotifyNewRemoteStatus( const char *status )
{
	if ( SetRemoteJobStatus( status ) ) {
		remoteJobState = status;
		SetEvaluateState();

		if ( condorState == IDLE &&
			 ( remoteJobState == REMOTE_STATE_INLRMS_R ||
			   remoteJobState == REMOTE_STATE_INLRMS_R2 ||
			   remoteJobState == REMOTE_STATE_INLRMS_E ||
			   remoteJobState == REMOTE_STATE_INLRMS_E2 ||
			   remoteJobState == REMOTE_STATE_EXECUTED ||
			   remoteJobState == REMOTE_STATE_FINISHING ||
			   remoteJobState == REMOTE_STATE_FINISHED ||
			   remoteJobState == REMOTE_STATE_FAILED ) ) {
			JobRunning();
		} else if ( condorState == RUNNING &&
					( remoteJobState == REMOTE_STATE_INLRMS_Q ||
					  remoteJobState == REMOTE_STATE_INLRMS_Q2 ||
					  remoteJobState == REMOTE_STATE_INLRMS_S ||
					  remoteJobState == REMOTE_STATE_INLRMS_S2 ) ) {
			JobIdle();
		}
	}
	if ( gmState == GM_RECOVER_QUERY ) {
		SetEvaluateState();
	}
}
Beispiel #2
0
int BaseJob::EvalPeriodicJobExpr()
{
    float old_run_time;
    bool old_run_time_dirty;
    UserPolicy user_policy;

#ifdef USE_NON_MUTATING_USERPOLICY
    user_policy.Init();
#else
    user_policy.Init( jobAd );
#endif

    UpdateJobTime( &old_run_time, &old_run_time_dirty );

#ifdef USE_NON_MUTATING_USERPOLICY
    int action = user_policy.AnalyzePolicy( *jobAd, PERIODIC_ONLY );
#else
    int action = user_policy.AnalyzePolicy( PERIODIC_ONLY );
#endif

    RestoreJobTime( old_run_time, old_run_time_dirty );

    MyString reason_buf;
    int reason_code;
    int reason_subcode;
    user_policy.FiringReason(reason_buf,reason_code,reason_subcode);
    char const *reason = reason_buf.Value();
    if ( reason == NULL || !reason[0] ) {
        reason = "Unknown user policy expression";
    }

    switch( action ) {
    case UNDEFINED_EVAL:
    case HOLD_IN_QUEUE:
        JobHeld( reason, reason_code, reason_subcode );
        SetEvaluateState();
        break;
    case STAYS_IN_QUEUE:
        // do nothing
        break;
    case REMOVE_FROM_QUEUE:
        JobRemoved( reason );
        SetEvaluateState();
        break;
    case RELEASE_FROM_HOLD:
        // When a job gets held and then released while the gridmanager
        // is managing it, the gridmanager cleans up and deletes its
        // local data for the job (canceling the remote submission if
        // possible), then picks it up as a new job from the schedd.
        // So ignore release-from-hold and let the schedd deal with it.
        break;
    default:
        EXCEPT( "Unknown action (%d) in BaseJob::EvalPeriodicJobExpr",
                action );
    }

    return 0;
}
Beispiel #3
0
void BaseJob::JobLeaseSentExpired()
{
    dprintf(D_FULLDEBUG,"(%d.%d) BaseJob::JobLeaseSentExpired()\n",procID.cluster,procID.proc);
    if ( jobLeaseSentExpiredTid != TIMER_UNSET ) {
        daemonCore->Cancel_Timer( jobLeaseSentExpiredTid );
        jobLeaseSentExpiredTid = TIMER_UNSET;
    }
    SetEvaluateState();
}
Beispiel #4
0
void BaseJob::NotifyResourceUp()
{
    resourceStateKnown = true;
    if ( resourceDown == true ) {
        // The GlobusResourceUp event is now deprecated
        WriteGlobusResourceUpEventToUserLog( jobAd );
        WriteGridResourceUpEventToUserLog( jobAd );
        jobAd->AssignExpr( ATTR_GRID_RESOURCE_UNAVAILABLE_TIME, "Undefined" );
        requestScheddUpdate( this, false );
    }
    resourceDown = false;
    if ( resourcePingPending ) {
        resourcePingPending = false;
        resourcePingComplete = true;
    }
    SetEvaluateState();
}
Beispiel #5
0
void BaseJob::CheckRemoteStatus()
{
    const int stale_limit = 15*60;

    // TODO return time that this job status could become stale?
    // TODO compute stale_limit from job's poll interval?
    // TODO make stale_limit configurable?
    if ( m_lastRemoteStatusUpdate == 0 || m_currentStatusUnknown == true ) {
        return;
    }
    if ( time(NULL) > m_lastRemoteStatusUpdate + stale_limit ) {
        m_currentStatusUnknown = true;
        jobAd->Assign( ATTR_CURRENT_STATUS_UNKNOWN, true );
        requestScheddUpdate( this, false );
        WriteJobStatusUnknownEventToUserLog( jobAd );
        SetEvaluateState();
    }
}
Beispiel #6
0
void BaseJob::JobLeaseReceivedExpired()
{
    dprintf(D_FULLDEBUG,"(%d.%d) BaseJob::JobLeaseReceivedExpired()\n",procID.cluster,procID.proc);
    if ( jobLeaseReceivedExpiredTid != TIMER_UNSET ) {
        daemonCore->Cancel_Timer( jobLeaseReceivedExpiredTid );
        jobLeaseReceivedExpiredTid = TIMER_UNSET;
    }

    condorState = REMOVED;
    jobAd->Assign( ATTR_JOB_STATUS, condorState );
    jobAd->Assign( ATTR_ENTERED_CURRENT_STATUS, (int)time(NULL) );

    jobAd->Assign( ATTR_REMOVE_REASON, "Job lease expired" );

    UpdateRuntimeStats();

    requestScheddUpdate( this, false );

    SetEvaluateState();
}
Beispiel #7
0
void BaseJob::JobAdUpdateFromSchedd( const ClassAd *new_ad, bool full_ad )
{
    static const char *held_removed_update_attrs[] = {
        ATTR_JOB_STATUS,
        ATTR_HOLD_REASON,
        ATTR_HOLD_REASON_CODE,
        ATTR_HOLD_REASON_SUBCODE,
        ATTR_LAST_HOLD_REASON,
        ATTR_RELEASE_REASON,
        ATTR_LAST_RELEASE_REASON,
        ATTR_ENTERED_CURRENT_STATUS,
        ATTR_NUM_SYSTEM_HOLDS,
        ATTR_REMOVE_REASON,
        NULL
    };

    int new_condor_state;

    new_ad->LookupInteger( ATTR_JOB_STATUS, new_condor_state );

    if ( new_condor_state == condorState ) {
        if ( !full_ad ) {
            MergeClassAds( jobAd, const_cast<ClassAd*>(new_ad), true, false );
        }
        return;
    }

    if ( new_condor_state == REMOVED && condorState == HELD ) {
        int release_status = IDLE;
        jobAd->LookupInteger( ATTR_JOB_STATUS_ON_RELEASE, release_status );
        if ( release_status == REMOVED ) {
            // We already know about this REMOVED state and have
            // decided to go on hold afterwards, so ignore this
            // "update".
            return;
        }
    }

    if ( new_condor_state == REMOVED || new_condor_state == HELD ) {

        for ( int i = 0; held_removed_update_attrs[i] != NULL; i++ ) {
            ExprTree *expr;

            if ( (expr = new_ad->LookupExpr( held_removed_update_attrs[i] )) != NULL ) {
                ExprTree * pTree = expr->Copy();
                jobAd->Insert( held_removed_update_attrs[i], pTree, false );
            } else {
                jobAd->Delete( held_removed_update_attrs[i] );
            }
            jobAd->SetDirtyFlag( held_removed_update_attrs[i], false );
        }

        if ( new_condor_state == HELD && writeUserLog && !holdLogged ) {
            // TODO should this log event be delayed until gridmanager is
            //   done dealing with the job?
            WriteHoldEventToUserLog( jobAd );
            holdLogged = true;
        }

        // If we're about to put a job on hold and learn that it's been
        // removed, make sure the state returns to removed when it is
        // released. This is normally checked in JobHeld(), but it's
        // possible to learn of the removal just as we're about to
        // update the schedd with the hold.
        if ( new_condor_state == REMOVED && condorState == HELD ) {
            bool dirty;
            jobAd->GetDirtyFlag( ATTR_JOB_STATUS, NULL, &dirty );
            if ( dirty ) {
                jobAd->Assign( ATTR_JOB_STATUS_ON_RELEASE, REMOVED );
            }
        }

        condorState = new_condor_state;
        // TODO do we need to call UpdateRuntimeStats() here?
        UpdateRuntimeStats();
        SetEvaluateState();

    } else if ( new_condor_state == COMPLETED ) {

        condorState = new_condor_state;
        // TODO do we need to update any other attributes?
        SetEvaluateState();
    } else if ( !full_ad ) {
        MergeClassAds( jobAd, const_cast<ClassAd*>(new_ad), true, false );
    }

}