Beispiel #1
0
void
ResState::publish( ClassAd* cp, amask_t  /*how_much*/ ) 
{
	cp->Assign( ATTR_STATE, state_to_string(r_state) );

	cp->Assign( ATTR_ENTERED_CURRENT_STATE, (int)m_stime );

	cp->Assign( ATTR_ACTIVITY, activity_to_string(r_act) );

	cp->Assign( ATTR_ENTERED_CURRENT_ACTIVITY, (int)m_atime );

		// Conditionally publish any attributes about time spent in
		// each of the following state/activity combinations.
	publishHistoryInfo(cp, owner_state, idle_act);
	publishHistoryInfo(cp, unclaimed_state, idle_act);
	publishHistoryInfo(cp, unclaimed_state, benchmarking_act);
	publishHistoryInfo(cp, matched_state, idle_act);
	publishHistoryInfo(cp, claimed_state, idle_act);
	publishHistoryInfo(cp, claimed_state, busy_act);
	publishHistoryInfo(cp, claimed_state, suspended_act);
	publishHistoryInfo(cp, claimed_state, retiring_act);
	publishHistoryInfo(cp, preempting_state, vacating_act);
	publishHistoryInfo(cp, preempting_state, killing_act);
	publishHistoryInfo(cp, backfill_state, idle_act);
	publishHistoryInfo(cp, backfill_state, busy_act);
	publishHistoryInfo(cp, backfill_state, killing_act);
	publishHistoryInfo(cp, drained_state, idle_act);
	publishHistoryInfo(cp, drained_state, retiring_act);
}
Beispiel #2
0
void
OfflineCollectorPlugin::update (
	int	 command,
	ClassAd	&ad )
{

	dprintf (
		D_FULLDEBUG,
		"In OfflineCollectorPlugin::update ( %d )\n",
		command );

	/* bail out if the plug-in is not enabled */
	if ( !enabled () ) {
		return;
	}

	/* make sure the command is relevant to us */
	if ( UPDATE_STARTD_AD_WITH_ACK != command &&
		 UPDATE_STARTD_AD != command &&
		 MERGE_STARTD_AD != command ) {
		 return;
	}

	MyString s;
	const char *key = makeOfflineKey(ad,s);
	if (!key) return;

	/* report whether this ad is "off-line" or not and update
	   the ad accordingly. */		
	int offline  = FALSE,
		lifetime = 0;

	bool offline_explicit = false;
	if( ad.EvalBool( ATTR_OFFLINE, NULL, offline ) ) {
		offline_explicit = true;
	}

	if ( MERGE_STARTD_AD == command ) {
		mergeClassAd( ad, key );
		return;
	}

	// Rewrite the ad if it is going offline
	if ( UPDATE_STARTD_AD_WITH_ACK == command && !offline_explicit ) {

		/* set the off-line state of the machine */
		offline = TRUE;

		/* get the off-line expiry time (default to INT_MAX) */
		lifetime = param_integer ( 
			"OFFLINE_EXPIRE_ADS_AFTER",
			INT_MAX );

		/* reset any values in the ad that may interfere with
		a match in the future */

		/* Reset Condor state */
		ad.Assign ( ATTR_STATE, state_to_string ( unclaimed_state ) );
		ad.Assign ( ATTR_ACTIVITY, activity_to_string ( idle_act ) );
		ad.Assign ( ATTR_ENTERED_CURRENT_STATE, 0 );
		ad.Assign ( ATTR_ENTERED_CURRENT_ACTIVITY, 0 );

		/* Set the heart-beat time */
		int now = static_cast<int> ( time ( NULL ) );
		ad.Assign ( ATTR_MY_CURRENT_TIME, now );
		ad.Assign ( ATTR_LAST_HEARD_FROM, now );

		/* Reset machine load */
		ad.Assign ( ATTR_LOAD_AVG, 0.0 );
		ad.Assign ( ATTR_CONDOR_LOAD_AVG, 0.0 );		
		ad.Assign ( ATTR_TOTAL_LOAD_AVG, 0.0 );
		ad.Assign ( ATTR_TOTAL_CONDOR_LOAD_AVG, 0.0 );
		
		/* Reset CPU load */
		ad.Assign ( ATTR_CPU_IS_BUSY, false );
		ad.Assign ( ATTR_CPU_BUSY_TIME, 0 );

		/* Reset keyboard and mouse times */
		ad.Assign ( ATTR_KEYBOARD_IDLE, INT_MAX );
		ad.Assign ( ATTR_CONSOLE_IDLE, INT_MAX );		

		/* any others? */


		dprintf ( 
			D_FULLDEBUG, 
			"Machine ad lifetime: %d\n",
			lifetime );

			/* record the new values as specified above */
		ad.Assign ( ATTR_OFFLINE, (bool)offline );
		if ( lifetime > 0 ) {
			ad.Assign ( ATTR_CLASSAD_LIFETIME, lifetime );
		}
	}

	/* if it is off-line then add it to the list; otherwise,
	   remove it. */
	if ( offline > 0 ) {
		persistentStoreAd(key,ad);
	} else {
		persistentRemoveAd(key);
	}

}
Beispiel #3
0
void
ResState::change( State new_state, Activity new_act )
{
	bool statechange = false, actchange = false;
	int now;

	if( new_state != r_state ) {
		statechange = true;
	}
	if( new_act != r_act ) {
		actchange = true;
	}
	if( ! (actchange || statechange) ) {
		return;   // If we're not changing anything, return
	}

		// leave_action and enter_action return TRUE if they result in
		// a state or activity change.  In these cases, we want to
		// abort the current state change.
	if( leave_action( r_state, r_act, new_state, new_act, statechange ) ) {
		return;
	}

	if( statechange && !actchange ) {
		dprintf( D_ALWAYS, "Changing state: %s -> %s\n",
				 state_to_string(r_state), 
				 state_to_string(new_state) );
	} else if (actchange && !statechange ) {
		dprintf( D_ALWAYS, "Changing activity: %s -> %s\n",
				 activity_to_string(r_act), 
				 activity_to_string(new_act) );
	} else {
		dprintf( D_ALWAYS, 
				 "Changing state and activity: %s/%s -> %s/%s\n", 
				 state_to_string(r_state), 
				 activity_to_string(r_act), 
				 state_to_string(new_state),
				 activity_to_string(new_act) );
	}

 	now = time( NULL );

		// Record the time we spent in the previous state
	updateHistoryTotals(now);

	if( statechange ) {
		m_stime = now;
			// Also reset activity time
		m_atime = now;
		r_state = new_state;
		if( r_state == r_destination ) {
				// We've reached our destination, so we can reset it.
			r_destination = no_state;
		}
	}
	if( actchange ) {
		r_act_was_benchmark = ( r_act == benchmarking_act );
		r_act = new_act;
		m_atime = now;
	}

	if( enter_action( r_state, r_act, statechange, actchange ) ) {
		return;
	}

		// Update resource availability statistics on state changes
	rip->r_avail_stats.update( r_state, r_act );
	
		// Note our current state and activity in the classad
	this->publish( rip->r_classad, A_ALL );

		// We want to update the CM on every state or activity change
	rip->update();   

#if HAVE_BACKFILL
		/*
		  in the case of Backfill/Idle, we do *not* want to do the
		  following check for idleness or retirement, we just want to
		  let the usual polling interval cover our next eval().  so,
		  if we're in Backfill, we can immediately return now...
		*/
	if( r_state == backfill_state ) {
		return;
	}
#endif /* HAVE_BACKFILL */

	if( r_act == retiring_act || r_act == idle_act ) {
		// When we enter retirement or idleness, check right away to
		// see if we should be preempting instead.
		this->eval();
	}

	return;
}
Beispiel #4
0
int
ResState::enter_action( State s, Activity a,
						bool statechange, bool ) 
{
#ifdef WIN32
	if (a == busy_act)
		systray_notifier.notifyCondorJobRunning(rip->r_id - 1);
	else if (s == unclaimed_state)
		systray_notifier.notifyCondorIdle(rip->r_id - 1);
	else if (s == preempting_state)
		systray_notifier.notifyCondorJobPreempting(rip->r_id - 1);
	else if (a == suspended_act)
		systray_notifier.notifyCondorJobSuspended(rip->r_id - 1);
	else
		systray_notifier.notifyCondorClaimed(rip->r_id - 1);
#endif

	
	switch( s ) {
	case owner_state:
			// Always want to create new claim objects
		if( rip->r_cur ) {
			delete( rip->r_cur );
		}
		rip->r_cur = new Claim( rip );
		if( rip->r_pre ) {
			rip->remove_pre();
		}
			// See if we should be in owner or unclaimed state
		if( ! rip->eval_is_owner() ) {
				// Really want to be in unclaimed.
			dprintf( D_ALWAYS, "State change: IS_OWNER is false\n" );
			change( unclaimed_state );
			return TRUE; // XXX: change TRUE
		}
		rip->r_reqexp->restore();		
		break;

	case claimed_state:
		rip->r_reqexp->restore();			
		if( statechange ) {
			rip->r_cur->beginClaim();	
				// Update important attributes into the classad.
			rip->r_cur->publish( rip->r_classad, A_PUBLIC );
				// Generate a preempting claim object
			rip->r_pre = new Claim( rip );
		}
		if (a == suspended_act) {
			if( ! rip->r_cur->suspendClaim() ) {
				rip->r_cur->starterKillPg( SIGKILL );
				dprintf( D_ALWAYS,
						 "State change: Error sending signals to starter\n" );
				change( preempting_state );
				return TRUE; // XXX: change TRUE
			}
		}
		else if (a == busy_act) {
			resmgr->start_poll_timer();

			if( rip->inRetirement() ) {

				// We have returned to a busy state (e.g. from
				// suspension) and there is a preempting claim or we
				// are in irreversible retirement, so retire.

				change( retiring_act );
				return TRUE; // XXX: change TRUE
			}
		}
		else if (a == retiring_act) {
			if( ! rip->claimIsActive() ) {
				// The starter exited by the time we got here.
				// No need to wait around in retirement.
				change( preempting_state );
				return TRUE; // XXX: change TRUE
			}
		}
#if HAVE_JOB_HOOKS
		else if (a == idle_act) {
			if (rip->r_cur->type() == CLAIM_FETCH) {
				if (statechange) {
						// We just entered Claimed/Idle on a state change,
						// and we've got a fetch claim, so try to activate it.
					ASSERT(rip->r_cur->hasJobAd());
					rip->spawnFetchedWork();
						// spawnFetchedWork() *always* causes a state change.
					return TRUE;
				}
				else {
						// We just entered Claimed/Idle, but not due
						// to a state change.  The starter must have
						// exited, so we should try to fetch more work.
					rip->tryFetchWork();

						// Starting the fetch doesn't cause a state
						// change, only the handler does, so we should
						// just return FALSE.
					return FALSE;
				}
			}
		}
#endif /* HAVE_JOB_HOOKS */

		break;

	case unclaimed_state:
		rip->r_reqexp->restore();
		break;

#if HAVE_BACKFILL
	case backfill_state:
			// whenever we're in Backill, we might be available
		rip->r_reqexp->restore();
		
		switch( a ) {

		case killing_act:
				// TODO notice and handle failure 
			rip->hardkill_backfill();
			break;

		case idle_act:
 				/*
				  we want to make sure the ResMgr will do frequent
				  evaluations now that we're in Backfill/Idle, so we
				  can spawn the backfill client quickly.  we do NOT
				  want to just immediately spawn it here, so that we
				  have a little bit of delay (to prevent pegging the
				  CPU in case of failure) and so that if there's a
				  temporary failure to spawn, we don't forget to keep
				  trying... 
				*/
			resmgr->start_poll_timer();
			break;

		case busy_act:
				// nothing special to do (yet)
			break;

		default:
			EXCEPT( "activity %s not yet supported in backfill state", 
					activity_to_string(a) ); 
		}
		break;
#endif /* HAVE_BACKFILL */

	case matched_state:
		rip->r_reqexp->unavail();
		break;

	case preempting_state:
		rip->r_reqexp->unavail();
		switch( a ) {
		case killing_act:
			if( rip->claimIsActive() ) {
				if( rip->preemptWasTrue() && rip->wants_hold() ) {
					rip->hold_job(false);
				}
				else if( ! rip->r_cur->starterKillHard() ) {
						// starterKillHard returns FALSE if there was
						// an error in kill and we had to send SIGKILL
						// to the starter's process group.
					dprintf( D_ALWAYS,
							 "State change: Error sending signals to starter\n" );
					rip->leave_preempting_state();
					return TRUE; // XXX: change TRUE
				}
			} else {
				rip->leave_preempting_state();
				return TRUE;
			}
			break;

		case vacating_act:
			if( rip->claimIsActive() ) {
				if( rip->preemptWasTrue() && rip->wants_hold() ) {
					rip->hold_job(true);
				}
				else if( ! rip->r_cur->starterKillSoft(true) ) {
					rip->r_cur->starterKillPg( SIGKILL );
					dprintf( D_ALWAYS,
							 "State change: Error sending signals to starter\n" );
					change( owner_state );
					return TRUE; // XXX: change TRUE
				}
			} else {
				rip->leave_preempting_state();
				return TRUE;
			}
			break;

		default:
			EXCEPT( "Unknown activity in ResState::enter_action" );
		}
		break; 	// preempting_state

	case delete_state:
		if ( Resource::DYNAMIC_SLOT == rip->get_feature() ) {
			resmgr->removeResource( rip );
		} else {
			resmgr->deleteResource( rip );
		}
		return TRUE;
		break;

	case drained_state:
		rip->r_reqexp->unavail();
		break;
	default: 
		EXCEPT("Unknown state in ResState::enter_action");
	}
	return FALSE;
}
Beispiel #5
0
ResState::HistoryInfo
ResState::getHistoryInfo( State _state, Activity _act ) {
	ResState::HistoryInfo info;
	time_t* var_ptr = NULL;
	const char* attr_name = NULL;
	switch (_state) {
	case owner_state:
		var_ptr = &m_time_owner_idle;
		attr_name = ATTR_TOTAL_TIME_OWNER_IDLE;
		break;
	case unclaimed_state:
		switch (_act) {
		case idle_act:
			var_ptr = &m_time_unclaimed_idle;
			attr_name = ATTR_TOTAL_TIME_UNCLAIMED_IDLE;
			break;
		case benchmarking_act:
			var_ptr = &m_time_unclaimed_benchmarking;
			attr_name = ATTR_TOTAL_TIME_UNCLAIMED_BENCHMARKING;
			break;
		default:
			EXCEPT("Unexpected activity (%s: %d) in getHistoryInfo() for %s",
				   activity_to_string(_act), (int)_act,
				   state_to_string(_state));
		}
		break;
	case matched_state:
		var_ptr = &m_time_matched_idle;
		attr_name = ATTR_TOTAL_TIME_MATCHED_IDLE;
		break;
	case claimed_state:
		switch (_act) {
		case idle_act:
			var_ptr = &m_time_claimed_idle;
			attr_name = ATTR_TOTAL_TIME_CLAIMED_IDLE;
			break;
		case busy_act:
			var_ptr = &m_time_claimed_busy;
			attr_name = ATTR_TOTAL_TIME_CLAIMED_BUSY;
			break;
		case suspended_act:
			var_ptr = &m_time_claimed_suspended;
			attr_name = ATTR_TOTAL_TIME_CLAIMED_SUSPENDED;
			break;
		case retiring_act:
			var_ptr = &m_time_claimed_retiring;
			attr_name = ATTR_TOTAL_TIME_CLAIMED_RETIRING;
			break;
		default:
			EXCEPT("Unexpected activity (%s: %d) in getHistoryInfo() for %s",
				   activity_to_string(_act), (int)_act,
				   state_to_string(_state));
		}
		break;
	case preempting_state:
		switch (_act) {
		case vacating_act:
			var_ptr = &m_time_preempting_vacating;
			attr_name = ATTR_TOTAL_TIME_PREEMPTING_VACATING;
			break;
		case killing_act:
			var_ptr = &m_time_preempting_killing;
			attr_name = ATTR_TOTAL_TIME_PREEMPTING_KILLING;
			break;
		default:
			EXCEPT("Unexpected activity (%s: %d) in getHistoryInfo() for %s",
				   activity_to_string(_act), (int)_act,
				   state_to_string(_state));
		}
		break;
	case drained_state:
		switch (_act) {
		case idle_act:
			var_ptr = &m_time_drained_idle;
			attr_name = ATTR_TOTAL_TIME_DRAINED_IDLE;
			break;
		case retiring_act:
			var_ptr = &m_time_drained_retiring;
			attr_name = ATTR_TOTAL_TIME_DRAINED_RETIRING;
			break;
		default:
			EXCEPT("Unexpected activity (%s: %d) in getHistoryInfo() for %s",
				   activity_to_string(_act), (int)_act,
				   state_to_string(_state));
		}
		break;
	case backfill_state:
		switch (_act) {
		case idle_act:
			var_ptr = &m_time_backfill_idle;
			attr_name = ATTR_TOTAL_TIME_BACKFILL_IDLE;
			break;
		case busy_act:
			var_ptr = &m_time_backfill_busy;
			attr_name = ATTR_TOTAL_TIME_BACKFILL_BUSY;
			break;
		case killing_act:
			var_ptr = &m_time_backfill_killing;
			attr_name = ATTR_TOTAL_TIME_BACKFILL_KILLING;
			break;
		default:
			EXCEPT("Unexpected activity (%s: %d) in getHistoryInfo() for %s",
				   activity_to_string(_act), (int)_act,
				   state_to_string(_state));
		}
		break;
	default:
		EXCEPT("Unexpected state (%s: %d) in getHistoryInfo()",
			   state_to_string(_state), (int)_state);

	}
	info.time_ptr = var_ptr;
	info.attr_name = attr_name;
	return info;
}