Пример #1
0
void
OfflineCollectorPlugin::mergeClassAd (
	ClassAd &ad,
	char const *key )
{
	ClassAd *old_ad = NULL;

	if (!_ads) return;

	_ads->BeginTransaction ();

	if ( !_ads->LookupClassAd ( key, old_ad ) ) {
		_ads->AbortTransaction ();
		return;
	}

	ad.ResetExpr();
	ExprTree *expr;
	const char *attr_name;
	while (ad.NextExpr(attr_name, expr)) {
		MyString new_val;
		MyString old_val;

		ASSERT( attr_name && expr );

		new_val = ExprTreeToString( expr );

		expr = old_ad->LookupExpr( attr_name );
		if( expr ) {
			old_val = ExprTreeToString( expr );
			if( new_val == old_val ) {
				continue;
			}
		}

			// filter out stuff we never want to mess with
		if( !strcasecmp(attr_name,ATTR_MY_TYPE) ||
			!strcasecmp(attr_name,ATTR_TARGET_TYPE) ||
			!strcasecmp(attr_name,ATTR_AUTHENTICATED_IDENTITY) )
		{
			continue;
		}

		_ads->SetAttribute(key, attr_name, new_val.Value());
	}

	_ads->CommitTransaction ();
}
Пример #2
0
static void
construct_custom_attributes( MyString &attributes, ClassAd* job_ad )
{
    attributes = "";

	bool first_time = true;
	char *tmp = NULL;
	job_ad->LookupString( ATTR_EMAIL_ATTRIBUTES, &tmp );
	if( ! tmp ) {
		return;
	}

	StringList email_attrs;
	email_attrs.initializeFromString( tmp );
	free( tmp );
	tmp = NULL;
		
	ExprTree* expr_tree;
	email_attrs.rewind();
	while( (tmp = email_attrs.next()) ) {
		expr_tree = job_ad->LookupExpr(tmp);
		if( ! expr_tree ) {
            dprintf(D_ALWAYS, "Custom email attribute (%s) is undefined.", 
                    tmp);
			continue;
		}
		if( first_time ) {
			attributes.formatstr_cat( "\n\n" );
			first_time = false;
		}
		attributes.formatstr_cat( "%s = %s\n", tmp, ExprTreeToString(expr_tree) );
	}
    return;
}
Пример #3
0
int
CondorQ::fetchQueueFromDB (ClassAdList &list,
						   char *&lastUpdate,
						   const char *dbconn,
						   CondorError*  /*errstack*/)
{
#ifndef HAVE_EXT_POSTGRESQL
	(void) list;
	(void) lastUpdate;
	(void) dbconn;
#else
	int     		result;
	JobQueueSnapshot	*jqSnapshot;
	const char 		*constraint;
	ClassAd        *ad;
	QuillErrCode   rv;
	ExprTree *tree;

	jqSnapshot = new JobQueueSnapshot(dbconn);

	rv = jqSnapshot->startIterateAllClassAds(clusterarray,
						 numclusters,
						 procarray,
						 numprocs,
						 schedd,
						 FALSE,
						 scheddBirthdate,
						 lastUpdate);

	if (rv == QUILL_FAILURE) {
		delete jqSnapshot;
		return Q_COMMUNICATION_ERROR;
	} else if (rv == JOB_QUEUE_EMPTY) {
		delete jqSnapshot;
		return Q_OK;
	}

	// make the query ad
	if ((result = query.makeQuery (tree)) != Q_OK) {
		delete jqSnapshot;
		return result;
	}

	constraint = ExprTreeToString(tree);
	delete tree;

	ad = getDBNextJobByConstraint(constraint, jqSnapshot);

	while (ad != (ClassAd *) 0) {
		ad->ChainCollapse();
		list.Insert(ad);
		ad = getDBNextJobByConstraint(constraint, jqSnapshot);
	}	

	delete jqSnapshot;
#endif /* HAVE_EXT_POSTGRESQL */

	return Q_OK;
}
Пример #4
0
void
ClassAdLog::LogState(FILE *fp)
{
	LogRecord	*log=NULL;
	ClassAd		*ad=NULL;
	ExprTree	*expr=NULL;
	HashKey		hashval;
	MyString	key;
	const char	*attr_name = NULL;

	// This must always be the first entry in the log.
	log = new LogHistoricalSequenceNumber( historical_sequence_number, m_original_log_birthdate );
	if (log->Write(fp) < 0) {
		EXCEPT("write to %s failed, errno = %d", logFilename(), errno);
	}
	delete log;

	table.startIterations();
	while(table.iterate(ad) == 1) {
		table.getCurrentKey(hashval);
		hashval.sprint(key);
		log = new LogNewClassAd(key.Value(), ad->GetMyTypeName(), ad->GetTargetTypeName());
		if (log->Write(fp) < 0) {
			EXCEPT("write to %s failed, errno = %d", logFilename(), errno);
		}
		delete log;
			// Unchain the ad -- we just want to write out this ads exprs,
			// not all the exprs in the chained ad as well.
		AttrList *chain = dynamic_cast<AttrList*>(ad->GetChainedParentAd());
		ad->Unchain();
		ad->ResetName();
		attr_name = ad->NextNameOriginal();
		while (attr_name) {
			expr = ad->LookupExpr(attr_name);
				// This conditional used to check whether the ExprTree is
				// invisible, but no codepath sets any attributes
				// invisible for this call.
			if (expr) {
				log = new LogSetAttribute(key.Value(), attr_name,
										  ExprTreeToString(expr));
				if (log->Write(fp) < 0) {
					EXCEPT("write to %s failed, errno = %d", logFilename(),
						   errno);
				}
				delete log;
			}
			attr_name = ad->NextNameOriginal();
		}
			// ok, now that we're done writing out this ad, restore the chain
		ad->ChainToAd(chain);
	}
	if (fflush(fp) !=0){
	  EXCEPT("fflush of %s failed, errno = %d", logFilename(), errno);
	}
	if (condor_fsync(fileno(fp)) < 0) {
		EXCEPT("fsync of %s failed, errno = %d", logFilename(), errno);
	} 
}
Пример #5
0
void EmitExpression(unsigned int mode, const char *attr, ExprTree* attr_expr)
{
	if (attr_expr == NULL)
	{
		dprintf(mode, "%s = UNDEFINED\n", attr);
	}
	else
	{
		dprintf(mode, "%s = %s\n", attr, ExprTreeToString(attr_expr));
	}
}
Пример #6
0
int
CondorQ::fetchQueueFromHostAndProcess ( const char *host,
										StringList &attrs,
										int fetch_opts,
										int match_limit,
										condor_q_process_func process_func,
										void * process_func_data,
										int useFastPath,
										CondorError* errstack,
										ClassAd ** psummary_ad)
{
	Qmgr_connection *qmgr;
	ExprTree		*tree;
	char 			*constraint;
	int     		result;

	// make the query ad
	if ((result = query.makeQuery (tree)) != Q_OK)
		return result;
	constraint = strdup( ExprTreeToString( tree ) );
	delete tree;

	if (useFastPath > 1) {
		int result = fetchQueueFromHostAndProcessV2(host, constraint, attrs, fetch_opts, match_limit, process_func, process_func_data, connect_timeout, useFastPath, errstack, psummary_ad);
		free( constraint);
		return result;
	}

	if (fetch_opts != fetch_Jobs) {
		free( constraint );
		return Q_UNSUPPORTED_OPTION_ERROR;
	}

	/*
	 connect to the Q manager.
	 use a timeout of 20 seconds, and a read-only connection.
	 why 20 seconds?  because careful research by Derek has shown
	 that whenever one needs a periodic time value, 20 is always
	 optimal.  :^).
	*/
	init();  // needed to get default connect_timeout
	if( !(qmgr = ConnectQ( host, connect_timeout, true, errstack)) ) {
		free( constraint );
		return Q_SCHEDD_COMMUNICATION_ERROR;
	}

	// get the ads and filter them
	result = getFilterAndProcessAds (constraint, attrs, match_limit, process_func, process_func_data, useFastPath);

	DisconnectQ (qmgr);
	free( constraint );
	return result;
}
Пример #7
0
bool
VMType::createConfigUsingScript(const char* configfile)
{
	vmprintf(D_FULLDEBUG, "Inside VMType::createConfigUsingScript\n");

	if( !configfile || m_scriptname.IsEmpty() ) {
		return false;
	}

	// Set temporary environments for script program
	StringList name_list;

	const char *name;
	ExprTree* expr = NULL;

	m_classAd.ResetExpr();
	while( m_classAd.NextExpr(name, expr) ) {
		if( !strncasecmp( name, "JobVM", strlen("JobVM") ) ||
			!strncasecmp( name, "VMPARAM", strlen("VMPARAM") )) {

			name_list.append(name);
			SetEnv(name, ExprTreeToString(expr));
		}
	}

	ArgList systemcmd;
	if( m_prog_for_script.IsEmpty() == false ) {
		systemcmd.AppendArg(m_prog_for_script);
	}
	systemcmd.AppendArg(m_scriptname);
	systemcmd.AppendArg("createconfig");
	systemcmd.AppendArg(configfile);

	int result = systemCommand(systemcmd, m_file_owner);

	// UnSet temporary environments for script program
	const char *tmp_name = NULL;
	name_list.rewind();
	while( (tmp_name = name_list.next()) != NULL ) {
		UnsetEnv(tmp_name);
	}

	if( result != 0 ) {
		vmprintf(D_ALWAYS, "Failed to create Configuration file('%s') using "
				"script program('%s')\n", configfile, 
				m_scriptname.Value());
		return false;
	}
	return true;
}
Пример #8
0
bool ClassAdCollection::NewClassAd(const char* key, ClassAd* ad)
{
  LogRecord* log=new LogNewClassAd(key,GetMyTypeName(*ad),GetTargetTypeName(*ad));
  ClassAdLog::AppendLog(log);
  const char *name;
  ExprTree* expr;
  ad->ResetExpr();
  while (ad->NextExpr(name, expr)) {
    LogRecord* l=new LogSetAttribute(key,name,ExprTreeToString(expr));
    ClassAdLog::AppendLog(l);
  }
  // return AddClassAd(0,key);
  return true;
}
Пример #9
0
bool ClassAdCollection::CheckClassAd(BaseCollection* Coll,const MyString& OID, ClassAd* Ad) 
{
  if (Coll->Type()==PartitionParent_e) {
    PartitionParent* ParentColl=(PartitionParent*) Coll;
    StringSet Values;
    MyString AttrName;
    MyString AttrValue;
    ParentColl->Attributes.StartIterations();
// printf("Checking OID %s\n",OID.Value());
    while (ParentColl->Attributes.Iterate(AttrName)) {
      ExprTree* expr=Ad->LookupExpr(AttrName.Value());
      if (expr) {
		  AttrValue = ExprTreeToString( expr );
      } else {
		  AttrValue = "";
	  }
      Values.Add(AttrValue);
    }
// Values.StartIterations(); while (Values.Iterate(AttrValue)) { printf("Val: AttrValue=%s\n",AttrValue.Value()); }
    int CoID;
    PartitionChild* ChildColl=NULL;
    ParentColl->Children.StartIterations();
    while (ParentColl->Children.Iterate(CoID)) {
      if (Collections.lookup(CoID,Coll)==-1) continue;
      ChildColl=(PartitionChild*) Coll;
// ChildColl->Values.StartIterations(); while (ChildColl->Values.Iterate(AttrValue)) { printf("ChildVal: AttrValue=%s\n",AttrValue.Value()); }
      if (EqualSets(ChildColl->Values,Values)) break;
      ChildColl=NULL;
    }

    if (ChildColl==NULL) {
      // Create a new child collection
      ChildColl=new PartitionChild(ParentColl->Rank,Values);
      CoID=LastCoID+1;
      if (Collections.insert(CoID,ChildColl)==-1) return false;
      LastCoID=CoID;

      // Add to parent's children
      ParentColl->Children.Add(CoID);
   }
 
   // Add to child
   AddClassAd(CoID,OID,Ad);
   return false;
  }
  else {
    return Coll->CheckClassAd(Ad);
  }
}
Пример #10
0
int CondorQ::
fetchQueue (ClassAdList &list, StringList &attrs, ClassAd *ad, CondorError* errstack)
{
	Qmgr_connection *qmgr;
	ExprTree		*tree;
	int     		result;
	char    		scheddString [32];
	const char 		*constraint;

	bool useFastPath = false;

	// make the query ad
	if ((result = query.makeQuery (tree)) != Q_OK)
		return result;
	constraint = ExprTreeToString( tree );
	delete tree;

	// connect to the Q manager
	init();  // needed to get default connect_timeout
	if (ad == 0)
	{
		// local case
		if( !(qmgr = ConnectQ( 0, connect_timeout, true, errstack)) ) {
			errstack->push("TEST", 0, "FOO");
			return Q_SCHEDD_COMMUNICATION_ERROR;
		}
		useFastPath = true;
	}
	else
	{
		// remote case to handle condor_globalq
		if (!ad->LookupString (ATTR_SCHEDD_IP_ADDR, scheddString, sizeof(scheddString)))
			return Q_NO_SCHEDD_IP_ADDR;

		if( !(qmgr = ConnectQ( scheddString, connect_timeout, true, errstack)) )
			return Q_SCHEDD_COMMUNICATION_ERROR;

	}

	// get the ads and filter them
	getAndFilterAds (constraint, attrs, list, useFastPath);

	DisconnectQ (qmgr);
	return Q_OK;
}
Пример #11
0
int CondorQ::
fetchQueueFromHost (ClassAdList &list, StringList &attrs, const char *host, char const *schedd_version, CondorError* errstack)
{
	Qmgr_connection *qmgr;
	ExprTree		*tree;
	const char		*constraint;
	int     		result;

	// make the query ad
	if ((result = query.makeQuery (tree)) != Q_OK)
		return result;
	constraint = ExprTreeToString( tree );
	delete tree;

	/*
	 connect to the Q manager.
	 use a timeout of 20 seconds, and a read-only connection.
	 why 20 seconds?  because careful research by Derek has shown
	 that whenever one needs a periodic time value, 20 is always
	 optimal.  :^).
	*/
	init();  // needed to get default connect_timeout
	if( !(qmgr = ConnectQ( host, connect_timeout, true, errstack)) )
		return Q_SCHEDD_COMMUNICATION_ERROR;

	int useFastPath = 0;
	if( schedd_version && *schedd_version ) {
		CondorVersionInfo v(schedd_version);
		useFastPath = v.built_since_version(6,9,3) ? 1 : 0;
		if (v.built_since_version(8, 1, 5)) {
			useFastPath = 2;
		}
	}

	// get the ads and filter them
	result = getAndFilterAds (constraint, attrs, -1, list, useFastPath);

	DisconnectQ (qmgr);
	return result;
}
Пример #12
0
int
JobTransforms::set_dirty_attributes(ClassAd *ad, int cluster, int proc)
{
	int num_attrs_set = 0;
	const char *rhstr = 0;
	ExprTree * tree;

	for ( classad::ClassAd::dirtyIterator it = ad->dirtyBegin();
		  it != ad->dirtyEnd(); ++it ) 
	{
		rhstr = NULL;
		tree = ad->Lookup( *it );
		if ( tree ) {
			rhstr = ExprTreeToString( tree );
		} else {
			// If an attribute is marked as dirty but Lookup() on this
			// attribute fails, it means that attribute was deleted.
			// We handle this by inserting into the transaction log a
			// SetAttribute to UNDEFINED, which will work properly even if we
			// are dealing with a chained job ad that has the attribute set differently
			// in the cluster ad.
			rhstr = "UNDEFINED";
		}
		if( !rhstr) { 
			dprintf(D_ALWAYS,"(%d.%d) job_transforms: Problem processing classad\n",
				cluster, proc);
			return -1;
		}
		dprintf(D_FULLDEBUG, "(%d.%d) job_transforms: Setting %s = %s\n",
				cluster, proc, it->c_str(), rhstr);
		if( SetAttribute(cluster, proc, it->c_str(), rhstr) == -1 ) {
			dprintf(D_ALWAYS,"(%d.%d) job_transforms: Failed to set %s = %s\n",
				cluster, proc, it->c_str(), rhstr);
			return -2;
		}
		num_attrs_set++;
	}

	return num_attrs_set;
}
Пример #13
0
int
pseudo_get_job_attr( const char *name, MyString &expr )
{
	RemoteResource *remote;
	if (parallelMasterResource == NULL) {
		remote = thisRemoteResource;
	} else {
		remote = parallelMasterResource;
	}
	ClassAd *ad = remote->getJobAd();

	ExprTree *e = ad->LookupExpr(name);
	if(e) {
		expr = ExprTreeToString(e);
		dprintf(D_SYSCALLS,"pseudo_get_job_attr(%s) = %s\n",name,expr.Value());
		return 0;
	} else {
		dprintf(D_SYSCALLS,"pseudo_get_job_attr(%s) is UNDEFINED\n",name);
		expr = "UNDEFINED";
		return 0;
	}
}
Пример #14
0
int
CondorQ::fetchQueueFromHostAndProcess ( const char *host,
										StringList &attrs,
										process_function process_func,
										bool useFastPath,
										CondorError* errstack)
{
	Qmgr_connection *qmgr;
	ExprTree		*tree;
	char 			*constraint;
	int     		result;

	// make the query ad
	if ((result = query.makeQuery (tree)) != Q_OK)
		return result;
	constraint = strdup( ExprTreeToString( tree ) );
	delete tree;

	/*
	 connect to the Q manager.
	 use a timeout of 20 seconds, and a read-only connection.
	 why 20 seconds?  because careful research by Derek has shown
	 that whenever one needs a periodic time value, 20 is always
	 optimal.  :^).
	*/
	init();  // needed to get default connect_timeout
	if( !(qmgr = ConnectQ( host, connect_timeout, true, errstack)) ) {
		free( constraint );
		return Q_SCHEDD_COMMUNICATION_ERROR;
	}

	// get the ads and filter them
	result = getFilterAndProcessAds (constraint, attrs, process_func, useFastPath);

	DisconnectQ (qmgr);
	free( constraint );
	return result;
}
Пример #15
0
const char *ExprTreeToString( const classad::ExprTree *expr )
{
	static std::string buffer;
	buffer = "";
	return ExprTreeToString(expr, buffer);
}
Пример #16
0
// TODO: this code doesn't work as expected
// everything seems to get set to EXPR_TYPE
bool
LiveJobImpl::Get ( const char *_name, const Attribute *&_attribute ) const
{
    // our job ad is chained so lookups will
    // encompass our parent ad as well as the child

    // parse the type
    ExprTree *expr = NULL;
    if ( ! ( expr = m_full_ad->Lookup ( _name ) ) )
    {
        dprintf ( D_FULLDEBUG,
                  "warning: failed to lookup attribute %s in job '%s'\n", _name, m_job->GetKey() );
        return false;
    }
    // decode the type
    classad::Value value;
    m_full_ad->EvaluateExpr(expr,value);
    switch ( value.GetType() )
    {
    case classad::Value::INTEGER_VALUE:
    {
        int i;
        if ( !m_full_ad->LookupInteger ( _name, i ) )
        {
            return false;
        }
        const char* int_str = to_string<int> ( i,dec ).c_str();
        _attribute = new Attribute ( Attribute::INTEGER_TYPE, int_str );
        return true;
    }
    case classad::Value::REAL_VALUE:
    {
        float f;
        if ( !m_full_ad->LookupFloat ( _name, f ) )
        {
            return false;
        }
        const char* float_str = to_string<float> ( f,dec ).c_str();
        _attribute = new Attribute ( Attribute::FLOAT_TYPE, float_str );
        return true;
    }
    case classad::Value::STRING_VALUE:
    {
        std::string str;
        if ( !m_full_ad->LookupString ( _name, str ) )
        {
            return false;
        }
        _attribute = new Attribute ( Attribute::STRING_TYPE, str.c_str() );
        return true;
    }
    default:
    {
        ExprTree* tree = NULL;
        if ( ! ( tree = m_full_ad->Lookup ( _name ) ) )
        {
            return false;
        }
        const char* rhs;
        rhs = ExprTreeToString( expr );
        _attribute = new Attribute ( Attribute::EXPR_TYPE, rhs );
        return true;
    }
    }

    return false;
}
Пример #17
0
void
doContactSchedd()
{
	int rc;
	Qmgr_connection *schedd;
	BaseJob *curr_job;
	ClassAd *next_ad;
	char expr_buf[12000];
	bool schedd_updates_complete = false;
	bool schedd_deletes_complete = false;
	bool add_remove_jobs_complete = false;
	bool update_jobs_complete = false;
	bool commit_transaction = true;
	int failure_line_num = 0;
	bool send_reschedule = false;
	std::string error_str = "";
	StringList dirty_job_ids;
	char *job_id_str;
	PROC_ID job_id;
	CondorError errstack;

	dprintf(D_FULLDEBUG,"in doContactSchedd()\n");

	initJobExprs();

	contactScheddTid = TIMER_UNSET;

	// vacateJobs
	/////////////////////////////////////////////////////
	if ( pendingScheddVacates.getNumElements() != 0 ) {
		std::string buff;
		StringList job_ids;
		VacateRequest curr_request;

		int result;
		ClassAd* rval;

		pendingScheddVacates.startIterations();
		while ( pendingScheddVacates.iterate( curr_request ) != 0 ) {
			formatstr( buff, "%d.%d", curr_request.job->procID.cluster,
						  curr_request.job->procID.proc );
			job_ids.append( buff.c_str() );
		}

		char *tmp = job_ids.print_to_string();
		if ( tmp ) {
			dprintf( D_FULLDEBUG, "Calling vacateJobs on %s\n", tmp );
			free(tmp);
			tmp = NULL;
		}

		rval = ScheddObj->vacateJobs( &job_ids, VACATE_FAST, &errstack );
		if ( rval == NULL ) {
			formatstr( error_str, "vacateJobs returned NULL, CondorError: %s!",
							   errstack.getFullText().c_str() );
			goto contact_schedd_failure;
		} else {
			pendingScheddVacates.startIterations();
			while ( pendingScheddVacates.iterate( curr_request ) != 0 ) {
				formatstr( buff, "job_%d_%d", curr_request.job->procID.cluster,
							  curr_request.job->procID.proc );
				if ( !rval->LookupInteger( buff.c_str(), result ) ) {
					dprintf( D_FULLDEBUG, "vacateJobs returned malformed ad\n" );
					EXCEPT( "vacateJobs returned malformed ad" );
				} else {
					dprintf( D_FULLDEBUG, "   %d.%d vacate result: %d\n",
							 curr_request.job->procID.cluster,
							 curr_request.job->procID.proc,result);
					pendingScheddVacates.remove( curr_request.job->procID );
					curr_request.result = (action_result_t)result;
					curr_request.job->SetEvaluateState();
					completedScheddVacates.insert( curr_request.job->procID,
												   curr_request );
				}
			}
			delete rval;
		}
	}


	schedd = ConnectQ( ScheddAddr, QMGMT_TIMEOUT, false, NULL, myUserName, CondorVersion() );
	if ( !schedd ) {
		error_str = "Failed to connect to schedd!";
		goto contact_schedd_failure;
	}


	// CheckLeases
	/////////////////////////////////////////////////////
	if ( checkLeasesSignaled ) {

		dprintf( D_FULLDEBUG, "querying for renewed leases\n" );

		// Grab the lease attributes of all the jobs in our global hashtable.

		BaseJob::JobsByProcId.startIterations();

		while ( BaseJob::JobsByProcId.iterate( curr_job ) != 0 ) {
			int new_expiration;

			rc = GetAttributeInt( curr_job->procID.cluster,
								  curr_job->procID.proc,
								  ATTR_TIMER_REMOVE_CHECK,
								  &new_expiration );
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// This job doesn't have doesn't have a lease from
						// the submitter. Skip it.
					continue;
				}
			}
			curr_job->UpdateJobLeaseReceived( new_expiration );
		}

		checkLeasesSignaled = false;
	}	// end of handling check leases


	// AddJobs
	/////////////////////////////////////////////////////
	if ( addJobsSignaled || firstScheddContact ) {
		int num_ads = 0;

		dprintf( D_FULLDEBUG, "querying for new jobs\n" );

		// Make sure we grab all Globus Universe jobs (except held ones
		// that we previously indicated we were done with)
		// when we first start up in case we're recovering from a
		// shutdown/meltdown.
		// Otherwise, grab all jobs that are unheld and aren't marked as
		// currently being managed and aren't marked as not matched.
		// If JobManaged is undefined, equate it with false.
		// If Matched is undefined, equate it with true.
		// NOTE: Schedds from Condor 6.6 and earlier don't include
		//   "(Universe==9)" in the constraint they give to the gridmanager,
		//   so this gridmanager will pull down non-globus-universe ads,
		//   although it won't use them. This is inefficient but not
		//   incorrect behavior.
		if ( firstScheddContact ) {
			// Grab all jobs for us to manage. This expression is a
			// derivative of the expression below for new jobs. We add
			// "|| Managed =?= TRUE" to also get jobs our previous
			// incarnation was in the middle of managing when it died
			// (if it died unexpectedly). With the new term, the
			// "&& Managed =!= TRUE" from the new jobs expression becomes
			// superfluous (by boolean logic), so we drop it.
			sprintf( expr_buf,
					 "%s && %s && ((%s && %s) || %s)",
					 expr_schedd_job_constraint.c_str(), 
					 expr_not_completely_done.c_str(),
					 expr_matched_or_undef.c_str(),
					 expr_not_held.c_str(),
					 expr_managed.c_str()
					 );
		} else {
			// Grab new jobs for us to manage
			sprintf( expr_buf,
					 "%s && %s && %s && %s && %s",
					 expr_schedd_job_constraint.c_str(), 
					 expr_not_completely_done.c_str(),
					 expr_matched_or_undef.c_str(),
					 expr_not_held.c_str(),
					 expr_not_managed.c_str()
					 );
		}
		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			PROC_ID procID;
			BaseJob *old_job;
			int job_is_matched = 1;		// default to true if not in ClassAd

			next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, procID.proc );
			bool job_is_managed = jobExternallyManaged(next_ad);
			next_ad->LookupBool(ATTR_JOB_MATCHED,job_is_matched);

			if ( BaseJob::JobsByProcId.lookup( procID, old_job ) != 0 ) {

				JobType *job_type = NULL;
				BaseJob *new_job = NULL;

				// job had better be either managed or matched! (or both)
				ASSERT( job_is_managed || job_is_matched );

				if ( MustExpandJobAd( next_ad ) ) {
					// Get the expanded ClassAd from the schedd, which
					// has the GridResource filled in with info from
					// the matched ad.
					delete next_ad;
					next_ad = NULL;
					next_ad = GetJobAd(procID.cluster,procID.proc);
					if ( next_ad == NULL && errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						commit_transaction = false;
						goto contact_schedd_disconnect;
					}
					if ( next_ad == NULL ) {
						// We may get here if it was not possible to expand
						// one of the $$() expressions.  We don't want to
						// roll back the transaction and blow away the
						// hold that the schedd just put on the job, so
						// simply skip over this ad.
						dprintf(D_ALWAYS,"Failed to get expanded job ClassAd from Schedd for %d.%d.  errno=%d\n",procID.cluster,procID.proc,errno);
						goto contact_schedd_next_add_job;
					}
				}

				// Search our job types for one that'll handle this job
				jobTypes.Rewind();
				while ( jobTypes.Next( job_type ) ) {
					if ( job_type->AdMatchFunc( next_ad ) ) {

						// Found one!
						dprintf( D_FULLDEBUG, "Using job type %s for job %d.%d\n",
								 job_type->Name, procID.cluster, procID.proc );
						break;
					}
				}

				if ( job_type != NULL ) {
					new_job = job_type->CreateFunc( next_ad );
				} else {
					dprintf( D_ALWAYS, "No handlers for job %d.%d\n",
							 procID.cluster, procID.proc );
					new_job = new BaseJob( next_ad );
				}

				ASSERT(new_job);
				new_job->SetEvaluateState();
				dprintf(D_ALWAYS,"Found job %d.%d --- inserting\n",
						new_job->procID.cluster,new_job->procID.proc);
				num_ads++;

				if ( !job_is_managed ) {
					rc = tSetAttributeString( new_job->procID.cluster,
									   new_job->procID.proc,
									   ATTR_JOB_MANAGED,
									   MANAGED_EXTERNAL);
					if ( rc < 0 ) {
						failure_line_num = __LINE__;
						commit_transaction = false;
						goto contact_schedd_disconnect;
					}
				}

			} else {

				// We already know about this job, skip
				// But also set Managed=true on the schedd so that it won't
				// keep signalling us about it
				delete next_ad;
				rc = tSetAttributeString( procID.cluster, procID.proc,
								   ATTR_JOB_MANAGED, MANAGED_EXTERNAL );
				if ( rc < 0 ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				}

			}

contact_schedd_next_add_job:
			next_ad = GetNextJobByConstraint( expr_buf, 0 );
		}	// end of while next_ad
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			commit_transaction = false;
			goto contact_schedd_disconnect;
		}

		dprintf(D_FULLDEBUG,"Fetched %d new job ads from schedd\n",num_ads);
	}	// end of handling add jobs


	// RemoveJobs
	/////////////////////////////////////////////////////

	// We always want to perform this check. Otherwise, we may overwrite a
	// REMOVED/HELD/COMPLETED status with something else below.
	{
		int num_ads = 0;

		dprintf( D_FULLDEBUG, "querying for removed/held jobs\n" );

		// Grab jobs marked as REMOVED/COMPLETED or marked as HELD that we
		// haven't previously indicated that we're done with (by setting
		// JobManaged to "Schedd".
		sprintf( expr_buf, "(%s) && (%s) && (%s == %d || %s == %d || (%s == %d && %s =?= \"%s\"))",
				 ScheddJobConstraint, expr_not_completely_done.c_str(),
				 ATTR_JOB_STATUS, REMOVED,
				 ATTR_JOB_STATUS, COMPLETED, ATTR_JOB_STATUS, HELD,
				 ATTR_JOB_MANAGED, MANAGED_EXTERNAL );

		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			PROC_ID procID;
			BaseJob *next_job;
			int curr_status;

			next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, procID.proc );
			next_ad->LookupInteger( ATTR_JOB_STATUS, curr_status );

			if ( BaseJob::JobsByProcId.lookup( procID, next_job ) == 0 ) {
				// Should probably skip jobs we already have marked as
				// held or removed

				next_job->JobAdUpdateFromSchedd( next_ad, true );
				num_ads++;

			} else if ( curr_status == REMOVED ) {

				// If we don't know about the job, act like we got an
				// ADD_JOBS signal from the schedd the next time we
				// connect, so that we'll create a Job object for it
				// and decide how it needs to be handled.
				// TODO The AddJobs and RemoveJobs queries shoule be
				//   combined into a single query.
				dprintf( D_ALWAYS, 
						 "Don't know about removed job %d.%d. "
						 "Will treat it as a new job to manage\n",
						 procID.cluster, procID.proc );
				addJobsSignaled = true;

			} else {

				dprintf( D_ALWAYS, "Don't know about held/completed job %d.%d. "
						 "Ignoring it\n",
						 procID.cluster, procID.proc );

			}

			delete next_ad;
			next_ad = GetNextJobByConstraint( expr_buf, 0 );
		}
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			commit_transaction = false;
			goto contact_schedd_disconnect;
		}

		dprintf(D_FULLDEBUG,"Fetched %d job ads from schedd\n",num_ads);
	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	add_remove_jobs_complete = true;


	// Retrieve dirty attributes
	/////////////////////////////////////////////////////
	if ( updateJobsSignaled ) {
		dprintf( D_FULLDEBUG, "querying for jobs with attribute updates\n" );

		sprintf( expr_buf, "%s && %s && %s && %s",
				 expr_schedd_job_constraint.c_str(), 
				 expr_not_completely_done.c_str(),
				 expr_not_held.c_str(),
				 expr_managed.c_str()
				 );
		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextDirtyJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			ClassAd updates;
			char str[PROC_ID_STR_BUFLEN];
			next_ad->LookupInteger( ATTR_CLUSTER_ID, job_id.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, job_id.proc );
			if ( GetDirtyAttributes( job_id.cluster, job_id.proc, &updates ) < 0 ) {
				dprintf( D_ALWAYS, "Failed to retrieve dirty attributes for job %d.%d\n", job_id.cluster, job_id.proc );
				failure_line_num = __LINE__;
				delete next_ad;
				goto contact_schedd_disconnect;
		        }
			else {
				dprintf (D_FULLDEBUG, "Retrieved updated attributes for job %d.%d\n", job_id.cluster, job_id.proc);
				dPrintAd(D_JOB, updates);
			}
			if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) {
				curr_job->JobAdUpdateFromSchedd( &updates, false );
				ProcIdToStr( job_id, str );
				dirty_job_ids.append( str );
			}
			else {
				dprintf( D_ALWAYS, "Don't know about updated job %d.%d. "
						 "Ignoring it\n",
						 job_id.cluster, job_id.proc );
			}
			delete next_ad;
			next_ad = GetNextDirtyJobByConstraint( expr_buf, 0 );
		}
	}
	update_jobs_complete = true;

//	if ( BeginTransaction() < 0 ) {
	errno = 0;
	BeginTransaction();
	if ( errno == ETIMEDOUT ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}


	// requestJobStatus
	/////////////////////////////////////////////////////
	if ( pendingJobStatus.getNumElements() != 0 ) {
		JobStatusRequest curr_request;

		pendingJobStatus.startIterations();
		while ( pendingJobStatus.iterate( curr_request ) != 0 ) {

			int status;

			rc = GetAttributeInt( curr_request.job_id.cluster,
								  curr_request.job_id.proc,
								  ATTR_JOB_STATUS, &status );
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// The job is not in the schedd's job queue. This
						// probably means that the user did a condor_rm -f,
						// so return a job status of REMOVED.
					status = REMOVED;
				}
			}
				// return status
			dprintf( D_FULLDEBUG, "%d.%d job status: %d\n",
					 curr_request.job_id.cluster,
					 curr_request.job_id.proc, status );
			pendingJobStatus.remove( curr_request.job_id );
			curr_request.job_status = status;
			daemonCore->Reset_Timer( curr_request.tid, 0 );
			completedJobStatus.insert( curr_request.job_id,
									   curr_request );
		}

	}


	// Update existing jobs
	/////////////////////////////////////////////////////
	ScheddUpdateRequest *curr_request;
	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		dprintf(D_FULLDEBUG,"Updating classad values for %d.%d:\n",
				curr_job->procID.cluster, curr_job->procID.proc);
		const char *attr_name;
		const char *attr_value;
		ExprTree *expr;
		bool fake_job_in_queue = false;
		curr_job->jobAd->ResetExpr();
		while ( curr_job->jobAd->NextDirtyExpr(attr_name, expr) == true &&
				fake_job_in_queue == false ) {
			attr_value = ExprTreeToString( expr );

			dprintf(D_FULLDEBUG,"   %s = %s\n",attr_name,attr_value);
			rc = SetAttribute( curr_job->procID.cluster,
							   curr_job->procID.proc,
							   attr_name,
							   attr_value);
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// The job is not in the schedd's job queue. This
						// probably means that the user did a condor_rm -f,
						// so pretend that all updates for the job succeed.
						// Otherwise, we'll never make forward progress on
						// the job.
						// TODO We should also fake a job status of REMOVED
						//   to the job, so it can do what cleanup it can.
					fake_job_in_queue = true;
					break;
				}
			}
		}

	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	schedd_updates_complete = true;


	// Delete existing jobs
	/////////////////////////////////////////////////////
	errno = 0;
	BeginTransaction();
	if ( errno == ETIMEDOUT ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		if ( curr_job->deleteFromSchedd ) {
			dprintf(D_FULLDEBUG,"Deleting job %d.%d from schedd\n",
					curr_job->procID.cluster, curr_job->procID.proc);
			rc = DestroyProc(curr_job->procID.cluster,
							 curr_job->procID.proc);
				// NOENT means the job doesn't exist.  Good enough for us.
			if ( rc < 0 && rc != DESTROYPROC_ENOENT) {
				failure_line_num = __LINE__;
				commit_transaction = false;
				goto contact_schedd_disconnect;
			}
		}

	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	schedd_deletes_complete = true;


 contact_schedd_disconnect:
	DisconnectQ( schedd, commit_transaction );

	if ( add_remove_jobs_complete == true ) {
		firstScheddContact = false;
		addJobsSignaled = false;
	} else {
		formatstr( error_str, "Schedd connection error during Add/RemoveJobs at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	if ( update_jobs_complete == true ) {
		updateJobsSignaled = false;
	} else {
		formatstr( error_str, "Schedd connection error during dirty attribute update at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	if ( schedd_updates_complete == false ) {
		formatstr( error_str, "Schedd connection error during updates at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	// Clear dirty bits for all jobs updated
	if ( !dirty_job_ids.isEmpty() ) {
		ClassAd *rval;
		dprintf( D_FULLDEBUG, "Calling clearDirtyAttrs on %d jobs\n",
				 dirty_job_ids.number() );
		dirty_job_ids.rewind();
		rval = ScheddObj->clearDirtyAttrs( &dirty_job_ids, &errstack );
		if ( rval == NULL ) {
			dprintf(D_ALWAYS, "Failed to notify schedd to clear dirty attributes.  CondorError: %s\n", errstack.getFullText().c_str() );
		}
		delete rval;
	}

	// Wake up jobs that had schedd updates pending and delete job
	// objects that wanted to be deleted
	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		curr_job->jobAd->ClearAllDirtyFlags();

		if ( curr_job->deleteFromGridmanager ) {

				// If the Job object wants to delete the job from the
				// schedd but we failed to do so, don't delete the job
				// object yet; wait until we successfully delete the job
				// from the schedd.
			if ( curr_job->deleteFromSchedd == true &&
				 schedd_deletes_complete == false ) {
				continue;
			}

				// If wantRematch is set, send a reschedule now
			if ( curr_job->wantRematch ) {
				send_reschedule = true;
			}
			pendingScheddUpdates.remove( curr_job->procID );
			pendingScheddVacates.remove( curr_job->procID );
			pendingJobStatus.remove( curr_job->procID );
			completedJobStatus.remove( curr_job->procID );
			completedScheddVacates.remove( curr_job->procID );
			delete curr_job;

		} else {
			pendingScheddUpdates.remove( curr_job->procID );

			if ( curr_request->m_notify ) {
				curr_job->SetEvaluateState();
			}
		}

		delete curr_request;
	}

	// Poke objects that wanted to be notified when a schedd update completed
	// successfully (possibly minus deletes)
	int timer_id;
	scheddUpdateNotifications.Rewind();
	while ( scheddUpdateNotifications.Next( timer_id ) ) {
		daemonCore->Reset_Timer( timer_id, 0 );
	}
	scheddUpdateNotifications.Clear();

	if ( send_reschedule == true ) {
		ScheddObj->reschedule();
	}

	// Check if we have any jobs left to manage. If not, exit.
	if ( BaseJob::JobsByProcId.getNumElements() == 0 ) {
		dprintf( D_ALWAYS, "No jobs left, shutting down\n" );
		daemonCore->Send_Signal( daemonCore->getpid(), SIGTERM );
	}

	lastContactSchedd = time(NULL);

	if ( schedd_deletes_complete == false ) {
		error_str = "Problem using DestroyProc to delete jobs!";
		goto contact_schedd_failure;
	}

	scheddFailureCount = 0;

	// For each job that had dirty attributes, re-evaluate the policy
	dirty_job_ids.rewind();
	while ( (job_id_str = dirty_job_ids.next()) != NULL ) {
		StrToProcIdFixMe(job_id_str, job_id);
		if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) {
			curr_job->EvalPeriodicJobExpr();
		}
	}

dprintf(D_FULLDEBUG,"leaving doContactSchedd()\n");
	return;

 contact_schedd_failure:
	scheddFailureCount++;
	if ( error_str == "" ) {
		error_str = "Failure in doContactSchedd";
	}
	if ( scheddFailureCount >= maxScheddFailures ) {
		dprintf( D_ALWAYS, "%s\n", error_str.c_str() );
		EXCEPT( "Too many failures connecting to schedd!" );
	}
	dprintf( D_ALWAYS, "%s Will retry\n", error_str.c_str() );
	lastContactSchedd = time(NULL);
	RequestContactSchedd();
	return;
}
Пример #18
0
/* Fill in a PROC structure given a job ClassAd.  This function replaces
   GetProc from qmgr_lib_support.C, so we just get the job classad once
   from the schedd and fill in the PROC structure directly. */
int
MakeProc(ClassAd *ad, PROC *p)
{
	char buf[ATTRLIST_MAX_EXPRESSION];
	float	utime,stime;
	ExprTree *e;
	
	p->version_num = 3;
	ad->LookupInteger(ATTR_CLUSTER_ID, p->id.cluster);
	ad->LookupInteger(ATTR_PROC_ID, p->id.proc);
	ad->LookupInteger(ATTR_JOB_UNIVERSE, p->universe);
	ad->LookupInteger(ATTR_WANT_CHECKPOINT, p->checkpoint);
	ad->LookupInteger(ATTR_WANT_REMOTE_SYSCALLS, p->remote_syscalls);
	ad->LookupString(ATTR_OWNER, buf);
	p->owner = strdup(buf);
	ad->LookupInteger(ATTR_Q_DATE, p->q_date);
	ad->LookupInteger(ATTR_COMPLETION_DATE, p->completion_date);
	ad->LookupInteger(ATTR_JOB_STATUS, p->status);
	ad->LookupInteger(ATTR_JOB_PRIO, p->prio);
	ad->LookupInteger(ATTR_JOB_NOTIFICATION, p->notification);
	ad->LookupInteger(ATTR_IMAGE_SIZE, p->image_size);

	// There are two different syntaxes for the environment.  Since
	// the wire protocol only expects one, we pack either one into the
	// same proc variable "env_v1or2" and make sure they are
	// distinguishable.  For backward compatibility, the schedd
	// will have already ensured that we use V1 syntax if the
	// remote side only understands that.

	Env envobj;
	MyString env_v1or2;
	MyString env_error_msg;
	if(!envobj.getDelimitedStringV1or2Raw(ad,&env_v1or2,&env_error_msg)) {
		EXCEPT("Failed to parse environment string: %s\n",
			   env_error_msg.Value());
	}
	p->env_v1or2 = strdup(env_v1or2.Value());

	p->n_cmds = 1;
	p->cmd = (char **) malloc(p->n_cmds * sizeof(char *));
	p->args_v1or2 = (char **) malloc(p->n_cmds * sizeof(char *));
	p->in = (char **) malloc(p->n_cmds * sizeof(char *));
	p->out = (char **) malloc(p->n_cmds * sizeof(char *));
	p->err = (char **) malloc(p->n_cmds * sizeof(char *));
	p->exit_status = (int *) malloc(p->n_cmds * sizeof(int));

	// There are two different syntaxes for arguments.  Since
	// the wire protocol only expects one, we pack either one into the
	// same proc variable "env_v1or2" and make sure they are
	// distinguishable.  For backward compatibility, the schedd
	// will have already ensured that we use V1 syntax if the
	// remote side only understands that.

	ArgList args;
	MyString args_v1or2;
	MyString error_msg;
	if(!args.GetArgsStringV1or2Raw(ad,&args_v1or2,&error_msg)) {
		EXCEPT("Failed to get V1or2 arguments string: %s\n",error_msg.Value());
	}
	p->args_v1or2[0] = strdup(args_v1or2.Value());

	ad->LookupString(ATTR_JOB_CMD, buf);
	p->cmd[0] = strdup(buf);
	ad->LookupString(ATTR_JOB_INPUT, buf);
	p->in[0] = strdup(buf);
	ad->LookupString(ATTR_JOB_OUTPUT, buf);
	p->out[0] = strdup(buf);
	ad->LookupString(ATTR_JOB_ERROR, buf);
	p->err[0] = strdup(buf);
	ad->LookupInteger(ATTR_JOB_EXIT_STATUS, p->exit_status[0]);

	ad->LookupInteger(ATTR_MIN_HOSTS, p->min_needed);
	ad->LookupInteger(ATTR_MAX_HOSTS, p->max_needed);

	ad->LookupString(ATTR_JOB_ROOT_DIR, buf);
	p->rootdir = strdup(buf);
	ad->LookupString(ATTR_JOB_IWD, buf);
	p->iwd = strdup(buf);

	e = ad->LookupExpr(ATTR_REQUIREMENTS);
	if (e) {
		p->requirements = strdup(ExprTreeToString(e));
	} else {
	   p->requirements = NULL;
	}
	e = ad->LookupExpr(ATTR_RANK);
	if (e) {
		p->preferences = strdup(ExprTreeToString(e));
	} else {
		p->preferences = NULL;
	}

	ad->LookupFloat(ATTR_JOB_LOCAL_USER_CPU, utime);
	ad->LookupFloat(ATTR_JOB_LOCAL_SYS_CPU, stime);
	float_to_rusage(utime, stime, &(p->local_usage));

	p->remote_usage = (struct rusage *) malloc(p->n_cmds * 
		sizeof(struct rusage));

	memset(p->remote_usage, 0, sizeof( struct rusage ));

	ad->LookupFloat(ATTR_JOB_REMOTE_USER_CPU, utime);
	ad->LookupFloat(ATTR_JOB_REMOTE_SYS_CPU, stime);
	float_to_rusage(utime, stime, &(p->remote_usage[0]));
	
	return 0;
}
Пример #19
0
bool
VMGahpServer::publishVMClassAd(const char *workingdir)
{
    static const char* command = "CLASSAD";

    if( !m_job_ad || !workingdir ) {
        return false;
    }

    if( m_is_initialized == false ) {
        return false;
    }

    if( isSupportedCommand(command) == FALSE ) {
        return false;
    }

    // Before sending command, print the remaining stderr messages from vmgahp
    printSystemErrorMsg();

    if(write_line(command) == false) {
        return false;
    }

    // give some time to gahp server
    sleep(1);

    Gahp_Args start_result;
    if( read_argv(start_result) == false ) {
        return false;
    }

    if( start_result.argc == 0 || start_result.argv[0][0] != 'S' ) {
        dprintf(D_ALWAYS,"VMGAHP command '%s' failed\n",command);
        return false;
    }

    // Send Working directory
    MyString vmAttr;
    vmAttr.sprintf("VM_WORKING_DIR = \"%s\"", workingdir);

    if ( write_line( vmAttr.Value() ) == false ) {
        return false;
    }

    // publish job ClassAd to vmgahp server via pipe
    bool can_send_it = false;
    int total_len = 0;

    const char *name;
    ExprTree *expr = NULL;
    m_job_ad->ResetExpr();
    while( m_job_ad->NextExpr(name, expr) ) {
        can_send_it = false;

        if( !m_send_all_classad ) {
            // Instead of sending entire job ClassAd to vmgahp,
            // we will send some part of job ClassAd necessary to vmgahp.
            if( !strncasecmp( name, "JobVM", strlen("JobVM") ) ||
                    !strncasecmp( name, "VMPARAM", strlen("VMPARAM") ) ||
                    !strncasecmp( name, ATTR_CLUSTER_ID, strlen(ATTR_CLUSTER_ID)) ||
                    !strncasecmp( name, ATTR_PROC_ID, strlen(ATTR_PROC_ID)) ||
                    !strncasecmp( name, ATTR_USER, strlen(ATTR_USER)) ||
                    !strncasecmp( name, ATTR_ORIG_JOB_IWD,
                                  strlen(ATTR_ORIG_JOB_IWD)) ||
                    !strncasecmp( name, ATTR_JOB_ARGUMENTS1,
                                  strlen(ATTR_JOB_ARGUMENTS1)) ||
                    !strncasecmp( name, ATTR_JOB_ARGUMENTS2,
                                  strlen(ATTR_JOB_ARGUMENTS2)) ||
                    !strncasecmp( name, ATTR_TRANSFER_INTERMEDIATE_FILES,
                                  strlen(ATTR_TRANSFER_INTERMEDIATE_FILES)) ||
                    !strncasecmp( name, ATTR_TRANSFER_INPUT_FILES,
                                  strlen(ATTR_TRANSFER_INPUT_FILES)) ) {
                can_send_it = true;
            }
        } else {
            // We will send the entire job ClassAd to vmgahp
            can_send_it = true;
        }

        if( !can_send_it ) {
            continue;
        }

        vmAttr.sprintf( "%s = %s", name, ExprTreeToString( expr ) );

        if ( write_line( vmAttr.Value() ) == false ) {
            return false;
        }
        total_len += vmAttr.Length();
        if( total_len > 2048 ) {
            // Give some time for vmgahp to read this pipe
            sleep(1);
            printSystemErrorMsg();
            total_len = 0;
        }
    }

    static const char *endcommand = "CLASSAD_END";

    if(write_line(endcommand) == false) {
        return false;
    }

    // give some time to vmgahp
    sleep(1);

    Gahp_Args end_result;
    if( read_argv(end_result) == false ) {
        return false;
    }
    if( end_result.argc == 0 || end_result.argv[0][0] != 'S' ) {
        dprintf(D_ALWAYS,"VMGAHP command '%s' failed\n",endcommand);
        return false;
    }

    m_workingdir = workingdir;
    return true;
}
Пример #20
0
bool UserPolicy::FiringReason(MyString &reason,int &reason_code,int &reason_subcode)
{
	reason_code = 0;
	reason_subcode = 0;

	if ( m_ad == NULL || m_fire_expr == NULL ) {
		return false;
	}


	const char * expr_src;
	MyString exprString;
	std::string reason_expr_param;
	std::string reason_expr_attr;
	std::string subcode_expr_param;
	std::string subcode_expr_attr;
	switch(m_fire_source) {
		case FS_NotYet:
			expr_src = "UNKNOWN (never set)";
			break;

		case FS_JobAttribute:
		{
			expr_src = "job attribute";
			ExprTree *tree;
			tree = m_ad->LookupExpr( m_fire_expr );

			// Get a formatted expression string
			if( tree ) {
				exprString = ExprTreeToString( tree );
			}
			if( m_fire_expr_val == -1 ) {
				reason_code = CONDOR_HOLD_CODE_JobPolicyUndefined;
			}
			else {
				reason_code = CONDOR_HOLD_CODE_JobPolicy;
				sprintf(reason_expr_attr,"%sReason", m_fire_expr);
				sprintf(subcode_expr_attr,"%sSubCode", m_fire_expr);
			}
			break;
		}

		case FS_SystemMacro:
		{
			expr_src = "system macro";
			char * val = param(m_fire_expr);
			exprString = val;
			free(val);
			if( m_fire_expr_val == -1 ) {
				reason_code = CONDOR_HOLD_CODE_SystemPolicyUndefined;
			}
			else {
				reason_code = CONDOR_HOLD_CODE_SystemPolicy;
				sprintf(reason_expr_param,"%s_REASON", m_fire_expr);
				sprintf(subcode_expr_param,"%s_SUBCODE", m_fire_expr);
			}
			break;
		}

		default:
			expr_src = "UNKNOWN (bad value)";
			break;
	}

	reason = "";

	MyString subcode_expr;
	if( !subcode_expr_param.empty() &&
		param(subcode_expr,subcode_expr_param.c_str(),NULL) &&
		!subcode_expr.IsEmpty())
	{
		m_ad->AssignExpr(ATTR_SCRATCH_EXPRESSION, subcode_expr.Value());
		m_ad->EvalInteger(ATTR_SCRATCH_EXPRESSION, m_ad, reason_subcode);
		m_ad->Delete(ATTR_SCRATCH_EXPRESSION);
	}
	else if( !subcode_expr_attr.empty() )
	{
		m_ad->EvalInteger(subcode_expr_attr.c_str(), m_ad, reason_subcode);
	}

	MyString reason_expr;
	if( !reason_expr_param.empty() &&
		param(reason_expr,reason_expr_param.c_str(),NULL) &&
		!reason_expr.IsEmpty())
	{
		m_ad->AssignExpr(ATTR_SCRATCH_EXPRESSION, reason_expr.Value());
		m_ad->EvalString(ATTR_SCRATCH_EXPRESSION, m_ad, reason);
		m_ad->Delete(ATTR_SCRATCH_EXPRESSION);
	}
	else if( !reason_expr_attr.empty() )
	{
		m_ad->EvalString(reason_expr_attr.c_str(), m_ad, reason);
	}

	if( !reason.IsEmpty() ) {
		return true;
	}

	// Format up the reason string
	reason.sprintf( "The %s %s expression '%s' evaluated to ",
					expr_src,
					m_fire_expr,
					exprString.Value());

	// Get a string for it's value
	switch( m_fire_expr_val ) {
	case 0:
		reason += "FALSE";
		break;
	case 1:
		reason += "TRUE";
		break;
	case -1:
		reason += "UNDEFINED";
		break;
	default:
		EXCEPT( "Unrecognized FiringExpressionValue: %d", 
				m_fire_expr_val ); 
		break;
	}

	return true;
}
Пример #21
0
int
CondorQ::fetchQueueFromDBAndProcess ( const char *dbconn,
									  char *&lastUpdate,
									  condor_q_process_func process_func,
									  void * process_func_data,
									  CondorError*  /*errstack*/ )
{
#ifndef HAVE_EXT_POSTGRESQL
	(void) dbconn;
	(void) lastUpdate;
	(void) process_func;
	(void) process_func_data;
#else
	int     		result;
	JobQueueSnapshot	*jqSnapshot;
	const char           *constraint;
	ClassAd        *ad;
	QuillErrCode             rv;
	ExprTree *tree;

	ASSERT(process_func);

	jqSnapshot = new JobQueueSnapshot(dbconn);

	rv = jqSnapshot->startIterateAllClassAds(clusterarray,
						 numclusters,
						 procarray,
						 numprocs,
						schedd,
						 FALSE,
						scheddBirthdate,
						lastUpdate);

	if (rv == QUILL_FAILURE) {
		delete jqSnapshot;
		return Q_COMMUNICATION_ERROR;
	}
	else if (rv == JOB_QUEUE_EMPTY) {
		delete jqSnapshot;
		return Q_OK;
	}	

	// make the query ad
	if ((result = query.makeQuery (tree)) != Q_OK) {
		delete jqSnapshot;
		return result;
	}

	constraint = ExprTreeToString(tree);
	delete tree;

	ad = getDBNextJobByConstraint(constraint, jqSnapshot);
	
	while (ad != (ClassAd *) 0) {
			// Process the data and insert it into the list
		if ((*process_func) (ad, process_func_data) ) {
			ad->Clear();
			delete ad;
		}
		
		ad = getDBNextJobByConstraint(constraint, jqSnapshot);
	}	

	delete jqSnapshot;
#endif /* HAVE_EXT_POSTGRESQL */

	return Q_OK;
}
void
doContactSchedd()
{
	if (command_queue.IsEmpty()) {
		daemonCore->Reset_Timer( contactScheddTid, contact_schedd_interval ); // Come back in a min
		return;
	}

	dprintf(D_FULLDEBUG,"in doContactSchedd\n");

	SchedDRequest * current_command = NULL;

	int error=FALSE;
	std::string error_msg;
	CondorError errstack;
	bool do_reschedule = false;
	int failure_line_num = 0;
	int failure_errno = 0;

	// Try connecting to schedd
	DCSchedd dc_schedd ( ScheddAddr, ScheddPool );
	if (dc_schedd.error() || !dc_schedd.locate()) {
		sprintf( error_msg, "Error locating schedd %s", ScheddAddr );

		dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );

		// If you can't connect return "Failure" on every job request
		command_queue.Rewind();
		while (command_queue.Next(current_command)) {
			if (current_command->status != SchedDRequest::SDCS_NEW)
				continue;

			if (current_command->command == SchedDRequest::SDC_STATUS_CONSTRAINED) {
				const char * result[] = {
					GAHP_RESULT_FAILURE,
					error_msg.c_str(),
					"0"};
				enqueue_result (current_command->request_id, result, 3);
			} else if (current_command->command == SchedDRequest::SDC_SUBMIT_JOB) {
				const char * result[] = {
									GAHP_RESULT_FAILURE,
									NULL,
									error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 3);
			} else if (current_command->command == SchedDRequest::SDC_UPDATE_LEASE) {
				const char * result[] = {
									GAHP_RESULT_FAILURE,
									error_msg.c_str(),
									NULL };
				enqueue_result (current_command->request_id, result, 3);
			} else {
				const char * result[] = {
									GAHP_RESULT_FAILURE,
									error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 2);
			}

			current_command->status = SchedDRequest::SDCS_COMPLETED;
		}
	}

	
	SchedDRequest::schedd_command_type commands [] = {
		SchedDRequest::SDC_REMOVE_JOB,
		SchedDRequest::SDC_HOLD_JOB,
		SchedDRequest::SDC_RELEASE_JOB };

	const char * command_titles [] = {
		"REMOVE_JOB", "HOLD_JOB", "RELEASE_JOB" };

	// REMOVE
	// HOLD
	// RELEASE
	int i=0;
	while (i<3) {
		
		
		StringList id_list;
		SimpleList <SchedDRequest*> this_batch;

		SchedDRequest::schedd_command_type this_command = commands[i];
		const char * this_action = command_titles[i];
		const char * this_reason = NULL;

		dprintf (D_FULLDEBUG, "Processing %s requests\n", this_action);
		
		error = FALSE;

		// Create a batch of commands with the same command type AND the same reason		
		command_queue.Rewind();
		while (command_queue.Next(current_command)) {
			if (current_command->status != SchedDRequest::SDCS_NEW)
				continue;

			if (current_command->command != this_command)
				continue;

			if ((this_reason != NULL) && (strcmp (current_command->reason, this_reason) != 0))
				continue;

			if (this_reason == NULL)
				this_reason = current_command->reason;
				
			char job_id_buff[30];
			sprintf (job_id_buff, "%d.%d",
				current_command->cluster_id,
				current_command->proc_id);
			id_list.append (job_id_buff);

			this_batch.Append (current_command);
		}

		// If we haven't found any....
		if (id_list.isEmpty()) {
			i++;
			continue;	// ... then try the next command
		}

		// Perform the appropriate command on the current batch
		ClassAd * result_ad= NULL;
		if (this_command == SchedDRequest::SDC_REMOVE_JOB)  {
			errstack.clear();
			result_ad=
				dc_schedd.removeJobs (
					&id_list,
					this_reason,
					&errstack);
		} else if (this_command == SchedDRequest::SDC_HOLD_JOB) {
			errstack.clear();
			result_ad=
				dc_schedd.holdJobs (
					&id_list,
					this_reason,
					NULL,
			 		&errstack);
		} else if (this_command == SchedDRequest::SDC_RELEASE_JOB)  {
			errstack.clear();
			result_ad=
				dc_schedd.releaseJobs (
					&id_list,
					this_reason,
					&errstack);
		} else {
			EXCEPT( "Unexpected command type %d in doContactSchedd",
					this_command );
		}

		// Analyze the result ad
		if (!result_ad) {
			error = TRUE;
			sprintf( error_msg, "Error connecting to schedd %s %s: %s",
					 ScheddAddr, dc_schedd.addr(), errstack.getFullText() );
		}
		else {
			result_ad->dPrint (D_FULLDEBUG);
			if ( this_command == SchedDRequest::SDC_RELEASE_JOB ) {
				do_reschedule = true;
			}
		}

		// Go through the batch again, and create responses for each request
		this_batch.Rewind();
		while (this_batch.Next(current_command)) {
			
			// Check the result
			char job_id_buff[30];
			if (result_ad && (error == FALSE)) {
				sprintf (job_id_buff, "job_%d_%d",
					current_command->cluster_id,
					current_command->proc_id);
				
				int remove_result;
				if (result_ad->LookupInteger (job_id_buff, remove_result)) {
					switch (remove_result) {
						case AR_ERROR:
							error = TRUE;
							error_msg = "General Error";
							break;
						case AR_SUCCESS:
							error = FALSE;
							break;
						case AR_NOT_FOUND:
							error = TRUE;
							error_msg = "Job not found";
							break;
						case AR_BAD_STATUS:
							error = TRUE;
							error_msg = "Bad job status";
							break;
						case AR_ALREADY_DONE:
							error = TRUE;
							error_msg = "Already done";
							break;
						case AR_PERMISSION_DENIED:
							error = TRUE;
							error_msg = "Permission denied";
							break;
						default:
							error = TRUE;
							error_msg = "Unknown Result";
					} // hctiws

				} else {
					error_msg = "Unable to get result";
				} // fi lookup result for job
			} // fi error == FALSE

			if (error) {
				dprintf (D_ALWAYS, "Error (operation: %s) %d.%d: %s\n",
						this_action,
						current_command->cluster_id,
						current_command->proc_id,
						error_msg.c_str());

				const char * result[2];
				result[0] = GAHP_RESULT_FAILURE;
				result[1] = error_msg.c_str();

				enqueue_result (current_command->request_id, result, 2);
			} else {
				dprintf (D_ALWAYS, "Succeess (operation: %s) %d.%d\n",
						this_action,
						current_command->cluster_id,
						current_command->proc_id);

				const char * result[2];
				result[0] = GAHP_RESULT_SUCCESS;
				result[1] = NULL;

				enqueue_result (current_command->request_id, result, 2);
			} // fi error

			// Mark the status
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} // elihw this_batch

		if ( result_ad ) {
			delete result_ad;
		}
	}

	dprintf (D_FULLDEBUG, "Processing JOB_STAGE_IN requests\n");
	

	// JOB_STAGE_IN
	int MAX_BATCH_SIZE=1; // This should be a config param

	SimpleList <SchedDRequest*> stage_in_batch;
	do {
		stage_in_batch.Clear();

		command_queue.Rewind();
		while (command_queue.Next(current_command)) {

			if (current_command->status != SchedDRequest::SDCS_NEW)
				continue;

			if (current_command->command != SchedDRequest::SDC_JOB_STAGE_IN)
				continue;

			dprintf (D_ALWAYS, "Adding %d.%d to STAGE_IN batch\n", 
					 current_command->cluster_id,
					 current_command->proc_id);

			stage_in_batch.Append (current_command);
			if (stage_in_batch.Number() >= MAX_BATCH_SIZE)
				break;
		}

		if (stage_in_batch.Number() > 0) {
			ClassAd ** array = new ClassAd*[stage_in_batch.Number()];
			i=0;
			stage_in_batch.Rewind();
			while (stage_in_batch.Next(current_command)) {
				array[i++] = current_command->classad;
			}

			error = FALSE;
			errstack.clear();
			if (!dc_schedd.spoolJobFiles( stage_in_batch.Number(),
										  array,
										  &errstack )) {
				error = TRUE;
				sprintf( error_msg, "Error sending files to schedd %s: %s", ScheddAddr, errstack.getFullText() );
				dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
			}
			delete [] array;
  
			stage_in_batch.Rewind();
			while (stage_in_batch.Next(current_command)) {
				current_command->status = SchedDRequest::SDCS_COMPLETED;

				if (error) {
					const char * result[] = {
						GAHP_RESULT_FAILURE,
						error_msg.c_str() };
					enqueue_result (current_command->request_id, result, 2);

				} else {
					const char * result[] = {
						GAHP_RESULT_SUCCESS,
						NULL };
					enqueue_result (current_command->request_id, result, 2);
				}
			} // elihw (command_queue)
		} // fi has STAGE_IN requests
	} while (stage_in_batch.Number() > 0);

	dprintf (D_FULLDEBUG, "Processing JOB_STAGE_OUT requests\n");
	

	// JOB_STAGE_OUT
	SimpleList <SchedDRequest*> stage_out_batch;

	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_JOB_STAGE_OUT)
			continue;


		stage_out_batch.Append (current_command);
	}

	if (stage_out_batch.Number() > 0) {
		std::string constraint = "";
		stage_out_batch.Rewind();
		int jobsexpected = stage_out_batch.Number();
		while (stage_out_batch.Next(current_command)) {
			sprintf_cat( constraint, "(ClusterId==%d&&ProcId==%d)||",
									current_command->cluster_id,
									current_command->proc_id );
		}
		constraint += "False";

		error = FALSE;
		errstack.clear();
		int jobssent;
		if (!dc_schedd.receiveJobSandbox( constraint.c_str(),
										  &errstack, &jobssent )) {
			error = TRUE;
			sprintf( error_msg, "Error receiving files from schedd %s: %s",
							   ScheddAddr, errstack.getFullText() );
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		}

		if(error == FALSE && jobssent != jobsexpected) {
			error = TRUE;
			sprintf( error_msg, "Schedd %s didn't send expected files",
					 ScheddAddr );
			dprintf (D_ALWAYS, "Transfered files for %d jobs but got files for %d jobs. (Schedd %s with contraint %s\n", jobsexpected, jobssent, ScheddAddr, constraint.c_str());
		}
  
		stage_out_batch.Rewind();
		while (stage_out_batch.Next(current_command)) {
			current_command->status = SchedDRequest::SDCS_COMPLETED;

			if (error) {
				const char * result[] = {
								GAHP_RESULT_FAILURE,
								error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 2);

			} else {
				const char * result[] = {
										GAHP_RESULT_SUCCESS,
										NULL };
				enqueue_result (current_command->request_id, result, 2);
			}
		} // elihw (command_queue)
	} // fi has STAGE_OUT requests


	dprintf (D_FULLDEBUG, "Processing JOB_REFRESH_PROXY requests\n");

	CondorVersionInfo ver_info(dc_schedd.version());
	bool delegate_credential;
	if ( ver_info.built_since_version(6,7,19) &&
		 param_boolean( "DELEGATE_JOB_GSI_CREDENTIALS", true ) ) {
		delegate_credential = true;
	} else {
		delegate_credential = false;
	}

	// JOB_REFRESH_PROXY
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_JOB_REFRESH_PROXY)
			continue;

		time_t expiration_time = GetDesiredDelegatedJobCredentialExpiration(current_command->classad);
		time_t result_expiration_time = 0;

		bool result;
		errstack.clear();
		if ( delegate_credential ) {
			result = dc_schedd.delegateGSIcredential( 
												current_command->cluster_id,
												current_command->proc_id,
												current_command->proxy_file,
												expiration_time,
												&result_expiration_time,
												&errstack );

				// Currently, we do not propagate the actual resulting
				// expiration time back to the gridmanager.  We
				// probably should.
		} else {
			result = dc_schedd.updateGSIcredential( 
												current_command->cluster_id,
												current_command->proc_id,
												current_command->proxy_file,
												&errstack );
		}

		current_command->status = SchedDRequest::SDCS_COMPLETED;

		if (result == false) {
			sprintf( error_msg, "Error refreshing proxy to schedd %s: %s",
					 ScheddAddr, errstack.getFullText() );
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );

			const char * result_to_queue[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str() };
			enqueue_result (current_command->request_id, result_to_queue, 2);

		} else {
			const char * result_to_queue[] = {
				GAHP_RESULT_SUCCESS,
				NULL };
			enqueue_result (current_command->request_id, result_to_queue, 2);
		}

	}


	// Now do all the QMGMT transactions
	error = FALSE;

	// Try connecting to the queue
	Qmgr_connection * qmgr_connection;
	
	if ((qmgr_connection = ConnectQ(dc_schedd.addr(), QMGMT_TIMEOUT, false, NULL, NULL, dc_schedd.version() )) == NULL) {
		error = TRUE;
		sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
		dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
	} else {
		errno = 0;
		AbortTransaction(); // Just so we can call BeginTransaction() in the loop
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}
	}


	dprintf (D_FULLDEBUG, "Processing UPDATE_CONSTRAINED/UDATE_JOB requests\n");
	
	// UPDATE_CONSTRAINED
	// UDATE_JOB
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {
		
		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if ((current_command->command != SchedDRequest::SDC_UPDATE_CONSTRAINED) &&
			(current_command->command != SchedDRequest::SDC_UPDATE_JOB))
			continue;

		if (qmgr_connection == NULL)
			goto update_report_result;
		
		error = FALSE;
		errno = 0;
		BeginTransaction();
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}

		current_command->classad->ResetExpr();
		ExprTree *tree;
		const char *lhstr, *rhstr;
		while( current_command->classad->NextExpr(lhstr, tree) ) {

			rhstr = ExprTreeToString( tree );
			if( !lhstr || !rhstr) {
				sprintf( error_msg, "ERROR: ClassAd problem in Updating by constraint %s",
												 current_command->constraint );
				dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
				error = TRUE;
			} else {
				if (current_command->command == SchedDRequest::SDC_UPDATE_CONSTRAINED) {
					if( SetAttributeByConstraint(current_command->constraint,
												lhstr,
												rhstr) == -1 ) {
						if ( errno == ETIMEDOUT ) {
							failure_line_num = __LINE__;
							failure_errno = errno;
							goto contact_schedd_disconnect;
						}
						sprintf( error_msg, "ERROR: Failed (errno=%d) to SetAttributeByConstraint %s=%s for constraint %s",
									errno, lhstr, rhstr, current_command->constraint );
						dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
						error = TRUE;
					}
				} else if (current_command->command == SchedDRequest::SDC_UPDATE_JOB) {
					if( SetAttribute(current_command->cluster_id,
											current_command->proc_id,
											lhstr,
											rhstr) == -1 ) {
						if ( errno == ETIMEDOUT ) {
							failure_line_num = __LINE__;
							failure_errno = errno;
							goto contact_schedd_disconnect;
						}
						sprintf( error_msg, "ERROR: Failed to SetAttribute() %s=%s for job %d.%d",
										 lhstr, rhstr, current_command->cluster_id,  current_command->proc_id);
						dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
						error = TRUE;
					}
				}
			}

			if (error)
				break;
		} // elihw classad

update_report_result:
		if (error) {
			const char * result[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str() };


			//RemoteCommitTransaction();
			enqueue_result (current_command->request_id, result, 2);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
			if ( qmgr_connection != NULL ) {
				errno = 0;
				AbortTransaction();
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					failure_errno = errno;
					goto contact_schedd_disconnect;
				}
			}
		} else {
			if ( RemoteCommitTransaction() < 0 ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
			const char * result[] = {
				GAHP_RESULT_SUCCESS,
				NULL };
			enqueue_result (current_command->request_id, result, 2);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} // fi

	} // elihw

	
	dprintf (D_FULLDEBUG, "Processing UPDATE_LEASE requests\n");

	// UPDATE_LEASE
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {
		
		error = FALSE;

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_UPDATE_LEASE)
			continue;

		std::string success_job_ids="";
		if (qmgr_connection == NULL) {
			sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
			error = TRUE;
		} else {
			error = FALSE;
			errno = 0;
			BeginTransaction();
			if ( errno == ETIMEDOUT ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
		
			for (i=0; i<current_command->num_jobs; i++) {
			
				time_t time_now = time(NULL);
				int duration = 
					current_command->expirations[i].expiration - time_now;

				dprintf (D_FULLDEBUG, 
						 "Job %d.%d SetTimerAttribute=%d\n",
						 current_command->expirations[i].cluster,
						 current_command->expirations[i].proc,
						 duration);
		
				if (SetTimerAttribute (current_command->expirations[i].cluster,
									   current_command->expirations[i].proc,
									   ATTR_TIMER_REMOVE_CHECK,
									   duration) < 0) {

					if ( errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						failure_errno = errno;
						goto contact_schedd_disconnect;
					}
					dprintf (D_ALWAYS, 
							 "Unable to SetTimerAttribute(%d, %d), errno=%d\n",
							 current_command->expirations[i].cluster,
							 current_command->expirations[i].proc,
							 errno);
						 
				} else {
						// Append job id to the result line
					if (success_job_ids.length() > 0)
						success_job_ids += ",";

					sprintf_cat( success_job_ids,
						"%d.%d",
						current_command->expirations[i].cluster,
						current_command->expirations[i].proc);
				}
			} //rof jobs for request
		} // fi error


		if (error) {
			const char * result[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str(),
				NULL
			};


			//RemoteCommitTransaction();
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
			if ( qmgr_connection != NULL ) {
				errno = 0;
				AbortTransaction();
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					failure_errno = errno;
					goto contact_schedd_disconnect;
				}
			}
		} else {
			if ( RemoteCommitTransaction() < 0 ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
			const char * result[] = {
				GAHP_RESULT_SUCCESS,
				NULL,
				success_job_ids.length()?success_job_ids.c_str():NULL
			};
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} // fi

	} // elihw UPDATE_LEASE requests

	dprintf (D_FULLDEBUG, "Processing SUBMIT_JOB requests\n");

	// SUBMIT_JOB
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_SUBMIT_JOB)
			continue;

		int ClusterId = -1;
		int ProcId = -1;

		if (qmgr_connection == NULL) {
			error = TRUE;
			goto submit_report_result;
		}

		errno = 0;
		BeginTransaction();
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}
		error = FALSE;

		if ((ClusterId = NewCluster()) >= 0) {
			ProcId = NewProc (ClusterId);
		}
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}

		if ( ClusterId < 0 ) {
			error = TRUE;
			error_msg = "Unable to create a new job cluster";
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		} else if ( ProcId < 0 ) {
			error = TRUE;
			error_msg = "Unable to create a new job proc";
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		}
		if ( ClusterId == -2 || ProcId == -2 ) {
			error = TRUE;
			error_msg =
				"Number of submitted jobs would exceed MAX_JOBS_SUBMITTED\n";
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		}


		// Adjust the argument/environment syntax based on the version
		// of the schedd we are talking to.

		if( error == FALSE) {
			CondorVersionInfo version_info(dc_schedd.version());
			ArgList arglist;
			MyString arg_error_msg;
			Env env_obj;
			MyString env_error_msg;

			if(!arglist.AppendArgsFromClassAd(current_command->classad,&arg_error_msg) ||
		   !	arglist.InsertArgsIntoClassAd(current_command->classad,&version_info,&arg_error_msg))
			{
				sprintf( error_msg,
						"ERROR: ClassAd problem in converting arguments to syntax "
						"for schedd (version=%s): %s\n",
						dc_schedd.version() ? dc_schedd.version() : "NULL",
						arg_error_msg.Value());
				dprintf( D_ALWAYS,"%s\n", error_msg.c_str() );
				error = TRUE;
			}	

			if(!env_obj.MergeFrom(current_command->classad,&env_error_msg) ||
			   !env_obj.InsertEnvIntoClassAd(current_command->classad,&env_error_msg,NULL,&version_info))
			{
				sprintf( error_msg,
						"ERROR: Failed to convert environment to target syntax"
						" for schedd (version %s): %s\n",
						dc_schedd.version() ? dc_schedd.version() : "NULL",
						env_error_msg.Value());
				dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
				error = TRUE;
			}
		}

		if( error == FALSE ) {
				// See the comment in the function body of ExpandInputFileList
				// for an explanation of what is going on here.
			MyString transfer_input_error_msg;
			if( !FileTransfer::ExpandInputFileList( current_command->classad, transfer_input_error_msg ) ) {
				dprintf( D_ALWAYS, "%s\n", transfer_input_error_msg.Value() );
				error = TRUE;
			}
		}

		if ( error == FALSE ) {
			current_command->classad->Assign(ATTR_CLUSTER_ID, ClusterId);
			current_command->classad->Assign(ATTR_PROC_ID, ProcId);

			// Special case for the job lease
			int expire_time;
			if ( current_command->classad->LookupInteger( ATTR_TIMER_REMOVE_CHECK, expire_time ) ) {
				if ( SetTimerAttribute( ClusterId, ProcId,
										ATTR_TIMER_REMOVE_CHECK,
										expire_time - time(NULL) ) == -1 ) {
					if ( errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						failure_errno = errno;
						goto contact_schedd_disconnect;
					}
					sprintf( error_msg, "ERROR: Failed to SetTimerAttribute %s=%ld for job %d.%d",
							 ATTR_TIMER_REMOVE_CHECK, expire_time - time(NULL), ClusterId, ProcId );
					dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
					error = TRUE;
					goto submit_report_result;
				}
				current_command->classad->Delete( ATTR_TIMER_REMOVE_CHECK );
			}

			// Set all the classad attribute on the remote classad
			current_command->classad->ResetExpr();
			ExprTree *tree;
			const char *lhstr, *rhstr;
			while( current_command->classad->NextExpr(lhstr, tree) ) {

				rhstr = ExprTreeToString( tree );
				if( !lhstr || !rhstr) {
					sprintf( error_msg, "ERROR: ClassAd problem in Updating by constraint %s",
												 current_command->constraint );
					dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
					error = TRUE;
				} else if( SetAttribute (ClusterId, ProcId,
											lhstr,
											rhstr) == -1 ) {
					if ( errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						failure_errno = errno;
						goto contact_schedd_disconnect;
					}
					sprintf( error_msg, "ERROR: Failed to SetAttribute %s=%s for job %d.%d",
									 lhstr, rhstr, ClusterId, ProcId );
					dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
					error = TRUE;
				}

				if (error) break;
			} // elihw classad
		} // fi error==FALSE

submit_report_result:
		char job_id_buff[30];
		sprintf (job_id_buff, "%d.%d", ClusterId, ProcId);

		if (error) {
			const char * result[] = {
								GAHP_RESULT_FAILURE,
								job_id_buff,
								error_msg.c_str() };
			enqueue_result (current_command->request_id, result, 3);
			if ( qmgr_connection != NULL ) {
				errno = 0;
				AbortTransaction();
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					failure_errno = errno;
					goto contact_schedd_disconnect;
				}
			}
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} else {
			if ( RemoteCommitTransaction() < 0 ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
			const char * result[] = {
									GAHP_RESULT_SUCCESS,
									job_id_buff,
									NULL };
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		}
	} // elihw


	dprintf (D_FULLDEBUG, "Processing STATUS_CONSTRAINED requests\n");
		
	// STATUS_CONSTRAINED
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_STATUS_CONSTRAINED)
			continue;

		if (qmgr_connection != NULL) {
			SimpleList <MyString *> matching_ads;

			error = FALSE;
			
			ClassAd *next_ad;
			ClassAdList adlist;
				// Only use GetAllJobsByConstraint if remote schedd is
				// 6.9.5 or newer.  Previous versions either did not
				// support this call, or they closed the Qmgmt connection
				// as a side-effect of this call.
			if( ver_info.built_since_version(6,9,5) ) {
				dprintf( D_FULLDEBUG, "Calling GetAllJobsByConstraint(%s)\n",
						 current_command->constraint );
					// NOTE: this could be made more efficient if we knew
					// the list of attributes to query.  For lack of that,
					// we just get all attributes.
				GetAllJobsByConstraint( current_command->constraint, "", adlist);
			}
			else {
					// This is the old latency-prone method.
				dprintf( D_FULLDEBUG, "Calling GetNextJobByConstraint(%s)\n",
						 current_command->constraint );
				next_ad = GetNextJobByConstraint( current_command->constraint, 1 );
				while( next_ad != NULL ) {
					adlist.Insert( next_ad );
					next_ad = GetNextJobByConstraint( current_command->constraint, 0 );
				}
			}

				// NOTE: ClassAdList will deallocate the ClassAds in it

			adlist.Rewind();
			while( (next_ad=adlist.Next()) ) {
				MyString * da_buffer = new MyString();	// Use a ptr to avoid excessive copying
				if ( useXMLClassads ) {
					ClassAdXMLUnparser unparser;
					unparser.SetUseCompactSpacing(true);
					unparser.Unparse (next_ad, *da_buffer);
				} else {
					NewClassAdUnparser unparser;
					unparser.SetUseCompactSpacing(true);
					unparser.Unparse (next_ad, *da_buffer);
				}
				matching_ads.Append (da_buffer);
			}
			if ( errno == ETIMEDOUT ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}

			// now output this list of classads into a result
			const char ** result  = new const char* [matching_ads.Length() + 3];

			std::string _ad_count;
			sprintf( _ad_count, "%d", matching_ads.Length() );

			int count=0;
			result[count++] = GAHP_RESULT_SUCCESS;
			result[count++] = NULL;
			result[count++] = _ad_count.c_str();

			MyString *next_string;
			matching_ads.Rewind();
			while (matching_ads.Next(next_string)) {
				result[count++] = next_string->Value();
			}

			enqueue_result (current_command->request_id, result, count);
			current_command->status = SchedDRequest::SDCS_COMPLETED;

			// Cleanup
			matching_ads.Rewind();
			while (matching_ads.Next(next_string)) {
				delete next_string;
			}
			//CommitTransaction();
			delete [] result;
		}
		else {
			const char * result[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str(),
				"0" };
			//RemoteCommitTransaction();
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		}
	}	//elihw

	
 contact_schedd_disconnect:
	if ( qmgr_connection != NULL ) {
		DisconnectQ (qmgr_connection, FALSE);
	}

	if ( failure_line_num ) {
			// We had an error talking to the schedd. Take all of our
			// incomplete commands and mark them as failed.
			// TODO Consider retrying these commands, rather than
			//   immediately marking them as failed.
		if ( failure_errno == ETIMEDOUT ) {
			dprintf( D_ALWAYS, "Timed out talking to schedd at line %d in "
					 "doContactSchedd()\n", failure_line_num );
			sprintf( error_msg, "Timed out talking to schedd" );
		} else {
			dprintf( D_ALWAYS, "Error talking to schedd at line %d in "
					 "doContactSchedd(), errno=%d (%s)\n", failure_line_num,
					 failure_errno, strerror(failure_errno) );
			sprintf( error_msg, "Error talking to schedd" );
		}
		command_queue.Rewind();
		while (command_queue.Next(current_command)) {
			if ( current_command->status != SchedDRequest::SDCS_NEW ) {
				continue;
			}
			switch( current_command->command ) {
			case SchedDRequest::SDC_UPDATE_JOB:
			case SchedDRequest::SDC_UPDATE_CONSTRAINED:
			{
				const char *result[2] = { GAHP_RESULT_FAILURE, error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 2);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			case SchedDRequest::SDC_UPDATE_LEASE:
			{
				const char *result[3] = { GAHP_RESULT_FAILURE, error_msg.c_str(), NULL };
				enqueue_result (current_command->request_id, result, 3);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			case SchedDRequest::SDC_SUBMIT_JOB:
			{
				const char *result[3] = { GAHP_RESULT_FAILURE, "-1.-1", error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 3);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			case SchedDRequest::SDC_STATUS_CONSTRAINED:
			{
				const char *result[3] = { GAHP_RESULT_FAILURE, error_msg.c_str(), "0" };
				enqueue_result (current_command->request_id, result, 3);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			default:
					// Do nothing
				;
			}
		}
	}

	if ( do_reschedule ) {
		dc_schedd.reschedule();
	}

		// Write all of our results to our parent.
	flush_results();

	dprintf (D_FULLDEBUG, "Finishing doContactSchedd()\n");

	// Clean up the list
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {
		if (current_command->status == SchedDRequest::SDCS_COMPLETED) {
			command_queue.DeleteCurrent();
			delete current_command;
		}
	}

	// Come back soon..
	// QUESTION: Should this always be a fixed time period?
	daemonCore->Reset_Timer( contactScheddTid, contact_schedd_interval );
}