예제 #1
0
파일: rm.cpp 프로젝트: emaste/htcondor
void
handleConstraints( void )
{
	if( ! has_constraint ) {
		return;
	}
	const char* tmp = global_constraint.Value();

	CondorError errstack;
	if( doWorkByConstraint(tmp, &errstack) ) {
		fprintf( stdout, "Jobs matching constraint %s %s\n", tmp,
				 (mode == JA_REMOVE_JOBS) ?
				 "have been marked for removal" :
				 (mode == JA_REMOVE_X_JOBS) ?
				 "have been removed locally (remote state unknown)" :
				 actionWord(mode,true) );

	} else {
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		if (had_error)
		{
			fprintf( stderr, 
				 "Couldn't find/%s all jobs matching constraint %s\n",
				 actionWord(mode,false), tmp );
		}
	}
}
예제 #2
0
// This function calls up the schedd passed in on the command line and 
// registers the transferd as being available for the schedd's use.
RegisterResult
TransferD::register_to_schedd(ReliSock **regsock_ptr)
{
	CondorError errstack;
	MyString sname;
	MyString id;
	MyString sinful;
	bool rval;
	
	if (*regsock_ptr != NULL) {
		*regsock_ptr = NULL;
	}

	sname = m_features.get_schedd_sinful();
	id = m_features.get_id();

	if (sname == "N/A") {
		// no schedd supplied with which to register
		dprintf(D_ALWAYS, "No schedd specified to which to register.\n");
		return REG_RESULT_NO_SCHEDD;
	}
	
	// what is my sinful string?
	sinful = daemonCore->InfoCommandSinfulString(-1);

	dprintf(D_FULLDEBUG, "Registering myself(%s) to schedd(%s)\n",
		sinful.Value(), sname.Value());

	// hook up to the schedd.
	DCSchedd schedd(sname.Value(), NULL);

	// register myself, give myself 1 minute to connect.
	rval = schedd.register_transferd(sinful, id, 20*3, regsock_ptr, &errstack);

	if (rval == false) {
		// emit why 
		dprintf(D_ALWAYS, "TransferRequest::register_to_schedd(): Failed to "
			"register. Schedd gave reason '%s'\n", errstack.getFullText().c_str());
		return REG_RESULT_FAILED;
	}

	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //

	// Here, I must infact go back to daemon core without closing or doing
	// anything with the socket. This is because the schedd is going to
	// reconnect back to me, and I can't deadlock.

	dprintf(D_FULLDEBUG, 
		"Succesfully registered, awaiting treq channel message....\n");

	return REG_RESULT_SUCCESS;
}
예제 #3
0
bool
DCMaster::sendMasterCommand( bool insure_update, int my_cmd )
{
	CondorError errstack;
	int master_cmd = my_cmd;
	dprintf( D_FULLDEBUG, "DCMaster::sendMasterCommand: Just starting... \n"); 

	/* have we located the required master yet? */
	if( ! _addr ) {
		locate();
	}

	if( ! m_master_safesock && ! insure_update ) {
		m_master_safesock = new SafeSock;
		m_master_safesock->timeout(20);   // years of research... :)
		if( ! m_master_safesock->connect(_addr) ) {
			dprintf( D_ALWAYS, "sendMasterCommand: Failed to connect to master " 
					 "(%s)\n", _addr );
			delete m_master_safesock;
			m_master_safesock = NULL;
			return false;
		}
	}

	ReliSock reli_sock;
	bool  result;

	if( insure_update ) {
			// For now, if we have to ensure that the update gets
			// there, we use a ReliSock (TCP).
		reli_sock.timeout(20);   // years of research... :)
		if( ! reli_sock.connect(_addr) ) {
			dprintf( D_ALWAYS, "sendMasterCommand: Failed to connect to master " 
					 "(%s)\n", _addr );
			return false;
		}

		result = sendCommand( master_cmd, (Sock*)&reli_sock, 0, &errstack );
	} else {
		result = sendCommand( master_cmd, (Sock*)m_master_safesock, 0, &errstack );
	}
	if( ! result ) {
		dprintf( D_FULLDEBUG, 
				 "Failed to send %d command to master\n",master_cmd );
		if( m_master_safesock ) {
			delete m_master_safesock;
			m_master_safesock = NULL;
		}
		if( errstack.code() != 0 ) {
		        dprintf( D_ALWAYS, "ERROR: %s\n", errstack.getFullText() );
		}
		return false;
	}
	return true;
}
예제 #4
0
// Called when the schedd initially connects to the transferd to finish
// the registration process.
int
TransferD::setup_transfer_request_handler(int  /*cmd*/, Stream *sock)
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString sock_id;

	dprintf(D_ALWAYS, "Got TRANSFER_CONTROL_CHANNEL!\n");

	rsock->decode();

	///////////////////////////////////////////////////////////////
	// make sure we are authenticated
	///////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText().c_str() );
			refuse(rsock);
			return CLOSE_STREAM;
		} 
	}

	rsock->decode();

	///////////////////////////////////////////////////////////////
	// Register this socket with a socket handler to handle incoming requests
	///////////////////////////////////////////////////////////////

	sock_id += "<TreqChannel-Socket>";

	char* _sock_id = strdup( sock_id.Value() );		//de-const

	// register the handler for any future transfer requests on this socket.
	daemonCore->Register_Socket((Sock*)rsock, _sock_id,
		(SocketHandlercpp)&TransferD::accept_transfer_request_handler,
		"TransferD::accept_transfer_request_handler", this, ALLOW);
	
	free( _sock_id );
	
	dprintf(D_ALWAYS, "Treq channel established.\n");
	dprintf(D_ALWAYS, "Accepting Transfer Requests.\n");

	return KEEP_STREAM;
}
예제 #5
0
//---------------------------------------------------------------------------
Qmgr_connection *
DagmanClassad::OpenConnection()
{
		// Open job queue
	CondorError errstack;
	Qmgr_connection *queue = ConnectQ( _schedd->addr(), 0, false,
				&errstack, NULL, _schedd->version() );
	if ( !queue ) {
		debug_printf( DEBUG_QUIET,
					"WARNING: failed to connect to queue manager (%s)\n",
					errstack.getFullText().c_str() );
		check_warning_strictness( DAG_STRICT_3 );
		return NULL;
	}

	return queue;
}
예제 #6
0
DCStarter::X509UpdateStatus
DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time)
{
	ReliSock rsock;
	rsock.timeout(60);
	if( ! rsock.connect(_addr) ) {
		dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
			"Failed to connect to starter %s\n", _addr);
		return XUS_Error;
	}

	CondorError errstack;
	if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) {
		dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: "
				 "Failed send command to the starter: %s\n",
				 errstack.getFullText().c_str());
		return XUS_Error;
	}

		// Send the gsi proxy
	filesize_t file_size = 0;	// will receive the size of the file
	if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) {
		dprintf(D_ALWAYS,
			"DCStarter::delegateX509Proxy "
			"failed to delegate proxy file %s (size=%ld)\n",
			filename, (long int)file_size);
		return XUS_Error;
	}

		// Fetch the result
	rsock.decode();
	int reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	switch(reply) {
		case 0: return XUS_Error;
		case 1: return XUS_Okay;
		case 2: return XUS_Declined;
	}
	dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
		"remote side returned unknown code %d. Treating "
		"as an error.\n", reply);
	return XUS_Error;
}
예제 #7
0
/**
 * Process the history directory and maintain the history file map
 *
 * Only handle rotated history files, those history.* that are not an
 * index. For each one that is not in the history file map, create a
 * new HistoryFile, poll it for entries to process, and add it to the
 * map.
 */
void
aviary::history::processHistoryDirectory()
{
    const char *file = NULL;

    // each time through we rebuild our set of inodes
    if (force_reset) {
        m_historyFiles.clear();
    }

    Directory dir ( m_path.Value() );
    dir.Rewind();
    while ( ( file = dir.Next() ) )
    {
        // Skip all non-history files, e.g. history and history.*.idx
        if ( strncmp ( file, "history.", 8 ) ||
                !strncmp ( file + ( strlen ( file ) - 4 ), HISTORY_INDEX_SUFFIX, 4 ) ) continue;

        HistoryFile h_file ( ( m_path + DIR_DELIM_STRING + file ).Value() );
        CondorError errstack;
        if ( !h_file.init ( errstack ) )
        {
            dprintf ( D_ALWAYS, "%s\n", errstack.getFullText().c_str() );
            return;
        }
        errstack.clear();

        long unsigned int id;
        ASSERT ( h_file.getId ( id ) );
        HistoryFileListType::iterator entry = m_historyFiles.find ( id );
        if ( m_historyFiles.end() == entry )
        {
            HistoryFile::HistoryEntriesTypeIterators ij = h_file.poll ( errstack );
            for ( HistoryFile::HistoryEntriesTypeIterator i = ij.first;
                    i != ij.second;
                    i++ )
            {
                process ( ( *i ) );
            }

            m_historyFiles.insert ( id );
        }
    }
}
예제 #8
0
ODSHistoryFile &
ODSHistoryFile::operator=(const ODSHistoryFile &base)
{
	if (this != &base) {
		(*this).m_name = base.m_name;

		cleanup();

		// Don't just copy the stat and FILE* members, initialize them
		CondorError errstack;
		if (!init(errstack)) {
			// XXX: Should throw an exception here
			dprintf ( D_ALWAYS, "ODSHistoryFile::operator=: %s\n",
					errstack.getFullText(true).c_str());		
		}
	}

	return *this;
}
예제 #9
0
파일: rm.cpp 프로젝트: emaste/htcondor
void
handleAll()
{
	char constraint[128];
	sprintf( constraint, "%s >= 0", ATTR_CLUSTER_ID );

	CondorError errstack;
	if( doWorkByConstraint(constraint, &errstack) ) {
		fprintf( stdout, "All jobs %s.\n",
				 (mode == JA_REMOVE_JOBS) ?
				 "marked for removal" :
				 (mode == JA_REMOVE_X_JOBS) ?
				 "removed locally (remote state unknown)" :
				 actionWord(mode,true) );
	} else {
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		if (had_error)
		{
			fprintf( stderr, "Could not %s all jobs.\n",
				 actionWord(mode,false) );
		}
	}
}
예제 #10
0
bool 
DCStartd::getAds( ClassAdList &adsList )
{
	CondorError errstack;
	// fetch the query
	QueryResult q;
	CondorQuery* query;
	char* ad_addr;

	// instantiate query object
	if (!(query = new CondorQuery (STARTD_AD))) {
		dprintf( D_ALWAYS, "Error:  Out of memory\n");
		return(false);
	}

	if( this->locate() ){
		ad_addr = this->addr();
		q = query->fetchAds(adsList, ad_addr, &errstack);
		if (q != Q_OK) {
        	if (q == Q_COMMUNICATION_ERROR) {
            	dprintf( D_ALWAYS, "%s\n", errstack.getFullText(true).c_str() );
        	}
        	else {
            	dprintf (D_ALWAYS, "Error:  Could not fetch ads --- %s\n",
                     	getStrQueryResult(q));
        	}
			delete query;
        	return (false);
		}
	} else {
		delete query;
		return(false);
	}

	delete query;
	return(true);
}
예제 #11
0
//---------------------------------------------------------------------------
bool
Job::UnmonitorLogFile( ReadMultipleUserLogs &condorLogReader,
			ReadMultipleUserLogs &storkLogReader )
{
	debug_printf( DEBUG_DEBUG_2, "Unmonitoring log file <%s> for node %s\n",
				GetLogFile(), GetJobName() );

	if ( !_logIsMonitored ) {
		debug_printf( DEBUG_DEBUG_1, "Warning: log file for node "
					"%s is already unmonitored\n", GetJobName() );
		return true;
	}

	ReadMultipleUserLogs &logReader = (_jobType == TYPE_CONDOR) ?
				condorLogReader : storkLogReader;

	debug_printf( DEBUG_DEBUG_1, "Unmonitoring log file <%s> for node %s\n",
				GetLogFile(), GetJobName() );

	CondorError errstack;
	bool result = logReader.unmonitorLogFile( GetLogFile(), errstack );
	if ( !result ) {
		errstack.pushf( "DAGMan::Job", DAGMAN_ERR_LOG_FILE,
					"ERROR: Unable to unmonitor log " "file for node %s",
					GetJobName() );
		debug_printf( DEBUG_QUIET, "%s\n", errstack.getFullText().c_str() );
		EXCEPT( "Fatal log file monitoring error!\n" );
	}

	if ( result ) {
		delete [] _logFile;
		_logFile = NULL;
		_logIsMonitored = false;
	}

	return result;
}
예제 #12
0
void
doContactSchedd()
{
	int rc;
	Qmgr_connection *schedd;
	BaseJob *curr_job;
	ClassAd *next_ad;
	char expr_buf[12000];
	bool schedd_updates_complete = false;
	bool schedd_deletes_complete = false;
	bool add_remove_jobs_complete = false;
	bool update_jobs_complete = false;
	bool commit_transaction = true;
	int failure_line_num = 0;
	bool send_reschedule = false;
	std::string error_str = "";
	StringList dirty_job_ids;
	char *job_id_str;
	PROC_ID job_id;
	CondorError errstack;

	dprintf(D_FULLDEBUG,"in doContactSchedd()\n");

	initJobExprs();

	contactScheddTid = TIMER_UNSET;

	// vacateJobs
	/////////////////////////////////////////////////////
	if ( pendingScheddVacates.getNumElements() != 0 ) {
		std::string buff;
		StringList job_ids;
		VacateRequest curr_request;

		int result;
		ClassAd* rval;

		pendingScheddVacates.startIterations();
		while ( pendingScheddVacates.iterate( curr_request ) != 0 ) {
			formatstr( buff, "%d.%d", curr_request.job->procID.cluster,
						  curr_request.job->procID.proc );
			job_ids.append( buff.c_str() );
		}

		char *tmp = job_ids.print_to_string();
		if ( tmp ) {
			dprintf( D_FULLDEBUG, "Calling vacateJobs on %s\n", tmp );
			free(tmp);
			tmp = NULL;
		}

		rval = ScheddObj->vacateJobs( &job_ids, VACATE_FAST, &errstack );
		if ( rval == NULL ) {
			formatstr( error_str, "vacateJobs returned NULL, CondorError: %s!",
							   errstack.getFullText().c_str() );
			goto contact_schedd_failure;
		} else {
			pendingScheddVacates.startIterations();
			while ( pendingScheddVacates.iterate( curr_request ) != 0 ) {
				formatstr( buff, "job_%d_%d", curr_request.job->procID.cluster,
							  curr_request.job->procID.proc );
				if ( !rval->LookupInteger( buff.c_str(), result ) ) {
					dprintf( D_FULLDEBUG, "vacateJobs returned malformed ad\n" );
					EXCEPT( "vacateJobs returned malformed ad" );
				} else {
					dprintf( D_FULLDEBUG, "   %d.%d vacate result: %d\n",
							 curr_request.job->procID.cluster,
							 curr_request.job->procID.proc,result);
					pendingScheddVacates.remove( curr_request.job->procID );
					curr_request.result = (action_result_t)result;
					curr_request.job->SetEvaluateState();
					completedScheddVacates.insert( curr_request.job->procID,
												   curr_request );
				}
			}
			delete rval;
		}
	}


	schedd = ConnectQ( ScheddAddr, QMGMT_TIMEOUT, false, NULL, myUserName, CondorVersion() );
	if ( !schedd ) {
		error_str = "Failed to connect to schedd!";
		goto contact_schedd_failure;
	}


	// CheckLeases
	/////////////////////////////////////////////////////
	if ( checkLeasesSignaled ) {

		dprintf( D_FULLDEBUG, "querying for renewed leases\n" );

		// Grab the lease attributes of all the jobs in our global hashtable.

		BaseJob::JobsByProcId.startIterations();

		while ( BaseJob::JobsByProcId.iterate( curr_job ) != 0 ) {
			int new_expiration;

			rc = GetAttributeInt( curr_job->procID.cluster,
								  curr_job->procID.proc,
								  ATTR_TIMER_REMOVE_CHECK,
								  &new_expiration );
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// This job doesn't have doesn't have a lease from
						// the submitter. Skip it.
					continue;
				}
			}
			curr_job->UpdateJobLeaseReceived( new_expiration );
		}

		checkLeasesSignaled = false;
	}	// end of handling check leases


	// AddJobs
	/////////////////////////////////////////////////////
	if ( addJobsSignaled || firstScheddContact ) {
		int num_ads = 0;

		dprintf( D_FULLDEBUG, "querying for new jobs\n" );

		// Make sure we grab all Globus Universe jobs (except held ones
		// that we previously indicated we were done with)
		// when we first start up in case we're recovering from a
		// shutdown/meltdown.
		// Otherwise, grab all jobs that are unheld and aren't marked as
		// currently being managed and aren't marked as not matched.
		// If JobManaged is undefined, equate it with false.
		// If Matched is undefined, equate it with true.
		// NOTE: Schedds from Condor 6.6 and earlier don't include
		//   "(Universe==9)" in the constraint they give to the gridmanager,
		//   so this gridmanager will pull down non-globus-universe ads,
		//   although it won't use them. This is inefficient but not
		//   incorrect behavior.
		if ( firstScheddContact ) {
			// Grab all jobs for us to manage. This expression is a
			// derivative of the expression below for new jobs. We add
			// "|| Managed =?= TRUE" to also get jobs our previous
			// incarnation was in the middle of managing when it died
			// (if it died unexpectedly). With the new term, the
			// "&& Managed =!= TRUE" from the new jobs expression becomes
			// superfluous (by boolean logic), so we drop it.
			sprintf( expr_buf,
					 "%s && %s && ((%s && %s) || %s)",
					 expr_schedd_job_constraint.c_str(), 
					 expr_not_completely_done.c_str(),
					 expr_matched_or_undef.c_str(),
					 expr_not_held.c_str(),
					 expr_managed.c_str()
					 );
		} else {
			// Grab new jobs for us to manage
			sprintf( expr_buf,
					 "%s && %s && %s && %s && %s",
					 expr_schedd_job_constraint.c_str(), 
					 expr_not_completely_done.c_str(),
					 expr_matched_or_undef.c_str(),
					 expr_not_held.c_str(),
					 expr_not_managed.c_str()
					 );
		}
		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			PROC_ID procID;
			BaseJob *old_job;
			int job_is_matched = 1;		// default to true if not in ClassAd

			next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, procID.proc );
			bool job_is_managed = jobExternallyManaged(next_ad);
			next_ad->LookupBool(ATTR_JOB_MATCHED,job_is_matched);

			if ( BaseJob::JobsByProcId.lookup( procID, old_job ) != 0 ) {

				JobType *job_type = NULL;
				BaseJob *new_job = NULL;

				// job had better be either managed or matched! (or both)
				ASSERT( job_is_managed || job_is_matched );

				if ( MustExpandJobAd( next_ad ) ) {
					// Get the expanded ClassAd from the schedd, which
					// has the GridResource filled in with info from
					// the matched ad.
					delete next_ad;
					next_ad = NULL;
					next_ad = GetJobAd(procID.cluster,procID.proc);
					if ( next_ad == NULL && errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						commit_transaction = false;
						goto contact_schedd_disconnect;
					}
					if ( next_ad == NULL ) {
						// We may get here if it was not possible to expand
						// one of the $$() expressions.  We don't want to
						// roll back the transaction and blow away the
						// hold that the schedd just put on the job, so
						// simply skip over this ad.
						dprintf(D_ALWAYS,"Failed to get expanded job ClassAd from Schedd for %d.%d.  errno=%d\n",procID.cluster,procID.proc,errno);
						goto contact_schedd_next_add_job;
					}
				}

				// Search our job types for one that'll handle this job
				jobTypes.Rewind();
				while ( jobTypes.Next( job_type ) ) {
					if ( job_type->AdMatchFunc( next_ad ) ) {

						// Found one!
						dprintf( D_FULLDEBUG, "Using job type %s for job %d.%d\n",
								 job_type->Name, procID.cluster, procID.proc );
						break;
					}
				}

				if ( job_type != NULL ) {
					new_job = job_type->CreateFunc( next_ad );
				} else {
					dprintf( D_ALWAYS, "No handlers for job %d.%d\n",
							 procID.cluster, procID.proc );
					new_job = new BaseJob( next_ad );
				}

				ASSERT(new_job);
				new_job->SetEvaluateState();
				dprintf(D_ALWAYS,"Found job %d.%d --- inserting\n",
						new_job->procID.cluster,new_job->procID.proc);
				num_ads++;

				if ( !job_is_managed ) {
					rc = tSetAttributeString( new_job->procID.cluster,
									   new_job->procID.proc,
									   ATTR_JOB_MANAGED,
									   MANAGED_EXTERNAL);
					if ( rc < 0 ) {
						failure_line_num = __LINE__;
						commit_transaction = false;
						goto contact_schedd_disconnect;
					}
				}

			} else {

				// We already know about this job, skip
				// But also set Managed=true on the schedd so that it won't
				// keep signalling us about it
				delete next_ad;
				rc = tSetAttributeString( procID.cluster, procID.proc,
								   ATTR_JOB_MANAGED, MANAGED_EXTERNAL );
				if ( rc < 0 ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				}

			}

contact_schedd_next_add_job:
			next_ad = GetNextJobByConstraint( expr_buf, 0 );
		}	// end of while next_ad
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			commit_transaction = false;
			goto contact_schedd_disconnect;
		}

		dprintf(D_FULLDEBUG,"Fetched %d new job ads from schedd\n",num_ads);
	}	// end of handling add jobs


	// RemoveJobs
	/////////////////////////////////////////////////////

	// We always want to perform this check. Otherwise, we may overwrite a
	// REMOVED/HELD/COMPLETED status with something else below.
	{
		int num_ads = 0;

		dprintf( D_FULLDEBUG, "querying for removed/held jobs\n" );

		// Grab jobs marked as REMOVED/COMPLETED or marked as HELD that we
		// haven't previously indicated that we're done with (by setting
		// JobManaged to "Schedd".
		sprintf( expr_buf, "(%s) && (%s) && (%s == %d || %s == %d || (%s == %d && %s =?= \"%s\"))",
				 ScheddJobConstraint, expr_not_completely_done.c_str(),
				 ATTR_JOB_STATUS, REMOVED,
				 ATTR_JOB_STATUS, COMPLETED, ATTR_JOB_STATUS, HELD,
				 ATTR_JOB_MANAGED, MANAGED_EXTERNAL );

		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			PROC_ID procID;
			BaseJob *next_job;
			int curr_status;

			next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, procID.proc );
			next_ad->LookupInteger( ATTR_JOB_STATUS, curr_status );

			if ( BaseJob::JobsByProcId.lookup( procID, next_job ) == 0 ) {
				// Should probably skip jobs we already have marked as
				// held or removed

				next_job->JobAdUpdateFromSchedd( next_ad, true );
				num_ads++;

			} else if ( curr_status == REMOVED ) {

				// If we don't know about the job, act like we got an
				// ADD_JOBS signal from the schedd the next time we
				// connect, so that we'll create a Job object for it
				// and decide how it needs to be handled.
				// TODO The AddJobs and RemoveJobs queries shoule be
				//   combined into a single query.
				dprintf( D_ALWAYS, 
						 "Don't know about removed job %d.%d. "
						 "Will treat it as a new job to manage\n",
						 procID.cluster, procID.proc );
				addJobsSignaled = true;

			} else {

				dprintf( D_ALWAYS, "Don't know about held/completed job %d.%d. "
						 "Ignoring it\n",
						 procID.cluster, procID.proc );

			}

			delete next_ad;
			next_ad = GetNextJobByConstraint( expr_buf, 0 );
		}
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			commit_transaction = false;
			goto contact_schedd_disconnect;
		}

		dprintf(D_FULLDEBUG,"Fetched %d job ads from schedd\n",num_ads);
	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	add_remove_jobs_complete = true;


	// Retrieve dirty attributes
	/////////////////////////////////////////////////////
	if ( updateJobsSignaled ) {
		dprintf( D_FULLDEBUG, "querying for jobs with attribute updates\n" );

		sprintf( expr_buf, "%s && %s && %s && %s",
				 expr_schedd_job_constraint.c_str(), 
				 expr_not_completely_done.c_str(),
				 expr_not_held.c_str(),
				 expr_managed.c_str()
				 );
		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextDirtyJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			ClassAd updates;
			char str[PROC_ID_STR_BUFLEN];
			next_ad->LookupInteger( ATTR_CLUSTER_ID, job_id.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, job_id.proc );
			if ( GetDirtyAttributes( job_id.cluster, job_id.proc, &updates ) < 0 ) {
				dprintf( D_ALWAYS, "Failed to retrieve dirty attributes for job %d.%d\n", job_id.cluster, job_id.proc );
				failure_line_num = __LINE__;
				delete next_ad;
				goto contact_schedd_disconnect;
		        }
			else {
				dprintf (D_FULLDEBUG, "Retrieved updated attributes for job %d.%d\n", job_id.cluster, job_id.proc);
				dPrintAd(D_JOB, updates);
			}
			if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) {
				curr_job->JobAdUpdateFromSchedd( &updates, false );
				ProcIdToStr( job_id, str );
				dirty_job_ids.append( str );
			}
			else {
				dprintf( D_ALWAYS, "Don't know about updated job %d.%d. "
						 "Ignoring it\n",
						 job_id.cluster, job_id.proc );
			}
			delete next_ad;
			next_ad = GetNextDirtyJobByConstraint( expr_buf, 0 );
		}
	}
	update_jobs_complete = true;

//	if ( BeginTransaction() < 0 ) {
	errno = 0;
	BeginTransaction();
	if ( errno == ETIMEDOUT ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}


	// requestJobStatus
	/////////////////////////////////////////////////////
	if ( pendingJobStatus.getNumElements() != 0 ) {
		JobStatusRequest curr_request;

		pendingJobStatus.startIterations();
		while ( pendingJobStatus.iterate( curr_request ) != 0 ) {

			int status;

			rc = GetAttributeInt( curr_request.job_id.cluster,
								  curr_request.job_id.proc,
								  ATTR_JOB_STATUS, &status );
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// The job is not in the schedd's job queue. This
						// probably means that the user did a condor_rm -f,
						// so return a job status of REMOVED.
					status = REMOVED;
				}
			}
				// return status
			dprintf( D_FULLDEBUG, "%d.%d job status: %d\n",
					 curr_request.job_id.cluster,
					 curr_request.job_id.proc, status );
			pendingJobStatus.remove( curr_request.job_id );
			curr_request.job_status = status;
			daemonCore->Reset_Timer( curr_request.tid, 0 );
			completedJobStatus.insert( curr_request.job_id,
									   curr_request );
		}

	}


	// Update existing jobs
	/////////////////////////////////////////////////////
	ScheddUpdateRequest *curr_request;
	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		dprintf(D_FULLDEBUG,"Updating classad values for %d.%d:\n",
				curr_job->procID.cluster, curr_job->procID.proc);
		const char *attr_name;
		const char *attr_value;
		ExprTree *expr;
		bool fake_job_in_queue = false;
		curr_job->jobAd->ResetExpr();
		while ( curr_job->jobAd->NextDirtyExpr(attr_name, expr) == true &&
				fake_job_in_queue == false ) {
			attr_value = ExprTreeToString( expr );

			dprintf(D_FULLDEBUG,"   %s = %s\n",attr_name,attr_value);
			rc = SetAttribute( curr_job->procID.cluster,
							   curr_job->procID.proc,
							   attr_name,
							   attr_value);
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// The job is not in the schedd's job queue. This
						// probably means that the user did a condor_rm -f,
						// so pretend that all updates for the job succeed.
						// Otherwise, we'll never make forward progress on
						// the job.
						// TODO We should also fake a job status of REMOVED
						//   to the job, so it can do what cleanup it can.
					fake_job_in_queue = true;
					break;
				}
			}
		}

	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	schedd_updates_complete = true;


	// Delete existing jobs
	/////////////////////////////////////////////////////
	errno = 0;
	BeginTransaction();
	if ( errno == ETIMEDOUT ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		if ( curr_job->deleteFromSchedd ) {
			dprintf(D_FULLDEBUG,"Deleting job %d.%d from schedd\n",
					curr_job->procID.cluster, curr_job->procID.proc);
			rc = DestroyProc(curr_job->procID.cluster,
							 curr_job->procID.proc);
				// NOENT means the job doesn't exist.  Good enough for us.
			if ( rc < 0 && rc != DESTROYPROC_ENOENT) {
				failure_line_num = __LINE__;
				commit_transaction = false;
				goto contact_schedd_disconnect;
			}
		}

	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	schedd_deletes_complete = true;


 contact_schedd_disconnect:
	DisconnectQ( schedd, commit_transaction );

	if ( add_remove_jobs_complete == true ) {
		firstScheddContact = false;
		addJobsSignaled = false;
	} else {
		formatstr( error_str, "Schedd connection error during Add/RemoveJobs at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	if ( update_jobs_complete == true ) {
		updateJobsSignaled = false;
	} else {
		formatstr( error_str, "Schedd connection error during dirty attribute update at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	if ( schedd_updates_complete == false ) {
		formatstr( error_str, "Schedd connection error during updates at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	// Clear dirty bits for all jobs updated
	if ( !dirty_job_ids.isEmpty() ) {
		ClassAd *rval;
		dprintf( D_FULLDEBUG, "Calling clearDirtyAttrs on %d jobs\n",
				 dirty_job_ids.number() );
		dirty_job_ids.rewind();
		rval = ScheddObj->clearDirtyAttrs( &dirty_job_ids, &errstack );
		if ( rval == NULL ) {
			dprintf(D_ALWAYS, "Failed to notify schedd to clear dirty attributes.  CondorError: %s\n", errstack.getFullText().c_str() );
		}
		delete rval;
	}

	// Wake up jobs that had schedd updates pending and delete job
	// objects that wanted to be deleted
	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		curr_job->jobAd->ClearAllDirtyFlags();

		if ( curr_job->deleteFromGridmanager ) {

				// If the Job object wants to delete the job from the
				// schedd but we failed to do so, don't delete the job
				// object yet; wait until we successfully delete the job
				// from the schedd.
			if ( curr_job->deleteFromSchedd == true &&
				 schedd_deletes_complete == false ) {
				continue;
			}

				// If wantRematch is set, send a reschedule now
			if ( curr_job->wantRematch ) {
				send_reschedule = true;
			}
			pendingScheddUpdates.remove( curr_job->procID );
			pendingScheddVacates.remove( curr_job->procID );
			pendingJobStatus.remove( curr_job->procID );
			completedJobStatus.remove( curr_job->procID );
			completedScheddVacates.remove( curr_job->procID );
			delete curr_job;

		} else {
			pendingScheddUpdates.remove( curr_job->procID );

			if ( curr_request->m_notify ) {
				curr_job->SetEvaluateState();
			}
		}

		delete curr_request;
	}

	// Poke objects that wanted to be notified when a schedd update completed
	// successfully (possibly minus deletes)
	int timer_id;
	scheddUpdateNotifications.Rewind();
	while ( scheddUpdateNotifications.Next( timer_id ) ) {
		daemonCore->Reset_Timer( timer_id, 0 );
	}
	scheddUpdateNotifications.Clear();

	if ( send_reschedule == true ) {
		ScheddObj->reschedule();
	}

	// Check if we have any jobs left to manage. If not, exit.
	if ( BaseJob::JobsByProcId.getNumElements() == 0 ) {
		dprintf( D_ALWAYS, "No jobs left, shutting down\n" );
		daemonCore->Send_Signal( daemonCore->getpid(), SIGTERM );
	}

	lastContactSchedd = time(NULL);

	if ( schedd_deletes_complete == false ) {
		error_str = "Problem using DestroyProc to delete jobs!";
		goto contact_schedd_failure;
	}

	scheddFailureCount = 0;

	// For each job that had dirty attributes, re-evaluate the policy
	dirty_job_ids.rewind();
	while ( (job_id_str = dirty_job_ids.next()) != NULL ) {
		StrToProcIdFixMe(job_id_str, job_id);
		if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) {
			curr_job->EvalPeriodicJobExpr();
		}
	}

dprintf(D_FULLDEBUG,"leaving doContactSchedd()\n");
	return;

 contact_schedd_failure:
	scheddFailureCount++;
	if ( error_str == "" ) {
		error_str = "Failure in doContactSchedd";
	}
	if ( scheddFailureCount >= maxScheddFailures ) {
		dprintf( D_ALWAYS, "%s\n", error_str.c_str() );
		EXCEPT( "Too many failures connecting to schedd!" );
	}
	dprintf( D_ALWAYS, "%s Will retry\n", error_str.c_str() );
	lastContactSchedd = time(NULL);
	RequestContactSchedd();
	return;
}
예제 #13
0
bool
DCTransferQueue::RequestTransferQueueSlot(bool downloading,char const *fname,char const *jobid,int timeout,MyString &error_desc)
{
	ASSERT(fname);
	ASSERT(jobid);

	if( GoAheadAlways( downloading ) ) {
		m_xfer_downloading = downloading;
		m_xfer_fname = fname;
		m_xfer_jobid = jobid;
		return true;
	}
	CheckTransferQueueSlot();
	if( m_xfer_queue_sock ) {
			// A request has already been made.
			// Currently, this is a no-op, because any upload/download slot
			// is as good as any other.  In the future, there may be
			// different queues for different paths.

		ASSERT( m_xfer_downloading == downloading );
		m_xfer_fname = fname;
		m_xfer_jobid = jobid;
		return true;
	}

	time_t started = time(NULL);
	CondorError errstack;
		// Our caller has to finish this operation in the specified
		// amount of time or risk not responding to the file transfer
		// peer in time, so ignore the timeout multiplier and set the
		// timeout exactly as specified.
	m_xfer_queue_sock = reliSock( timeout, 0, &errstack, false, true );

	if( !m_xfer_queue_sock ) {
		formatstr(m_xfer_rejected_reason,
			"Failed to connect to transfer queue manager for job %s (%s): %s.",
			jobid, fname, errstack.getFullText().c_str() );
		error_desc = m_xfer_rejected_reason;
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());
		return false;
	}

	if( timeout ) {
		timeout -= time(NULL)-started;
		if( timeout <= 0 ) {
			timeout = 1;
		}
	}

	bool connected = startCommand(
		TRANSFER_QUEUE_REQUEST, m_xfer_queue_sock, timeout, &errstack );

	if( !connected )
	{
		delete m_xfer_queue_sock;
		m_xfer_queue_sock = NULL;
		formatstr(m_xfer_rejected_reason,
			"Failed to initiate transfer queue request for job %s (%s): %s.",
			jobid, fname, errstack.getFullText().c_str() );
		error_desc = m_xfer_rejected_reason;
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());
		return false;
	}

	m_xfer_downloading = downloading;
	m_xfer_fname = fname;
	m_xfer_jobid = jobid;

	ClassAd msg;
	msg.Assign(ATTR_DOWNLOADING,downloading);
	msg.Assign(ATTR_FILE_NAME,fname);
	msg.Assign(ATTR_JOB_ID,jobid);

	m_xfer_queue_sock->encode();

	if( !msg.put(*m_xfer_queue_sock) || !m_xfer_queue_sock->end_of_message() )
	{
		formatstr(m_xfer_rejected_reason,
			"Failed to write transfer request to %s for job %s "
			"(initial file %s).",
			m_xfer_queue_sock->peer_description(),
			m_xfer_jobid.c_str(), m_xfer_fname.c_str());
		error_desc = m_xfer_rejected_reason;
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());
		return false;
	}

	m_xfer_queue_sock->decode();

		// Request has been initiated.  Now sender should call
		// PollForTransferQueueSlot() to get response.
	m_xfer_queue_pending = true;
	return true;
}
예제 #14
0
int
do_Q_request(ReliSock *syscall_sock,bool &may_fork)
{
	int	request_num = -1;
	int	rval;

	syscall_sock->decode();

	assert( syscall_sock->code(request_num) );

	dprintf(D_SYSCALLS, "Got request #%d\n", request_num);

	switch( request_num ) {

	case CONDOR_InitializeConnection:
	{
		// dprintf( D_ALWAYS, "InitializeConnection()\n" );
		bool authenticated = true;

		// Authenticate socket, if not already done by daemonCore
		if( !syscall_sock->triedAuthentication() ) {
			if( IsDebugLevel(D_SECURITY) ) {
				MyString methods;
				SecMan::getAuthenticationMethods( WRITE, &methods );
				dprintf(D_SECURITY,"Calling authenticate(%s) in qmgmt_receivers\n", methods.Value());
			}
			CondorError errstack;
			if( ! SecMan::authenticate_sock(syscall_sock, WRITE, &errstack) ) {
					// Failed to authenticate
				dprintf( D_ALWAYS, "SCHEDD: authentication failed: %s\n",
						 errstack.getFullText().c_str() );
				authenticated = false;
			}
		}

		if ( authenticated ) {
			InitializeConnection( syscall_sock->getOwner(),
					syscall_sock->getDomain() );
		} else {
			InitializeConnection( NULL, NULL );
		}
		return 0;
	}

	case CONDOR_InitializeReadOnlyConnection:
	{
		// dprintf( D_ALWAYS, "InitializeReadOnlyConnection()\n" );

		// Since InitializeConnection() does nothing, and we need
		// to record the fact that this is a read-only connection,
		// but we have to do it in the socket (since we don't have
		// any other persistent data structure, and it's probably
		// the right place anyway), set the FQU.
		//
		// We need to record if this is a read-only connection so that
		// we can avoid expanding $$ in GetJobAd; simply checking if the
		// connection is authenticated isn't sufficient, because the
		// security session cache means that read-only connection could
		// be authenticated by a previous authenticated connection from
		// the same address (when using host-based security) less than
		// the expiration period ago.
		syscall_sock->setFullyQualifiedUser( "read-only" );

		// same as InitializeConnection but no authenticate()
		InitializeConnection( NULL, NULL );

		may_fork = true;
		return 0;
	}

	case CONDOR_SetEffectiveOwner:
	{
		MyString owner;
		int terrno;

		assert( syscall_sock->get(owner) );
		assert( syscall_sock->end_of_message() );

		rval = QmgmtSetEffectiveOwner( owner.Value() );
		terrno = errno;

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );

		char const *fqu = syscall_sock->getFullyQualifiedUser();
		dprintf(D_SYSCALLS, "\tSetEffectiveOwner\n");
		dprintf(D_SYSCALLS, "\tauthenticated user = '******'\n", fqu ? fqu : "");
		dprintf(D_SYSCALLS, "\trequested owner = '%s'\n", owner.Value());
		dprintf(D_SYSCALLS, "\trval %d, errno %d\n", rval, terrno);

		return 0;
	}

	case CONDOR_NewCluster:
	  {
		int terrno;

		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = NewCluster( );
		terrno = errno;
		dprintf(D_SYSCALLS, 
				"\tNewCluster: rval = %d, errno = %d\n",rval,terrno );
		if ( rval > 0 ) {
			dprintf( D_AUDIT, *syscall_sock, 
					 "Submitting new job %d.0\n", rval );
		}

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );;

		dprintf(D_FULLDEBUG,"schedd: NewCluster rval %d errno %d\n",rval,terrno);

		return 0;
	}

	case CONDOR_NewProc:
	  {
		int cluster_id = -1;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = NewProc( cluster_id );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
		if ( rval > 0 ) {
			dprintf( D_AUDIT, *syscall_sock, 
					 "Submitting new job %d.%d\n", cluster_id, rval );
		}

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );;

		dprintf(D_FULLDEBUG,"schedd: NewProc rval %d errno %d\n",rval,terrno);

		return 0;
	}

	case CONDOR_DestroyProc:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = DestroyProc( cluster_id, proc_id );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );;

		dprintf(D_FULLDEBUG,"schedd: DestroyProc cluster %d proc %d rval %d errno %d\n",cluster_id,proc_id,rval,terrno);

		return 0;
	}

	case CONDOR_DestroyCluster:
	  {
		int cluster_id = -1;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = DestroyCluster( cluster_id );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

#if 0
	case CONDOR_DestroyClusterByConstraint:
	  {
		char *constraint=NULL;
		int terrno;

		assert( syscall_sock->code(constraint) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = DestroyClusterByConstraint( constraint );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		free( (char *)constraint );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}
#endif

	case CONDOR_SetAttributeByConstraint:
	case CONDOR_SetAttributeByConstraint2:
	  {
		char *attr_name=NULL;
		char *attr_value=NULL;
		char *constraint=NULL;
		int terrno;
		SetAttributeFlags_t flags = 0;

		assert( syscall_sock->code(constraint) );
		dprintf( D_SYSCALLS, "  constraint = %s\n",constraint);
		assert( syscall_sock->code(attr_value) );
		assert( syscall_sock->code(attr_name) );
		if( request_num == CONDOR_SetAttributeByConstraint2 ) {
			assert( syscall_sock->code( flags ) );
		}
		assert( syscall_sock->end_of_message() );;

		if (strcmp (attr_name, ATTR_MYPROXY_PASSWORD) == 0) {
			errno = 0;
			dprintf( D_SYSCALLS, "SetAttributeByConstraint (MyProxyPassword) not supported...\n");
			rval = 0;
			terrno = errno;
		} else {

			errno = 0;
			rval = SetAttributeByConstraint( constraint, attr_name, attr_value, flags );
			terrno = errno;
			dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
			if ( rval == 0 ) {
				dprintf( D_AUDIT, *syscall_sock,
						 "Set Attribute By Constraint %s, "
						 "%s = %s\n",
						 constraint, attr_name, attr_value);
			}

		}

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		free( (char *)constraint );
		free( (char *)attr_value );
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_SetAttribute:
	case CONDOR_SetAttribute2:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;
		char *attr_value=NULL;
		int terrno;
		SetAttributeFlags_t flags = 0;
		const char *users_username;
		const char *condor_username;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_value) );
		assert( syscall_sock->code(attr_name) );
		if( request_num == CONDOR_SetAttribute2 ) {
			assert( syscall_sock->code( flags ) );
		}
		users_username = syscall_sock->getOwner();
		condor_username = get_condor_username();
		if (attr_name) dprintf(D_SYSCALLS,"\tattr_name = %s\n",attr_name);
		if (attr_value) dprintf(D_SYSCALLS,"\tattr_value = %s\n",attr_value);		
		assert( syscall_sock->end_of_message() );;

		// ckireyev:
		// We do NOT want to include MyProxy password in the ClassAd (since it's a secret)
		// I'm not sure if this is the best place to do this, but....
		if (attr_name && attr_value && strcmp (attr_name, ATTR_MYPROXY_PASSWORD) == 0) {
			errno = 0;
			dprintf( D_SYSCALLS, "Got MyProxyPassword, stashing...\n");
			rval = SetMyProxyPassword (cluster_id, proc_id, attr_value);
			terrno = errno;
			dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
			
		}
		else {
			errno = 0;

			rval = SetAttribute( cluster_id, proc_id, attr_name, attr_value, flags );
			terrno = errno;
			dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
				// If we're modifying a previously-submitted job AND either
				// the client's username is not HTCondor's (i.e. not a
				// daemon) OR the client says we should log...
			if( (cluster_id != active_cluster_num) && (rval == 0) &&
				( strcmp(users_username, condor_username) || (flags & SHOULDLOG) ) ) { 

				dprintf( D_AUDIT, *syscall_sock, 
						 "Set Attribute for job %d.%d, "
						 "%s = %s\n",
						 cluster_id, proc_id, attr_name, attr_value);
			}
		}

		free( (char *)attr_value );
		free( (char *)attr_name );

		if( flags & SetAttribute_NoAck ) {
			if( rval < 0 ) {
				return -1;
			}
		}
		else {
			syscall_sock->encode();
			assert( syscall_sock->code(rval) );
			if( rval < 0 ) {
				assert( syscall_sock->code(terrno) );
			}
			assert( syscall_sock->end_of_message() );
		}
		return 0;
	}

	case CONDOR_SetTimerAttribute:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;
		int duration = 0;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_name) );
		if (attr_name) dprintf(D_SYSCALLS,"\tattr_name = %s\n",attr_name);
		assert( syscall_sock->code(duration) );
		dprintf(D_SYSCALLS,"\tduration = %d\n",duration);
		assert( syscall_sock->end_of_message() );;

		errno = 0;

		rval = SetTimerAttribute( cluster_id, proc_id, attr_name, duration );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
		dprintf( D_AUDIT, *syscall_sock, 
				 "Set Timer Attribute for job %d.%d, "
				 "attr_name = %s, duration = %d\n",
				 cluster_id, proc_id, attr_name, duration);

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_BeginTransaction:
	  {
		int terrno;

		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = 0;	// BeginTransaction returns void (sigh), so always success
		BeginTransaction( );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_AbortTransaction:
	{
		int terrno;

		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = 0;	// AbortTransaction returns void (sigh), so always success

		AbortTransaction( );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );


		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}

		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_CommitTransactionNoFlags:
	case CONDOR_CommitTransaction:
	  {
		int terrno;
		int flags;

		if( request_num == CONDOR_CommitTransaction ) {
			assert( syscall_sock->code(flags) );
		}
		else {
			flags = 0;
		}
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		CondorError errstack;
		rval = CheckTransaction( flags, & errstack );
		terrno = errno;
		dprintf( D_SYSCALLS, "\tflags = %d, rval = %d, errno = %d\n", flags, rval, terrno );

		if( rval >= 0 ) {
			errno = 0;
			CommitTransaction( flags );
				// CommitTransaction() never returns on failure
			rval = 0;
			terrno = errno;
			dprintf( D_SYSCALLS, "\tflags = %d, rval = %d, errno = %d\n", flags, rval, terrno );
		}

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
			const CondorVersionInfo *vers = syscall_sock->get_peer_version();
			if (vers && vers->built_since_version(8, 3, 4))
			{
				// Send a classad, for less backwards-incompatibility.
				int code = 1;
				const char * reason = "QMGMT rejected job submission.";
				if( errstack.subsys() ) {
					code = 2;
					reason = errstack.message();
				}

				ClassAd reply;
				reply.Assign( "ErrorCode", code );
				reply.Assign( "ErrorReason", reason );
				assert( putClassAd( syscall_sock, reply ) );
			}
		}
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetAttributeFloat:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;
		float value = 0.0;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_name) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		if( QmgmtMayAccessAttribute( attr_name ) ) {
			rval = GetAttributeFloat( cluster_id, proc_id, attr_name, &value );
		}
		else {
			rval = -1;
		}
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( syscall_sock->code(value) );
		}
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetAttributeInt:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;
		int value = 0;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_name) );
		dprintf( D_SYSCALLS, "  attr_name = %s\n", attr_name );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		if( QmgmtMayAccessAttribute( attr_name ) ) {
			rval = GetAttributeInt( cluster_id, proc_id, attr_name, &value );
		}
		else {
			rval = -1;
		}
		terrno = errno;
		if (rval < 0) {
			dprintf( D_SYSCALLS, "GetAttributeInt(%d, %d, %s) not found.\n",
					cluster_id, proc_id, attr_name);
		} else {
			dprintf( D_SYSCALLS, "  value: %d\n", value );
			dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
		}

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( syscall_sock->code(value) );
		}
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetAttributeString:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;
		char *value = NULL;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_name) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		if( QmgmtMayAccessAttribute( attr_name ) ) {
			rval = GetAttributeStringNew( cluster_id, proc_id, attr_name, &value );
		}
		else {
			rval = -1;
		}
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( syscall_sock->code(value) );
		}
		free( (char *)value );
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetAttributeExpr:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;

		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_name) );
		assert( syscall_sock->end_of_message() );;

		char *value = NULL;

		errno = 0;
		if( QmgmtMayAccessAttribute( attr_name ) ) {
			rval = GetAttributeExprNew( cluster_id, proc_id, attr_name, &value );
		}
		else {
			rval = -1;
		}
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();

		if ( !syscall_sock->code(rval) ) {
			free(value);
			return -1;
		}
		if( rval < 0 ) {
			if ( !syscall_sock->code(terrno) ) {
					free(value);
					return -1;
			}
		}
		if( rval >= 0 ) {
			if ( !syscall_sock->code(value) ) {
					free(value);
					return -1;
			}
		}
		free( (char *)value );
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetDirtyAttributes:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		ClassAd updates;

		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = GetDirtyAttributes( cluster_id, proc_id, &updates );

		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();

		if ( !syscall_sock->code(rval) ) {
			return -1;
		}
		if( rval < 0 ) {
			if ( !syscall_sock->code(terrno) ) {
					return -1;
			}
		}
		if( rval >= 0 ) {
			assert( putClassAd(syscall_sock, updates) );
		}
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_DeleteAttribute:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		char *attr_name=NULL;
		int terrno;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->code(attr_name) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = DeleteAttribute( cluster_id, proc_id, attr_name );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		free( (char *)attr_name );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetJobAd:
	  {
		int cluster_id = -1;
		int proc_id = -1;
		ClassAd *ad = NULL;
		int terrno;
		bool delete_ad = false;

		assert( syscall_sock->code(cluster_id) );
		dprintf( D_SYSCALLS, "	cluster_id = %d\n", cluster_id );
		assert( syscall_sock->code(proc_id) );
		dprintf( D_SYSCALLS, "	proc_id = %d\n", proc_id );
		assert( syscall_sock->end_of_message() );;

		// dprintf( D_ALWAYS, "(%d.%d) isAuthenticated() = %d\n", cluster_id, proc_id, syscall_sock->isAuthenticated() );
		// dprintf( D_ALWAYS, "(%d.%d) getOwner() = %s\n", cluster_id, proc_id, syscall_sock->getOwner() );

		errno = 0;
		// Only fetch the jobad for legal values of cluster/proc
		if( cluster_id >= 1 ) {
			if( proc_id >= 0 ) {
				const char * fqu = syscall_sock->getFullyQualifiedUser();
				if( fqu != NULL && strcmp( fqu, "read-only" ) != 0 ) {
					// expand $$() macros in the jobad as required by GridManager.
					// The GridManager depends on the fact that the following call
					// expands $$ and saves the expansions to disk in case of
					// restart.
					ad = GetJobAd_as_ClassAd( cluster_id, proc_id, true, true );
					delete_ad = true;
					// note : since we expanded the ad, ad is now a deep
					// copy of the ad in memory, so we must delete it below.
				} else {
					ad = GetJobAd_as_ClassAd( cluster_id, proc_id, false, false );
				}
			} else if( proc_id == -1 ) {
				// allow cluster ad to be queried as required by preen, but
				// do NOT ask to expand $$() macros in a cluster ad!
				ad = GetJobAd_as_ClassAd( cluster_id, proc_id, false, false );
			}
		}
		terrno = errno;
		rval = ad ? 0 : -1;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) );
		}
		// If we called GetJobAd() with the third bool argument set
		// to True (expandedAd), it does a deep copy of the ad in the
		// queue in order to expand the $$() attributes.  So we must
		// delete it.
		if (delete_ad) delete ad;
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetJobByConstraint:
	  {
		char *constraint=NULL;
		ClassAd *ad;
		int terrno;

		assert( syscall_sock->code(constraint) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		ad = GetJobByConstraint_as_ClassAd( constraint );
		terrno = errno;
		rval = ad ? 0 : -1;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) );
		}
		FreeJobAd(ad);
		free( (char *)constraint );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetNextJob:
	  {
		ClassAd *ad;
		int initScan = 0;
		int terrno;

		assert( syscall_sock->code(initScan) );
		dprintf( D_SYSCALLS, "	initScan = %d\n", initScan );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		ad = GetNextJob( initScan );
		terrno = errno;
		rval = ad ? 0 : -1;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) );
		}
		FreeJobAd(ad);
		assert( syscall_sock->end_of_message() );;
		return 0;
	}

	case CONDOR_GetNextJobByConstraint:
	  {
		char *constraint=NULL;
		ClassAd *ad;
		int initScan = 0;
		int terrno;

		assert( syscall_sock->code(initScan) );
		dprintf( D_SYSCALLS, "	initScan = %d\n", initScan );
		if ( !(syscall_sock->code(constraint)) ) {
			if (constraint != NULL) {
				free(constraint);
				constraint = NULL;
			}
			return -1;
		}
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		ad = GetNextJobByConstraint( constraint, initScan );
		terrno = errno;
		rval = ad ? 0 : -1;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) );
		}
		FreeJobAd(ad);
		free( (char *)constraint );
		assert( syscall_sock->end_of_message() );;
		return 0;
	}
	case CONDOR_GetNextDirtyJobByConstraint:
	{
		char *constraint=NULL;
		ClassAd *ad;
		int initScan = 0;
		int terrno;

		assert( syscall_sock->code(initScan) );
		dprintf( D_SYSCALLS, "  initScan = %d\n", initScan );
		if ( !(syscall_sock->code(constraint)) ) {
			if (constraint != NULL) {
				free(constraint);
				constraint = NULL;
			}
			return -1;
		}
		assert( syscall_sock->end_of_message() );

		errno = 0;
		ad = GetNextDirtyJobByConstraint( constraint, initScan );
		terrno = errno;
		rval = ad ? 0 : -1;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		if( rval >= 0 ) {
			assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE) );
		}
		FreeJobAd(ad);
		free( (char *)constraint );
		assert( syscall_sock->end_of_message() );
		return 0;
	}

	case CONDOR_SendSpoolFile:
	  {
		char *filename=NULL;
		int terrno;

		assert( syscall_sock->code(filename) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = SendSpoolFile( filename );
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );
#if 0
		syscall_sock->encode();
		assert( syscall_sock->code(rval) );
		if( rval < 0 ) {
			assert( syscall_sock->code(terrno) );
		}
		assert( syscall_sock->end_of_message() );;
#endif
		free( (char *)filename );
		return 0;
	}

	case CONDOR_SendSpoolFileIfNeeded:
	  {
		int terrno;

		ClassAd ad;
		assert( getClassAd(syscall_sock, ad) );
		assert( syscall_sock->end_of_message() );;

		errno = 0;
		rval = SendSpoolFileIfNeeded(ad);
		terrno = errno;
		dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

		return 0;
	}

	case CONDOR_GetAllJobsByConstraint:
	  {
		char *constraint=NULL;
		char *projection=NULL;
		ClassAd *ad;
		int terrno;
		int initScan = 1;
		classad::References proj;

		if ( !(syscall_sock->code(constraint)) ) {
			if (constraint != NULL) {
				free(constraint);
				constraint = NULL;
			}
			return -1;
		}
		if ( !(syscall_sock->code(projection)) ) {
			if (projection != NULL) {
				free(constraint);
				free(projection);
				projection = NULL;
			}
			return -1;
		}
		dprintf( D_SYSCALLS, "	constraint = %s\n", constraint );
		dprintf( D_SYSCALLS, "	projection = %s\n", projection ? projection : "");

		assert( syscall_sock->end_of_message() );;

		// if there is a projection, convert it into a set of attribute names
		if (projection) {
			StringTokenIterator list(projection);
			const std::string * attr;
			while ((attr = list.next_string())) { proj.insert(*attr); }
		}

		syscall_sock->encode();

		do {
			errno = 0;

			ad = GetNextJobByConstraint( constraint, initScan );
			initScan=0; // one first time through, otherwise 0

			terrno = errno;
			rval = ad ? 0 : -1;
			dprintf( D_SYSCALLS, "\trval = %d, errno = %d\n", rval, terrno );

			assert( syscall_sock->code(rval) );

			if( rval < 0 ) {
				assert( syscall_sock->code(terrno) );
			}

			if( rval >= 0 ) {
				assert( putClassAd(syscall_sock, *ad, PUT_CLASSAD_NO_PRIVATE, proj.empty() ? NULL : &proj) );
				FreeJobAd(ad);
			}
		} while (rval >= 0);
		assert( syscall_sock->end_of_message() );;

		free( (char *)constraint );
		free( (char *)projection );
		return 0; 
	}

	case CONDOR_CloseSocket:
	{
		assert( syscall_sock->end_of_message() );;
		return -1;
	}

	} /* End of switch */

	return -1;
} /* End of function */
예제 #15
0
int main(int argc, char **argv)
{
	int		result = 0;

	if ( argc <= 1 || (argc >= 2 && !strcmp("-usage", argv[1])) ) {
		printf("Usage: condor_check_userlogs <log file 1> "
				"[log file 2] ... [log file n]\n");
		exit(0);
	}

		// Set up dprintf.
	dprintf_set_tool_debug("condor_check_userlogs", 0);
	set_debug_flags(NULL, D_ALWAYS);

	StringList	logFiles;
	for ( int argnum = 1; argnum < argc; ++argnum ) {
		logFiles.append(argv[argnum]);
	}
	logFiles.rewind();

	ReadMultipleUserLogs	ru;
	char *filename;
	while ( (filename = logFiles.next()) ) {
		MyString filestring( filename );
		CondorError errstack;
		if ( !ru.monitorLogFile( filestring, false, errstack ) ) {
			fprintf( stderr, "Error monitoring log file %s: %s\n", filename,
						errstack.getFullText().c_str() );
			result = 1;
		}
	}

	bool logsMissing = false;

	CheckEvents		ce;
	int totalSubmitted = 0;
	int netSubmitted = 0;
	bool done = false;
	while( !done ) {

    	ULogEvent* e = NULL;
		MyString errorMsg;

        ULogEventOutcome outcome = ru.readEvent( e );

        switch (outcome) {

        case ULOG_RD_ERROR:
        case ULOG_UNK_ERROR:
			logsMissing = true;
        case ULOG_NO_EVENT:

			printf( "Log outcome: %s\n", ULogEventOutcomeNames[outcome] );
			done = true;
			break;
 
        case ULOG_OK:

			printf( "Log event: %s (%d.%d.%d)",
						ULogEventNumberNames[e->eventNumber],
						e->cluster, e->proc, e->subproc );

			if ( ce.CheckAnEvent(e, errorMsg) != CheckEvents::EVENT_OKAY ) {
				fprintf(stderr, "%s\n", errorMsg.Value());
				result = 1;
			}

			if( e->eventNumber == ULOG_SUBMIT ) {
				SubmitEvent* ee = (SubmitEvent*) e;
				printf( " (\"%s\")", ee->submitEventLogNotes );
				++totalSubmitted;
				++netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}
			
			if( e->eventNumber == ULOG_JOB_HELD ) {
				JobHeldEvent* ee = (JobHeldEvent*) e;
				printf( " (code=%d subcode=%d)", ee->getReasonCode(),
						ee->getReasonSubCode());
			}

			if( e->eventNumber == ULOG_JOB_TERMINATED ) {
				--netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}

			if( e->eventNumber == ULOG_JOB_ABORTED ) {
				--netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}

			if( e->eventNumber == ULOG_EXECUTABLE_ERROR ) {
				--netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}

			printf( "\n" );
			break;

		default:

			fprintf(stderr, "Unexpected read event outcome!\n");
			result = 1;
			break;
        }
	}

	logFiles.rewind();
	while ( (filename = logFiles.next()) ) {
		MyString filestring( filename );
		CondorError errstack;
		if ( !ru.unmonitorLogFile( filestring, errstack ) ) {
			fprintf( stderr, "Error unmonitoring log file %s: %s\n", filename,
						errstack.getFullText().c_str() );
			result = 1;
		}
	}

	MyString errorMsg;
	CheckEvents::check_event_result_t checkAllResult =
				ce.CheckAllJobs(errorMsg);
	if ( checkAllResult != CheckEvents::EVENT_OKAY ) {
		fprintf(stderr, "%s\n", errorMsg.Value());
		fprintf(stderr, "CheckAllJobs() result: %s\n",
					CheckEvents::ResultToString(checkAllResult));
		result = 1;
	}

	if ( result == 0 ) {
		if ( !logsMissing ) {
			printf("Log(s) are okay\n");
		} else {
			printf("Log(s) may be okay\n");
			printf(  "Some logs cannot be read\n");
		}
	} else {
		printf("Log(s) have error(s)\n");
	}
	return result;
}
예제 #16
0
///////////////////////////////////////////////////////////////////////////////
// Note: this method should get speeded up (see Gnats PR 846).
MyString
MultiLogFiles::loadLogFileNameFromSubFile(const MyString &strSubFilename,
		const MyString &directory, bool &isXml, bool usingDefaultNode)
{
	dprintf( D_FULLDEBUG, "MultiLogFiles::loadLogFileNameFromSubFile(%s, %s)\n",
				strSubFilename.Value(), directory.Value() );

	TmpDir		td;
	if ( directory != "" ) {
		MyString	errMsg;
		if ( !td.Cd2TmpDir(directory.Value(), errMsg) ) {
			dprintf(D_ALWAYS, "Error from Cd2TmpDir: %s\n", errMsg.Value());
			return "";
		}
	}

	StringList	logicalLines;
	if ( fileNameToLogicalLines( strSubFilename, logicalLines ) != "" ) {
		return "";
	}

	MyString	logFileName("");
	MyString	initialDir("");
	MyString	isXmlLogStr("");

		// Now look through the submit file logical lines to find the
		// log file and initial directory (if specified) and combine
		// them into a path to the log file that's either absolute or
		// relative to the DAG submit directory.  Also look for log_xml.
	const char *logicalLine;
	while( (logicalLine = logicalLines.next()) != NULL ) {
		MyString	submitLine(logicalLine);
		MyString	tmpLogName = getParamFromSubmitLine(submitLine, "log");
		if ( tmpLogName != "" ) {
			logFileName = tmpLogName;
		}

			// If we are using the default node log, we don't care
			// about these
		if( !usingDefaultNode ) {
			MyString	tmpInitialDir = getParamFromSubmitLine(submitLine,
					"initialdir");
			if ( tmpInitialDir != "" ) {
				initialDir = tmpInitialDir;
			}

			MyString tmpLogXml = getParamFromSubmitLine(submitLine, "log_xml");
			if ( tmpLogXml != "" ) {
				isXmlLogStr = tmpLogXml;
			}
		}
	}

	if ( !usingDefaultNode ) {
			//
			// Check for macros in the log file name -- we currently don't
			// handle those.
			//
			// If we are using the default node, we don't need to check this
		if ( logFileName != "" ) {
			if ( strstr(logFileName.Value(), "$(") ) {
				dprintf(D_ALWAYS, "MultiLogFiles: macros ('$(...') not allowed "
						"in log file name (%s) in DAG node submit files\n",
						logFileName.Value());
				logFileName = "";
			}
		}

			// Do not need to prepend initialdir if we are using the 
			// default node log
		if ( logFileName != "" ) {
				// Prepend initialdir to log file name if log file name is not
				// an absolute path.
			if ( initialDir != "" && !fullpath(logFileName.Value()) ) {
				logFileName = initialDir + DIR_DELIM_STRING + logFileName;
			}

				// We do this in case the same log file is specified with a
				// relative and an absolute path.  
				// Note: we now do further checking that doesn't rely on
				// comparing paths to the log files.  wenger 2004-05-27.
			CondorError errstack;
			if ( !makePathAbsolute( logFileName, errstack ) ) {
				dprintf(D_ALWAYS, "%s\n", errstack.getFullText().c_str());
				return "";
			}
		}
		isXmlLogStr.lower_case();
		isXml = (isXmlLogStr == "true");
		if ( directory != "" ) {
			MyString	errMsg;
			if ( !td.Cd2MainDir(errMsg) ) {
				dprintf(D_ALWAYS, "Error from Cd2MainDir: %s\n", errMsg.Value());
				return "";
			}
		}
	}
	return logFileName;
}
예제 #17
0
bool DCSchedd::recycleShadow( int previous_job_exit_reason, ClassAd **new_job_ad, MyString &error_msg )
{
	int timeout = 300;
	CondorError errstack;

	ReliSock sock;
	if( !connectSock(&sock,timeout,&errstack) ) {
		error_msg.formatstr("Failed to connect to schedd: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	if( !startCommand(RECYCLE_SHADOW, &sock, timeout, &errstack) ) {
		error_msg.formatstr("Failed to send RECYCLE_SHADOW to schedd: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	if( !forceAuthentication(&sock, &errstack) ) {
		error_msg.formatstr("Failed to authenticate: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	sock.encode();
	int mypid = getpid();
	if( !sock.put( mypid ) ||
		!sock.put( previous_job_exit_reason ) ||
		!sock.end_of_message() )
	{
		error_msg = "Failed to send job exit reason";
		return false;
	}

	sock.decode();

	int found_new_job = 0;
	sock.get( found_new_job );

	if( found_new_job ) {
		*new_job_ad = new ClassAd();
		if( !getClassAd( &sock, *(*new_job_ad) ) ) {
			error_msg = "Failed to receive new job ClassAd";
			delete *new_job_ad;
			*new_job_ad = NULL;
			return false;
		}
	}

	if( !sock.end_of_message() ) {
		error_msg = "Failed to receive end of message";
		delete *new_job_ad;
		*new_job_ad = NULL;
		return false;
	}

	if( *new_job_ad ) {
		sock.encode();
		int ok=1;
		if( !sock.put(ok) ||
			!sock.end_of_message() )
		{
			error_msg = "Failed to send ok";
			delete *new_job_ad;
			*new_job_ad = NULL;
			return false;
		}
	}

	return true;
}
예제 #18
0
파일: rm.cpp 프로젝트: emaste/htcondor
int
main( int argc, char *argv[] )
{
	char	*arg;
	char	**args = (char **)malloc(sizeof(char *)*(argc - 1)); // args 
	int					nArgs = 0;				// number of args 
	int					i;
	char*	cmd_str;
	DCCollector* pool = NULL;
	char* scheddName = NULL;
	char* scheddAddr = NULL;

		// Initialize our global variables
	has_constraint = false;

	myDistro->Init( argc, argv );
	MyName = strrchr( argv[0], DIR_DELIM_CHAR );
	if( !MyName ) {
		MyName = argv[0];
	} else {
		MyName++;
	}

	cmd_str = strchr( MyName, '_');

	// we match modes based on characters after the '_'. This means
	// 'condor_hold.exe' or 'condor_hold_wrapped' are all legal argv[0]'s
	// for condor_hold.

	if (cmd_str && strncasecmp( cmd_str, "_hold", strlen("_hold") ) == MATCH) { 

		mode = JA_HOLD_JOBS;

	} else if ( cmd_str && 
			strncasecmp( cmd_str, "_release", strlen("_release") ) == MATCH ) {

		mode = JA_RELEASE_JOBS;

	} else if ( cmd_str && 
			strncasecmp( cmd_str, "_suspend", strlen("_suspend") ) == MATCH ) {

		mode = JA_SUSPEND_JOBS;

	} else if ( cmd_str && 
			strncasecmp( cmd_str, "_continue", strlen("_continue") ) == MATCH ) {

		mode = JA_CONTINUE_JOBS;

	}else if ( cmd_str && 
			strncasecmp( cmd_str, "_rm", strlen("_rm") ) == MATCH ) {

		mode = JA_REMOVE_JOBS;

	} else if( cmd_str && ! strncasecmp(cmd_str, "_vacate_job",
									strlen("_vacate_job")) ) {  

		mode = JA_VACATE_JOBS;

	} else {
		// don't know what mode we're using, so bail.
		fprintf( stderr, "Unrecognized command name, \"%s\"\n", MyName ); 
		usage();
	}

	config();


	if( argc < 2 ) {
			// We got no indication of what to act on
		fprintf( stderr, "You did not specify any jobs\n" ); 
		usage();
	}

#if !defined(WIN32)
	install_sig_handler(SIGPIPE, SIG_IGN );
#endif

	for( argv++; (arg = *argv); argv++ ) {
		if( arg[0] == '-' ) {
            if (match_prefix(arg, "-debug")) {
				// dprintf to console
				dprintf_set_tool_debug("TOOL", 0);
            } else if (match_prefix(arg, "-constraint")) {
				args[nArgs] = arg;
				nArgs++;
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -constraint requires another argument\n", 
							 MyName);
					exit(1);
				}				
				args[nArgs] = *argv;
				nArgs++;
				ConstraintArg = true;
            } else if (match_prefix(arg, "-all")) {
                All = true;
            } else if (match_prefix(arg, "-addr")) {
                argv++;
                if( ! *argv ) {
                    fprintf( stderr, 
                             "%s: -addr requires another argument\n", 
                             MyName);
                    exit(1);
                }				
                if( is_valid_sinful(*argv) ) {
                    scheddAddr = strdup(*argv);
                    if( ! scheddAddr ) {
                        fprintf( stderr, "Out of memory!\n" );
                        exit(1);
                    }
                } else {
                    fprintf( stderr, 
                             "%s: \"%s\" is not a valid address\n",
                             MyName, *argv );
                    fprintf( stderr, "Should be of the form "
                             "<ip.address.here:port>\n" );
                    fprintf( stderr, 
                             "For example: <123.456.789.123:6789>\n" );
                    exit( 1 );
                }
			} else if (match_prefix(arg, "-reason")) {
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -reason requires another argument\n", 
							 MyName);
					exit(1);
				}		
				actionReason = strdup(*argv);		
				if( ! actionReason ) {
					fprintf( stderr, "Out of memory!\n" );
					exit(1);
				}
			} else if (match_prefix(arg, "-subcode")) {
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -subcode requires another argument\n", 
							 MyName);
					exit(1);
				}		
				char *end = NULL;
				long code = strtol(*argv,&end,10);
				if( code == LONG_MIN || !end || *end || end==*argv ) {
					fprintf( stderr, "Invalid -subcode %s!\n", *argv );
					exit(1);
				}
				holdReasonSubCode = strdup(*argv);
				ASSERT( holdReasonSubCode );
            } else if (match_prefix(arg, "-forcex")) {
				if( mode == JA_REMOVE_JOBS ) {
					mode = JA_REMOVE_X_JOBS;
				} else {
                    fprintf( stderr, 
                             "-forcex is only valid with condor_rm\n" );
					usage();
				}
            } else if (match_prefix(arg, "-fast")) {
				if( mode == JA_VACATE_JOBS ) {
					mode = JA_VACATE_FAST_JOBS;
				} else {
                    fprintf( stderr, 
                             "-fast is only valid with condor_vacate_job\n" );
					usage();
				}
            } else if (match_prefix(arg, "-name")) {
				// use the given name as the schedd name to connect to
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -name requires another argument\n", 
							 MyName);
					exit(1);
				}				
				if( !(scheddName = get_daemon_name(*argv)) ) { 
					fprintf( stderr, "%s: unknown host %s\n", 
							 MyName, get_host_part(*argv) );
					exit(1);
				}
            } else if (match_prefix(arg, "-pool")) {
				// use the given name as the central manager to query
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -pool requires another argument\n", 
							 MyName);
					exit(1);
				}				
				if( pool ) {
					delete pool;
				}
				pool = new DCCollector( *argv );
				if( ! pool->addr() ) {
					fprintf( stderr, "%s: %s\n", MyName, pool->error() );
					exit(1);
				}
            } else if (match_prefix(arg, "-version")) {
				version();
            } else if (match_prefix(arg, "-help")) {
				usage(0);
            } else {
				fprintf( stderr, "Unrecognized option: %s\n", arg ); 
				usage();
			}
		} else {
			if( All ) {
					// If -all is set, there should be no other
					// constraint arguments.
				usage();
			}
			args[nArgs] = arg;
			nArgs++;
			UserJobIdArg = true;
		}
	}

	if( ! (All || nArgs) ) {
			// We got no indication of what to act on
		fprintf( stderr, "You did not specify any jobs\n" ); 
		usage();
	}

	if ( ConstraintArg && UserJobIdArg ) {
		fprintf( stderr, "You can't use both -constraint and usernames or job ids\n" );
		usage();
	}

		// Pick the default reason if the user didn't specify one
	if( actionReason == NULL ) {
		switch( mode ) {
		case JA_RELEASE_JOBS:
			actionReason = strdup("via condor_release");
			break;
		case JA_REMOVE_X_JOBS:
			actionReason = strdup("via condor_rm -forcex");
			break;
		case JA_REMOVE_JOBS:
			actionReason = strdup("via condor_rm");
			break;
		case JA_HOLD_JOBS:
			actionReason = strdup("via condor_hold");
			break;
		case JA_SUSPEND_JOBS:
			actionReason = strdup("via condor_suspend");
			break;
		case JA_CONTINUE_JOBS:
			actionReason = strdup("via condor_continue");
			break;
		default:
			actionReason = NULL;
		}
	}

		// We're done parsing args, now make sure we know how to
		// contact the schedd. 
	if( ! scheddAddr ) {
			// This will always do the right thing, even if either or
			// both of scheddName or pool are NULL.
		schedd = new DCSchedd( scheddName, pool ? pool->addr() : NULL );
	} else {
		schedd = new DCSchedd( scheddAddr );
	}
	if( ! schedd->locate() ) {
		fprintf( stderr, "%s: %s\n", MyName, schedd->error() ); 
		exit( 1 );
	}

		// Special case for condor_rm -forcex: a configuration
		// setting can disable this functionality.  The real
		// validation is done in the schedd, but we can catch
		// the most common cases here and give a useful error
		// message.
	if(mode == JA_REMOVE_X_JOBS) {
		if( mayUserForceRm() == false) {
			fprintf( stderr, "Remove aborted. condor_rm -forcex has been disabled by the administrator.\n" );
			exit( 1 );
		}
	}

		// Process the args so we do the work.
	if( All ) {
		handleAll();
	} else {
		for(i = 0; i < nArgs; i++) {
			if( match_prefix( args[i], "-constraint" ) ) {
				i++;
				addConstraint( args[i] );
			} else {
				procArg(args[i]);
			}
		}
	}

		// Deal with all the -constraint constraints
	handleConstraints();

		// Finally, do the actual work for all our args which weren't
		// constraints...
	if( job_ids ) {
		CondorError errstack;
		ClassAd* result_ad = doWorkByList( job_ids, &errstack );
		if (had_error) {
			fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		}
		printNewMessages( result_ad, job_ids );
		delete( result_ad );
	}

		// If releasing jobs, and no errors happened, do a 
		// reschedule command now.
	if ( mode == JA_RELEASE_JOBS && had_error == false ) {
		Daemon  my_schedd(DT_SCHEDD, NULL, NULL);
		CondorError errstack;
		if (!my_schedd.sendCommand(RESCHEDULE, Stream::safe_sock, 0, &errstack)) {
			fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		}
	}

	return had_error;
}
예제 #19
0
int
main (int argc, char *argv[])
{
#if !defined(WIN32)
	install_sig_handler(SIGPIPE, (SIG_HANDLER)SIG_IGN );
#endif

	// initialize to read from config file
	myDistro->Init( argc, argv );
	myName = argv[0];
	config();
	dprintf_config_tool_on_error(0);

	// The arguments take two passes to process --- the first pass
	// figures out the mode, after which we can instantiate the required
	// query object.  We add implied constraints from the command line in
	// the second pass.
	firstPass (argc, argv);
	
	// if the mode has not been set, it is STARTD_NORMAL
	if (mode == MODE_NOTSET) {
		setMode (MODE_STARTD_NORMAL, 0, DEFAULT);
	}

	// instantiate query object
	if (!(query = new CondorQuery (type))) {
		dprintf_WriteOnErrorBuffer(stderr, true);
		fprintf (stderr, "Error:  Out of memory\n");
		exit (1);
	}
	// if a first-pass setMode set a mode_constraint, apply it now to the query object
	if (mode_constraint && ! explicit_format) {
		query->addANDConstraint(mode_constraint);
	}

	// set pretty print style implied by the type of entity being queried
	// but do it with default priority, so that explicitly requested options
	// can override it
	switch (type)
	{
#ifdef HAVE_EXT_POSTGRESQL
	  case QUILL_AD:
		setPPstyle(PP_QUILL_NORMAL, 0, DEFAULT);
		break;
#endif /* HAVE_EXT_POSTGRESQL */


	  case DEFRAG_AD:
		setPPstyle(PP_GENERIC_NORMAL, 0, DEFAULT);
		break;

	  case STARTD_AD:
		setPPstyle(PP_STARTD_NORMAL, 0, DEFAULT);
		break;

	  case SCHEDD_AD:
		setPPstyle(PP_SCHEDD_NORMAL, 0, DEFAULT);
		break;

	  case MASTER_AD:
		setPPstyle(PP_MASTER_NORMAL, 0, DEFAULT);
		break;

	  case CKPT_SRVR_AD:
		setPPstyle(PP_CKPT_SRVR_NORMAL, 0, DEFAULT);
		break;

	  case COLLECTOR_AD:
		setPPstyle(PP_COLLECTOR_NORMAL, 0, DEFAULT);
		break;

	  case STORAGE_AD:
		setPPstyle(PP_STORAGE_NORMAL, 0, DEFAULT);
		break;

	  case NEGOTIATOR_AD:
		setPPstyle(PP_NEGOTIATOR_NORMAL, 0, DEFAULT);
		break;

      case GRID_AD:
        setPPstyle(PP_GRID_NORMAL, 0, DEFAULT);
		break;

	  case GENERIC_AD:
		setPPstyle(PP_GENERIC, 0, DEFAULT);
		break;

	  case ANY_AD:
		setPPstyle(PP_ANY_NORMAL, 0, DEFAULT);
		break;

	  default:
		setPPstyle(PP_VERBOSE, 0, DEFAULT);
	}

	// set the constraints implied by the mode
	switch (mode) {
#ifdef HAVE_EXT_POSTGRESQL
	  case MODE_QUILL_NORMAL:
#endif /* HAVE_EXT_POSTGRESQL */

	  case MODE_DEFRAG_NORMAL:
	  case MODE_STARTD_NORMAL:
	  case MODE_MASTER_NORMAL:
	  case MODE_CKPT_SRVR_NORMAL:
	  case MODE_SCHEDD_NORMAL:
	  case MODE_SCHEDD_SUBMITTORS:
	  case MODE_COLLECTOR_NORMAL:
	  case MODE_NEGOTIATOR_NORMAL:
	  case MODE_STORAGE_NORMAL:
	  case MODE_GENERIC_NORMAL:
	  case MODE_ANY_NORMAL:
	  case MODE_GRID_NORMAL:
	  case MODE_HAD_NORMAL:
		break;

	  case MODE_OTHER:
			// tell the query object what the type we're querying is
		query->setGenericQueryType(genericType);
		free(genericType);
		genericType = NULL;
		break;

	  case MODE_STARTD_AVAIL:
			  // For now, -avail shows you machines avail to anyone.
		sprintf (buffer, "%s == \"%s\"", ATTR_STATE,
					state_to_string(unclaimed_state));
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;


	  case MODE_STARTD_RUN:
		sprintf (buffer, "%s == \"%s\"", ATTR_STATE,
					state_to_string(claimed_state));
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;

	  case MODE_STARTD_COD:
	    sprintf (buffer, "%s > 0", ATTR_NUM_COD_CLAIMS );
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;

	  default:
		break;
	}	

	if(javaMode) {
		sprintf( buffer, "%s == TRUE", ATTR_HAS_JAVA );
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addANDConstraint (buffer);
		
		projList.AppendArg(ATTR_HAS_JAVA);
		projList.AppendArg(ATTR_JAVA_MFLOPS);
		projList.AppendArg(ATTR_JAVA_VENDOR);
		projList.AppendArg(ATTR_JAVA_VERSION);

	}

	if(offlineMode) {
		query->addANDConstraint( "size( OfflineUniverses ) != 0" );

		projList.AppendArg( "OfflineUniverses" );

		//
		// Since we can't add a regex to a projection, explicitly list all
		// the attributes we know about.
		//

		projList.AppendArg( "HasVM" );
		projList.AppendArg( "VMOfflineReason" );
		projList.AppendArg( "VMOfflineTime" );
	}

	if(absentMode) {
	    sprintf( buffer, "%s == TRUE", ATTR_ABSENT );
	    if (diagnose) {
	        printf( "Adding constraint %s\n", buffer );
	    }
	    query->addANDConstraint( buffer );
	    
	    projList.AppendArg( ATTR_ABSENT );
	    projList.AppendArg( ATTR_LAST_HEARD_FROM );
	    projList.AppendArg( ATTR_CLASSAD_LIFETIME );
	}

	if(vmMode) {
		sprintf( buffer, "%s == TRUE", ATTR_HAS_VM);
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addANDConstraint (buffer);

		projList.AppendArg(ATTR_VM_TYPE);
		projList.AppendArg(ATTR_VM_MEMORY);
		projList.AppendArg(ATTR_VM_NETWORKING);
		projList.AppendArg(ATTR_VM_NETWORKING_TYPES);
		projList.AppendArg(ATTR_VM_HARDWARE_VT);
		projList.AppendArg(ATTR_VM_AVAIL_NUM);
		projList.AppendArg(ATTR_VM_ALL_GUEST_MACS);
		projList.AppendArg(ATTR_VM_ALL_GUEST_IPS);
		projList.AppendArg(ATTR_VM_GUEST_MAC);
		projList.AppendArg(ATTR_VM_GUEST_IP);

	}

	// second pass:  add regular parameters and constraints
	if (diagnose) {
		printf ("----------\n");
	}

	secondPass (argc, argv);

	// initialize the totals object
	if (ppStyle == PP_CUSTOM && using_print_format) {
		if (pmHeadFoot & HF_NOSUMMARY) ppTotalStyle = PP_CUSTOM;
	} else {
		ppTotalStyle = ppStyle;
	}
	TrackTotals	totals(ppTotalStyle);

	// fetch the query
	QueryResult q;

	if ((mode == MODE_STARTD_NORMAL) && (ppStyle == PP_STARTD_NORMAL)) {
		projList.AppendArg("Name");
		projList.AppendArg("Machine");
		projList.AppendArg("Opsys");
		projList.AppendArg("Arch");
		projList.AppendArg("State");
		projList.AppendArg("Activity");
		projList.AppendArg("LoadAvg");
		projList.AppendArg("Memory");
		projList.AppendArg("ActvtyTime");
		projList.AppendArg("MyCurrentTime");
		projList.AppendArg("EnteredCurrentActivity");
	} else if( ppStyle == PP_VERBOSE ) {
	    // Remove everything from the projection list if we're displaying
	    // the "long form" of the ads.
	    projList.Clear();
		// but if -attributes was supplied, show only those attributes
		if ( ! dashAttributes.isEmpty()) {
			const char * s;
			dashAttributes.rewind();
			while ((s = dashAttributes.next())) {
				projList.AppendArg(s);
			}
		}
	}

	if( projList.Count() > 0 ) {
		char **attr_list = projList.GetStringArray();
		query->setDesiredAttrs(attr_list);
		deleteStringArray(attr_list);
	}

	// if diagnose was requested, just print the query ad
	if (diagnose) {
		ClassAd 	queryAd;

		// print diagnostic information about inferred internal state
		setMode ((Mode) 0, 0, NULL);
		setType (NULL, 0, NULL);
		setPPstyle ((ppOption) 0, 0, DEFAULT);
		printf ("----------\n");

		q = query->getQueryAd (queryAd);
		fPrintAd (stdout, queryAd);

		printf ("----------\n");
		fprintf (stderr, "Result of making query ad was:  %d\n", q);
		exit (1);
	}

        // Address (host:port) is taken from requested pool, if given.
	char* addr = (NULL != pool) ? pool->addr() : NULL;
        Daemon* requested_daemon = pool;

        // If we're in "direct" mode, then we attempt to locate the daemon
	// associated with the requested subsystem (here encoded by value of mode)
        // In this case the host:port of pool (if given) denotes which
        // pool is being consulted
	if( direct ) {
		Daemon *d = NULL;
		switch( mode ) {
		case MODE_MASTER_NORMAL:
			d = new Daemon( DT_MASTER, direct, addr );
			break;
		case MODE_STARTD_NORMAL:
		case MODE_STARTD_AVAIL:
		case MODE_STARTD_RUN:
		case MODE_STARTD_COD:
			d = new Daemon( DT_STARTD, direct, addr );
			break;

#ifdef HAVE_EXT_POSTGRESQL
		case MODE_QUILL_NORMAL:
			d = new Daemon( DT_QUILL, direct, addr );
			break;
#endif /* HAVE_EXT_POSTGRESQL */

		case MODE_SCHEDD_NORMAL:
		case MODE_SCHEDD_SUBMITTORS:
			d = new Daemon( DT_SCHEDD, direct, addr );
			break;
		case MODE_NEGOTIATOR_NORMAL:
			d = new Daemon( DT_NEGOTIATOR, direct, addr );
			break;
		case MODE_CKPT_SRVR_NORMAL:
		case MODE_COLLECTOR_NORMAL:
		case MODE_LICENSE_NORMAL:
		case MODE_STORAGE_NORMAL:
		case MODE_GENERIC_NORMAL:
		case MODE_ANY_NORMAL:
		case MODE_OTHER:
		case MODE_GRID_NORMAL:
		case MODE_HAD_NORMAL:
				// These have to go to the collector, anyway.
			break;
		default:
            fprintf( stderr, "Error:  Illegal mode %d\n", mode );
			exit( 1 );
			break;
		}

                // Here is where we actually override 'addr', if we can obtain
                // address of the requested daemon/subsys.  If it can't be
                // located, then fail with error msg.
                // 'd' will be null (unset) if mode is one of above that must go to
                // collector (MODE_ANY_NORMAL, MODE_COLLECTOR_NORMAL, etc)
		if (NULL != d) {
			if( d->locate() ) {
				addr = d->addr();
				requested_daemon = d;
			} else {
				const char* id = d->idStr();
				if (NULL == id) id = d->name();
				dprintf_WriteOnErrorBuffer(stderr, true);
				if (NULL == id) id = "daemon";
				fprintf(stderr, "Error: Failed to locate %s\n", id);
				fprintf(stderr, "%s\n", d->error());
				exit( 1 );
			}
		}
	}

	ClassAdList result;
	CondorError errstack;
	if (NULL != ads_file) {
		MyString req; // query requirements
		q = query->getRequirements(req);
		const char * constraint = req.empty() ? NULL : req.c_str();
		if (read_classad_file(ads_file, result, constraint)) {
			q = Q_OK;
		}
	} else if (NULL != addr) {
			// this case executes if pool was provided, or if in "direct" mode with
			// subsystem that corresponds to a daemon (above).
			// Here 'addr' represents either the host:port of requested pool, or
			// alternatively the host:port of daemon associated with requested subsystem (direct mode)
		q = query->fetchAds (result, addr, &errstack);
	} else {
			// otherwise obtain list of collectors and submit query that way
		CollectorList * collectors = CollectorList::create();
		q = collectors->query (*query, result, &errstack);
		delete collectors;
	}
		

	// if any error was encountered during the query, report it and exit 
	if (Q_OK != q) {

		dprintf_WriteOnErrorBuffer(stderr, true);
			// we can always provide these messages:
		fprintf( stderr, "Error: %s\n", getStrQueryResult(q) );
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );

        if ((NULL != requested_daemon) && ((Q_NO_COLLECTOR_HOST == q) ||
			(requested_daemon->type() == DT_COLLECTOR)))
		{
				// Specific long message if connection to collector failed.
			const char* fullhost = requested_daemon->fullHostname();
			if (NULL == fullhost) fullhost = "<unknown_host>";
			const char* daddr = requested_daemon->addr();
			if (NULL == daddr) daddr = "<unknown>";
			char info[1000];
			sprintf(info, "%s (%s)", fullhost, daddr);
	        printNoCollectorContact( stderr, info, !expert );
        } else if ((NULL != requested_daemon) && (Q_COMMUNICATION_ERROR == q)) {
				// more helpful message for failure to connect to some daemon/subsys
			const char* id = requested_daemon->idStr();
			if (NULL == id) id = requested_daemon->name();
			if (NULL == id) id = "daemon";
			const char* daddr = requested_daemon->addr();
			if (NULL == daddr) daddr = "<unknown>";
			fprintf(stderr, "Error: Failed to contact %s at %s\n", id, daddr);
		}

		// fail
		exit (1);
	}

	if (noSort) {
		// do nothing 
	} else if (sortSpecs.empty()) {
        // default classad sorting
		result.Sort((SortFunctionType)lessThanFunc);
	} else {
        // User requested custom sorting expressions:
        // insert attributes related to custom sorting
        result.Open();
        while (ClassAd* ad = result.Next()) {
            for (vector<SortSpec>::iterator ss(sortSpecs.begin());  ss != sortSpecs.end();  ++ss) {
                ss->expr->SetParentScope(ad);
                classad::Value v;
                ss->expr->Evaluate(v);
                stringstream vs;
                // This will properly render all supported value types,
                // including undefined and error, although current semantic
                // pre-filters classads where sort expressions are undef/err:
                vs << ((v.IsStringValue())?"\"":"") << v << ((v.IsStringValue())?"\"":"");
                ad->AssignExpr(ss->keyAttr.c_str(), vs.str().c_str());
                // Save the full expr in case user wants to examine on output:
                ad->AssignExpr(ss->keyExprAttr.c_str(), ss->arg.c_str());
            }
        }
        
        result.Open();
		result.Sort((SortFunctionType)customLessThanFunc);
	}

	
	// output result
	prettyPrint (result, &totals);
	
    delete query;

	return 0;
}
예제 #20
0
int
main(int argc, char *argv[])
{
	char	*arg;
	int		nArgs = 0;				// number of args 
	int	 i, result;
	char* pool = NULL;
	char* scheddName = NULL;
	char* scheddAddr = NULL;
	MyString method;
	char *tmp;

	myDistro->Init( argc, argv );
	MyName = condor_basename(argv[0]);
	config();

#if !defined(WIN32)
	install_sig_handler(SIGPIPE, SIG_IGN );
#endif

	// dig around in the config file looking for what the config file says
	// about getting files from Condor. This defaults with the global variable
	// initialization.
	tmp = param( "SANDBOX_TRANSFER_METHOD" );
	if ( tmp != NULL ) {
		method = tmp;
		free( tmp );
		string_to_stm( method, st_method );
	}

	char **args = (char **)malloc(sizeof(char *) * argc); // args 
	if ( ! args) exit(2);

	// parse the arguments.
	for( argv++; (arg = *argv); argv++ ) {
		if( arg[0] == '-' ) {
			if( ! arg[1] ) {
				usage();
			}
			switch( arg[1] ) {
			case 'd':
				// dprintf to console
				dprintf_set_tool_debug("TOOL", 0);
				break;
			case 'c':
				args[nArgs] = arg;
				nArgs++;
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -constraint requires another argument\n", 
							 MyName);
					exit(1);
				}				
				args[nArgs] = *argv;
				nArgs++;
				break;
			case 'a':
				if( arg[2] && arg[2] == 'd' ) {
					argv++;
					if( ! *argv ) {
						fprintf( stderr, 
								 "%s: -addr requires another argument\n", 
								 MyName);
						exit(1);
					}				
					if( is_valid_sinful(*argv) ) {
						scheddAddr = strdup(*argv);
						if( ! scheddAddr ) {
							fprintf( stderr, "Out of Memory!\n" );
							exit(1);
						}
					} else {
						fprintf( stderr, 
								 "%s: \"%s\" is not a valid address\n",
								 MyName, *argv );
						fprintf( stderr, "Should be of the form "
								 "<ip.address.here:port>\n" );
						fprintf( stderr, 
								 "For example: <123.456.789.123:6789>\n" );
						exit( 1 );
					}
					break;
				}
				All = true;
				break;
			case 'n': 
				// use the given name as the schedd name to connect to
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -name requires another argument\n", 
							 MyName);
					exit(1);
				}			
				if ( scheddName ) free(scheddName);
				scheddName = strdup(*argv);
				break;
			case 'p':
				// use the given name as the central manager to query
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -pool requires another argument\n", 
							 MyName);
					exit(1);
				}				
				if( pool ) {
					free( pool );
				}
				pool = strdup( *argv );
				break;
			case 's':
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -stm requires another argument\n", 
							 MyName);
					exit(1);
				}				
				method = *argv;
				string_to_stm(method, st_method);
				break;
			case 'v':
				version();
				break;
			case 'h':
				usage(0);
				break;
			default:
				fprintf( stderr, "Unrecognized option: %s\n", arg ); 
				usage();
				break;
			}
		} else {
			if( All ) {
					// If -all is set, there should be no other
					// constraint arguments.
				usage();
			}
			args[nArgs] = arg;
			nArgs++;
		}
	}

	// Check to make sure we have a valid sandbox transfer mechanism.
	if (st_method == STM_UNKNOWN) {
		fprintf( stderr,
			"%s: Unknown sandbox transfer method: %s\n", MyName,
			method.Value());
		usage();
		exit(1);
	}

	if( ! (All || nArgs) ) {
			// We got no indication of what to act on


		fprintf( stderr, "You did not specify any jobs\n" ); 
		usage();
	}

		// We're done parsing args, now make sure we know how to
		// contact the schedd. 
	if( ! scheddAddr ) {
			// This will always do the right thing, even if either or
			// both of scheddName or pool are NULL.
		schedd = new DCSchedd( scheddName, pool );
	} else {
		schedd = new DCSchedd( scheddAddr );
	}
	if( ! schedd->locate() ) {
		fprintf( stderr, "%s: %s\n", MyName, schedd->error() ); 
		exit( 1 );
	}

		// Process the args.
	if( All ) {
		handleAll();
	} else {
		for(i = 0; i < nArgs; i++) {
			if( match_prefix( args[i], "-constraint" ) ) {
				i++;
				addConstraint( args[i] );
			} else {
				procArg(args[i]);
			}
		}
	}

		// Sanity check: make certain we now have a constraint
	if ( global_constraint.Length() <= 0 ) {			
		fprintf( stderr, "Unable to create a job constraint!\n");
		exit(1);
	}

	fprintf(stdout,"Fetching data files...\n");

	switch(st_method) {
		case STM_USE_SCHEDD_ONLY:
			{ // start block

			// Get the sandbox directly from the schedd.
			// And now, do the work.
			CondorError errstack;
			result = schedd->receiveJobSandbox(global_constraint.Value(),
				&errstack);
			if ( !result ) {
				fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() );
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				exit(1);
			}
		
			// All done
			return 0;

			} //end block
			break;

		case STM_USE_TRANSFERD:
			{ // start block

			// NEW METHOD where we ask the schedd for a transferd, then get the
			// files from the transferd

			CondorError errstack;
			ClassAd respad;
			int invalid;
			MyString reason;
			MyString td_sinful;
			MyString td_cap;

			result = schedd->requestSandboxLocation(FTPD_DOWNLOAD, 
				global_constraint, FTP_CFTP, &respad, &errstack);
			if ( !result ) {
				fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() );
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				exit(1);
			}

			respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
			if (invalid == TRUE) {
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
				fprintf( stderr, "%s\n", reason.Value());
				exit(EXIT_FAILURE);
			}

			respad.LookupString(ATTR_TREQ_TD_SINFUL, td_sinful);
			respad.LookupString(ATTR_TREQ_CAPABILITY, td_cap);

			dprintf(D_ALWAYS, 
				"td: %s, cap: %s\n", td_sinful.Value(), td_cap.Value());

			DCTransferD dctd(td_sinful.Value());

			result = dctd.download_job_files(&respad, &errstack);
			if ( !result ) {
				fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() );
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				exit(1);
			}

			} // end block
		break;

		default:
			EXCEPT("PROGRAMMER ERROR: st_method must be known.");
			break;
		}

	// All done
	return 0;
}
예제 #21
0
//---------------------------------------------------------------------------
bool
Job::MonitorLogFile( ReadMultipleUserLogs &condorLogReader,
			ReadMultipleUserLogs &storkLogReader, bool nfsIsError,
			bool recovery, const char *defaultNodeLog, bool usingDefault )
{
	debug_printf( DEBUG_DEBUG_2,
				"Attempting to monitor log file for node %s\n",
				GetJobName() );

	if ( _logIsMonitored ) {
		debug_printf( DEBUG_DEBUG_1, "Warning: log file for node "
					"%s is already monitored\n", GetJobName() );
		return true;
	}

	ReadMultipleUserLogs &logReader = (_jobType == TYPE_CONDOR) ?
				condorLogReader : storkLogReader;

    std::string logFileStr;
	if ( _jobType == TYPE_CONDOR ) {
			// We check to see if the user has specified a log file
			// If not, we give him a default
    	MyString templogFileStr = MultiLogFiles::loadLogFileNameFromSubFile( _cmdFile,
					_directory, _logFileIsXml, usingDefault);
		logFileStr = templogFileStr.Value();
	} else {
		StringList logFiles;
		MyString tmpResult = MultiLogFiles::loadLogFileNamesFromStorkSubFile(
					_cmdFile, _directory, logFiles );
		if ( tmpResult != "" ) {
			debug_printf( DEBUG_QUIET, "Error getting Stork log file: %s\n",
						tmpResult.Value() );
			LogMonitorFailed();
			return false;
		} else if ( logFiles.number() != 1 ) {
			debug_printf( DEBUG_QUIET, "Error: %d Stork log files found "
						"in submit file %s; we want 1\n",
						logFiles.number(), _cmdFile );
			LogMonitorFailed();
			return false;
		} else {
			logFiles.rewind();
			logFileStr = logFiles.next();
		}
	}

		// Warn the user if the node's log file is in /tmp.
	if ( logFileStr.find( "/tmp" ) == 0 ) {
		debug_printf( DEBUG_QUIET, "Warning: "
					"Log file %s for node %s is in /tmp\n",
					logFileStr.c_str(), GetJobName() );
        check_warning_strictness( usingDefault ? DAG_STRICT_2 : DAG_STRICT_1 );
	}

	if ( logFileStr == "" ) {
		logFileStr = defaultNodeLog;
		_useDefaultLog = true;
			// Default User log is never XML
			// This could be specified in the submit file and should be
			// ignored.
		_logFileIsXml = false;
		debug_printf( DEBUG_NORMAL, "Unable to get log file from "
					"submit file %s (node %s); using default (%s)\n",
					_cmdFile, GetJobName(), logFileStr.c_str() );
		append_default_log = false;
	} else {
		append_default_log = usingDefault;
		if( append_default_log ) {
				// DAGman is not going to look at the user-specified log.
				// It will look at the defaultNode log.
			logFileStr = defaultNodeLog;
			_useDefaultLog = false;
			_logFileIsXml = false;
		}
	}

		// This function returns true if the log file is on NFS and
		// that is an error.  If the log file is on NFS, but nfsIsError
		// is false, it prints a warning but returns false.
	if ( MultiLogFiles::logFileNFSError( logFileStr.c_str(),
				nfsIsError ) ) {
		debug_printf( DEBUG_QUIET, "Error: log file %s on NFS\n",
					logFileStr.c_str() );
		LogMonitorFailed();
		return false;
	}

	delete [] _logFile;
		// Saving log file here in case submit file gets changed.
	_logFile = strnewp( logFileStr.c_str() );
	debug_printf( DEBUG_DEBUG_2, "Monitoring log file <%s> for node %s\n",
				GetLogFile(), GetJobName() );
	CondorError errstack;
	if ( !logReader.monitorLogFile( GetLogFile(), !recovery, errstack ) ) {
		errstack.pushf( "DAGMan::Job", DAGMAN_ERR_LOG_FILE,
					"ERROR: Unable to monitor log file for node %s",
					GetJobName() );
		debug_printf( DEBUG_QUIET, "%s\n", errstack.getFullText().c_str() );
		LogMonitorFailed();
		EXCEPT( "Fatal log file monitoring error!\n" );
		return false;
	}

	_logIsMonitored = true;

	return true;
}
void
doContactSchedd()
{
	if (command_queue.IsEmpty()) {
		daemonCore->Reset_Timer( contactScheddTid, contact_schedd_interval ); // Come back in a min
		return;
	}

	dprintf(D_FULLDEBUG,"in doContactSchedd\n");

	SchedDRequest * current_command = NULL;

	int error=FALSE;
	std::string error_msg;
	CondorError errstack;
	bool do_reschedule = false;
	int failure_line_num = 0;
	int failure_errno = 0;

	// Try connecting to schedd
	DCSchedd dc_schedd ( ScheddAddr, ScheddPool );
	if (dc_schedd.error() || !dc_schedd.locate()) {
		sprintf( error_msg, "Error locating schedd %s", ScheddAddr );

		dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );

		// If you can't connect return "Failure" on every job request
		command_queue.Rewind();
		while (command_queue.Next(current_command)) {
			if (current_command->status != SchedDRequest::SDCS_NEW)
				continue;

			if (current_command->command == SchedDRequest::SDC_STATUS_CONSTRAINED) {
				const char * result[] = {
					GAHP_RESULT_FAILURE,
					error_msg.c_str(),
					"0"};
				enqueue_result (current_command->request_id, result, 3);
			} else if (current_command->command == SchedDRequest::SDC_SUBMIT_JOB) {
				const char * result[] = {
									GAHP_RESULT_FAILURE,
									NULL,
									error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 3);
			} else if (current_command->command == SchedDRequest::SDC_UPDATE_LEASE) {
				const char * result[] = {
									GAHP_RESULT_FAILURE,
									error_msg.c_str(),
									NULL };
				enqueue_result (current_command->request_id, result, 3);
			} else {
				const char * result[] = {
									GAHP_RESULT_FAILURE,
									error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 2);
			}

			current_command->status = SchedDRequest::SDCS_COMPLETED;
		}
	}

	
	SchedDRequest::schedd_command_type commands [] = {
		SchedDRequest::SDC_REMOVE_JOB,
		SchedDRequest::SDC_HOLD_JOB,
		SchedDRequest::SDC_RELEASE_JOB };

	const char * command_titles [] = {
		"REMOVE_JOB", "HOLD_JOB", "RELEASE_JOB" };

	// REMOVE
	// HOLD
	// RELEASE
	int i=0;
	while (i<3) {
		
		
		StringList id_list;
		SimpleList <SchedDRequest*> this_batch;

		SchedDRequest::schedd_command_type this_command = commands[i];
		const char * this_action = command_titles[i];
		const char * this_reason = NULL;

		dprintf (D_FULLDEBUG, "Processing %s requests\n", this_action);
		
		error = FALSE;

		// Create a batch of commands with the same command type AND the same reason		
		command_queue.Rewind();
		while (command_queue.Next(current_command)) {
			if (current_command->status != SchedDRequest::SDCS_NEW)
				continue;

			if (current_command->command != this_command)
				continue;

			if ((this_reason != NULL) && (strcmp (current_command->reason, this_reason) != 0))
				continue;

			if (this_reason == NULL)
				this_reason = current_command->reason;
				
			char job_id_buff[30];
			sprintf (job_id_buff, "%d.%d",
				current_command->cluster_id,
				current_command->proc_id);
			id_list.append (job_id_buff);

			this_batch.Append (current_command);
		}

		// If we haven't found any....
		if (id_list.isEmpty()) {
			i++;
			continue;	// ... then try the next command
		}

		// Perform the appropriate command on the current batch
		ClassAd * result_ad= NULL;
		if (this_command == SchedDRequest::SDC_REMOVE_JOB)  {
			errstack.clear();
			result_ad=
				dc_schedd.removeJobs (
					&id_list,
					this_reason,
					&errstack);
		} else if (this_command == SchedDRequest::SDC_HOLD_JOB) {
			errstack.clear();
			result_ad=
				dc_schedd.holdJobs (
					&id_list,
					this_reason,
					NULL,
			 		&errstack);
		} else if (this_command == SchedDRequest::SDC_RELEASE_JOB)  {
			errstack.clear();
			result_ad=
				dc_schedd.releaseJobs (
					&id_list,
					this_reason,
					&errstack);
		} else {
			EXCEPT( "Unexpected command type %d in doContactSchedd",
					this_command );
		}

		// Analyze the result ad
		if (!result_ad) {
			error = TRUE;
			sprintf( error_msg, "Error connecting to schedd %s %s: %s",
					 ScheddAddr, dc_schedd.addr(), errstack.getFullText() );
		}
		else {
			result_ad->dPrint (D_FULLDEBUG);
			if ( this_command == SchedDRequest::SDC_RELEASE_JOB ) {
				do_reschedule = true;
			}
		}

		// Go through the batch again, and create responses for each request
		this_batch.Rewind();
		while (this_batch.Next(current_command)) {
			
			// Check the result
			char job_id_buff[30];
			if (result_ad && (error == FALSE)) {
				sprintf (job_id_buff, "job_%d_%d",
					current_command->cluster_id,
					current_command->proc_id);
				
				int remove_result;
				if (result_ad->LookupInteger (job_id_buff, remove_result)) {
					switch (remove_result) {
						case AR_ERROR:
							error = TRUE;
							error_msg = "General Error";
							break;
						case AR_SUCCESS:
							error = FALSE;
							break;
						case AR_NOT_FOUND:
							error = TRUE;
							error_msg = "Job not found";
							break;
						case AR_BAD_STATUS:
							error = TRUE;
							error_msg = "Bad job status";
							break;
						case AR_ALREADY_DONE:
							error = TRUE;
							error_msg = "Already done";
							break;
						case AR_PERMISSION_DENIED:
							error = TRUE;
							error_msg = "Permission denied";
							break;
						default:
							error = TRUE;
							error_msg = "Unknown Result";
					} // hctiws

				} else {
					error_msg = "Unable to get result";
				} // fi lookup result for job
			} // fi error == FALSE

			if (error) {
				dprintf (D_ALWAYS, "Error (operation: %s) %d.%d: %s\n",
						this_action,
						current_command->cluster_id,
						current_command->proc_id,
						error_msg.c_str());

				const char * result[2];
				result[0] = GAHP_RESULT_FAILURE;
				result[1] = error_msg.c_str();

				enqueue_result (current_command->request_id, result, 2);
			} else {
				dprintf (D_ALWAYS, "Succeess (operation: %s) %d.%d\n",
						this_action,
						current_command->cluster_id,
						current_command->proc_id);

				const char * result[2];
				result[0] = GAHP_RESULT_SUCCESS;
				result[1] = NULL;

				enqueue_result (current_command->request_id, result, 2);
			} // fi error

			// Mark the status
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} // elihw this_batch

		if ( result_ad ) {
			delete result_ad;
		}
	}

	dprintf (D_FULLDEBUG, "Processing JOB_STAGE_IN requests\n");
	

	// JOB_STAGE_IN
	int MAX_BATCH_SIZE=1; // This should be a config param

	SimpleList <SchedDRequest*> stage_in_batch;
	do {
		stage_in_batch.Clear();

		command_queue.Rewind();
		while (command_queue.Next(current_command)) {

			if (current_command->status != SchedDRequest::SDCS_NEW)
				continue;

			if (current_command->command != SchedDRequest::SDC_JOB_STAGE_IN)
				continue;

			dprintf (D_ALWAYS, "Adding %d.%d to STAGE_IN batch\n", 
					 current_command->cluster_id,
					 current_command->proc_id);

			stage_in_batch.Append (current_command);
			if (stage_in_batch.Number() >= MAX_BATCH_SIZE)
				break;
		}

		if (stage_in_batch.Number() > 0) {
			ClassAd ** array = new ClassAd*[stage_in_batch.Number()];
			i=0;
			stage_in_batch.Rewind();
			while (stage_in_batch.Next(current_command)) {
				array[i++] = current_command->classad;
			}

			error = FALSE;
			errstack.clear();
			if (!dc_schedd.spoolJobFiles( stage_in_batch.Number(),
										  array,
										  &errstack )) {
				error = TRUE;
				sprintf( error_msg, "Error sending files to schedd %s: %s", ScheddAddr, errstack.getFullText() );
				dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
			}
			delete [] array;
  
			stage_in_batch.Rewind();
			while (stage_in_batch.Next(current_command)) {
				current_command->status = SchedDRequest::SDCS_COMPLETED;

				if (error) {
					const char * result[] = {
						GAHP_RESULT_FAILURE,
						error_msg.c_str() };
					enqueue_result (current_command->request_id, result, 2);

				} else {
					const char * result[] = {
						GAHP_RESULT_SUCCESS,
						NULL };
					enqueue_result (current_command->request_id, result, 2);
				}
			} // elihw (command_queue)
		} // fi has STAGE_IN requests
	} while (stage_in_batch.Number() > 0);

	dprintf (D_FULLDEBUG, "Processing JOB_STAGE_OUT requests\n");
	

	// JOB_STAGE_OUT
	SimpleList <SchedDRequest*> stage_out_batch;

	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_JOB_STAGE_OUT)
			continue;


		stage_out_batch.Append (current_command);
	}

	if (stage_out_batch.Number() > 0) {
		std::string constraint = "";
		stage_out_batch.Rewind();
		int jobsexpected = stage_out_batch.Number();
		while (stage_out_batch.Next(current_command)) {
			sprintf_cat( constraint, "(ClusterId==%d&&ProcId==%d)||",
									current_command->cluster_id,
									current_command->proc_id );
		}
		constraint += "False";

		error = FALSE;
		errstack.clear();
		int jobssent;
		if (!dc_schedd.receiveJobSandbox( constraint.c_str(),
										  &errstack, &jobssent )) {
			error = TRUE;
			sprintf( error_msg, "Error receiving files from schedd %s: %s",
							   ScheddAddr, errstack.getFullText() );
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		}

		if(error == FALSE && jobssent != jobsexpected) {
			error = TRUE;
			sprintf( error_msg, "Schedd %s didn't send expected files",
					 ScheddAddr );
			dprintf (D_ALWAYS, "Transfered files for %d jobs but got files for %d jobs. (Schedd %s with contraint %s\n", jobsexpected, jobssent, ScheddAddr, constraint.c_str());
		}
  
		stage_out_batch.Rewind();
		while (stage_out_batch.Next(current_command)) {
			current_command->status = SchedDRequest::SDCS_COMPLETED;

			if (error) {
				const char * result[] = {
								GAHP_RESULT_FAILURE,
								error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 2);

			} else {
				const char * result[] = {
										GAHP_RESULT_SUCCESS,
										NULL };
				enqueue_result (current_command->request_id, result, 2);
			}
		} // elihw (command_queue)
	} // fi has STAGE_OUT requests


	dprintf (D_FULLDEBUG, "Processing JOB_REFRESH_PROXY requests\n");

	CondorVersionInfo ver_info(dc_schedd.version());
	bool delegate_credential;
	if ( ver_info.built_since_version(6,7,19) &&
		 param_boolean( "DELEGATE_JOB_GSI_CREDENTIALS", true ) ) {
		delegate_credential = true;
	} else {
		delegate_credential = false;
	}

	// JOB_REFRESH_PROXY
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_JOB_REFRESH_PROXY)
			continue;

		time_t expiration_time = GetDesiredDelegatedJobCredentialExpiration(current_command->classad);
		time_t result_expiration_time = 0;

		bool result;
		errstack.clear();
		if ( delegate_credential ) {
			result = dc_schedd.delegateGSIcredential( 
												current_command->cluster_id,
												current_command->proc_id,
												current_command->proxy_file,
												expiration_time,
												&result_expiration_time,
												&errstack );

				// Currently, we do not propagate the actual resulting
				// expiration time back to the gridmanager.  We
				// probably should.
		} else {
			result = dc_schedd.updateGSIcredential( 
												current_command->cluster_id,
												current_command->proc_id,
												current_command->proxy_file,
												&errstack );
		}

		current_command->status = SchedDRequest::SDCS_COMPLETED;

		if (result == false) {
			sprintf( error_msg, "Error refreshing proxy to schedd %s: %s",
					 ScheddAddr, errstack.getFullText() );
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );

			const char * result_to_queue[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str() };
			enqueue_result (current_command->request_id, result_to_queue, 2);

		} else {
			const char * result_to_queue[] = {
				GAHP_RESULT_SUCCESS,
				NULL };
			enqueue_result (current_command->request_id, result_to_queue, 2);
		}

	}


	// Now do all the QMGMT transactions
	error = FALSE;

	// Try connecting to the queue
	Qmgr_connection * qmgr_connection;
	
	if ((qmgr_connection = ConnectQ(dc_schedd.addr(), QMGMT_TIMEOUT, false, NULL, NULL, dc_schedd.version() )) == NULL) {
		error = TRUE;
		sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
		dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
	} else {
		errno = 0;
		AbortTransaction(); // Just so we can call BeginTransaction() in the loop
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}
	}


	dprintf (D_FULLDEBUG, "Processing UPDATE_CONSTRAINED/UDATE_JOB requests\n");
	
	// UPDATE_CONSTRAINED
	// UDATE_JOB
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {
		
		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if ((current_command->command != SchedDRequest::SDC_UPDATE_CONSTRAINED) &&
			(current_command->command != SchedDRequest::SDC_UPDATE_JOB))
			continue;

		if (qmgr_connection == NULL)
			goto update_report_result;
		
		error = FALSE;
		errno = 0;
		BeginTransaction();
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}

		current_command->classad->ResetExpr();
		ExprTree *tree;
		const char *lhstr, *rhstr;
		while( current_command->classad->NextExpr(lhstr, tree) ) {

			rhstr = ExprTreeToString( tree );
			if( !lhstr || !rhstr) {
				sprintf( error_msg, "ERROR: ClassAd problem in Updating by constraint %s",
												 current_command->constraint );
				dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
				error = TRUE;
			} else {
				if (current_command->command == SchedDRequest::SDC_UPDATE_CONSTRAINED) {
					if( SetAttributeByConstraint(current_command->constraint,
												lhstr,
												rhstr) == -1 ) {
						if ( errno == ETIMEDOUT ) {
							failure_line_num = __LINE__;
							failure_errno = errno;
							goto contact_schedd_disconnect;
						}
						sprintf( error_msg, "ERROR: Failed (errno=%d) to SetAttributeByConstraint %s=%s for constraint %s",
									errno, lhstr, rhstr, current_command->constraint );
						dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
						error = TRUE;
					}
				} else if (current_command->command == SchedDRequest::SDC_UPDATE_JOB) {
					if( SetAttribute(current_command->cluster_id,
											current_command->proc_id,
											lhstr,
											rhstr) == -1 ) {
						if ( errno == ETIMEDOUT ) {
							failure_line_num = __LINE__;
							failure_errno = errno;
							goto contact_schedd_disconnect;
						}
						sprintf( error_msg, "ERROR: Failed to SetAttribute() %s=%s for job %d.%d",
										 lhstr, rhstr, current_command->cluster_id,  current_command->proc_id);
						dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
						error = TRUE;
					}
				}
			}

			if (error)
				break;
		} // elihw classad

update_report_result:
		if (error) {
			const char * result[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str() };


			//RemoteCommitTransaction();
			enqueue_result (current_command->request_id, result, 2);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
			if ( qmgr_connection != NULL ) {
				errno = 0;
				AbortTransaction();
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					failure_errno = errno;
					goto contact_schedd_disconnect;
				}
			}
		} else {
			if ( RemoteCommitTransaction() < 0 ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
			const char * result[] = {
				GAHP_RESULT_SUCCESS,
				NULL };
			enqueue_result (current_command->request_id, result, 2);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} // fi

	} // elihw

	
	dprintf (D_FULLDEBUG, "Processing UPDATE_LEASE requests\n");

	// UPDATE_LEASE
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {
		
		error = FALSE;

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_UPDATE_LEASE)
			continue;

		std::string success_job_ids="";
		if (qmgr_connection == NULL) {
			sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
			error = TRUE;
		} else {
			error = FALSE;
			errno = 0;
			BeginTransaction();
			if ( errno == ETIMEDOUT ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
		
			for (i=0; i<current_command->num_jobs; i++) {
			
				time_t time_now = time(NULL);
				int duration = 
					current_command->expirations[i].expiration - time_now;

				dprintf (D_FULLDEBUG, 
						 "Job %d.%d SetTimerAttribute=%d\n",
						 current_command->expirations[i].cluster,
						 current_command->expirations[i].proc,
						 duration);
		
				if (SetTimerAttribute (current_command->expirations[i].cluster,
									   current_command->expirations[i].proc,
									   ATTR_TIMER_REMOVE_CHECK,
									   duration) < 0) {

					if ( errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						failure_errno = errno;
						goto contact_schedd_disconnect;
					}
					dprintf (D_ALWAYS, 
							 "Unable to SetTimerAttribute(%d, %d), errno=%d\n",
							 current_command->expirations[i].cluster,
							 current_command->expirations[i].proc,
							 errno);
						 
				} else {
						// Append job id to the result line
					if (success_job_ids.length() > 0)
						success_job_ids += ",";

					sprintf_cat( success_job_ids,
						"%d.%d",
						current_command->expirations[i].cluster,
						current_command->expirations[i].proc);
				}
			} //rof jobs for request
		} // fi error


		if (error) {
			const char * result[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str(),
				NULL
			};


			//RemoteCommitTransaction();
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
			if ( qmgr_connection != NULL ) {
				errno = 0;
				AbortTransaction();
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					failure_errno = errno;
					goto contact_schedd_disconnect;
				}
			}
		} else {
			if ( RemoteCommitTransaction() < 0 ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
			const char * result[] = {
				GAHP_RESULT_SUCCESS,
				NULL,
				success_job_ids.length()?success_job_ids.c_str():NULL
			};
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} // fi

	} // elihw UPDATE_LEASE requests

	dprintf (D_FULLDEBUG, "Processing SUBMIT_JOB requests\n");

	// SUBMIT_JOB
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_SUBMIT_JOB)
			continue;

		int ClusterId = -1;
		int ProcId = -1;

		if (qmgr_connection == NULL) {
			error = TRUE;
			goto submit_report_result;
		}

		errno = 0;
		BeginTransaction();
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}
		error = FALSE;

		if ((ClusterId = NewCluster()) >= 0) {
			ProcId = NewProc (ClusterId);
		}
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			failure_errno = errno;
			goto contact_schedd_disconnect;
		}

		if ( ClusterId < 0 ) {
			error = TRUE;
			error_msg = "Unable to create a new job cluster";
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		} else if ( ProcId < 0 ) {
			error = TRUE;
			error_msg = "Unable to create a new job proc";
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		}
		if ( ClusterId == -2 || ProcId == -2 ) {
			error = TRUE;
			error_msg =
				"Number of submitted jobs would exceed MAX_JOBS_SUBMITTED\n";
			dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
		}


		// Adjust the argument/environment syntax based on the version
		// of the schedd we are talking to.

		if( error == FALSE) {
			CondorVersionInfo version_info(dc_schedd.version());
			ArgList arglist;
			MyString arg_error_msg;
			Env env_obj;
			MyString env_error_msg;

			if(!arglist.AppendArgsFromClassAd(current_command->classad,&arg_error_msg) ||
		   !	arglist.InsertArgsIntoClassAd(current_command->classad,&version_info,&arg_error_msg))
			{
				sprintf( error_msg,
						"ERROR: ClassAd problem in converting arguments to syntax "
						"for schedd (version=%s): %s\n",
						dc_schedd.version() ? dc_schedd.version() : "NULL",
						arg_error_msg.Value());
				dprintf( D_ALWAYS,"%s\n", error_msg.c_str() );
				error = TRUE;
			}	

			if(!env_obj.MergeFrom(current_command->classad,&env_error_msg) ||
			   !env_obj.InsertEnvIntoClassAd(current_command->classad,&env_error_msg,NULL,&version_info))
			{
				sprintf( error_msg,
						"ERROR: Failed to convert environment to target syntax"
						" for schedd (version %s): %s\n",
						dc_schedd.version() ? dc_schedd.version() : "NULL",
						env_error_msg.Value());
				dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
				error = TRUE;
			}
		}

		if( error == FALSE ) {
				// See the comment in the function body of ExpandInputFileList
				// for an explanation of what is going on here.
			MyString transfer_input_error_msg;
			if( !FileTransfer::ExpandInputFileList( current_command->classad, transfer_input_error_msg ) ) {
				dprintf( D_ALWAYS, "%s\n", transfer_input_error_msg.Value() );
				error = TRUE;
			}
		}

		if ( error == FALSE ) {
			current_command->classad->Assign(ATTR_CLUSTER_ID, ClusterId);
			current_command->classad->Assign(ATTR_PROC_ID, ProcId);

			// Special case for the job lease
			int expire_time;
			if ( current_command->classad->LookupInteger( ATTR_TIMER_REMOVE_CHECK, expire_time ) ) {
				if ( SetTimerAttribute( ClusterId, ProcId,
										ATTR_TIMER_REMOVE_CHECK,
										expire_time - time(NULL) ) == -1 ) {
					if ( errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						failure_errno = errno;
						goto contact_schedd_disconnect;
					}
					sprintf( error_msg, "ERROR: Failed to SetTimerAttribute %s=%ld for job %d.%d",
							 ATTR_TIMER_REMOVE_CHECK, expire_time - time(NULL), ClusterId, ProcId );
					dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
					error = TRUE;
					goto submit_report_result;
				}
				current_command->classad->Delete( ATTR_TIMER_REMOVE_CHECK );
			}

			// Set all the classad attribute on the remote classad
			current_command->classad->ResetExpr();
			ExprTree *tree;
			const char *lhstr, *rhstr;
			while( current_command->classad->NextExpr(lhstr, tree) ) {

				rhstr = ExprTreeToString( tree );
				if( !lhstr || !rhstr) {
					sprintf( error_msg, "ERROR: ClassAd problem in Updating by constraint %s",
												 current_command->constraint );
					dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
					error = TRUE;
				} else if( SetAttribute (ClusterId, ProcId,
											lhstr,
											rhstr) == -1 ) {
					if ( errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						failure_errno = errno;
						goto contact_schedd_disconnect;
					}
					sprintf( error_msg, "ERROR: Failed to SetAttribute %s=%s for job %d.%d",
									 lhstr, rhstr, ClusterId, ProcId );
					dprintf( D_ALWAYS, "%s\n", error_msg.c_str() );
					error = TRUE;
				}

				if (error) break;
			} // elihw classad
		} // fi error==FALSE

submit_report_result:
		char job_id_buff[30];
		sprintf (job_id_buff, "%d.%d", ClusterId, ProcId);

		if (error) {
			const char * result[] = {
								GAHP_RESULT_FAILURE,
								job_id_buff,
								error_msg.c_str() };
			enqueue_result (current_command->request_id, result, 3);
			if ( qmgr_connection != NULL ) {
				errno = 0;
				AbortTransaction();
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					failure_errno = errno;
					goto contact_schedd_disconnect;
				}
			}
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		} else {
			if ( RemoteCommitTransaction() < 0 ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}
			const char * result[] = {
									GAHP_RESULT_SUCCESS,
									job_id_buff,
									NULL };
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		}
	} // elihw


	dprintf (D_FULLDEBUG, "Processing STATUS_CONSTRAINED requests\n");
		
	// STATUS_CONSTRAINED
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {

		if (current_command->status != SchedDRequest::SDCS_NEW)
			continue;

		if (current_command->command != SchedDRequest::SDC_STATUS_CONSTRAINED)
			continue;

		if (qmgr_connection != NULL) {
			SimpleList <MyString *> matching_ads;

			error = FALSE;
			
			ClassAd *next_ad;
			ClassAdList adlist;
				// Only use GetAllJobsByConstraint if remote schedd is
				// 6.9.5 or newer.  Previous versions either did not
				// support this call, or they closed the Qmgmt connection
				// as a side-effect of this call.
			if( ver_info.built_since_version(6,9,5) ) {
				dprintf( D_FULLDEBUG, "Calling GetAllJobsByConstraint(%s)\n",
						 current_command->constraint );
					// NOTE: this could be made more efficient if we knew
					// the list of attributes to query.  For lack of that,
					// we just get all attributes.
				GetAllJobsByConstraint( current_command->constraint, "", adlist);
			}
			else {
					// This is the old latency-prone method.
				dprintf( D_FULLDEBUG, "Calling GetNextJobByConstraint(%s)\n",
						 current_command->constraint );
				next_ad = GetNextJobByConstraint( current_command->constraint, 1 );
				while( next_ad != NULL ) {
					adlist.Insert( next_ad );
					next_ad = GetNextJobByConstraint( current_command->constraint, 0 );
				}
			}

				// NOTE: ClassAdList will deallocate the ClassAds in it

			adlist.Rewind();
			while( (next_ad=adlist.Next()) ) {
				MyString * da_buffer = new MyString();	// Use a ptr to avoid excessive copying
				if ( useXMLClassads ) {
					ClassAdXMLUnparser unparser;
					unparser.SetUseCompactSpacing(true);
					unparser.Unparse (next_ad, *da_buffer);
				} else {
					NewClassAdUnparser unparser;
					unparser.SetUseCompactSpacing(true);
					unparser.Unparse (next_ad, *da_buffer);
				}
				matching_ads.Append (da_buffer);
			}
			if ( errno == ETIMEDOUT ) {
				failure_line_num = __LINE__;
				failure_errno = errno;
				goto contact_schedd_disconnect;
			}

			// now output this list of classads into a result
			const char ** result  = new const char* [matching_ads.Length() + 3];

			std::string _ad_count;
			sprintf( _ad_count, "%d", matching_ads.Length() );

			int count=0;
			result[count++] = GAHP_RESULT_SUCCESS;
			result[count++] = NULL;
			result[count++] = _ad_count.c_str();

			MyString *next_string;
			matching_ads.Rewind();
			while (matching_ads.Next(next_string)) {
				result[count++] = next_string->Value();
			}

			enqueue_result (current_command->request_id, result, count);
			current_command->status = SchedDRequest::SDCS_COMPLETED;

			// Cleanup
			matching_ads.Rewind();
			while (matching_ads.Next(next_string)) {
				delete next_string;
			}
			//CommitTransaction();
			delete [] result;
		}
		else {
			const char * result[] = {
				GAHP_RESULT_FAILURE,
				error_msg.c_str(),
				"0" };
			//RemoteCommitTransaction();
			enqueue_result (current_command->request_id, result, 3);
			current_command->status = SchedDRequest::SDCS_COMPLETED;
		}
	}	//elihw

	
 contact_schedd_disconnect:
	if ( qmgr_connection != NULL ) {
		DisconnectQ (qmgr_connection, FALSE);
	}

	if ( failure_line_num ) {
			// We had an error talking to the schedd. Take all of our
			// incomplete commands and mark them as failed.
			// TODO Consider retrying these commands, rather than
			//   immediately marking them as failed.
		if ( failure_errno == ETIMEDOUT ) {
			dprintf( D_ALWAYS, "Timed out talking to schedd at line %d in "
					 "doContactSchedd()\n", failure_line_num );
			sprintf( error_msg, "Timed out talking to schedd" );
		} else {
			dprintf( D_ALWAYS, "Error talking to schedd at line %d in "
					 "doContactSchedd(), errno=%d (%s)\n", failure_line_num,
					 failure_errno, strerror(failure_errno) );
			sprintf( error_msg, "Error talking to schedd" );
		}
		command_queue.Rewind();
		while (command_queue.Next(current_command)) {
			if ( current_command->status != SchedDRequest::SDCS_NEW ) {
				continue;
			}
			switch( current_command->command ) {
			case SchedDRequest::SDC_UPDATE_JOB:
			case SchedDRequest::SDC_UPDATE_CONSTRAINED:
			{
				const char *result[2] = { GAHP_RESULT_FAILURE, error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 2);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			case SchedDRequest::SDC_UPDATE_LEASE:
			{
				const char *result[3] = { GAHP_RESULT_FAILURE, error_msg.c_str(), NULL };
				enqueue_result (current_command->request_id, result, 3);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			case SchedDRequest::SDC_SUBMIT_JOB:
			{
				const char *result[3] = { GAHP_RESULT_FAILURE, "-1.-1", error_msg.c_str() };
				enqueue_result (current_command->request_id, result, 3);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			case SchedDRequest::SDC_STATUS_CONSTRAINED:
			{
				const char *result[3] = { GAHP_RESULT_FAILURE, error_msg.c_str(), "0" };
				enqueue_result (current_command->request_id, result, 3);
				current_command->status = SchedDRequest::SDCS_COMPLETED;
			}
				break;
			default:
					// Do nothing
				;
			}
		}
	}

	if ( do_reschedule ) {
		dc_schedd.reschedule();
	}

		// Write all of our results to our parent.
	flush_results();

	dprintf (D_FULLDEBUG, "Finishing doContactSchedd()\n");

	// Clean up the list
	command_queue.Rewind();
	while (command_queue.Next(current_command)) {
		if (current_command->status == SchedDRequest::SDCS_COMPLETED) {
			command_queue.DeleteCurrent();
			delete current_command;
		}
	}

	// Come back soon..
	// QUESTION: Should this always be a fixed time period?
	daemonCore->Reset_Timer( contactScheddTid, contact_schedd_interval );
}
예제 #23
0
void
printClassAd( void )
{
	printf( "%s = \"%s\"\n", ATTR_VERSION, CondorVersion() );
	printf( "%s = True\n", ATTR_IS_DAEMON_CORE );
	printf( "%s = True\n", ATTR_HAS_FILE_TRANSFER );
	printf( "%s = True\n", ATTR_HAS_PER_FILE_ENCRYPTION );
	printf( "%s = True\n", ATTR_HAS_RECONNECT );
	printf( "%s = True\n", ATTR_HAS_MPI );
	printf( "%s = True\n", ATTR_HAS_TDP );
	printf( "%s = True\n", ATTR_HAS_JOB_DEFERRAL );

		/*
		  Attributes describing what kinds of Job Info Communicators
		  this starter has.  This is mostly for COD, but someday might
		  be useful to other people, too.  There's no need to
		  advertise the fact we've got a JICShadow, since all starters
		  always have and will be able to communicate with a shadow...
		*/

	printf( "%s = True\n", ATTR_HAS_JIC_LOCAL_CONFIG );
	printf( "%s = True\n", ATTR_HAS_JIC_LOCAL_STDIN );

	ClassAd *ad = java_detect();
	if(ad) {
		int gotone=0;
		float mflops;
		char *str = 0;

		if(ad->LookupString(ATTR_JAVA_VENDOR,&str)) {
			printf("%s = \"%s\"\n",ATTR_JAVA_VENDOR,str);
			free(str);
			str = 0;
			gotone++;
		}
		if(ad->LookupString(ATTR_JAVA_VERSION,&str)) {
			printf("%s = \"%s\"\n",ATTR_JAVA_VERSION,str);
			free(str);
			str = 0;
			gotone++;
		}
		if(ad->LookupString("JavaSpecificationVersion",&str)) {
			printf("JavaSpecificationVersion = \"%s\"\n",str);
			free(str);
			str = 0;
			gotone++;
		}
		if(ad->LookupFloat(ATTR_JAVA_MFLOPS,mflops)) {
			printf("%s = %f\n", ATTR_JAVA_MFLOPS,mflops);
			gotone++;
		}
		if(gotone>0) printf( "%s = True\n",ATTR_HAS_JAVA);		
		delete ad;
	}

	// VM universe stuff
	if( VMProc::vm_univ_detect() ) {
		// This doesn't mean that vm universe is really available.
		// This just means that starter has codes for vm universe.
		// Actual testing for vm universe will be 
		//  done by vmuniverse manager in startd.
		// ATTR_HAS_VM may be overwritten by vmuniverse manager in startd
		printf( "%s = True\n",ATTR_HAS_VM);		
	}

	// Advertise which file transfer plugins are supported
	FileTransfer ft;
	CondorError e;
	ft.InitializePlugins(e);
	if (e.code()) {
		dprintf(D_ALWAYS, "WARNING: Initializing plugins returned: %s\n", e.getFullText().c_str());
	}

	MyString method_list = ft.GetSupportedMethods();
	if (!method_list.IsEmpty()) {
		printf("%s = \"%s\"\n", ATTR_HAS_FILE_TRANSFER_PLUGIN_METHODS, method_list.Value());
	}

#if defined(WIN32)
		// Advertise our ability to run jobs as the submitting user
	printf("%s = True\n", ATTR_HAS_WIN_RUN_AS_OWNER);
#endif
}
예제 #24
0
bool
Triggerd::PerformQueries()
{
   ClassAdList result;
   CondorError errstack;
   QueryResult status;
   Trigger* trig = NULL;
   CondorQuery* query;
   bool ret_val = true;
   std::map<uint32_t,Trigger*>::iterator iter;
   ClassAd* ad = NULL;
   std::string eventText;
   char* token = NULL;
   std::string triggerText;
   char* queryString = NULL;
   ExprTree* attr = NULL;
   std::list<std::string> missing_nodes;
   size_t pos;
   size_t prev_pos;
   bool bad_trigger = false;
   const char* token_str = NULL;

   if (0 < triggers.size())
   {
      dprintf(D_FULLDEBUG, "Triggerd: Evaluating %d triggers\n", (int)triggers.size());
      query = new CondorQuery(ANY_AD);
      for (iter = triggers.begin(); iter != triggers.end(); iter++)
      {
         // Clear any pre-exhisting custom contraints and add the constraint
         // for this trigger
         trig = iter->second;
         query->clearORCustomConstraints();
         query->clearANDCustomConstraints();
         queryString = strdup(trig->GetQuery().c_str());
         ReplaceAllChars(queryString, '\'', '"');
         query->addANDConstraint(queryString);
         free(queryString);

         // Perform the query and check the result
         if (NULL != query_collector)
         {
            status = query->fetchAds(result, query_collector->addr(), &errstack);
         }
         else
         {
            status = collectors->query(*query, result, &errstack);
         }
         if (Q_OK != status)
         {
            // Problem with the query
            if (Q_COMMUNICATION_ERROR == status)
            {
               dprintf(D_ALWAYS, "Triggerd Error: Error contacting the collecter - %s\n", errstack.getFullText(true).c_str());
               if (CEDAR_ERR_CONNECT_FAILED == errstack.code(0))
               {
                  dprintf(D_ALWAYS, "Triggerd Error: Couldn't contact the collector on the central manager\n");
               }
            }
            else
            {
               dprintf(D_ALWAYS, "Triggerd Error: Could not retrieve ads - %s\n", getStrQueryResult(status));
            }

            ret_val = false;
            break;
         }
         else
         {
            dprintf(D_FULLDEBUG, "Query successful.  Parsing results\n");

            // Query was successful, so parse the results
            result.Open();
            while ((ad = result.Next()))
            {
               if (true == bad_trigger)
               {
                  // Avoid processing a bad trigger multiple times.  Remove
                  // all result ads and reset the flag
                  dprintf(D_FULLDEBUG, "Cleaning up after a bad trigger\n");
                  result.Delete(ad);
                  while ((ad = result.Next()))
                  {
                     result.Delete(ad);
                  }
                  bad_trigger = false;
                  break;
               }
               eventText = "";
               triggerText = trig->GetText();
               dprintf(D_FULLDEBUG, "Parsing trigger text '%s'\n", triggerText.c_str());
               prev_pos = pos = 0;
               while (prev_pos < triggerText.length())
               {
                  pos = triggerText.find("$(", prev_pos, 2);
                  if (std::string::npos == pos)
                  {
                     // Didn't find the start of a varible, so append the
                     // remaining string
                     dprintf(D_FULLDEBUG, "Adding text string to event text\n");
                     eventText += triggerText.substr(prev_pos, std::string::npos);
                     prev_pos = triggerText.length();
                  }
                  else
                  {
                     // Found a variable for substitution.  Need to add
                     // text before it to the string, grab the variable
                     // to substitute for, and put its value in the text
                     eventText += triggerText.substr(prev_pos, pos - prev_pos);
                     dprintf(D_FULLDEBUG, "Adding text string prior to variable substitution to event text\n");

                     // Increment the position by 2 to skip the $(
                     prev_pos = pos + 2;
                     pos = triggerText.find(")", prev_pos, 1);

                     if (std::string::npos == pos)
                     {
                        // Uh-oh.  We have a start of a variable substitution
                        // but no closing marker.
                        dprintf(D_FULLDEBUG, "Error: Failed to find closing varable substitution marker ')'.  Aborting processing of the trigger\n");
                        bad_trigger = true;
                        break;
                     }
                     else
                     {
                        token_str = triggerText.substr(prev_pos, pos-prev_pos).c_str();
                        token = RemoveWS(token_str);
                        dprintf(D_FULLDEBUG, "token: '%s'\n", token);
                        if (NULL == token)
                        {
                           dprintf(D_ALWAYS, "Removing whitespace from %s produced unusable name.  Aborting processing of the trigger\n", token_str);
                           bad_trigger = true;
                           break;
                        }

                        attr = ad->LookupExpr(token);
                        if (NULL == attr)
                        {
                           // The token isn't found in the classad, so treat it
                           // like a string
                           dprintf(D_FULLDEBUG, "Adding text string to event text\n");
                           eventText += token;
                        }
                        else
                        {
                           dprintf(D_FULLDEBUG, "Adding classad value to event text\n");
                           eventText += ExprTreeToString(attr);
                        }
                        if (NULL != token)
                        {
                           free(token);
                           token = NULL;
                        }
                        ++pos;
                     }
                     prev_pos = pos;
                  }
               }

               // Remove the trailing space
               std::string::size_type notwhite = eventText.find_last_not_of(" ");
               eventText.erase(notwhite+1);

               // Send the event
               if (false == bad_trigger)
               {
                  EventCondorTriggerNotify event(eventText, time(NULL));
                  singleton->getInstance()->raiseEvent(event);
                  dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str());
               }
               result.Delete(ad);
            }
            bad_trigger = false;
            result.Close();
         }
      }
      delete query;
   }
   else
   {
      dprintf(D_FULLDEBUG, "Triggerd: No triggers to evaluate\n");
   }

   // Look for absent nodes (nodes expected to be in the pool but aren't)
   if (NULL != console)
   {
      missing_nodes = console->findAbsentNodes();
      if (0 < missing_nodes.size())
      {
         for (std::list<std::string>::iterator node = missing_nodes.begin();
              node != missing_nodes.end(); ++ node)
         {
            eventText = node->c_str();
            eventText += " is missing from the pool";
            EventCondorTriggerNotify event(eventText, time(NULL));
            singleton->getInstance()->raiseEvent(event);
            dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str());
         }
      }
   }

   return ret_val;
}
예제 #25
0
// This handler is called when a client wishes to write files from the
// transferd's storage.
int
TransferD::write_files_handler(int cmd, Stream *sock) 
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString capability;
	int protocol = FTP_UNKNOWN;
	TransferRequest *treq = NULL;
	MyString fquser;
	static int transfer_reaper_id = -1;
	ThreadArg *thread_arg;
	int tid;
	ClassAd reqad;
	ClassAd respad;

	cmd = cmd; // quiet the compiler.

	dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n");

	/////////////////////////////////////////////////////////////////////////
	// make sure we are authenticated
	/////////////////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText() );
			refuse( rsock );
			return CLOSE_STREAM;
		} 
	}

	fquser = rsock->getFullyQualifiedUser();


	/////////////////////////////////////////////////////////////////////////
	// Check to see if the capability the client tells us is something that
	// we have knowledge of. We ONLY check the capability and not the
	// identity of the person in question. This allows people of different
	// identities to write files here as long as they had the right 
	// capability. While this might not sound secure, they STILL had to have
	// authenticated as someone this daemon trusts. 
	// Similarly, check the protocol it wants to use as well as ensure that
	// the direction the transfer request was supposed to be is being honored.
	/////////////////////////////////////////////////////////////////////////
	rsock->decode();

	// soak the request ad from the client about what it wants to transfer
	reqad.initFromStream(*rsock);
	rsock->end_of_message();

	reqad.LookupString(ATTR_TREQ_CAPABILITY, capability);

	rsock->encode();

	// do I know of such a capability?
	if (m_treqs.lookup(capability, treq) != 0) {
		// didn't find it. Log it and tell them to leave and close up shop
		respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
		respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!");
		respad.put(*rsock);
		rsock->end_of_message();

		dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
			"using capability '%s', but there was no such capability. "
			"Access denied.\n", fquser.Value(), capability.Value());
		return CLOSE_STREAM;
	}

	reqad.LookupInteger(ATTR_TREQ_FTP, protocol);

	// am I willing to use this protocol?
	switch(protocol) {
		case FTP_CFTP: // FileTrans protocol, I'm happy.
			break;

		default:
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Invalid file transfer protocol!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"using protocol '%d', but I don't support that protocol. "
				"Access denied.\n", fquser.Value(), protocol);
			return CLOSE_STREAM;
	}

	// nsure that this transfer request was of the uploading variety
	if (treq->get_direction() != FTPD_UPLOAD) {
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Transfer Request was not an uploading request!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"to a transfer request that wasn't expecting to be written. "
				"Access denied.\n", fquser.Value());
	}

	/////////////////////////////////////////////////////////////////////////
	// Tell the client everything was ok.
	/////////////////////////////////////////////////////////////////////////

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);
	respad.put(*rsock);
	rsock->end_of_message();

	/////////////////////////////////////////////////////////////////////////
	// Set up a thread (a process under unix) to read ALL of the job files
	// for all of the ads in the TransferRequest.
	/////////////////////////////////////////////////////////////////////////

	// now create a thread, passing in the sock, which uses the file transfer
	// object to accept the files.

	if (transfer_reaper_id == -1) {
		// only set this up ONCE so each and every thread gets one.
		transfer_reaper_id = daemonCore->Register_Reaper(
						"write_files_reaper",
						(ReaperHandlercpp) &TransferD::write_files_reaper,
						"write_files_reaper",
						this
						);
	}

	thread_arg = new ThreadArg(protocol, treq);

	// Start a new thread (process on Unix) to do the work
	tid = daemonCore->Create_Thread(
		(ThreadStartFunc)&TransferD::write_files_thread,
		(void *)thread_arg,
		rsock,
		transfer_reaper_id
		);
	
	if (tid == FALSE) {
		// XXX How do I handle this failure?
	}


	// associate the tid with the request so I can deal with it propery in
	// the reaper
	m_client_to_transferd_threads.insert(tid, treq);

	// The stream is inherited to the thread, who does the transfer and
	// finishes the protocol, but in the parent, I'm closing it.
	return CLOSE_STREAM;
}
예제 #26
0
파일: rm.cpp 프로젝트: emaste/htcondor
void
procArg(const char* arg)
{
	int		c, p;								// cluster/proc #
	char*	tmp;

	MyString constraint;

	if( str_isint(arg) || str_isreal(arg,true) )
	// process by cluster/proc #
	{
		c = strtol(arg, &tmp, 10);
		if(c <= 0)
		{
			fprintf(stderr, "Invalid cluster # from %s.\n", arg);
			had_error = true;
			return;
		}
		if(*tmp == '\0')
		// delete the cluster
		{
			CondorError errstack;
			constraint.formatstr( "%s == %d", ATTR_CLUSTER_ID, c );
			if( doWorkByConstraint(constraint.Value(), &errstack) ) {
				fprintf( stdout, 
						 "Cluster %d %s.\n", c,
						 (mode == JA_REMOVE_JOBS) ?
						 "has been marked for removal" :
						 (mode == JA_REMOVE_X_JOBS) ?
						 "has been removed locally (remote state unknown)" :
						 actionWord(mode,true) );
			} else {
				fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
				if (had_error)
				{
					fprintf( stderr, 
						 "Couldn't find/%s all jobs in cluster %d.\n",
						 actionWord(mode,false), c );
				}
			}
			return;
		}
		if(*tmp == '.')
		{
			p = strtol(tmp + 1, &tmp, 10);
			if(p < 0)
			{
				fprintf( stderr, "Invalid proc # from %s.\n", arg);
				had_error = true;
				return;
			}
			if(*tmp == '\0')
			// process a proc
			{
				if( ! job_ids ) {
					job_ids = new StringList();
				}
				job_ids->append( arg );
				return;
			}
		}
		fprintf( stderr, "Warning: unrecognized \"%s\" skipped.\n", arg );
		return;
	}
	// process by user name
	else {
		CondorError errstack;
		constraint.formatstr("%s == \"%s\"", ATTR_OWNER, arg );
		if( doWorkByConstraint(constraint.Value(), &errstack) ) {
			fprintf( stdout, "User %s's job(s) %s.\n", arg,
					 (mode == JA_REMOVE_JOBS) ?
					 "have been marked for removal" :
					 (mode == JA_REMOVE_X_JOBS) ?
					 "have been removed locally (remote state unknown)" :
					 actionWord(mode,true) );
		} else {
			fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
			if (had_error)
			{
				fprintf( stderr, 
					 "Couldn't find/%s all of user %s's job(s).\n",
					 actionWord(mode,false), arg );
			}
		}
	}
}
예제 #27
0
int main(int argc, char **argv)
{
	char ** ptr;
	const char * myName;

	// find our name
	myName = strrchr( argv[0], DIR_DELIM_CHAR );
	if( !myName ) {
		myName = argv[0];
	} else {
		myName++;
	}

	int cred_type = 0;
	char * cred_name = NULL;
	char * cred_file_name = NULL;
	char * myproxy_user = NULL;

	char * myproxy_host = NULL;
	int myproxy_port = 0;

	char * myproxy_dn = NULL;

	char * server_address= NULL;

	// read config file
	myDistro->Init (argc, argv);
	config();

	for (ptr=argv+1,argc--; argc > 0; argc--,ptr++) {
		if ( ptr[0][0] == '-' ) {
			switch ( ptr[0][1] ) {
			case 'h':
				usage(myName);
				exit(0);
				break;
			case 'd':

					// dprintf to console
				Termlog = 1;
				dprintf_config ("TOOL", get_param_functions());

				break;
			case 'S':

					// dprintf to console
				Termlog = 1;
				Read_Myproxy_pw_terminal = false;

				break;
			case 'n':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -n requires another argument\n",
							 myName );
					exit(1);
				}
	
				server_address = strdup (*ptr);

				break;
			case 't':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -t requires another argument\n",
							 myName );
					exit(1);
				}

				if (strcmp (*ptr, "x509") == 0) {
					cred_type = X509_CREDENTIAL_TYPE;
				} else {
					fprintf( stderr, "Invalid credential type %s\n",
							 *ptr );
					exit(1);
				}
				break;
			case 'f':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -f requires another argument\n",
							 myName );
					exit(1);
				}
				cred_file_name = strdup (*ptr);
				break;
			case 'N':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -N requires another argument\n",
							 myName );
					exit(1);
				}
				cred_name = strdup (*ptr);
				break;

			case 'm':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -m requires another argument\n",
							 myName );
					exit(1);
				}
	
				parseMyProxyArgument (*ptr, myproxy_user, myproxy_host, myproxy_port);
				break;
			case 'D':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -D requires another argument\n",
							 myName );
					exit(1);
				}
				myproxy_dn = strdup (*ptr);
				break;

			case 'v':
				version();	// this function calls exit(0)
				break;

			default:
				fprintf( stderr, "%s: Unknown option %s\n",
						 myName, *ptr);
				usage(myName);
				exit(1);
			}
		} //fi
	} //rof

	if (( cred_file_name == NULL ) || (cred_type == 0)) {
		fprintf ( stderr, "Credential filename or type not specified\n");
		exit (1);

	}

    Credential * cred = NULL;
	if (cred_type == X509_CREDENTIAL_TYPE) {
		cred = new X509Credential();
	} else {
		fprintf ( stderr, "Invalid credential type\n");
		exit (1);
	}

    
	char * data = NULL;
	int data_size;
	if (!read_file (cred_file_name, data, data_size)) {
		fprintf (stderr, "Can't open %s\n", cred_file_name);
		exit (1);
	}

	cred->SetData (data, data_size);

	if (cred_name !=NULL) {
		cred->SetName(cred_name);
	} else {
		cred->SetName(DEFAULT_CREDENTIAL_NAME);
	}

	char * username = my_username(0);
	cred->SetOwner (username);
  
	if (cred_type == X509_CREDENTIAL_TYPE && myproxy_host != NULL) {
		X509Credential * x509cred = (X509Credential*)cred;

		MyString str_host_port = myproxy_host;
		if (myproxy_port != 0) {
			str_host_port += ":";
			str_host_port += myproxy_port;
		}
		x509cred->SetMyProxyServerHost (str_host_port.Value());

		if (myproxy_user != NULL) {
			x509cred->SetMyProxyUser (myproxy_user);
		} else {
			x509cred->SetMyProxyUser (username);
		}

		if (myproxy_dn != NULL) {
			x509cred->SetMyProxyServerDN (myproxy_dn);
		}

		char * myproxy_password;
		if ( Read_Myproxy_pw_terminal ) {
			myproxy_password = 
				prompt_password(
					"Please enter the MyProxy password:"******"Please enter the MyProxy password from the standard input\n");
		}
		if (myproxy_password) {
			x509cred->SetRefreshPassword ( myproxy_password );
		}

		x509cred->display( D_FULLDEBUG );
	}

	CondorError errstack;
	DCCredd dc_credd (server_address);

	// resolve server address
	if ( ! dc_credd.locate() ) {
		fprintf (stderr, "%s\n", dc_credd.error() );
		return 1;
	}

	if (dc_credd.storeCredential(cred, errstack)) {
		printf ("Credential submitted successfully\n");
	} else {
		fprintf (stderr, "Unable to submit credential\n%s\n",
				 errstack.getFullText(true));
		return 1;
	}

	return 0;
}
예제 #28
0
/**
 * Process the current history file.
 *
 * 1) check to see if it is properly initialized, recording id (inode)
 * 2) stat the current history file
 * 3) poll for new entries and process them
 * 4) detect rotations
 */
void
aviary::history::processCurrentHistory()
{
    static MyString currentHistoryFilename = m_path + DIR_DELIM_STRING + "history";
    static HistoryFile currentHistory ( currentHistoryFilename.Value() );

    CondorError errstack;

    if (force_reset) {
       currentHistory.cleanup();
    }

	// (1)
    long unsigned int id;
    if ( !currentHistory.getId ( id ) || force_reset)
    {
        // at this point adjust the reset flag
        force_reset = false;
        if ( !currentHistory.init ( errstack ) )
        {
            dprintf ( D_ALWAYS, "%s\n", errstack.getFullText().c_str() );
            return;
        }
        ASSERT ( currentHistory.getId ( id ) );
        m_historyFiles.insert ( id );
    }


    // (2)
    // Stat before poll to handle race of: poll + write + rotate + stat
    StatWrapper stat_wrapper;
    if ( stat_wrapper.Stat ( currentHistoryFilename ) )
    {
        dprintf ( D_ALWAYS, "Failed to stat %s: %d (%s)\n",
                  currentHistoryFilename.Value(),
                  stat_wrapper.GetErrno(), strerror ( stat_wrapper.GetErrno() ) );
        return;
    }
    const StatStructType *stat = stat_wrapper.GetBuf();
    ASSERT ( currentHistory.getId ( id ) );

    // (3)
    errstack.clear();
    HistoryFile::HistoryEntriesTypeIterators poll = currentHistory.poll ( errstack );
    for ( HistoryFile::HistoryEntriesTypeIterator i = poll.first;
            i != poll.second;
            i++ )
    {
        process ( ( *i ) );
    }

    // (4)
    // If different the file has rotated
    if ( id != stat->st_ino )
    {
        currentHistory = HistoryFile ( currentHistoryFilename.Value() );
        if ( !currentHistory.init ( errstack ) )
        {
            dprintf ( D_ALWAYS, "%s\n", errstack.getFullText().c_str() );
            return;
        }
        ASSERT ( currentHistory.getId ( id ) );
        m_historyFiles.insert ( id );
        force_reset = true;
        return;
    }
}
예제 #29
0
int main(int argc, char **argv)
{
	char * server_address = NULL;
	char ** ptr;
	const char * myName;

	// find our name
	myName = strrchr( argv[0], DIR_DELIM_CHAR );
	if( !myName ) {
		myName = argv[0];
	} else {
		myName++;
	}

	// read config file
	myDistro->Init (argc, argv);
	config ();

	for (ptr=argv+1,argc--; argc > 0; argc--,ptr++) {
		if ( ptr[0][0] == '-' ) {
			switch ( ptr[0][1] ) {
			case 'h':
				usage(myName);
				exit(0);
				break;
			case 'd':
					// dprintf to console
				Termlog = 1;
				dprintf_config ("TOOL", get_param_functions());
				break;
			case 'n':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -n requires another argument\n",
							 myName );
					exit(1);
				}
	
				server_address = strdup (*ptr);

				break;
			case 'v':
				version();	// this function calls exit(0)
				break;
			default:
				fprintf( stderr, "%s: Unknown option %s\n",
						 myName, *ptr);
				usage(myName);
				exit(1);
			}
		} //fi
	} //rof


	CondorError errorstack;
	int number = 0;
	SimpleList <Credential*> result;

	DCCredd credd(server_address);

	// resolve server address
	if ( ! credd.locate() ) {
		fprintf (stderr, "%s\n", credd.error() );
		return 1;
	}

	if (!credd.listCredentials (result,
								number,
								errorstack)) {
		fprintf (stderr, "Unable to retrieve credentials (%s)\n",
				 errorstack.getFullText(true));
		return 1;
	}
	 


	if (number > 0) {
		Credential * cred;
		result.Rewind();
		printf ("Name\tType\n-----\t-----\n");
		while (result.Next (cred)) {
			
			printf ("%s\t%s\n", cred->GetName(), cred->GetTypeString());
		}

		printf ("\nTotal %d\n", number);
	} else if (number == 0) {
		printf ("No credentials currently stored on this server\n");
	} else {
		fprintf (stderr, "ERROR\n");
		return 1;
	}

	return 0;
}
예제 #30
0
//---------------------------------------------------------------------------
void main_init (int argc, char ** const argv) {

	printf ("Executing condor dagman ... \n");

		// flag used if DAGMan is invoked with -WaitForDebug so we
		// wait for a developer to attach with a debugger...
	volatile int wait_for_debug = 0;

		// process any config vars -- this happens before we process
		// argv[], since arguments should override config settings
	dagman.Config();

	// The DCpermission (last parm) should probably be PARENT, if it existed
    daemonCore->Register_Signal( SIGUSR1, "SIGUSR1",
                                 (SignalHandler) main_shutdown_remove,
                                 "main_shutdown_remove", NULL);

/****** FOR TESTING *******
    daemonCore->Register_Signal( SIGUSR2, "SIGUSR2",
                                 (SignalHandler) main_testing_stub,
                                 "main_testing_stub", NULL);
****** FOR TESTING ********/
    debug_progname = condor_basename(argv[0]);

		// condor_submit_dag version from .condor.sub
	bool allowVerMismatch = false;
	const char *csdVersion = "undefined";

	int i;
    for (i = 0 ; i < argc ; i++) {
        debug_printf( DEBUG_NORMAL, "argv[%d] == \"%s\"\n", i, argv[i] );
    }

    if (argc < 2) Usage();  //  Make sure an input file was specified

		// get dagman job id from environment, if it's there
		// (otherwise it will be set to "-1.-1.-1")
	dagman.DAGManJobId.SetFromString( getenv( EnvGetName( ENV_ID ) ) );

	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
		// Minimum legal version for a .condor.sub file to be compatible
		// with this condor_dagman binary.

		// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
		// Be sure to change this if the arguments or environment
		// passed to condor_dagman change in an incompatible way!!
		// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	struct DagVersionData {
		int majorVer;
		int minorVer;
		int subMinorVer;
	};
	const DagVersionData MIN_SUBMIT_FILE_VERSION = { 7, 1, 2 };

		// Construct a string of the minimum submit file version.
	MyString minSubmitVersionStr;
	minSubmitVersionStr.formatstr( "%d.%d.%d",
				MIN_SUBMIT_FILE_VERSION.majorVer,
				MIN_SUBMIT_FILE_VERSION.minorVer,
				MIN_SUBMIT_FILE_VERSION.subMinorVer );
	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    //
    // Process command-line arguments
    //
    for (i = 1; i < argc; i++) {
        if( !strcasecmp( "-Debug", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No debug level specified\n" );
                Usage();
            }
            debug_level = (debug_level_t) atoi (argv[i]);
        } else if( !strcasecmp( "-Lockfile", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No DagMan lockfile specified\n" );
                Usage();
            }
            lockFileName = argv[i];
        } else if( !strcasecmp( "-Help", argv[i] ) ) {
            Usage();
        } else if (!strcasecmp( "-Dag", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No DAG specified\n" );
                Usage();
            }
			dagman.dagFiles.append( argv[i] );
        } else if( !strcasecmp( "-MaxIdle", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT,
							  "Integer missing after -MaxIdle\n" );
                Usage();
            }
            dagman.maxIdle = atoi( argv[i] );
        } else if( !strcasecmp( "-MaxJobs", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT,
							  "Integer missing after -MaxJobs\n" );
                Usage();
            }
            dagman.maxJobs = atoi( argv[i] );
        } else if( !strcasecmp( "-MaxScripts", argv[i] ) ) {
			debug_printf( DEBUG_SILENT, "-MaxScripts has been replaced with "
						   "-MaxPre and -MaxPost arguments\n" );
			Usage();
        } else if( !strcasecmp( "-MaxPre", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT,
							  "Integer missing after -MaxPre\n" );
                Usage();
            }
            dagman.maxPreScripts = atoi( argv[i] );
        } else if( !strcasecmp( "-MaxPost", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT,
							  "Integer missing after -MaxPost\n" );
                Usage();
            }
            dagman.maxPostScripts = atoi( argv[i] );
        } else if( !strcasecmp( "-NoEventChecks", argv[i] ) ) {
			debug_printf( DEBUG_QUIET, "Warning: -NoEventChecks is "
						"ignored; please use the DAGMAN_ALLOW_EVENTS "
						"config parameter instead\n");
			check_warning_strictness( DAG_STRICT_1 );

        } else if( !strcasecmp( "-AllowLogError", argv[i] ) ) {
			dagman.allowLogError = true;

        } else if( !strcasecmp( "-DontAlwaysRunPost",argv[i] ) ) {
			dagman._runPost = false;

        } else if( !strcasecmp( "-WaitForDebug", argv[i] ) ) {
			wait_for_debug = 1;

        } else if( !strcasecmp( "-UseDagDir", argv[i] ) ) {
			dagman.useDagDir = true;

        } else if( !strcasecmp( "-AutoRescue", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No AutoRescue value specified\n" );
                Usage();
            }
            dagman.autoRescue = (atoi( argv[i] ) != 0);

        } else if( !strcasecmp( "-DoRescueFrom", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No rescue DAG number specified\n" );
                Usage();
            }
            dagman.doRescueFrom = atoi (argv[i]);

        } else if( !strcasecmp( "-CsdVersion", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No CsdVersion value specified\n" );
                Usage();
            }
			csdVersion = argv[i];

        } else if( !strcasecmp( "-AllowVersionMismatch", argv[i] ) ) {
			allowVerMismatch = true;

        } else if( !strcasecmp( "-DumpRescue", argv[i] ) ) {
			dagman.dumpRescueDag = true;

        } else if( !strcasecmp( "-verbose", argv[i] ) ) {
			dagman._submitDagDeepOpts.bVerbose = true;

        } else if( !strcasecmp( "-force", argv[i] ) ) {
			dagman._submitDagDeepOpts.bForce = true;
		
        } else if( !strcasecmp( "-notification", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No notification value specified\n" );
                Usage();
            }
			dagman._submitDagDeepOpts.strNotification = argv[i];

        } else if( !strcasecmp( "-dagman", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No dagman value specified\n" );
                Usage();
            }
			dagman._submitDagDeepOpts.strDagmanPath = argv[i];

        } else if( !strcasecmp( "-outfile_dir", argv[i] ) ) {
            i++;
            if( argc <= i || strcmp( argv[i], "" ) == 0 ) {
                debug_printf( DEBUG_SILENT, "No outfile_dir value specified\n" );
                Usage();
            }
			dagman._submitDagDeepOpts.strOutfileDir = argv[i];

        } else if( !strcasecmp( "-update_submit", argv[i] ) ) {
			dagman._submitDagDeepOpts.updateSubmit = true;

        } else if( !strcasecmp( "-import_env", argv[i] ) ) {
			dagman._submitDagDeepOpts.importEnv = true;

        } else if( !strcasecmp( "-priority", argv[i] ) ) {
		++i;
		if( i >= argc || strcmp( argv[i], "" ) == 0 ) {
			debug_printf( DEBUG_NORMAL, "No priority value specified\n");
			Usage();
		}
		dagman._submitDagDeepOpts.priority = atoi(argv[i]);
		} else if( !strcasecmp( "-dont_use_default_node_log", argv[i] ) ) {
			dagman._submitDagDeepOpts.always_use_node_log = false;
        } else {
    		debug_printf( DEBUG_SILENT, "\nUnrecognized argument: %s\n",
						argv[i] );
			Usage();
		}
    }

	dagman.dagFiles.rewind();
	dagman.primaryDagFile = dagman.dagFiles.next();
	dagman.multiDags = (dagman.dagFiles.number() > 1);

	MyString tmpDefaultLog;
	if ( dagman._defaultNodeLog != NULL ) {
		tmpDefaultLog = dagman._defaultNodeLog;
		free( dagman._defaultNodeLog );
	} else {
		tmpDefaultLog = dagman.primaryDagFile + ".nodes.log";
	}

		// Force default log file path to be absolute so it works
		// with -usedagdir and DIR nodes.
	CondorError errstack;
	if ( !MultiLogFiles::makePathAbsolute( tmpDefaultLog, errstack) ) {
       	debug_printf( DEBUG_QUIET, "Unable to convert default log "
					"file name to absolute path: %s\n",
					errstack.getFullText().c_str() );
		dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_ERROR );
		DC_Exit( EXIT_ERROR );
	}
	dagman._defaultNodeLog = strdup( tmpDefaultLog.Value() );
	debug_printf( DEBUG_NORMAL, "Default node log file is: <%s>\n",
				dagman._defaultNodeLog);

    //
    // Check the arguments
    //

	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	// Checking for version compatibility between the .condor.sub
	// file and this condor_dagman binary...

	// Note: if we're in recovery mode and the submit file version
	// causes us to quit, we leave any existing node jobs still
	// running -- may want to change that eventually.  wenger 2009-10-13.

		// Version of the condor_submit_dag that created our submit file.
	CondorVersionInfo submitFileVersion( csdVersion );

		// Version of this condor_dagman binary.
	CondorVersionInfo dagmanVersion;

		// Just generate this message fragment in one place.
	MyString versionMsg;
	versionMsg.formatstr("the version (%s) of this DAG's Condor submit "
				"file (created by condor_submit_dag)", csdVersion );

		// Make sure version in submit file is valid.
	if( !submitFileVersion.is_valid() ) {
		if ( !allowVerMismatch ) {
        	debug_printf( DEBUG_QUIET, "Error: %s is invalid!\n",
						versionMsg.Value() );
			DC_Exit( EXIT_ERROR );
		} else {
        	debug_printf( DEBUG_NORMAL, "Warning: %s is invalid; "
						"continuing because of -AllowVersionMismatch flag\n",
						versionMsg.Value() );
		}

		// Make sure .condor.sub file is recent enough.
	} else if ( submitFileVersion.compare_versions(
				CondorVersion() ) != 0 ) {

		if( !submitFileVersion.built_since_version(
					MIN_SUBMIT_FILE_VERSION.majorVer,
					MIN_SUBMIT_FILE_VERSION.minorVer,
					MIN_SUBMIT_FILE_VERSION.subMinorVer ) ) {
			if ( !allowVerMismatch ) {
        		debug_printf( DEBUG_QUIET, "Error: %s is older than "
							"oldest permissible version (%s)\n",
							versionMsg.Value(), minSubmitVersionStr.Value() );
				DC_Exit( EXIT_ERROR );
			} else {
        		debug_printf( DEBUG_NORMAL, "Warning: %s is older than "
							"oldest permissible version (%s); continuing "
							"because of -AllowVersionMismatch flag\n",
							versionMsg.Value(), minSubmitVersionStr.Value() );
			}

			// Warn if .condor.sub file is a newer version than this binary.
		} else if (dagmanVersion.compare_versions( csdVersion ) > 0 ) {
        	debug_printf( DEBUG_NORMAL, "Warning: %s is newer than "
						"condor_dagman version (%s)\n", versionMsg.Value(),
						CondorVersion() );
			check_warning_strictness( DAG_STRICT_3 );
		} else {
        	debug_printf( DEBUG_NORMAL, "Note: %s differs from "
						"condor_dagman version (%s), but the "
						"difference is permissible\n", 
						versionMsg.Value(), CondorVersion() );
		}
	}
	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    if( dagman.primaryDagFile == "" ) {
        debug_printf( DEBUG_SILENT, "No DAG file was specified\n" );
        Usage();
    }
    if (lockFileName == NULL) {
        debug_printf( DEBUG_SILENT, "No DAG lock file was specified\n" );
        Usage();
    }
    if( dagman.maxJobs < 0 ) {
        debug_printf( DEBUG_SILENT, "-MaxJobs must be non-negative\n");
        Usage();
    }
    if( dagman.maxPreScripts < 0 ) {
        debug_printf( DEBUG_SILENT, "-MaxPre must be non-negative\n" );
        Usage();
    }
    if( dagman.maxPostScripts < 0 ) {
        debug_printf( DEBUG_SILENT, "-MaxPost must be non-negative\n" );
        Usage();
    }
    if( dagman.doRescueFrom < 0 ) {
        debug_printf( DEBUG_SILENT, "-DoRescueFrom must be non-negative\n" );
        Usage();
    }

    debug_printf( DEBUG_VERBOSE, "DAG Lockfile will be written to %s\n",
                   lockFileName );
	if ( dagman.dagFiles.number() == 1 ) {
    	debug_printf( DEBUG_VERBOSE, "DAG Input file is %s\n",
				  	dagman.primaryDagFile.Value() );
	} else {
		MyString msg = "DAG Input files are ";
		dagman.dagFiles.rewind();
		const char *dagFile;
		while ( (dagFile = dagman.dagFiles.next()) != NULL ) {
			msg += dagFile;
			msg += " ";
		}
		msg += "\n";
    	debug_printf( DEBUG_VERBOSE, "%s", msg.Value() );
	}

		// if requested, wait for someone to attach with a debugger...
	while( wait_for_debug ) { }

    {
		MyString cwd;
		if( !condor_getcwd(cwd) ) {
			cwd = "<null>";
		}
        debug_printf( DEBUG_DEBUG_1, "Current path is %s\n",cwd.Value());

		char *temp = my_username();
		debug_printf( DEBUG_DEBUG_1, "Current user is %s\n",
					   temp ? temp : "<null>" );
		if( temp ) {
			free( temp );
		}
    }

		//
		// Figure out the rescue DAG to run, if any (this is with "new-
		// style" rescue DAGs).
		//
	int rescueDagNum = 0;
	MyString rescueDagMsg;

	if ( dagman.doRescueFrom != 0 ) {
		rescueDagNum = dagman.doRescueFrom;
		rescueDagMsg.formatstr( "Rescue DAG number %d specified", rescueDagNum );
		RenameRescueDagsAfter( dagman.primaryDagFile.Value(),
					dagman.multiDags, rescueDagNum, dagman.maxRescueDagNum );

	} else if ( dagman.autoRescue ) {
		rescueDagNum = FindLastRescueDagNum(
					dagman.primaryDagFile.Value(),
					dagman.multiDags, dagman.maxRescueDagNum );
		rescueDagMsg.formatstr( "Found rescue DAG number %d", rescueDagNum );
	}

		//
		// Fill in values in the deep submit options that we haven't
		// already set.
		//
	dagman._submitDagDeepOpts.bAllowLogError = dagman.allowLogError;
	dagman._submitDagDeepOpts.useDagDir = dagman.useDagDir;
	dagman._submitDagDeepOpts.autoRescue = dagman.autoRescue;
	dagman._submitDagDeepOpts.doRescueFrom = dagman.doRescueFrom;
	dagman._submitDagDeepOpts.allowVerMismatch = allowVerMismatch;
	dagman._submitDagDeepOpts.recurse = false;

    //
    // Create the DAG
    //

	// Note: a bunch of the parameters we pass here duplicate things
	// in submitDagOpts, but I'm keeping them separate so we don't have to
	// bother to construct a new SubmitDagOtions object for splices.
	// wenger 2010-03-25
    dagman.dag = new Dag( dagman.dagFiles, dagman.maxJobs,
						  dagman.maxPreScripts, dagman.maxPostScripts,
						  dagman.allowLogError, dagman.useDagDir,
						  dagman.maxIdle, dagman.retrySubmitFirst,
						  dagman.retryNodeFirst, dagman.condorRmExe,
						  dagman.storkRmExe, &dagman.DAGManJobId,
						  dagman.prohibitMultiJobs, dagman.submitDepthFirst,
						  dagman._defaultNodeLog,
						  dagman._generateSubdagSubmits,
						  &dagman._submitDagDeepOpts,
						  false ); /* toplevel dag! */

    if( dagman.dag == NULL ) {
        EXCEPT( "ERROR: out of memory!\n");
    }

	dagman.dag->SetAbortOnScarySubmit( dagman.abortOnScarySubmit );
	dagman.dag->SetAllowEvents( dagman.allow_events );
	dagman.dag->SetConfigFile( dagman._dagmanConfigFile );
	dagman.dag->SetMaxJobHolds( dagman._maxJobHolds );
	dagman.dag->SetPostRun(dagman._runPost);
	if( dagman._submitDagDeepOpts.priority != 0 ) { // From command line
		dagman.dag->SetDefaultPriority(dagman._submitDagDeepOpts.priority);
	} else if( dagman._defaultPriority != 0 ) { // From config file
		dagman.dag->SetDefaultPriority(dagman._defaultPriority);
		dagman._submitDagDeepOpts.priority = dagman._defaultPriority;
	}

    //
    // Parse the input files.  The parse() routine
    // takes care of adding jobs and dependencies to the DagMan
    //
	dagman.mungeNodeNames = (dagman.dagFiles.number() > 1);
	parseSetDoNameMunge( dagman.mungeNodeNames );
   	debug_printf( DEBUG_VERBOSE, "Parsing %d dagfiles\n", 
		dagman.dagFiles.number() );
	dagman.dagFiles.rewind();
	char *dagFile;

	// Here we make a copy of the dagFiles for iteration purposes. Deep inside
	// of the parsing, copies of the dagman.dagFile string list happen which
	// mess up the iteration of this list.
	StringList sl( dagman.dagFiles );
	sl.rewind();
	while ( (dagFile = sl.next()) != NULL ) {
    	debug_printf( DEBUG_VERBOSE, "Parsing %s ...\n", dagFile );

    	if( !parse( dagman.dag, dagFile, dagman.useDagDir ) ) {
			if ( dagman.dumpRescueDag ) {
					// Dump the rescue DAG so we can see what we got
					// in the failed parse attempt.
    			debug_printf( DEBUG_QUIET, "Dumping rescue DAG "
							"because of -DumpRescue flag\n" );
				dagman.dag->Rescue( dagman.primaryDagFile.Value(),
							dagman.multiDags, dagman.maxRescueDagNum,
							false, true, false );
			}
			
			dagman.dag->RemoveRunningJobs(dagman, true);
			MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored.
			unlink( lockFileName );
			dagman.CleanUp();
			
				// Note: debug_error calls DC_Exit().
        	debug_error( 1, DEBUG_QUIET, "Failed to parse %s\n",
					 	dagFile );
    	}
	}
	if( dagman.dag->GetDefaultPriority() != 0 ) {
		dagman.dag->SetDefaultPriorities(); // Applies to the nodes of the dag
	}
	dagman.dag->GetJobstateLog().WriteDagmanStarted( dagman.DAGManJobId );
	if ( rescueDagNum > 0 ) {
			// Get our Pegasus sequence numbers set correctly.
		dagman.dag->GetJobstateLog().InitializeRescue();
	}

	// lift the final set of splices into the main dag.
	dagman.dag->LiftSplices(SELF);

		//
		// Actually parse the "new-new" style (partial DAG info only)
		// rescue DAG here.  Note: this *must* be done after splices
		// are lifted!
		//
	if ( rescueDagNum > 0 ) {
		dagman.rescueFileToRun = RescueDagName(
					dagman.primaryDagFile.Value(),
					dagman.multiDags, rescueDagNum );
		debug_printf ( DEBUG_QUIET, "%s; running %s in combination with "
					"normal DAG file%s\n", rescueDagMsg.Value(),
					dagman.rescueFileToRun.Value(),
					dagman.multiDags ? "s" : "");
		debug_printf ( DEBUG_QUIET,
					"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");

		debug_printf ( DEBUG_QUIET, "USING RESCUE DAG %s\n",
					dagman.rescueFileToRun.Value() );

			// Turn off node name munging for the rescue DAG, because
			// it will already have munged node names.
		parseSetDoNameMunge( false );

    	if( !parse( dagman.dag, dagman.rescueFileToRun.Value(),
					dagman.useDagDir ) ) {
			if ( dagman.dumpRescueDag ) {
					// Dump the rescue DAG so we can see what we got
					// in the failed parse attempt.
    			debug_printf( DEBUG_QUIET, "Dumping rescue DAG "
							"because of -DumpRescue flag\n" );
				dagman.dag->Rescue( dagman.primaryDagFile.Value(),
							dagman.multiDags, dagman.maxRescueDagNum,
							true, false );
			}
			
			dagman.dag->RemoveRunningJobs(dagman, true);
			MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored.
			unlink( lockFileName );
			dagman.CleanUp();
			
				// Note: debug_error calls DC_Exit().
        	debug_error( 1, DEBUG_QUIET, "Failed to parse %s\n",
					 	dagFile );
    	}
	}

	dagman.dag->CheckThrottleCats();

	// fix up any use of $(JOB) in the vars values for any node
	dagman.dag->ResolveVarsInterpolations();

/*	debug_printf(DEBUG_QUIET, "COMPLETED DAG!\n");*/
/*	dagman.dag->PrintJobList();*/

#ifndef NOT_DETECT_CYCLE
	if( dagman.startup_cycle_detect && dagman.dag->isCycle() )
	{
		// Note: maybe we should run the final node here, if there is one.
		// wenger 2011-12-19.
		debug_error (1, DEBUG_QUIET, "ERROR: a cycle exists in the dag, please check input\n");
	}
#endif
    debug_printf( DEBUG_VERBOSE, "Dag contains %d total jobs\n",
				  dagman.dag->NumNodes( true ) );

	MyString firstLocation;
	if ( dagman.dag->GetReject( firstLocation ) ) {
    	debug_printf( DEBUG_QUIET, "Exiting because of REJECT "
					"specification in %s.  This most likely means "
					"that the DAG file was produced with the -DumpRescue "
					"flag when parsing the original DAG failed.\n",
					firstLocation.Value() );
		DC_Exit( EXIT_ERROR );
		return;
	}

	dagman.dag->DumpDotFile();

	if ( dagman.dumpRescueDag ) {
    	debug_printf( DEBUG_QUIET, "Dumping rescue DAG and exiting "
					"because of -DumpRescue flag\n" );
		dagman.dag->Rescue( dagman.primaryDagFile.Value(),
					dagman.multiDags, dagman.maxRescueDagNum, false,
					false, false );
		ExitSuccess();
		return;
	}

    //------------------------------------------------------------------------
    // Bootstrap and Recovery
    //
    // If the Lockfile exists, this indicates a premature termination
    // of a previous run of Dagman. If condor log is also present,
    // we run in recovery mode
  
    // If the Daglog is not present, then we do not run in recovery
    // mode
  
    {
      bool recovery = access(lockFileName,  F_OK) == 0;
      
        if (recovery) {
            debug_printf( DEBUG_VERBOSE, "Lock file %s detected, \n",
                           lockFileName);
			if (dagman.abortDuplicates) {
				if (util_check_lock_file(lockFileName) == 1) {
        			debug_printf( DEBUG_QUIET, "Aborting because it "
							"looks like another instance of DAGMan is "
							"currently running on this DAG; if that is "
							"not the case, delete the lock file (%s) "
							"and re-submit the DAG.\n", lockFileName );
					dagman.dag->GetJobstateLog().
								WriteDagmanFinished( EXIT_RESTART );
    				dagman.CleanUp();
					DC_Exit( EXIT_ERROR );
					// We should never get to here!
				}
			}
        }

			//
			// If this DAGMan continues, it should overwrite the lock
			// file if it exists.
			//
		util_create_lock_file(lockFileName, dagman.abortDuplicates);

        debug_printf( DEBUG_VERBOSE, "Bootstrapping...\n");
        if( !dagman.dag->Bootstrap( recovery ) ) {
            dagman.dag->PrintReadyQ( DEBUG_DEBUG_1 );
            debug_error( 1, DEBUG_QUIET, "ERROR while bootstrapping\n");
        }
    }

    debug_printf( DEBUG_VERBOSE, "Registering condor_event_timer...\n" );
    daemonCore->Register_Timer( 1, dagman.m_user_log_scan_interval, 
				condor_event_timer, "condor_event_timer" );

	dagman.dag->SetPendingNodeReportInterval(
				dagman.pendingReportInterval );
}