Exemplo n.º 1
0
void
handleConstraints( void )
{
	if( ! has_constraint ) {
		return;
	}
	const char* tmp = global_constraint.Value();

	CondorError errstack;
	if( doWorkByConstraint(tmp, &errstack) ) {
		fprintf( stdout, "Jobs matching constraint %s %s\n", tmp,
				 (mode == JA_REMOVE_JOBS) ?
				 "have been marked for removal" :
				 (mode == JA_REMOVE_X_JOBS) ?
				 "have been removed locally (remote state unknown)" :
				 actionWord(mode,true) );

	} else {
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		if (had_error)
		{
			fprintf( stderr, 
				 "Couldn't find/%s all jobs matching constraint %s\n",
				 actionWord(mode,false), tmp );
		}
	}
}
Exemplo n.º 2
0
int
Job::declare_file(const MyString &name,
                  filesize_t size,
				  CondorError &errstack)
{
	JobFile *ignored;
	JobFile jobFile;
	jobFile.size = size;
	jobFile.currentOffset = 0;

	jobFile.name = name;

	jobFile.file =
		safe_open_wrapper_follow((spoolDirectory + DIR_DELIM_STRING + jobFile.name).Value(),
			 O_WRONLY | O_CREAT | _O_BINARY,
			 0600);
	if (-1 != jobFile.file) {
		if (0 == declaredFiles.lookup(name, ignored)) {
			close(jobFile.file);
			errstack.pushf("SOAP",
						   ALREADYEXISTS,
						   "File '%s' already declared.",
						   name.Value());

			return 4;
		}

		if (declaredFiles.insert(name, jobFile)) {
			close(jobFile.file);
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to record file '%s'.",
						   name.Value());

			return 2;
		}
	} else {
			// If there is a path delimiter in the name we assume that
			// the client knows what she is doing and will set a
			// proper Iwd later on. If there is no path delimiter we
			// have a problem.
		if (-1 != name.FindChar(DIR_DELIM_CHAR)) {
			dprintf(D_FULLDEBUG,
					"Failed to open '%s' for writing, reason: %s\n",
					(spoolDirectory+DIR_DELIM_STRING+jobFile.name).Value(),
					strerror(errno));

			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to open '%s' for writing, reason: %s",
						   name.Value(),
						   strerror(errno));

			return 3;
		}
	}

	return 0;
}
Exemplo n.º 3
0
int
Job::get_file(const MyString &name,
              int offset,
              int length,
              unsigned char *&data,
			  CondorError &errstack)
{
	memset(data, 0, length);
	int file = safe_open_wrapper_follow((spoolDirectory + DIR_DELIM_STRING + name).Value(),
					O_RDONLY | _O_BINARY,
					0);

	if (-1 != file) {
		if (-1 == lseek(file, offset, SEEK_SET)) {
			close(file);
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to lseek in file '%s', reason: %s",
						   name.Value(),
						   strerror(errno));

			return 2;
		}
		int result;
		if (-1 == 
			(result = full_read(file, data, sizeof(unsigned char) * length))) {
			close(file);
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to read from file '%s', wanted to "
						   "read %d bytes but received %d",
						   name.Value(),
						   length,
						   result);

			return 3;
		}
		if (-1 == close(file)) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to close file '%s', reason: %s",
						   name.Value(),
						   strerror(errno));

			return 4;
		}
	} else {
		errstack.pushf("SOAP",
					   FAIL,
					   "Failed to open file '%s', reason: %s",
					   name.Value(),
					   strerror(errno));

		return 1;
	}

	return 0;
}
Exemplo n.º 4
0
// This function calls up the schedd passed in on the command line and 
// registers the transferd as being available for the schedd's use.
RegisterResult
TransferD::register_to_schedd(ReliSock **regsock_ptr)
{
	CondorError errstack;
	MyString sname;
	MyString id;
	MyString sinful;
	bool rval;
	
	if (*regsock_ptr != NULL) {
		*regsock_ptr = NULL;
	}

	sname = m_features.get_schedd_sinful();
	id = m_features.get_id();

	if (sname == "N/A") {
		// no schedd supplied with which to register
		dprintf(D_ALWAYS, "No schedd specified to which to register.\n");
		return REG_RESULT_NO_SCHEDD;
	}
	
	// what is my sinful string?
	sinful = daemonCore->InfoCommandSinfulString(-1);

	dprintf(D_FULLDEBUG, "Registering myself(%s) to schedd(%s)\n",
		sinful.Value(), sname.Value());

	// hook up to the schedd.
	DCSchedd schedd(sname.Value(), NULL);

	// register myself, give myself 1 minute to connect.
	rval = schedd.register_transferd(sinful, id, 20*3, regsock_ptr, &errstack);

	if (rval == false) {
		// emit why 
		dprintf(D_ALWAYS, "TransferRequest::register_to_schedd(): Failed to "
			"register. Schedd gave reason '%s'\n", errstack.getFullText().c_str());
		return REG_RESULT_FAILED;
	}

	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //
	// WARNING WARNING WARNING WARNING //

	// Here, I must infact go back to daemon core without closing or doing
	// anything with the socket. This is because the schedd is going to
	// reconnect back to me, and I can't deadlock.

	dprintf(D_FULLDEBUG, 
		"Succesfully registered, awaiting treq channel message....\n");

	return REG_RESULT_SUCCESS;
}
Exemplo n.º 5
0
bool
DCMaster::sendMasterCommand( bool insure_update, int my_cmd )
{
	CondorError errstack;
	int master_cmd = my_cmd;
	dprintf( D_FULLDEBUG, "DCMaster::sendMasterCommand: Just starting... \n"); 

	/* have we located the required master yet? */
	if( ! _addr ) {
		locate();
	}

	if( ! m_master_safesock && ! insure_update ) {
		m_master_safesock = new SafeSock;
		m_master_safesock->timeout(20);   // years of research... :)
		if( ! m_master_safesock->connect(_addr) ) {
			dprintf( D_ALWAYS, "sendMasterCommand: Failed to connect to master " 
					 "(%s)\n", _addr );
			delete m_master_safesock;
			m_master_safesock = NULL;
			return false;
		}
	}

	ReliSock reli_sock;
	bool  result;

	if( insure_update ) {
			// For now, if we have to ensure that the update gets
			// there, we use a ReliSock (TCP).
		reli_sock.timeout(20);   // years of research... :)
		if( ! reli_sock.connect(_addr) ) {
			dprintf( D_ALWAYS, "sendMasterCommand: Failed to connect to master " 
					 "(%s)\n", _addr );
			return false;
		}

		result = sendCommand( master_cmd, (Sock*)&reli_sock, 0, &errstack );
	} else {
		result = sendCommand( master_cmd, (Sock*)m_master_safesock, 0, &errstack );
	}
	if( ! result ) {
		dprintf( D_FULLDEBUG, 
				 "Failed to send %d command to master\n",master_cmd );
		if( m_master_safesock ) {
			delete m_master_safesock;
			m_master_safesock = NULL;
		}
		if( errstack.code() != 0 ) {
		        dprintf( D_ALWAYS, "ERROR: %s\n", errstack.getFullText() );
		}
		return false;
	}
	return true;
}
Exemplo n.º 6
0
// Called when the schedd initially connects to the transferd to finish
// the registration process.
int
TransferD::setup_transfer_request_handler(int  /*cmd*/, Stream *sock)
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString sock_id;

	dprintf(D_ALWAYS, "Got TRANSFER_CONTROL_CHANNEL!\n");

	rsock->decode();

	///////////////////////////////////////////////////////////////
	// make sure we are authenticated
	///////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText().c_str() );
			refuse(rsock);
			return CLOSE_STREAM;
		} 
	}

	rsock->decode();

	///////////////////////////////////////////////////////////////
	// Register this socket with a socket handler to handle incoming requests
	///////////////////////////////////////////////////////////////

	sock_id += "<TreqChannel-Socket>";

	char* _sock_id = strdup( sock_id.Value() );		//de-const

	// register the handler for any future transfer requests on this socket.
	daemonCore->Register_Socket((Sock*)rsock, _sock_id,
		(SocketHandlercpp)&TransferD::accept_transfer_request_handler,
		"TransferD::accept_transfer_request_handler", this, ALLOW);
	
	free( _sock_id );
	
	dprintf(D_ALWAYS, "Treq channel established.\n");
	dprintf(D_ALWAYS, "Accepting Transfer Requests.\n");

	return KEEP_STREAM;
}
Exemplo n.º 7
0
int
Job::put_file(const MyString &name,
			  int offset,
			  char * data,
			  int data_length,
			  CondorError &errstack)
{
	JobFile jobFile;
	if (-1 == declaredFiles.lookup(name, jobFile)) {
		errstack.pushf("SOAP",
					   FAIL,
					   "File '%s' has not been declared.",
					   name.Value());

		return 1;
	}

	if (-1 != jobFile.file) {
		if (-1 == lseek(jobFile.file, offset, SEEK_SET)) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to lseek in file '%s', reason: %s",
						   name.Value(),
						   strerror(errno));

			return 2;
		}
		int result;
		if (data_length !=
			(result = full_write(jobFile.file, data, data_length))) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to write to from file '%s', wanted to write %d bytes but was only able to write %d",
						   name.Value(),
						   data_length,
						   result);

			return 3;
		}
	} else {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to open file '%s', it should not "
						   "contain any path separators.",
						   name.Value());

		return 5;
	}

	return 0;
}
Exemplo n.º 8
0
///////////////////////////////////////////////////////////////////////////////
// Note: on Unix/Linux, the file ID is a string encoding the combination of
// device number and inode; on Windows the file ID is simply the value
// _fullpath() returns on the path we're given.  The Unix/Linux version
// is preferable because it will work correctly even if there are hard
// links to log files; but there are no inodes on Windows, so we're
// doing what we can.
bool
GetFileID( const MyString &filename, MyString &fileID,
			CondorError &errstack )
{

		// Make sure the log file exists.  Even though we may later call
		// InitializeFile(), we have to make sure the file exists here
		// first so we make sure that the file exists and we can therefore
		// get an inode or real path for it.
		// We *don't* want to truncate the file here, though, because
		// we don't know for sure whether it's the first time we're seeing
		// it.
	if ( access( filename.Value(), F_OK ) != 0 ) {
		if ( !MultiLogFiles::InitializeFile( filename.Value(),
					false, errstack ) ) {
			errstack.pushf( "ReadMultipleUserLogs", UTIL_ERR_LOG_FILE,
						"Error initializing log file %s", filename.Value() );
			return false;
		}
	}

#ifdef WIN32
	char *tmpRealPath = realpath( filename.Value(), NULL );
	if ( !tmpRealPath ) {
		errstack.pushf( "ReadMultipleUserLogs", UTIL_ERR_LOG_FILE,
					"Error (%d, %s) getting real path for specified path %s",
					errno, strerror( errno ), filename.Value() );
		return false;
	}

	fileID = tmpRealPath;
	free( tmpRealPath );
#else
	StatWrapper swrap;
	if ( swrap.Stat( filename.Value() ) != 0 ) {
		errstack.pushf( "ReadMultipleUserLogs", UTIL_ERR_LOG_FILE,
					"Error getting inode for log file %s",
					filename.Value() );
		return false;
	}
	fileID.formatstr( "%llu:%llu", (unsigned long long)swrap.GetBuf()->st_dev,
				(unsigned long long)swrap.GetBuf()->st_ino );
#endif

	return true;
}
Exemplo n.º 9
0
//---------------------------------------------------------------------------
Qmgr_connection *
DagmanClassad::OpenConnection()
{
		// Open job queue
	CondorError errstack;
	Qmgr_connection *queue = ConnectQ( _schedd->addr(), 0, false,
				&errstack, NULL, _schedd->version() );
	if ( !queue ) {
		debug_printf( DEBUG_QUIET,
					"WARNING: failed to connect to queue manager (%s)\n",
					errstack.getFullText().c_str() );
		check_warning_strictness( DAG_STRICT_3 );
		return NULL;
	}

	return queue;
}
Exemplo n.º 10
0
DCStarter::X509UpdateStatus
DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time)
{
	ReliSock rsock;
	rsock.timeout(60);
	if( ! rsock.connect(_addr) ) {
		dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
			"Failed to connect to starter %s\n", _addr);
		return XUS_Error;
	}

	CondorError errstack;
	if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) {
		dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: "
				 "Failed send command to the starter: %s\n",
				 errstack.getFullText().c_str());
		return XUS_Error;
	}

		// Send the gsi proxy
	filesize_t file_size = 0;	// will receive the size of the file
	if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) {
		dprintf(D_ALWAYS,
			"DCStarter::delegateX509Proxy "
			"failed to delegate proxy file %s (size=%ld)\n",
			filename, (long int)file_size);
		return XUS_Error;
	}

		// Fetch the result
	rsock.decode();
	int reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	switch(reply) {
		case 0: return XUS_Error;
		case 1: return XUS_Okay;
		case 2: return XUS_Declined;
	}
	dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
		"remote side returned unknown code %d. Treating "
		"as an error.\n", reply);
	return XUS_Error;
}
Exemplo n.º 11
0
/**
 * Process the history directory and maintain the history file map
 *
 * Only handle rotated history files, those history.* that are not an
 * index. For each one that is not in the history file map, create a
 * new HistoryFile, poll it for entries to process, and add it to the
 * map.
 */
void
aviary::history::processHistoryDirectory()
{
    const char *file = NULL;

    // each time through we rebuild our set of inodes
    if (force_reset) {
        m_historyFiles.clear();
    }

    Directory dir ( m_path.Value() );
    dir.Rewind();
    while ( ( file = dir.Next() ) )
    {
        // Skip all non-history files, e.g. history and history.*.idx
        if ( strncmp ( file, "history.", 8 ) ||
                !strncmp ( file + ( strlen ( file ) - 4 ), HISTORY_INDEX_SUFFIX, 4 ) ) continue;

        HistoryFile h_file ( ( m_path + DIR_DELIM_STRING + file ).Value() );
        CondorError errstack;
        if ( !h_file.init ( errstack ) )
        {
            dprintf ( D_ALWAYS, "%s\n", errstack.getFullText().c_str() );
            return;
        }
        errstack.clear();

        long unsigned int id;
        ASSERT ( h_file.getId ( id ) );
        HistoryFileListType::iterator entry = m_historyFiles.find ( id );
        if ( m_historyFiles.end() == entry )
        {
            HistoryFile::HistoryEntriesTypeIterators ij = h_file.poll ( errstack );
            for ( HistoryFile::HistoryEntriesTypeIterator i = ij.first;
                    i != ij.second;
                    i++ )
            {
                process ( ( *i ) );
            }

            m_historyFiles.insert ( id );
        }
    }
}
Exemplo n.º 12
0
ODSHistoryFile &
ODSHistoryFile::operator=(const ODSHistoryFile &base)
{
	if (this != &base) {
		(*this).m_name = base.m_name;

		cleanup();

		// Don't just copy the stat and FILE* members, initialize them
		CondorError errstack;
		if (!init(errstack)) {
			// XXX: Should throw an exception here
			dprintf ( D_ALWAYS, "ODSHistoryFile::operator=: %s\n",
					errstack.getFullText(true).c_str());		
		}
	}

	return *this;
}
Exemplo n.º 13
0
bool
ODSHistoryFile::init(CondorError &errstack)
{
	StatWrapper stat_wrapper;

	if (stat_wrapper.Stat(m_name.c_str())) {
		errstack.pushf("ODSHistoryFile::init", 1,
					   "Failed to stat %s: %d (%s)\n",
					   m_name.c_str(),
					   stat_wrapper.GetErrno(),
					   strerror(stat_wrapper.GetErrno()));
		return false;
	}

	m_stat = (StatStructType *) malloc(sizeof(StatStructType));
	ASSERT(m_stat);
	memcpy(m_stat, stat_wrapper.GetBuf(), sizeof(StatStructType));
	if (!S_ISREG(m_stat->st_mode)) {
		errstack.pushf("ODSHistoryFile::init", 2,
					   "%s: not a regular file\n",
					   m_name.c_str());
		return false;
	}

	m_file = safe_fopen_wrapper(m_name.c_str(), "r");
	if (NULL == m_file) {
		errstack.pushf("ODSHistoryFile::init", 4,
					   "Failed to fopen %s: %d (%s)\n",
					   m_name.c_str(), errno, strerror(errno));
		return false;
	}
	
	m_writer = new ODSMongodbOps(DB_NAME);
    if (!m_writer->init("localhost")) {
        errstack.pushf("ODSHistoryFile::init", 5,
                       "Unable to init ODS writer\n");
        return false;
    }

	return true;
}
Exemplo n.º 14
0
void
handleAll()
{
	char constraint[128];
	sprintf( constraint, "%s >= 0", ATTR_CLUSTER_ID );

	CondorError errstack;
	if( doWorkByConstraint(constraint, &errstack) ) {
		fprintf( stdout, "All jobs %s.\n",
				 (mode == JA_REMOVE_JOBS) ?
				 "marked for removal" :
				 (mode == JA_REMOVE_X_JOBS) ?
				 "removed locally (remote state unknown)" :
				 actionWord(mode,true) );
	} else {
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		if (had_error)
		{
			fprintf( stderr, "Could not %s all jobs.\n",
				 actionWord(mode,false) );
		}
	}
}
Exemplo n.º 15
0
int
Job::get_spool_list(List<FileInfo> &file_list,
					CondorError &errstack)
{
	StatInfo directoryInfo(spoolDirectory.Value());
	if (directoryInfo.IsDirectory()) {
		Directory directory(spoolDirectory.Value());
		const char * name;
		FileInfo *info;
		while (NULL != (name = directory.Next())) {
			info = new FileInfo();
			info->initialize(name, directory.GetFileSize());
			ASSERT(info);

			if (!file_list.Append(info)) {
				errstack.pushf("SOAP",
							   FAIL,
							   "Error adding %s to file list.",
							   name);

				return 2;
			}
		}

		return 0;
	} else {
		dprintf(D_ALWAYS, "spoolDirectory == '%s'\n",
				spoolDirectory.Value());

		errstack.pushf("SOAP",
					   FAIL,
					   "spool directory '%s' is not actually a directory.",
					   spoolDirectory.Value());

		return 1;
	}
}
Exemplo n.º 16
0
//---------------------------------------------------------------------------
bool
Job::UnmonitorLogFile( ReadMultipleUserLogs &condorLogReader,
			ReadMultipleUserLogs &storkLogReader )
{
	debug_printf( DEBUG_DEBUG_2, "Unmonitoring log file <%s> for node %s\n",
				GetLogFile(), GetJobName() );

	if ( !_logIsMonitored ) {
		debug_printf( DEBUG_DEBUG_1, "Warning: log file for node "
					"%s is already unmonitored\n", GetJobName() );
		return true;
	}

	ReadMultipleUserLogs &logReader = (_jobType == TYPE_CONDOR) ?
				condorLogReader : storkLogReader;

	debug_printf( DEBUG_DEBUG_1, "Unmonitoring log file <%s> for node %s\n",
				GetLogFile(), GetJobName() );

	CondorError errstack;
	bool result = logReader.unmonitorLogFile( GetLogFile(), errstack );
	if ( !result ) {
		errstack.pushf( "DAGMan::Job", DAGMAN_ERR_LOG_FILE,
					"ERROR: Unable to unmonitor log " "file for node %s",
					GetJobName() );
		debug_printf( DEBUG_QUIET, "%s\n", errstack.getFullText().c_str() );
		EXCEPT( "Fatal log file monitoring error!\n" );
	}

	if ( result ) {
		delete [] _logFile;
		_logFile = NULL;
		_logIsMonitored = false;
	}

	return result;
}
Exemplo n.º 17
0
bool 
DCStartd::getAds( ClassAdList &adsList )
{
	CondorError errstack;
	// fetch the query
	QueryResult q;
	CondorQuery* query;
	char* ad_addr;

	// instantiate query object
	if (!(query = new CondorQuery (STARTD_AD))) {
		dprintf( D_ALWAYS, "Error:  Out of memory\n");
		return(false);
	}

	if( this->locate() ){
		ad_addr = this->addr();
		q = query->fetchAds(adsList, ad_addr, &errstack);
		if (q != Q_OK) {
        	if (q == Q_COMMUNICATION_ERROR) {
            	dprintf( D_ALWAYS, "%s\n", errstack.getFullText(true).c_str() );
        	}
        	else {
            	dprintf (D_ALWAYS, "Error:  Could not fetch ads --- %s\n",
                     	getStrQueryResult(q));
        	}
			delete query;
        	return (false);
		}
	} else {
		delete query;
		return(false);
	}

	delete query;
	return(true);
}
Exemplo n.º 18
0
bool
MultiLogFiles::InitializeFile(const char *filename, bool truncate,
			CondorError &errstack)
{
	dprintf( D_LOG_FILES, "MultiLogFiles::InitializeFile(%s, %d)\n",
				filename, (int)truncate );

	int flags = O_WRONLY;
	if ( truncate ) {
		flags |= O_TRUNC;
		dprintf( D_ALWAYS, "MultiLogFiles: truncating log file %s\n",
					filename );
	}

		// Two-phase attempt at open here is to make things work if
		// a log file is a symlink to another file (see gittrac #2704).
	int fd = safe_create_fail_if_exists( filename, flags );
	if ( fd < 0 && errno == EEXIST ) {
		fd = safe_open_no_create_follow( filename, flags );
	}
	if ( fd < 0 ) {
		errstack.pushf("MultiLogFiles", UTIL_ERR_OPEN_FILE,
					"Error (%d, %s) opening file %s for creation "
					"or truncation", errno, strerror( errno ), filename );
		return false;
	}

	if ( close( fd ) != 0 ) {
		errstack.pushf("MultiLogFiles", UTIL_ERR_CLOSE_FILE,
					"Error (%d, %s) closing file %s for creation "
					"or truncation", errno, strerror( errno ), filename );
		return false;
	}

	return true;
}
Exemplo n.º 19
0
bool
MultiLogFiles::makePathAbsolute(MyString &filename, CondorError &errstack)
{
	if ( !fullpath(filename.Value()) ) {
			// I'd like to use realpath() here, but I'm not sure
			// if that's portable across all platforms.  wenger 2009-01-09.
		MyString	currentDir;
		if ( !condor_getcwd(currentDir) ) {
			errstack.pushf( "MultiLogFiles", UTIL_ERR_GET_CWD,
						"ERROR: condor_getcwd() failed with errno %d (%s) at %s:%d",
						errno, strerror(errno), __FILE__, __LINE__);
			return false;
		}

		filename = currentDir + DIR_DELIM_STRING + filename;
	}

	return true;
}
Exemplo n.º 20
0
int main(int argc, char **argv)
{
	char * server_address = NULL;
	char ** ptr;
	const char * myName;

	// find our name
	myName = strrchr( argv[0], DIR_DELIM_CHAR );
	if( !myName ) {
		myName = argv[0];
	} else {
		myName++;
	}

	// read config file
	myDistro->Init (argc, argv);
	config ();

	for (ptr=argv+1,argc--; argc > 0; argc--,ptr++) {
		if ( ptr[0][0] == '-' ) {
			switch ( ptr[0][1] ) {
			case 'h':
				usage(myName);
				exit(0);
				break;
			case 'd':
					// dprintf to console
				Termlog = 1;
				dprintf_config ("TOOL", get_param_functions());
				break;
			case 'n':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -n requires another argument\n",
							 myName );
					exit(1);
				}
	
				server_address = strdup (*ptr);

				break;
			case 'v':
				version();	// this function calls exit(0)
				break;
			default:
				fprintf( stderr, "%s: Unknown option %s\n",
						 myName, *ptr);
				usage(myName);
				exit(1);
			}
		} //fi
	} //rof


	CondorError errorstack;
	int number = 0;
	SimpleList <Credential*> result;

	DCCredd credd(server_address);

	// resolve server address
	if ( ! credd.locate() ) {
		fprintf (stderr, "%s\n", credd.error() );
		return 1;
	}

	if (!credd.listCredentials (result,
								number,
								errorstack)) {
		fprintf (stderr, "Unable to retrieve credentials (%s)\n",
				 errorstack.getFullText(true));
		return 1;
	}
	 


	if (number > 0) {
		Credential * cred;
		result.Rewind();
		printf ("Name\tType\n-----\t-----\n");
		while (result.Next (cred)) {
			
			printf ("%s\t%s\n", cred->GetName(), cred->GetTypeString());
		}

		printf ("\nTotal %d\n", number);
	} else if (number == 0) {
		printf ("No credentials currently stored on this server\n");
	} else {
		fprintf (stderr, "ERROR\n");
		return 1;
	}

	return 0;
}
Exemplo n.º 21
0
int
main(int argc, char *argv[])
{
	char	*arg;
	int		nArgs = 0;				// number of args 
	int	 i, result;
	char* pool = NULL;
	char* scheddName = NULL;
	char* scheddAddr = NULL;
	MyString method;
	char *tmp;

	myDistro->Init( argc, argv );
	MyName = condor_basename(argv[0]);
	config();

#if !defined(WIN32)
	install_sig_handler(SIGPIPE, SIG_IGN );
#endif

	// dig around in the config file looking for what the config file says
	// about getting files from Condor. This defaults with the global variable
	// initialization.
	tmp = param( "SANDBOX_TRANSFER_METHOD" );
	if ( tmp != NULL ) {
		method = tmp;
		free( tmp );
		string_to_stm( method, st_method );
	}

	char **args = (char **)malloc(sizeof(char *) * argc); // args 
	if ( ! args) exit(2);

	// parse the arguments.
	for( argv++; (arg = *argv); argv++ ) {
		if( arg[0] == '-' ) {
			if( ! arg[1] ) {
				usage();
			}
			switch( arg[1] ) {
			case 'd':
				// dprintf to console
				dprintf_set_tool_debug("TOOL", 0);
				break;
			case 'c':
				args[nArgs] = arg;
				nArgs++;
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -constraint requires another argument\n", 
							 MyName);
					exit(1);
				}				
				args[nArgs] = *argv;
				nArgs++;
				break;
			case 'a':
				if( arg[2] && arg[2] == 'd' ) {
					argv++;
					if( ! *argv ) {
						fprintf( stderr, 
								 "%s: -addr requires another argument\n", 
								 MyName);
						exit(1);
					}				
					if( is_valid_sinful(*argv) ) {
						scheddAddr = strdup(*argv);
						if( ! scheddAddr ) {
							fprintf( stderr, "Out of Memory!\n" );
							exit(1);
						}
					} else {
						fprintf( stderr, 
								 "%s: \"%s\" is not a valid address\n",
								 MyName, *argv );
						fprintf( stderr, "Should be of the form "
								 "<ip.address.here:port>\n" );
						fprintf( stderr, 
								 "For example: <123.456.789.123:6789>\n" );
						exit( 1 );
					}
					break;
				}
				All = true;
				break;
			case 'n': 
				// use the given name as the schedd name to connect to
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -name requires another argument\n", 
							 MyName);
					exit(1);
				}			
				if ( scheddName ) free(scheddName);
				scheddName = strdup(*argv);
				break;
			case 'p':
				// use the given name as the central manager to query
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -pool requires another argument\n", 
							 MyName);
					exit(1);
				}				
				if( pool ) {
					free( pool );
				}
				pool = strdup( *argv );
				break;
			case 's':
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -stm requires another argument\n", 
							 MyName);
					exit(1);
				}				
				method = *argv;
				string_to_stm(method, st_method);
				break;
			case 'v':
				version();
				break;
			case 'h':
				usage(0);
				break;
			default:
				fprintf( stderr, "Unrecognized option: %s\n", arg ); 
				usage();
				break;
			}
		} else {
			if( All ) {
					// If -all is set, there should be no other
					// constraint arguments.
				usage();
			}
			args[nArgs] = arg;
			nArgs++;
		}
	}

	// Check to make sure we have a valid sandbox transfer mechanism.
	if (st_method == STM_UNKNOWN) {
		fprintf( stderr,
			"%s: Unknown sandbox transfer method: %s\n", MyName,
			method.Value());
		usage();
		exit(1);
	}

	if( ! (All || nArgs) ) {
			// We got no indication of what to act on


		fprintf( stderr, "You did not specify any jobs\n" ); 
		usage();
	}

		// We're done parsing args, now make sure we know how to
		// contact the schedd. 
	if( ! scheddAddr ) {
			// This will always do the right thing, even if either or
			// both of scheddName or pool are NULL.
		schedd = new DCSchedd( scheddName, pool );
	} else {
		schedd = new DCSchedd( scheddAddr );
	}
	if( ! schedd->locate() ) {
		fprintf( stderr, "%s: %s\n", MyName, schedd->error() ); 
		exit( 1 );
	}

		// Process the args.
	if( All ) {
		handleAll();
	} else {
		for(i = 0; i < nArgs; i++) {
			if( match_prefix( args[i], "-constraint" ) ) {
				i++;
				addConstraint( args[i] );
			} else {
				procArg(args[i]);
			}
		}
	}

		// Sanity check: make certain we now have a constraint
	if ( global_constraint.Length() <= 0 ) {			
		fprintf( stderr, "Unable to create a job constraint!\n");
		exit(1);
	}

	fprintf(stdout,"Fetching data files...\n");

	switch(st_method) {
		case STM_USE_SCHEDD_ONLY:
			{ // start block

			// Get the sandbox directly from the schedd.
			// And now, do the work.
			CondorError errstack;
			result = schedd->receiveJobSandbox(global_constraint.Value(),
				&errstack);
			if ( !result ) {
				fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() );
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				exit(1);
			}
		
			// All done
			return 0;

			} //end block
			break;

		case STM_USE_TRANSFERD:
			{ // start block

			// NEW METHOD where we ask the schedd for a transferd, then get the
			// files from the transferd

			CondorError errstack;
			ClassAd respad;
			int invalid;
			MyString reason;
			MyString td_sinful;
			MyString td_cap;

			result = schedd->requestSandboxLocation(FTPD_DOWNLOAD, 
				global_constraint, FTP_CFTP, &respad, &errstack);
			if ( !result ) {
				fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() );
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				exit(1);
			}

			respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
			if (invalid == TRUE) {
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
				fprintf( stderr, "%s\n", reason.Value());
				exit(EXIT_FAILURE);
			}

			respad.LookupString(ATTR_TREQ_TD_SINFUL, td_sinful);
			respad.LookupString(ATTR_TREQ_CAPABILITY, td_cap);

			dprintf(D_ALWAYS, 
				"td: %s, cap: %s\n", td_sinful.Value(), td_cap.Value());

			DCTransferD dctd(td_sinful.Value());

			result = dctd.download_job_files(&respad, &errstack);
			if ( !result ) {
				fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() );
				fprintf( stderr, "ERROR: Failed to spool job files.\n" );
				exit(1);
			}

			} // end block
		break;

		default:
			EXCEPT("PROGRAMMER ERROR: st_method must be known.");
			break;
		}

	// All done
	return 0;
}
Exemplo n.º 22
0
int main(int argc, char **argv)
{
	char ** ptr;
	const char * myName;

	// find our name
	myName = strrchr( argv[0], DIR_DELIM_CHAR );
	if( !myName ) {
		myName = argv[0];
	} else {
		myName++;
	}

	int cred_type = 0;
	char * cred_name = NULL;
	char * cred_file_name = NULL;
	char * myproxy_user = NULL;

	char * myproxy_host = NULL;
	int myproxy_port = 0;

	char * myproxy_dn = NULL;

	char * server_address= NULL;

	// read config file
	myDistro->Init (argc, argv);
	config();

	for (ptr=argv+1,argc--; argc > 0; argc--,ptr++) {
		if ( ptr[0][0] == '-' ) {
			switch ( ptr[0][1] ) {
			case 'h':
				usage(myName);
				exit(0);
				break;
			case 'd':

					// dprintf to console
				Termlog = 1;
				dprintf_config ("TOOL", get_param_functions());

				break;
			case 'S':

					// dprintf to console
				Termlog = 1;
				Read_Myproxy_pw_terminal = false;

				break;
			case 'n':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -n requires another argument\n",
							 myName );
					exit(1);
				}
	
				server_address = strdup (*ptr);

				break;
			case 't':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -t requires another argument\n",
							 myName );
					exit(1);
				}

				if (strcmp (*ptr, "x509") == 0) {
					cred_type = X509_CREDENTIAL_TYPE;
				} else {
					fprintf( stderr, "Invalid credential type %s\n",
							 *ptr );
					exit(1);
				}
				break;
			case 'f':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -f requires another argument\n",
							 myName );
					exit(1);
				}
				cred_file_name = strdup (*ptr);
				break;
			case 'N':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -N requires another argument\n",
							 myName );
					exit(1);
				}
				cred_name = strdup (*ptr);
				break;

			case 'm':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -m requires another argument\n",
							 myName );
					exit(1);
				}
	
				parseMyProxyArgument (*ptr, myproxy_user, myproxy_host, myproxy_port);
				break;
			case 'D':
				if( !(--argc) || !(*(++ptr)) ) {
					fprintf( stderr, "%s: -D requires another argument\n",
							 myName );
					exit(1);
				}
				myproxy_dn = strdup (*ptr);
				break;

			case 'v':
				version();	// this function calls exit(0)
				break;

			default:
				fprintf( stderr, "%s: Unknown option %s\n",
						 myName, *ptr);
				usage(myName);
				exit(1);
			}
		} //fi
	} //rof

	if (( cred_file_name == NULL ) || (cred_type == 0)) {
		fprintf ( stderr, "Credential filename or type not specified\n");
		exit (1);

	}

    Credential * cred = NULL;
	if (cred_type == X509_CREDENTIAL_TYPE) {
		cred = new X509Credential();
	} else {
		fprintf ( stderr, "Invalid credential type\n");
		exit (1);
	}

    
	char * data = NULL;
	int data_size;
	if (!read_file (cred_file_name, data, data_size)) {
		fprintf (stderr, "Can't open %s\n", cred_file_name);
		exit (1);
	}

	cred->SetData (data, data_size);

	if (cred_name !=NULL) {
		cred->SetName(cred_name);
	} else {
		cred->SetName(DEFAULT_CREDENTIAL_NAME);
	}

	char * username = my_username(0);
	cred->SetOwner (username);
  
	if (cred_type == X509_CREDENTIAL_TYPE && myproxy_host != NULL) {
		X509Credential * x509cred = (X509Credential*)cred;

		MyString str_host_port = myproxy_host;
		if (myproxy_port != 0) {
			str_host_port += ":";
			str_host_port += myproxy_port;
		}
		x509cred->SetMyProxyServerHost (str_host_port.Value());

		if (myproxy_user != NULL) {
			x509cred->SetMyProxyUser (myproxy_user);
		} else {
			x509cred->SetMyProxyUser (username);
		}

		if (myproxy_dn != NULL) {
			x509cred->SetMyProxyServerDN (myproxy_dn);
		}

		char * myproxy_password;
		if ( Read_Myproxy_pw_terminal ) {
			myproxy_password = 
				prompt_password(
					"Please enter the MyProxy password:"******"Please enter the MyProxy password from the standard input\n");
		}
		if (myproxy_password) {
			x509cred->SetRefreshPassword ( myproxy_password );
		}

		x509cred->display( D_FULLDEBUG );
	}

	CondorError errstack;
	DCCredd dc_credd (server_address);

	// resolve server address
	if ( ! dc_credd.locate() ) {
		fprintf (stderr, "%s\n", dc_credd.error() );
		return 1;
	}

	if (dc_credd.storeCredential(cred, errstack)) {
		printf ("Credential submitted successfully\n");
	} else {
		fprintf (stderr, "Unable to submit credential\n%s\n",
				 errstack.getFullText(true));
		return 1;
	}

	return 0;
}
Exemplo n.º 23
0
void
procArg(const char* arg)
{
	int		c, p;								// cluster/proc #
	char*	tmp;

	MyString constraint;

	if( str_isint(arg) || str_isreal(arg,true) )
	// process by cluster/proc #
	{
		c = strtol(arg, &tmp, 10);
		if(c <= 0)
		{
			fprintf(stderr, "Invalid cluster # from %s.\n", arg);
			had_error = true;
			return;
		}
		if(*tmp == '\0')
		// delete the cluster
		{
			CondorError errstack;
			constraint.formatstr( "%s == %d", ATTR_CLUSTER_ID, c );
			if( doWorkByConstraint(constraint.Value(), &errstack) ) {
				fprintf( stdout, 
						 "Cluster %d %s.\n", c,
						 (mode == JA_REMOVE_JOBS) ?
						 "has been marked for removal" :
						 (mode == JA_REMOVE_X_JOBS) ?
						 "has been removed locally (remote state unknown)" :
						 actionWord(mode,true) );
			} else {
				fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
				if (had_error)
				{
					fprintf( stderr, 
						 "Couldn't find/%s all jobs in cluster %d.\n",
						 actionWord(mode,false), c );
				}
			}
			return;
		}
		if(*tmp == '.')
		{
			p = strtol(tmp + 1, &tmp, 10);
			if(p < 0)
			{
				fprintf( stderr, "Invalid proc # from %s.\n", arg);
				had_error = true;
				return;
			}
			if(*tmp == '\0')
			// process a proc
			{
				if( ! job_ids ) {
					job_ids = new StringList();
				}
				job_ids->append( arg );
				return;
			}
		}
		fprintf( stderr, "Warning: unrecognized \"%s\" skipped.\n", arg );
		return;
	}
	// process by user name
	else {
		CondorError errstack;
		constraint.formatstr("%s == \"%s\"", ATTR_OWNER, arg );
		if( doWorkByConstraint(constraint.Value(), &errstack) ) {
			fprintf( stdout, "User %s's job(s) %s.\n", arg,
					 (mode == JA_REMOVE_JOBS) ?
					 "have been marked for removal" :
					 (mode == JA_REMOVE_X_JOBS) ?
					 "have been removed locally (remote state unknown)" :
					 actionWord(mode,true) );
		} else {
			fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
			if (had_error)
			{
				fprintf( stderr, 
					 "Couldn't find/%s all of user %s's job(s).\n",
					 actionWord(mode,false), arg );
			}
		}
	}
}
Exemplo n.º 24
0
// This handler is called when a client wishes to write files from the
// transferd's storage.
int
TransferD::write_files_handler(int cmd, Stream *sock) 
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString capability;
	int protocol = FTP_UNKNOWN;
	TransferRequest *treq = NULL;
	MyString fquser;
	static int transfer_reaper_id = -1;
	ThreadArg *thread_arg;
	int tid;
	ClassAd reqad;
	ClassAd respad;

	cmd = cmd; // quiet the compiler.

	dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n");

	/////////////////////////////////////////////////////////////////////////
	// make sure we are authenticated
	/////////////////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText() );
			refuse( rsock );
			return CLOSE_STREAM;
		} 
	}

	fquser = rsock->getFullyQualifiedUser();


	/////////////////////////////////////////////////////////////////////////
	// Check to see if the capability the client tells us is something that
	// we have knowledge of. We ONLY check the capability and not the
	// identity of the person in question. This allows people of different
	// identities to write files here as long as they had the right 
	// capability. While this might not sound secure, they STILL had to have
	// authenticated as someone this daemon trusts. 
	// Similarly, check the protocol it wants to use as well as ensure that
	// the direction the transfer request was supposed to be is being honored.
	/////////////////////////////////////////////////////////////////////////
	rsock->decode();

	// soak the request ad from the client about what it wants to transfer
	reqad.initFromStream(*rsock);
	rsock->end_of_message();

	reqad.LookupString(ATTR_TREQ_CAPABILITY, capability);

	rsock->encode();

	// do I know of such a capability?
	if (m_treqs.lookup(capability, treq) != 0) {
		// didn't find it. Log it and tell them to leave and close up shop
		respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
		respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!");
		respad.put(*rsock);
		rsock->end_of_message();

		dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
			"using capability '%s', but there was no such capability. "
			"Access denied.\n", fquser.Value(), capability.Value());
		return CLOSE_STREAM;
	}

	reqad.LookupInteger(ATTR_TREQ_FTP, protocol);

	// am I willing to use this protocol?
	switch(protocol) {
		case FTP_CFTP: // FileTrans protocol, I'm happy.
			break;

		default:
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Invalid file transfer protocol!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"using protocol '%d', but I don't support that protocol. "
				"Access denied.\n", fquser.Value(), protocol);
			return CLOSE_STREAM;
	}

	// nsure that this transfer request was of the uploading variety
	if (treq->get_direction() != FTPD_UPLOAD) {
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Transfer Request was not an uploading request!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"to a transfer request that wasn't expecting to be written. "
				"Access denied.\n", fquser.Value());
	}

	/////////////////////////////////////////////////////////////////////////
	// Tell the client everything was ok.
	/////////////////////////////////////////////////////////////////////////

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);
	respad.put(*rsock);
	rsock->end_of_message();

	/////////////////////////////////////////////////////////////////////////
	// Set up a thread (a process under unix) to read ALL of the job files
	// for all of the ads in the TransferRequest.
	/////////////////////////////////////////////////////////////////////////

	// now create a thread, passing in the sock, which uses the file transfer
	// object to accept the files.

	if (transfer_reaper_id == -1) {
		// only set this up ONCE so each and every thread gets one.
		transfer_reaper_id = daemonCore->Register_Reaper(
						"write_files_reaper",
						(ReaperHandlercpp) &TransferD::write_files_reaper,
						"write_files_reaper",
						this
						);
	}

	thread_arg = new ThreadArg(protocol, treq);

	// Start a new thread (process on Unix) to do the work
	tid = daemonCore->Create_Thread(
		(ThreadStartFunc)&TransferD::write_files_thread,
		(void *)thread_arg,
		rsock,
		transfer_reaper_id
		);
	
	if (tid == FALSE) {
		// XXX How do I handle this failure?
	}


	// associate the tid with the request so I can deal with it propery in
	// the reaper
	m_client_to_transferd_threads.insert(tid, treq);

	// The stream is inherited to the thread, who does the transfer and
	// finishes the protocol, but in the parent, I'm closing it.
	return CLOSE_STREAM;
}
Exemplo n.º 25
0
bool
Triggerd::PerformQueries()
{
   ClassAdList result;
   CondorError errstack;
   QueryResult status;
   Trigger* trig = NULL;
   CondorQuery* query;
   bool ret_val = true;
   std::map<uint32_t,Trigger*>::iterator iter;
   ClassAd* ad = NULL;
   std::string eventText;
   char* token = NULL;
   std::string triggerText;
   char* queryString = NULL;
   ExprTree* attr = NULL;
   std::list<std::string> missing_nodes;
   size_t pos;
   size_t prev_pos;
   bool bad_trigger = false;
   const char* token_str = NULL;

   if (0 < triggers.size())
   {
      dprintf(D_FULLDEBUG, "Triggerd: Evaluating %d triggers\n", (int)triggers.size());
      query = new CondorQuery(ANY_AD);
      for (iter = triggers.begin(); iter != triggers.end(); iter++)
      {
         // Clear any pre-exhisting custom contraints and add the constraint
         // for this trigger
         trig = iter->second;
         query->clearORCustomConstraints();
         query->clearANDCustomConstraints();
         queryString = strdup(trig->GetQuery().c_str());
         ReplaceAllChars(queryString, '\'', '"');
         query->addANDConstraint(queryString);
         free(queryString);

         // Perform the query and check the result
         if (NULL != query_collector)
         {
            status = query->fetchAds(result, query_collector->addr(), &errstack);
         }
         else
         {
            status = collectors->query(*query, result, &errstack);
         }
         if (Q_OK != status)
         {
            // Problem with the query
            if (Q_COMMUNICATION_ERROR == status)
            {
               dprintf(D_ALWAYS, "Triggerd Error: Error contacting the collecter - %s\n", errstack.getFullText(true).c_str());
               if (CEDAR_ERR_CONNECT_FAILED == errstack.code(0))
               {
                  dprintf(D_ALWAYS, "Triggerd Error: Couldn't contact the collector on the central manager\n");
               }
            }
            else
            {
               dprintf(D_ALWAYS, "Triggerd Error: Could not retrieve ads - %s\n", getStrQueryResult(status));
            }

            ret_val = false;
            break;
         }
         else
         {
            dprintf(D_FULLDEBUG, "Query successful.  Parsing results\n");

            // Query was successful, so parse the results
            result.Open();
            while ((ad = result.Next()))
            {
               if (true == bad_trigger)
               {
                  // Avoid processing a bad trigger multiple times.  Remove
                  // all result ads and reset the flag
                  dprintf(D_FULLDEBUG, "Cleaning up after a bad trigger\n");
                  result.Delete(ad);
                  while ((ad = result.Next()))
                  {
                     result.Delete(ad);
                  }
                  bad_trigger = false;
                  break;
               }
               eventText = "";
               triggerText = trig->GetText();
               dprintf(D_FULLDEBUG, "Parsing trigger text '%s'\n", triggerText.c_str());
               prev_pos = pos = 0;
               while (prev_pos < triggerText.length())
               {
                  pos = triggerText.find("$(", prev_pos, 2);
                  if (std::string::npos == pos)
                  {
                     // Didn't find the start of a varible, so append the
                     // remaining string
                     dprintf(D_FULLDEBUG, "Adding text string to event text\n");
                     eventText += triggerText.substr(prev_pos, std::string::npos);
                     prev_pos = triggerText.length();
                  }
                  else
                  {
                     // Found a variable for substitution.  Need to add
                     // text before it to the string, grab the variable
                     // to substitute for, and put its value in the text
                     eventText += triggerText.substr(prev_pos, pos - prev_pos);
                     dprintf(D_FULLDEBUG, "Adding text string prior to variable substitution to event text\n");

                     // Increment the position by 2 to skip the $(
                     prev_pos = pos + 2;
                     pos = triggerText.find(")", prev_pos, 1);

                     if (std::string::npos == pos)
                     {
                        // Uh-oh.  We have a start of a variable substitution
                        // but no closing marker.
                        dprintf(D_FULLDEBUG, "Error: Failed to find closing varable substitution marker ')'.  Aborting processing of the trigger\n");
                        bad_trigger = true;
                        break;
                     }
                     else
                     {
                        token_str = triggerText.substr(prev_pos, pos-prev_pos).c_str();
                        token = RemoveWS(token_str);
                        dprintf(D_FULLDEBUG, "token: '%s'\n", token);
                        if (NULL == token)
                        {
                           dprintf(D_ALWAYS, "Removing whitespace from %s produced unusable name.  Aborting processing of the trigger\n", token_str);
                           bad_trigger = true;
                           break;
                        }

                        attr = ad->LookupExpr(token);
                        if (NULL == attr)
                        {
                           // The token isn't found in the classad, so treat it
                           // like a string
                           dprintf(D_FULLDEBUG, "Adding text string to event text\n");
                           eventText += token;
                        }
                        else
                        {
                           dprintf(D_FULLDEBUG, "Adding classad value to event text\n");
                           eventText += ExprTreeToString(attr);
                        }
                        if (NULL != token)
                        {
                           free(token);
                           token = NULL;
                        }
                        ++pos;
                     }
                     prev_pos = pos;
                  }
               }

               // Remove the trailing space
               std::string::size_type notwhite = eventText.find_last_not_of(" ");
               eventText.erase(notwhite+1);

               // Send the event
               if (false == bad_trigger)
               {
                  EventCondorTriggerNotify event(eventText, time(NULL));
                  singleton->getInstance()->raiseEvent(event);
                  dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str());
               }
               result.Delete(ad);
            }
            bad_trigger = false;
            result.Close();
         }
      }
      delete query;
   }
   else
   {
      dprintf(D_FULLDEBUG, "Triggerd: No triggers to evaluate\n");
   }

   // Look for absent nodes (nodes expected to be in the pool but aren't)
   if (NULL != console)
   {
      missing_nodes = console->findAbsentNodes();
      if (0 < missing_nodes.size())
      {
         for (std::list<std::string>::iterator node = missing_nodes.begin();
              node != missing_nodes.end(); ++ node)
         {
            eventText = node->c_str();
            eventText += " is missing from the pool";
            EventCondorTriggerNotify event(eventText, time(NULL));
            singleton->getInstance()->raiseEvent(event);
            dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str());
         }
      }
   }

   return ret_val;
}
Exemplo n.º 26
0
void
doContactSchedd()
{
	int rc;
	Qmgr_connection *schedd;
	BaseJob *curr_job;
	ClassAd *next_ad;
	char expr_buf[12000];
	bool schedd_updates_complete = false;
	bool schedd_deletes_complete = false;
	bool add_remove_jobs_complete = false;
	bool update_jobs_complete = false;
	bool commit_transaction = true;
	int failure_line_num = 0;
	bool send_reschedule = false;
	std::string error_str = "";
	StringList dirty_job_ids;
	char *job_id_str;
	PROC_ID job_id;
	CondorError errstack;

	dprintf(D_FULLDEBUG,"in doContactSchedd()\n");

	initJobExprs();

	contactScheddTid = TIMER_UNSET;

	// vacateJobs
	/////////////////////////////////////////////////////
	if ( pendingScheddVacates.getNumElements() != 0 ) {
		std::string buff;
		StringList job_ids;
		VacateRequest curr_request;

		int result;
		ClassAd* rval;

		pendingScheddVacates.startIterations();
		while ( pendingScheddVacates.iterate( curr_request ) != 0 ) {
			formatstr( buff, "%d.%d", curr_request.job->procID.cluster,
						  curr_request.job->procID.proc );
			job_ids.append( buff.c_str() );
		}

		char *tmp = job_ids.print_to_string();
		if ( tmp ) {
			dprintf( D_FULLDEBUG, "Calling vacateJobs on %s\n", tmp );
			free(tmp);
			tmp = NULL;
		}

		rval = ScheddObj->vacateJobs( &job_ids, VACATE_FAST, &errstack );
		if ( rval == NULL ) {
			formatstr( error_str, "vacateJobs returned NULL, CondorError: %s!",
							   errstack.getFullText().c_str() );
			goto contact_schedd_failure;
		} else {
			pendingScheddVacates.startIterations();
			while ( pendingScheddVacates.iterate( curr_request ) != 0 ) {
				formatstr( buff, "job_%d_%d", curr_request.job->procID.cluster,
							  curr_request.job->procID.proc );
				if ( !rval->LookupInteger( buff.c_str(), result ) ) {
					dprintf( D_FULLDEBUG, "vacateJobs returned malformed ad\n" );
					EXCEPT( "vacateJobs returned malformed ad" );
				} else {
					dprintf( D_FULLDEBUG, "   %d.%d vacate result: %d\n",
							 curr_request.job->procID.cluster,
							 curr_request.job->procID.proc,result);
					pendingScheddVacates.remove( curr_request.job->procID );
					curr_request.result = (action_result_t)result;
					curr_request.job->SetEvaluateState();
					completedScheddVacates.insert( curr_request.job->procID,
												   curr_request );
				}
			}
			delete rval;
		}
	}


	schedd = ConnectQ( ScheddAddr, QMGMT_TIMEOUT, false, NULL, myUserName, CondorVersion() );
	if ( !schedd ) {
		error_str = "Failed to connect to schedd!";
		goto contact_schedd_failure;
	}


	// CheckLeases
	/////////////////////////////////////////////////////
	if ( checkLeasesSignaled ) {

		dprintf( D_FULLDEBUG, "querying for renewed leases\n" );

		// Grab the lease attributes of all the jobs in our global hashtable.

		BaseJob::JobsByProcId.startIterations();

		while ( BaseJob::JobsByProcId.iterate( curr_job ) != 0 ) {
			int new_expiration;

			rc = GetAttributeInt( curr_job->procID.cluster,
								  curr_job->procID.proc,
								  ATTR_TIMER_REMOVE_CHECK,
								  &new_expiration );
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// This job doesn't have doesn't have a lease from
						// the submitter. Skip it.
					continue;
				}
			}
			curr_job->UpdateJobLeaseReceived( new_expiration );
		}

		checkLeasesSignaled = false;
	}	// end of handling check leases


	// AddJobs
	/////////////////////////////////////////////////////
	if ( addJobsSignaled || firstScheddContact ) {
		int num_ads = 0;

		dprintf( D_FULLDEBUG, "querying for new jobs\n" );

		// Make sure we grab all Globus Universe jobs (except held ones
		// that we previously indicated we were done with)
		// when we first start up in case we're recovering from a
		// shutdown/meltdown.
		// Otherwise, grab all jobs that are unheld and aren't marked as
		// currently being managed and aren't marked as not matched.
		// If JobManaged is undefined, equate it with false.
		// If Matched is undefined, equate it with true.
		// NOTE: Schedds from Condor 6.6 and earlier don't include
		//   "(Universe==9)" in the constraint they give to the gridmanager,
		//   so this gridmanager will pull down non-globus-universe ads,
		//   although it won't use them. This is inefficient but not
		//   incorrect behavior.
		if ( firstScheddContact ) {
			// Grab all jobs for us to manage. This expression is a
			// derivative of the expression below for new jobs. We add
			// "|| Managed =?= TRUE" to also get jobs our previous
			// incarnation was in the middle of managing when it died
			// (if it died unexpectedly). With the new term, the
			// "&& Managed =!= TRUE" from the new jobs expression becomes
			// superfluous (by boolean logic), so we drop it.
			sprintf( expr_buf,
					 "%s && %s && ((%s && %s) || %s)",
					 expr_schedd_job_constraint.c_str(), 
					 expr_not_completely_done.c_str(),
					 expr_matched_or_undef.c_str(),
					 expr_not_held.c_str(),
					 expr_managed.c_str()
					 );
		} else {
			// Grab new jobs for us to manage
			sprintf( expr_buf,
					 "%s && %s && %s && %s && %s",
					 expr_schedd_job_constraint.c_str(), 
					 expr_not_completely_done.c_str(),
					 expr_matched_or_undef.c_str(),
					 expr_not_held.c_str(),
					 expr_not_managed.c_str()
					 );
		}
		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			PROC_ID procID;
			BaseJob *old_job;
			int job_is_matched = 1;		// default to true if not in ClassAd

			next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, procID.proc );
			bool job_is_managed = jobExternallyManaged(next_ad);
			next_ad->LookupBool(ATTR_JOB_MATCHED,job_is_matched);

			if ( BaseJob::JobsByProcId.lookup( procID, old_job ) != 0 ) {

				JobType *job_type = NULL;
				BaseJob *new_job = NULL;

				// job had better be either managed or matched! (or both)
				ASSERT( job_is_managed || job_is_matched );

				if ( MustExpandJobAd( next_ad ) ) {
					// Get the expanded ClassAd from the schedd, which
					// has the GridResource filled in with info from
					// the matched ad.
					delete next_ad;
					next_ad = NULL;
					next_ad = GetJobAd(procID.cluster,procID.proc);
					if ( next_ad == NULL && errno == ETIMEDOUT ) {
						failure_line_num = __LINE__;
						commit_transaction = false;
						goto contact_schedd_disconnect;
					}
					if ( next_ad == NULL ) {
						// We may get here if it was not possible to expand
						// one of the $$() expressions.  We don't want to
						// roll back the transaction and blow away the
						// hold that the schedd just put on the job, so
						// simply skip over this ad.
						dprintf(D_ALWAYS,"Failed to get expanded job ClassAd from Schedd for %d.%d.  errno=%d\n",procID.cluster,procID.proc,errno);
						goto contact_schedd_next_add_job;
					}
				}

				// Search our job types for one that'll handle this job
				jobTypes.Rewind();
				while ( jobTypes.Next( job_type ) ) {
					if ( job_type->AdMatchFunc( next_ad ) ) {

						// Found one!
						dprintf( D_FULLDEBUG, "Using job type %s for job %d.%d\n",
								 job_type->Name, procID.cluster, procID.proc );
						break;
					}
				}

				if ( job_type != NULL ) {
					new_job = job_type->CreateFunc( next_ad );
				} else {
					dprintf( D_ALWAYS, "No handlers for job %d.%d\n",
							 procID.cluster, procID.proc );
					new_job = new BaseJob( next_ad );
				}

				ASSERT(new_job);
				new_job->SetEvaluateState();
				dprintf(D_ALWAYS,"Found job %d.%d --- inserting\n",
						new_job->procID.cluster,new_job->procID.proc);
				num_ads++;

				if ( !job_is_managed ) {
					rc = tSetAttributeString( new_job->procID.cluster,
									   new_job->procID.proc,
									   ATTR_JOB_MANAGED,
									   MANAGED_EXTERNAL);
					if ( rc < 0 ) {
						failure_line_num = __LINE__;
						commit_transaction = false;
						goto contact_schedd_disconnect;
					}
				}

			} else {

				// We already know about this job, skip
				// But also set Managed=true on the schedd so that it won't
				// keep signalling us about it
				delete next_ad;
				rc = tSetAttributeString( procID.cluster, procID.proc,
								   ATTR_JOB_MANAGED, MANAGED_EXTERNAL );
				if ( rc < 0 ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				}

			}

contact_schedd_next_add_job:
			next_ad = GetNextJobByConstraint( expr_buf, 0 );
		}	// end of while next_ad
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			commit_transaction = false;
			goto contact_schedd_disconnect;
		}

		dprintf(D_FULLDEBUG,"Fetched %d new job ads from schedd\n",num_ads);
	}	// end of handling add jobs


	// RemoveJobs
	/////////////////////////////////////////////////////

	// We always want to perform this check. Otherwise, we may overwrite a
	// REMOVED/HELD/COMPLETED status with something else below.
	{
		int num_ads = 0;

		dprintf( D_FULLDEBUG, "querying for removed/held jobs\n" );

		// Grab jobs marked as REMOVED/COMPLETED or marked as HELD that we
		// haven't previously indicated that we're done with (by setting
		// JobManaged to "Schedd".
		sprintf( expr_buf, "(%s) && (%s) && (%s == %d || %s == %d || (%s == %d && %s =?= \"%s\"))",
				 ScheddJobConstraint, expr_not_completely_done.c_str(),
				 ATTR_JOB_STATUS, REMOVED,
				 ATTR_JOB_STATUS, COMPLETED, ATTR_JOB_STATUS, HELD,
				 ATTR_JOB_MANAGED, MANAGED_EXTERNAL );

		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			PROC_ID procID;
			BaseJob *next_job;
			int curr_status;

			next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, procID.proc );
			next_ad->LookupInteger( ATTR_JOB_STATUS, curr_status );

			if ( BaseJob::JobsByProcId.lookup( procID, next_job ) == 0 ) {
				// Should probably skip jobs we already have marked as
				// held or removed

				next_job->JobAdUpdateFromSchedd( next_ad, true );
				num_ads++;

			} else if ( curr_status == REMOVED ) {

				// If we don't know about the job, act like we got an
				// ADD_JOBS signal from the schedd the next time we
				// connect, so that we'll create a Job object for it
				// and decide how it needs to be handled.
				// TODO The AddJobs and RemoveJobs queries shoule be
				//   combined into a single query.
				dprintf( D_ALWAYS, 
						 "Don't know about removed job %d.%d. "
						 "Will treat it as a new job to manage\n",
						 procID.cluster, procID.proc );
				addJobsSignaled = true;

			} else {

				dprintf( D_ALWAYS, "Don't know about held/completed job %d.%d. "
						 "Ignoring it\n",
						 procID.cluster, procID.proc );

			}

			delete next_ad;
			next_ad = GetNextJobByConstraint( expr_buf, 0 );
		}
		if ( errno == ETIMEDOUT ) {
			failure_line_num = __LINE__;
			commit_transaction = false;
			goto contact_schedd_disconnect;
		}

		dprintf(D_FULLDEBUG,"Fetched %d job ads from schedd\n",num_ads);
	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	add_remove_jobs_complete = true;


	// Retrieve dirty attributes
	/////////////////////////////////////////////////////
	if ( updateJobsSignaled ) {
		dprintf( D_FULLDEBUG, "querying for jobs with attribute updates\n" );

		sprintf( expr_buf, "%s && %s && %s && %s",
				 expr_schedd_job_constraint.c_str(), 
				 expr_not_completely_done.c_str(),
				 expr_not_held.c_str(),
				 expr_managed.c_str()
				 );
		dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf);
		next_ad = GetNextDirtyJobByConstraint( expr_buf, 1 );
		while ( next_ad != NULL ) {
			ClassAd updates;
			char str[PROC_ID_STR_BUFLEN];
			next_ad->LookupInteger( ATTR_CLUSTER_ID, job_id.cluster );
			next_ad->LookupInteger( ATTR_PROC_ID, job_id.proc );
			if ( GetDirtyAttributes( job_id.cluster, job_id.proc, &updates ) < 0 ) {
				dprintf( D_ALWAYS, "Failed to retrieve dirty attributes for job %d.%d\n", job_id.cluster, job_id.proc );
				failure_line_num = __LINE__;
				delete next_ad;
				goto contact_schedd_disconnect;
		        }
			else {
				dprintf (D_FULLDEBUG, "Retrieved updated attributes for job %d.%d\n", job_id.cluster, job_id.proc);
				dPrintAd(D_JOB, updates);
			}
			if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) {
				curr_job->JobAdUpdateFromSchedd( &updates, false );
				ProcIdToStr( job_id, str );
				dirty_job_ids.append( str );
			}
			else {
				dprintf( D_ALWAYS, "Don't know about updated job %d.%d. "
						 "Ignoring it\n",
						 job_id.cluster, job_id.proc );
			}
			delete next_ad;
			next_ad = GetNextDirtyJobByConstraint( expr_buf, 0 );
		}
	}
	update_jobs_complete = true;

//	if ( BeginTransaction() < 0 ) {
	errno = 0;
	BeginTransaction();
	if ( errno == ETIMEDOUT ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}


	// requestJobStatus
	/////////////////////////////////////////////////////
	if ( pendingJobStatus.getNumElements() != 0 ) {
		JobStatusRequest curr_request;

		pendingJobStatus.startIterations();
		while ( pendingJobStatus.iterate( curr_request ) != 0 ) {

			int status;

			rc = GetAttributeInt( curr_request.job_id.cluster,
								  curr_request.job_id.proc,
								  ATTR_JOB_STATUS, &status );
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// The job is not in the schedd's job queue. This
						// probably means that the user did a condor_rm -f,
						// so return a job status of REMOVED.
					status = REMOVED;
				}
			}
				// return status
			dprintf( D_FULLDEBUG, "%d.%d job status: %d\n",
					 curr_request.job_id.cluster,
					 curr_request.job_id.proc, status );
			pendingJobStatus.remove( curr_request.job_id );
			curr_request.job_status = status;
			daemonCore->Reset_Timer( curr_request.tid, 0 );
			completedJobStatus.insert( curr_request.job_id,
									   curr_request );
		}

	}


	// Update existing jobs
	/////////////////////////////////////////////////////
	ScheddUpdateRequest *curr_request;
	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		dprintf(D_FULLDEBUG,"Updating classad values for %d.%d:\n",
				curr_job->procID.cluster, curr_job->procID.proc);
		const char *attr_name;
		const char *attr_value;
		ExprTree *expr;
		bool fake_job_in_queue = false;
		curr_job->jobAd->ResetExpr();
		while ( curr_job->jobAd->NextDirtyExpr(attr_name, expr) == true &&
				fake_job_in_queue == false ) {
			attr_value = ExprTreeToString( expr );

			dprintf(D_FULLDEBUG,"   %s = %s\n",attr_name,attr_value);
			rc = SetAttribute( curr_job->procID.cluster,
							   curr_job->procID.proc,
							   attr_name,
							   attr_value);
			if ( rc < 0 ) {
				if ( errno == ETIMEDOUT ) {
					failure_line_num = __LINE__;
					commit_transaction = false;
					goto contact_schedd_disconnect;
				} else {
						// The job is not in the schedd's job queue. This
						// probably means that the user did a condor_rm -f,
						// so pretend that all updates for the job succeed.
						// Otherwise, we'll never make forward progress on
						// the job.
						// TODO We should also fake a job status of REMOVED
						//   to the job, so it can do what cleanup it can.
					fake_job_in_queue = true;
					break;
				}
			}
		}

	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	schedd_updates_complete = true;


	// Delete existing jobs
	/////////////////////////////////////////////////////
	errno = 0;
	BeginTransaction();
	if ( errno == ETIMEDOUT ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		if ( curr_job->deleteFromSchedd ) {
			dprintf(D_FULLDEBUG,"Deleting job %d.%d from schedd\n",
					curr_job->procID.cluster, curr_job->procID.proc);
			rc = DestroyProc(curr_job->procID.cluster,
							 curr_job->procID.proc);
				// NOENT means the job doesn't exist.  Good enough for us.
			if ( rc < 0 && rc != DESTROYPROC_ENOENT) {
				failure_line_num = __LINE__;
				commit_transaction = false;
				goto contact_schedd_disconnect;
			}
		}

	}

	if ( RemoteCommitTransaction() < 0 ) {
		failure_line_num = __LINE__;
		commit_transaction = false;
		goto contact_schedd_disconnect;
	}

	schedd_deletes_complete = true;


 contact_schedd_disconnect:
	DisconnectQ( schedd, commit_transaction );

	if ( add_remove_jobs_complete == true ) {
		firstScheddContact = false;
		addJobsSignaled = false;
	} else {
		formatstr( error_str, "Schedd connection error during Add/RemoveJobs at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	if ( update_jobs_complete == true ) {
		updateJobsSignaled = false;
	} else {
		formatstr( error_str, "Schedd connection error during dirty attribute update at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	if ( schedd_updates_complete == false ) {
		formatstr( error_str, "Schedd connection error during updates at line %d!", failure_line_num );
		goto contact_schedd_failure;
	}

	// Clear dirty bits for all jobs updated
	if ( !dirty_job_ids.isEmpty() ) {
		ClassAd *rval;
		dprintf( D_FULLDEBUG, "Calling clearDirtyAttrs on %d jobs\n",
				 dirty_job_ids.number() );
		dirty_job_ids.rewind();
		rval = ScheddObj->clearDirtyAttrs( &dirty_job_ids, &errstack );
		if ( rval == NULL ) {
			dprintf(D_ALWAYS, "Failed to notify schedd to clear dirty attributes.  CondorError: %s\n", errstack.getFullText().c_str() );
		}
		delete rval;
	}

	// Wake up jobs that had schedd updates pending and delete job
	// objects that wanted to be deleted
	pendingScheddUpdates.startIterations();

	while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) {

		curr_job = curr_request->m_job;
		curr_job->jobAd->ClearAllDirtyFlags();

		if ( curr_job->deleteFromGridmanager ) {

				// If the Job object wants to delete the job from the
				// schedd but we failed to do so, don't delete the job
				// object yet; wait until we successfully delete the job
				// from the schedd.
			if ( curr_job->deleteFromSchedd == true &&
				 schedd_deletes_complete == false ) {
				continue;
			}

				// If wantRematch is set, send a reschedule now
			if ( curr_job->wantRematch ) {
				send_reschedule = true;
			}
			pendingScheddUpdates.remove( curr_job->procID );
			pendingScheddVacates.remove( curr_job->procID );
			pendingJobStatus.remove( curr_job->procID );
			completedJobStatus.remove( curr_job->procID );
			completedScheddVacates.remove( curr_job->procID );
			delete curr_job;

		} else {
			pendingScheddUpdates.remove( curr_job->procID );

			if ( curr_request->m_notify ) {
				curr_job->SetEvaluateState();
			}
		}

		delete curr_request;
	}

	// Poke objects that wanted to be notified when a schedd update completed
	// successfully (possibly minus deletes)
	int timer_id;
	scheddUpdateNotifications.Rewind();
	while ( scheddUpdateNotifications.Next( timer_id ) ) {
		daemonCore->Reset_Timer( timer_id, 0 );
	}
	scheddUpdateNotifications.Clear();

	if ( send_reschedule == true ) {
		ScheddObj->reschedule();
	}

	// Check if we have any jobs left to manage. If not, exit.
	if ( BaseJob::JobsByProcId.getNumElements() == 0 ) {
		dprintf( D_ALWAYS, "No jobs left, shutting down\n" );
		daemonCore->Send_Signal( daemonCore->getpid(), SIGTERM );
	}

	lastContactSchedd = time(NULL);

	if ( schedd_deletes_complete == false ) {
		error_str = "Problem using DestroyProc to delete jobs!";
		goto contact_schedd_failure;
	}

	scheddFailureCount = 0;

	// For each job that had dirty attributes, re-evaluate the policy
	dirty_job_ids.rewind();
	while ( (job_id_str = dirty_job_ids.next()) != NULL ) {
		StrToProcIdFixMe(job_id_str, job_id);
		if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) {
			curr_job->EvalPeriodicJobExpr();
		}
	}

dprintf(D_FULLDEBUG,"leaving doContactSchedd()\n");
	return;

 contact_schedd_failure:
	scheddFailureCount++;
	if ( error_str == "" ) {
		error_str = "Failure in doContactSchedd";
	}
	if ( scheddFailureCount >= maxScheddFailures ) {
		dprintf( D_ALWAYS, "%s\n", error_str.c_str() );
		EXCEPT( "Too many failures connecting to schedd!" );
	}
	dprintf( D_ALWAYS, "%s Will retry\n", error_str.c_str() );
	lastContactSchedd = time(NULL);
	RequestContactSchedd();
	return;
}
Exemplo n.º 27
0
int main(int argc, char **argv)
{
	int		result = 0;

	if ( argc <= 1 || (argc >= 2 && !strcmp("-usage", argv[1])) ) {
		printf("Usage: condor_check_userlogs <log file 1> "
				"[log file 2] ... [log file n]\n");
		exit(0);
	}

		// Set up dprintf.
	dprintf_set_tool_debug("condor_check_userlogs", 0);
	set_debug_flags(NULL, D_ALWAYS);

	StringList	logFiles;
	for ( int argnum = 1; argnum < argc; ++argnum ) {
		logFiles.append(argv[argnum]);
	}
	logFiles.rewind();

	ReadMultipleUserLogs	ru;
	char *filename;
	while ( (filename = logFiles.next()) ) {
		MyString filestring( filename );
		CondorError errstack;
		if ( !ru.monitorLogFile( filestring, false, errstack ) ) {
			fprintf( stderr, "Error monitoring log file %s: %s\n", filename,
						errstack.getFullText().c_str() );
			result = 1;
		}
	}

	bool logsMissing = false;

	CheckEvents		ce;
	int totalSubmitted = 0;
	int netSubmitted = 0;
	bool done = false;
	while( !done ) {

    	ULogEvent* e = NULL;
		MyString errorMsg;

        ULogEventOutcome outcome = ru.readEvent( e );

        switch (outcome) {

        case ULOG_RD_ERROR:
        case ULOG_UNK_ERROR:
			logsMissing = true;
        case ULOG_NO_EVENT:

			printf( "Log outcome: %s\n", ULogEventOutcomeNames[outcome] );
			done = true;
			break;
 
        case ULOG_OK:

			printf( "Log event: %s (%d.%d.%d)",
						ULogEventNumberNames[e->eventNumber],
						e->cluster, e->proc, e->subproc );

			if ( ce.CheckAnEvent(e, errorMsg) != CheckEvents::EVENT_OKAY ) {
				fprintf(stderr, "%s\n", errorMsg.Value());
				result = 1;
			}

			if( e->eventNumber == ULOG_SUBMIT ) {
				SubmitEvent* ee = (SubmitEvent*) e;
				printf( " (\"%s\")", ee->submitEventLogNotes );
				++totalSubmitted;
				++netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}
			
			if( e->eventNumber == ULOG_JOB_HELD ) {
				JobHeldEvent* ee = (JobHeldEvent*) e;
				printf( " (code=%d subcode=%d)", ee->getReasonCode(),
						ee->getReasonSubCode());
			}

			if( e->eventNumber == ULOG_JOB_TERMINATED ) {
				--netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}

			if( e->eventNumber == ULOG_JOB_ABORTED ) {
				--netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}

			if( e->eventNumber == ULOG_EXECUTABLE_ERROR ) {
				--netSubmitted;
				printf( "\n Total submitted: %d; net submitted: %d\n",
						totalSubmitted, netSubmitted );
			}

			printf( "\n" );
			break;

		default:

			fprintf(stderr, "Unexpected read event outcome!\n");
			result = 1;
			break;
        }
	}

	logFiles.rewind();
	while ( (filename = logFiles.next()) ) {
		MyString filestring( filename );
		CondorError errstack;
		if ( !ru.unmonitorLogFile( filestring, errstack ) ) {
			fprintf( stderr, "Error unmonitoring log file %s: %s\n", filename,
						errstack.getFullText().c_str() );
			result = 1;
		}
	}

	MyString errorMsg;
	CheckEvents::check_event_result_t checkAllResult =
				ce.CheckAllJobs(errorMsg);
	if ( checkAllResult != CheckEvents::EVENT_OKAY ) {
		fprintf(stderr, "%s\n", errorMsg.Value());
		fprintf(stderr, "CheckAllJobs() result: %s\n",
					CheckEvents::ResultToString(checkAllResult));
		result = 1;
	}

	if ( result == 0 ) {
		if ( !logsMissing ) {
			printf("Log(s) are okay\n");
		} else {
			printf("Log(s) may be okay\n");
			printf(  "Some logs cannot be read\n");
		}
	} else {
		printf("Log(s) have error(s)\n");
	}
	return result;
}
Exemplo n.º 28
0
int
Job::initialize(CondorError &errstack)
{
	char * Spool = param("SPOOL");
	ASSERT(Spool);

	char *ckpt_name = gen_ckpt_name(Spool, id.cluster, id.proc, 0);
	spoolDirectory = ckpt_name;
	free(ckpt_name); ckpt_name = NULL;

	if (Spool) {
		free(Spool);
		Spool = NULL;
	}

	struct stat stats;
	if (-1 == stat(spoolDirectory.Value(), &stats)) {
		if (ENOENT == errno && spoolDirectory.Length() != 0) {

				// We assume here that the job is not a standard universe
				// job.  Spooling works differently for standard universe.
				// Unfortunately, we might not know the job universe
				// yet, so standard universe is problematic with SOAP
				// (and always has been).

			if( !SpooledJobFiles::createJobSpoolDirectory_PRIV_CONDOR(id.cluster,id.proc,false) ) {
				errstack.pushf("SOAP",
							   FAIL,
							   "Creation of spool directory '%s' failed, "
							   "reason: %s",
							   spoolDirectory.Value(),
							   strerror(errno));
				return 1;
			} else {
				dprintf(D_FULLDEBUG,
						"mkdir(%s) succeeded.\n",
						spoolDirectory.Value());
			}
		} else {
			dprintf(D_FULLDEBUG, "ERROR: stat(%s) errno: %d (%s)\n",
					spoolDirectory.Value(),
					errno,
					strerror(errno));

			errstack.pushf("SOAP",
						   FAIL,
						   "stat(%s) failed, reason: %s",
						   spoolDirectory.Value(),
						   strerror(errno));

			return 2;
		}
	} else {
		dprintf(D_FULLDEBUG,
				"WARNING: Job '%d.%d''s spool '%s' already exists.\n",
				id.cluster,
				id.proc,
				spoolDirectory.Value());
	}

	return 0;
}
Exemplo n.º 29
0
int
Job::get_file(const MyString &name,
              int offset,
              int length,
              unsigned char *&data,
			  CondorError &errstack)
{
#if !defined(WIN32)
	TemporaryPrivSentry sentry( true );
	if ( param_boolean( "CHOWN_JOB_SPOOL_FILES", false ) == false ) {
		ClassAd *job_ad = GetJobAd_as_ClassAd( id.cluster, id.proc );
		if ( job_ad == NULL ) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to retrieve job ad for file '%s'",
						   name.Value());
			return 5;
		}
		if ( !init_user_ids_from_ad( *job_ad ) ) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to init user ids for file '%s'",
						   name.Value());
			return 6;
		}
		set_user_priv();
	}
#endif
	int file = safe_open_wrapper_follow((spoolDirectory + DIR_DELIM_STRING + name).Value(),
					O_RDONLY | _O_BINARY,
					0);

	if (-1 != file) {
		if (-1 == lseek(file, offset, SEEK_SET)) {
			close(file);
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to lseek in file '%s', reason: %s",
						   name.Value(),
						   strerror(errno));

			return 2;
		}
		int result;
		if (-1 == 
			(result = full_read(file, data, sizeof(unsigned char) * length))) {
			close(file);
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to read from file '%s', wanted to "
						   "read %d bytes but received %d",
						   name.Value(),
						   length,
						   result);

			return 3;
		}
		if (-1 == close(file)) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to close file '%s', reason: %s",
						   name.Value(),
						   strerror(errno));

			return 4;
		}
	} else {
		errstack.pushf("SOAP",
					   FAIL,
					   "Failed to open file '%s', reason: %s",
					   name.Value(),
					   strerror(errno));

		return 1;
	}

	return 0;
}
Exemplo n.º 30
0
int
Job::submit(const struct condor__ClassAdStruct &jobAd,
			CondorError &errstack)
{
	int i, rval;

		// XXX: This is ugly, and only should happen when spooling,
		// i.e. not always with cedar.
	rval = SetAttributeString(id.cluster,
							  id.proc,
							  ATTR_JOB_IWD,
							  spoolDirectory.Value());
	if (rval < 0) {
		errstack.pushf("SOAP",
					   FAIL,
					   "Failed to set job %d.%d's %s attribute to '%s'.",
					   id.cluster,
					   id.proc,
					   ATTR_JOB_IWD,
					   spoolDirectory.Value());

		return rval;
	}

	StringList transferFiles;
	MyString currentKey;
	JobFile jobFile;
	declaredFiles.startIterations();
	while (declaredFiles.iterate(currentKey, jobFile)) {
		transferFiles.append(jobFile.name.Value());
	}

	char *fileList = NULL;
	if (0 == transferFiles.number()) {
		fileList = strdup("");
	} else {
		fileList = transferFiles.print_to_string();
		ASSERT(fileList);
	}

	rval = SetAttributeString(id.cluster,
							  id.proc,
							  ATTR_TRANSFER_INPUT_FILES,
							  fileList);

	if (fileList) {
		free(fileList);
		fileList = NULL;
	}

	if (rval < 0) {
		errstack.pushf("SOAP",
					   FAIL,
					   "Failed to set job %d.%d's %s attribute.",
					   id.cluster,
					   id.proc,
					   ATTR_TRANSFER_INPUT_FILES);

		return rval;
	}

	int found_iwd = 0;
	for (i = 0; i < jobAd.__size; i++) {
		const char* name = jobAd.__ptr[i].name;
		const char* value = jobAd.__ptr[i].value;
		if (!name) continue;
		if (!value) value="UNDEFINED";

			// XXX: This is a quick fix. If processing MyType or
			// TargetType they should be ignored. Ideally we could
			// convert the ClassAdStruct to a ClassAd and then iterate
			// the ClassAd.
		if (0 == strcmp(name, ATTR_MY_TYPE) ||
			0 == strcmp(name, ATTR_TARGET_TYPE)) {
			continue;
		}

		if ( jobAd.__ptr[i].type == STRING_ATTR ) {
				// string type - put value in quotes as hint for ClassAd parser

			found_iwd = found_iwd || !strcmp(name, ATTR_JOB_IWD);

			rval = SetAttributeString(id.cluster, id.proc, name, value);
		} else {
				// all other types can be deduced by the ClassAd parser
			rval = SetAttribute(id.cluster, id.proc, name, value);
		}
		if ( rval < 0 ) {
		errstack.pushf("SOAP",
					   FAIL,
					   "Failed to set job %d.%d's %s attribute.",
					   id.cluster,
					   id.proc,
					   name);

			return rval;
		}
	}

		// Trust the client knows what it is doing if there is an Iwd.
	if (!found_iwd) {
			// We need to make sure the Iwd is rewritten so files
			// in the spool directory can be found.
		rval = SetAttributeString(id.cluster,
								  id.proc,
								  ATTR_JOB_IWD,
								  spoolDirectory.Value());
		if (rval < 0) {
			errstack.pushf("SOAP",
						   FAIL,
						   "Failed to set %d.%d's %s attribute to '%s'.",
						   id.cluster,
						   id.proc,
						   ATTR_JOB_IWD,
						   spoolDirectory.Value());

			return rval;
		}
	}

	return 0;
}