예제 #1
0
int
CollectorList::sendUpdates (int cmd, ClassAd * ad1, ClassAd* ad2, bool nonblocking) {
	int success_count = 0;

	if ( ! adSeq) {
		adSeq = new DCCollectorAdSequences();
	}

	// advance the sequence numbers for these ads
	//
	time_t now = time(NULL);
	DCCollectorAdSeq * seqgen = adSeq->getAdSeq(*ad1);
	if (seqgen) { seqgen->advance(now); }

	this->rewind();
	DCCollector * daemon;
	while (this->next(daemon)) {
		dprintf( D_FULLDEBUG, 
				 "Trying to update collector %s\n", 
				 daemon->addr() );
		if( daemon->sendUpdate(cmd, ad1, *adSeq, ad2, nonblocking) ) {
			success_count++;
		} 
	}

	return success_count;
}
예제 #2
0
QueryResult
CollectorList::query(CondorQuery & cQuery, ClassAdList & adList, CondorError *errstack) {

	int num_collectors = this->number();
	if (num_collectors < 1) {
		return Q_NO_COLLECTOR_HOST;
	}

	std::vector<DCCollector *> vCollectors;
	DCCollector * daemon;
	QueryResult result = Q_COMMUNICATION_ERROR;

	bool problems_resolving = false;

	
	// switch containers for easier random access.
	this->rewind();
	while (this->next(daemon)) {
		vCollectors.push_back(daemon);
	}
	

	while ( vCollectors.size() ) {
		// choose a random collector in the list to query.
		unsigned int idx = get_random_int() % vCollectors.size() ;
		daemon = vCollectors[idx];

		if ( ! daemon->addr() ) {
			if ( daemon->name() ) {
				dprintf( D_ALWAYS,
						 "Can't resolve collector %s; skipping\n",
						 daemon->name() );
			} else {
				dprintf( D_ALWAYS,
						 "Can't resolve nameless collector; skipping\n" );
			}
			problems_resolving = true;
		} else if ( daemon->isBlacklisted() && vCollectors.size() > 1 ) {
			dprintf( D_ALWAYS,"Collector %s blacklisted; skipping\n",
					 daemon->name() );
		} else {
			dprintf (D_FULLDEBUG,
					 "Trying to query collector %s\n",
					 daemon->addr());

			if( num_collectors > 1 ) {
				daemon->blacklistMonitorQueryStarted();
			}

			result = cQuery.fetchAds (adList, daemon->addr(), errstack);

			if( num_collectors > 1 ) {
				daemon->blacklistMonitorQueryFinished( result == Q_OK );
			}

			if (result == Q_OK) {
				return result;
			}
		}

		// if you got here remove it from the list of potential candidates.
		vCollectors.erase( vCollectors.begin()+idx );
	}

	// only push an error if the error stack exists and is currently empty
	if(problems_resolving && errstack && !errstack->code(0)) {
		char* tmplist = getCmHostFromConfig( "COLLECTOR" );
		errstack->pushf("CONDOR_STATUS",1,"Unable to resolve COLLECTOR_HOST (%s).",tmplist?tmplist:"(null)");
	}

		// If we've gotten here, there are no good collectors
	return result;
}
예제 #3
0
파일: rm.cpp 프로젝트: emaste/htcondor
int
main( int argc, char *argv[] )
{
	char	*arg;
	char	**args = (char **)malloc(sizeof(char *)*(argc - 1)); // args 
	int					nArgs = 0;				// number of args 
	int					i;
	char*	cmd_str;
	DCCollector* pool = NULL;
	char* scheddName = NULL;
	char* scheddAddr = NULL;

		// Initialize our global variables
	has_constraint = false;

	myDistro->Init( argc, argv );
	MyName = strrchr( argv[0], DIR_DELIM_CHAR );
	if( !MyName ) {
		MyName = argv[0];
	} else {
		MyName++;
	}

	cmd_str = strchr( MyName, '_');

	// we match modes based on characters after the '_'. This means
	// 'condor_hold.exe' or 'condor_hold_wrapped' are all legal argv[0]'s
	// for condor_hold.

	if (cmd_str && strncasecmp( cmd_str, "_hold", strlen("_hold") ) == MATCH) { 

		mode = JA_HOLD_JOBS;

	} else if ( cmd_str && 
			strncasecmp( cmd_str, "_release", strlen("_release") ) == MATCH ) {

		mode = JA_RELEASE_JOBS;

	} else if ( cmd_str && 
			strncasecmp( cmd_str, "_suspend", strlen("_suspend") ) == MATCH ) {

		mode = JA_SUSPEND_JOBS;

	} else if ( cmd_str && 
			strncasecmp( cmd_str, "_continue", strlen("_continue") ) == MATCH ) {

		mode = JA_CONTINUE_JOBS;

	}else if ( cmd_str && 
			strncasecmp( cmd_str, "_rm", strlen("_rm") ) == MATCH ) {

		mode = JA_REMOVE_JOBS;

	} else if( cmd_str && ! strncasecmp(cmd_str, "_vacate_job",
									strlen("_vacate_job")) ) {  

		mode = JA_VACATE_JOBS;

	} else {
		// don't know what mode we're using, so bail.
		fprintf( stderr, "Unrecognized command name, \"%s\"\n", MyName ); 
		usage();
	}

	config();


	if( argc < 2 ) {
			// We got no indication of what to act on
		fprintf( stderr, "You did not specify any jobs\n" ); 
		usage();
	}

#if !defined(WIN32)
	install_sig_handler(SIGPIPE, SIG_IGN );
#endif

	for( argv++; (arg = *argv); argv++ ) {
		if( arg[0] == '-' ) {
            if (match_prefix(arg, "-debug")) {
				// dprintf to console
				dprintf_set_tool_debug("TOOL", 0);
            } else if (match_prefix(arg, "-constraint")) {
				args[nArgs] = arg;
				nArgs++;
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -constraint requires another argument\n", 
							 MyName);
					exit(1);
				}				
				args[nArgs] = *argv;
				nArgs++;
				ConstraintArg = true;
            } else if (match_prefix(arg, "-all")) {
                All = true;
            } else if (match_prefix(arg, "-addr")) {
                argv++;
                if( ! *argv ) {
                    fprintf( stderr, 
                             "%s: -addr requires another argument\n", 
                             MyName);
                    exit(1);
                }				
                if( is_valid_sinful(*argv) ) {
                    scheddAddr = strdup(*argv);
                    if( ! scheddAddr ) {
                        fprintf( stderr, "Out of memory!\n" );
                        exit(1);
                    }
                } else {
                    fprintf( stderr, 
                             "%s: \"%s\" is not a valid address\n",
                             MyName, *argv );
                    fprintf( stderr, "Should be of the form "
                             "<ip.address.here:port>\n" );
                    fprintf( stderr, 
                             "For example: <123.456.789.123:6789>\n" );
                    exit( 1 );
                }
			} else if (match_prefix(arg, "-reason")) {
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -reason requires another argument\n", 
							 MyName);
					exit(1);
				}		
				actionReason = strdup(*argv);		
				if( ! actionReason ) {
					fprintf( stderr, "Out of memory!\n" );
					exit(1);
				}
			} else if (match_prefix(arg, "-subcode")) {
				argv++;
				if( ! *argv ) {
					fprintf( stderr, 
							 "%s: -subcode requires another argument\n", 
							 MyName);
					exit(1);
				}		
				char *end = NULL;
				long code = strtol(*argv,&end,10);
				if( code == LONG_MIN || !end || *end || end==*argv ) {
					fprintf( stderr, "Invalid -subcode %s!\n", *argv );
					exit(1);
				}
				holdReasonSubCode = strdup(*argv);
				ASSERT( holdReasonSubCode );
            } else if (match_prefix(arg, "-forcex")) {
				if( mode == JA_REMOVE_JOBS ) {
					mode = JA_REMOVE_X_JOBS;
				} else {
                    fprintf( stderr, 
                             "-forcex is only valid with condor_rm\n" );
					usage();
				}
            } else if (match_prefix(arg, "-fast")) {
				if( mode == JA_VACATE_JOBS ) {
					mode = JA_VACATE_FAST_JOBS;
				} else {
                    fprintf( stderr, 
                             "-fast is only valid with condor_vacate_job\n" );
					usage();
				}
            } else if (match_prefix(arg, "-name")) {
				// use the given name as the schedd name to connect to
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -name requires another argument\n", 
							 MyName);
					exit(1);
				}				
				if( !(scheddName = get_daemon_name(*argv)) ) { 
					fprintf( stderr, "%s: unknown host %s\n", 
							 MyName, get_host_part(*argv) );
					exit(1);
				}
            } else if (match_prefix(arg, "-pool")) {
				// use the given name as the central manager to query
				argv++;
				if( ! *argv ) {
					fprintf( stderr, "%s: -pool requires another argument\n", 
							 MyName);
					exit(1);
				}				
				if( pool ) {
					delete pool;
				}
				pool = new DCCollector( *argv );
				if( ! pool->addr() ) {
					fprintf( stderr, "%s: %s\n", MyName, pool->error() );
					exit(1);
				}
            } else if (match_prefix(arg, "-version")) {
				version();
            } else if (match_prefix(arg, "-help")) {
				usage(0);
            } else {
				fprintf( stderr, "Unrecognized option: %s\n", arg ); 
				usage();
			}
		} else {
			if( All ) {
					// If -all is set, there should be no other
					// constraint arguments.
				usage();
			}
			args[nArgs] = arg;
			nArgs++;
			UserJobIdArg = true;
		}
	}

	if( ! (All || nArgs) ) {
			// We got no indication of what to act on
		fprintf( stderr, "You did not specify any jobs\n" ); 
		usage();
	}

	if ( ConstraintArg && UserJobIdArg ) {
		fprintf( stderr, "You can't use both -constraint and usernames or job ids\n" );
		usage();
	}

		// Pick the default reason if the user didn't specify one
	if( actionReason == NULL ) {
		switch( mode ) {
		case JA_RELEASE_JOBS:
			actionReason = strdup("via condor_release");
			break;
		case JA_REMOVE_X_JOBS:
			actionReason = strdup("via condor_rm -forcex");
			break;
		case JA_REMOVE_JOBS:
			actionReason = strdup("via condor_rm");
			break;
		case JA_HOLD_JOBS:
			actionReason = strdup("via condor_hold");
			break;
		case JA_SUSPEND_JOBS:
			actionReason = strdup("via condor_suspend");
			break;
		case JA_CONTINUE_JOBS:
			actionReason = strdup("via condor_continue");
			break;
		default:
			actionReason = NULL;
		}
	}

		// We're done parsing args, now make sure we know how to
		// contact the schedd. 
	if( ! scheddAddr ) {
			// This will always do the right thing, even if either or
			// both of scheddName or pool are NULL.
		schedd = new DCSchedd( scheddName, pool ? pool->addr() : NULL );
	} else {
		schedd = new DCSchedd( scheddAddr );
	}
	if( ! schedd->locate() ) {
		fprintf( stderr, "%s: %s\n", MyName, schedd->error() ); 
		exit( 1 );
	}

		// Special case for condor_rm -forcex: a configuration
		// setting can disable this functionality.  The real
		// validation is done in the schedd, but we can catch
		// the most common cases here and give a useful error
		// message.
	if(mode == JA_REMOVE_X_JOBS) {
		if( mayUserForceRm() == false) {
			fprintf( stderr, "Remove aborted. condor_rm -forcex has been disabled by the administrator.\n" );
			exit( 1 );
		}
	}

		// Process the args so we do the work.
	if( All ) {
		handleAll();
	} else {
		for(i = 0; i < nArgs; i++) {
			if( match_prefix( args[i], "-constraint" ) ) {
				i++;
				addConstraint( args[i] );
			} else {
				procArg(args[i]);
			}
		}
	}

		// Deal with all the -constraint constraints
	handleConstraints();

		// Finally, do the actual work for all our args which weren't
		// constraints...
	if( job_ids ) {
		CondorError errstack;
		ClassAd* result_ad = doWorkByList( job_ids, &errstack );
		if (had_error) {
			fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		}
		printNewMessages( result_ad, job_ids );
		delete( result_ad );
	}

		// If releasing jobs, and no errors happened, do a 
		// reschedule command now.
	if ( mode == JA_RELEASE_JOBS && had_error == false ) {
		Daemon  my_schedd(DT_SCHEDD, NULL, NULL);
		CondorError errstack;
		if (!my_schedd.sendCommand(RESCHEDULE, Stream::safe_sock, 0, &errstack)) {
			fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );
		}
	}

	return had_error;
}
예제 #4
0
	static void startUpdateCallback(bool success,Sock *sock,CondorError * /* errstack */,void *misc_data) {
		UpdateData *ud = (UpdateData *)misc_data;

			// We got here because a nonblocking call to startCommand()
			// has called us back.  Now we will finish sending the update.

			// NOTE: it is possible that by the time we get here,
			// dc_collector has been deleted.  If that is the case,
			// dc_collector will be NULL.  We will go ahead and finish
			// the update anyway, but we will not do anything that
			// modifies dc_collector (such as saving the TCP sock for
			// future use).

		DCCollector *dc_collector = ud->dc_collector;
		if(!success) {
			char const *who = "unknown";
			if(sock) who = sock->get_sinful_peer();
			dprintf(D_ALWAYS,"Failed to start non-blocking update to %s.\n",who);
		}
		else if(sock && !DCCollector::finishUpdate(ud->dc_collector,sock,ud->ad1,ud->ad2)) {
			char const *who = "unknown";
			if(sock) who = sock->get_sinful_peer();
			dprintf(D_ALWAYS,"Failed to send non-blocking update to %s.\n",who);
		}
		else if(sock && sock->type() == Sock::reli_sock) {
			// We keep the TCP socket around for sending more updates.
			if(ud->dc_collector && ud->dc_collector->update_rsock == NULL) {
				ud->dc_collector->update_rsock = (ReliSock *)sock;
				sock = NULL;
			}
		}
		if(sock) {
			delete sock;
		}
		delete ud;

			// Now that we finished sending the update, we can start sequentially sending
			// the pending updates.  We send these updates synchronously in sequence
			// because if we did it all asynchronously, we may end up with many TCP
			// connections to the collector.  Instead we send the updates one at a time
			// via a single connection.

		if (dc_collector && dc_collector->pending_update_list.size())
		{

			// Here we handle pending updates by sending them over a stashed
			// TCP socket to the collector.
			//
			while (dc_collector->update_rsock && dc_collector->pending_update_list.size())
			{
				UpdateData *ud = dc_collector->pending_update_list.front();
				dc_collector->update_rsock->encode();
					// NOTE: If there's a valid TCP socket available, we always
					// push our updates over that (even if the update requested UDP).
					// I don't think mixing TCP/UDP to the same collector is supported, so
					// I believe this shortcut acceptable.
				if (!dc_collector->update_rsock->put( ud->cmd ) ||
					!DCCollector::finishUpdate(ud->dc_collector,dc_collector->update_rsock,ud->ad1,ud->ad2))
				{
					char const *who = "unknown";
					if(dc_collector->update_rsock) {
						who = dc_collector->update_rsock->get_sinful_peer();
					}
					dprintf(D_ALWAYS,"Failed to send update to %s.\n",who);
					delete dc_collector->update_rsock;
					dc_collector->update_rsock = NULL;
					// Notice we remove the element from the list of pending updates
					// even on failure.
				}
				delete ud;
			}

			// Here we handle pending updates in the event that we do not have
			// a stashed TCP socket to the collector.  This could occur if
			// our TCP socket to the collector was closed for some reason
			// (e.g. the collector was restarted), or it may occur in the
			// case of UDP.  Note that we just start handling the next pending
			// update here, then go back to daemonCore with a callback registered
			// to ensure we do not block in the event we need to re-establish
			// a new TCP socket.
			if (dc_collector->pending_update_list.size())
			{
				UpdateData *ud = dc_collector->pending_update_list.front();
				dc_collector->startCommand_nonblocking(ud->cmd, ud->sock_type, 20, NULL, UpdateData::startUpdateCallback, ud );
			}
		}
	}