Example #1
0
void Defrag::poll_cancel(MachineSet &cancelled_machines)
{
	if (!m_cancel_requirements.size())
	{
		return;
	}

	MachineSet draining_whole_machines;
	std::stringstream draining_whole_machines_ss;
	draining_whole_machines_ss << "(" <<  m_cancel_requirements << ") && (" << DRAINING_CONSTRAINT << ")";
	int num_draining_whole_machines = countMachines(draining_whole_machines_ss.str().c_str(),
		"<DEFRAG_CANCEL_REQUIREMENTS>", &draining_whole_machines);

	if (num_draining_whole_machines)
	{
		dprintf(D_ALWAYS, "Of the whole machines, %d are in the draining state.\n", num_draining_whole_machines);
	}
	else
	{	// Early exit: nothing to do.
		return;
	}

	ClassAdList startdAds;
	if (!queryMachines(DRAINING_CONSTRAINT, "DRAINING_CONSTRAINT <all draining slots>",startdAds))
	{
		return;
	}

	startdAds.Shuffle();
	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();

	unsigned int cancel_count = 0;
	ClassAd *startd_ad_ptr;
	while ( (startd_ad_ptr=startdAds.Next()) )
	{
		if (!startd_ad_ptr) continue;

		ClassAd &startd_ad = *startd_ad_ptr;
		std::string machine;
		std::string name;
		startd_ad.LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		if( !cancelled_machines.count(machine) && draining_whole_machines.count(machine) ) {
			cancel_drain(startd_ad);
			cancelled_machines.insert(machine);
			cancel_count ++;
		}
	}

	startdAds.Close();


	dprintf(D_ALWAYS, "Cancelled draining of %u whole machines.\n", cancel_count);
}
Example #2
0
void Defrag::poll()
{
	dprintf(D_FULLDEBUG,"Evaluating defragmentation policy.\n");

		// If we crash during this polling cycle, we will have saved
		// the time of the last poll, so the next cycle will be
		// scheduled on the false assumption that a cycle ran now.  In
		// this way, we error on the side of draining too little
		// rather than too much.

	time_t now = time(NULL);
	time_t prev = m_last_poll;
	m_last_poll = now;
	saveState();

	m_stats.Tick();

	int num_to_drain = m_draining_per_poll;

	time_t last_hour    = (prev / 3600)*3600;
	time_t current_hour = (now  / 3600)*3600;
	time_t last_day     = (prev / (3600*24))*3600*24;
	time_t current_day  = (now  / (3600*24))*3600*24;

	if( current_hour != last_hour ) {
		num_to_drain += prorate(m_draining_per_poll_hour,now-current_hour,3600,m_polling_interval);
	}
	if( current_day != last_day ) {
		num_to_drain += prorate(m_draining_per_poll_day,now-current_day,3600*24,m_polling_interval);
	}

	int num_draining = countMachines(DRAINING_CONSTRAINT,"<InternalDrainingConstraint>");
	m_stats.MachinesDraining = num_draining;

	MachineSet whole_machines;
	int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines);
	m_stats.WholeMachines = num_whole_machines;

	dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n",
			num_draining,num_whole_machines);

	queryDrainingCost();

	// If possible, cancel some drains.
	MachineSet cancelled_machines;
	poll_cancel(cancelled_machines);

	if( num_to_drain <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because number to drain in next %ds is calculated to be 0.\n",
				m_polling_interval);
		return;
	}

	if( (int)ceil(m_draining_per_hour) <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_DRAINING_MACHINES_PER_HOUR=%f\n",
				m_draining_per_hour);
		return;
	}

	if( m_max_draining == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=0\n");
		return;
	}

	if( m_max_whole_machines == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=0\n");
		return;
	}

	if( m_max_draining >= 0 ) {
		if( num_draining >= m_max_draining ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and there are %d draining machines.\n",
					m_max_draining, num_draining);
			return;
		}
		else if( num_draining < 0 ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and the query to count draining machines failed.\n",
					m_max_draining);
			return;
		}
	}

	if( m_max_whole_machines >= 0 ) {
		if( num_whole_machines >= m_max_whole_machines ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=%d and there are %d whole machines.\n",
					m_max_whole_machines, num_whole_machines);
			return;
		}
	}

		// Even if m_max_whole_machines is -1 (infinite), we still need
		// the list of whole machines in order to filter them out in
		// the draining selection algorithm, so abort now if the
		// whole machine query failed.
	if( num_whole_machines < 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to find whole machines failed.\n");
		return;
	}

	dprintf(D_ALWAYS,"Looking for %d machines to drain.\n",num_to_drain);

	ClassAdList startdAds;
	std::string requirements;
	sprintf(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str());
	if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n");
		return;
	}

	startdAds.Shuffle();
	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();
	int num_drained = 0;
	ClassAd *startd_ad_ptr;
	MachineSet machines_done;
	while( (startd_ad_ptr=startdAds.Next()) ) {

		if (!startd_ad_ptr) continue;
		ClassAd &startd_ad = *startd_ad_ptr;

		std::string machine;
		std::string name;
		startd_ad.LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		// If we have already cancelled draining on this machine, ignore it for this cycle.
		if( cancelled_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already cancelled draining of %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( machines_done.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already attempted to drain %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( whole_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: because it is already running as a whole machine.\n",
					name.c_str());
			continue;
		}

		if( drain(startd_ad) ) {
			machines_done.insert(machine);

			if( ++num_drained >= num_to_drain ) {
				dprintf(D_ALWAYS,
						"Drained maximum number of machines allowed in this cycle (%d).\n",
						num_to_drain);
				break;
			}
		}
	}
	startdAds.Close();

	dprintf(D_ALWAYS,"Drained %d machines (wanted to drain %d machines).\n",
			num_drained,num_drained);

	dprintf(D_FULLDEBUG,"Done evaluating defragmentation policy.\n");
}
Example #3
0
void Rooster::poll()
{
	dprintf(D_FULLDEBUG,"C**k-a-doodle-doo! (Time to look for machines to wake up.)\n");

	ClassAdList startdAds;
	CondorQuery unhibernateQuery(STARTD_AD);
	ExprTree *requirements = NULL;

	if( ParseClassAdRvalExpr( m_unhibernate_constraint.Value(), requirements )!=0 || requirements==NULL )
	{
		EXCEPT("Invalid expression for ROOSTER_UNHIBERNATE: %s\n",
			   m_unhibernate_constraint.Value());
	}

	unhibernateQuery.addANDConstraint(m_unhibernate_constraint.Value());

	CollectorList* collects = daemonCore->getCollectorList();
	ASSERT( collects );

	QueryResult result;
	result = collects->query(unhibernateQuery,startdAds);
	if( result != Q_OK ) {
		dprintf(D_ALWAYS,
				"Couldn't fetch startd ads using constraint "
				"ROOSTER_UNHIBERNATE=%s: %s\n",
				m_unhibernate_constraint.Value(), getStrQueryResult(result));
		return;
	}

	dprintf(D_FULLDEBUG,"Got %d startd ads matching ROOSTER_UNHIBERNATE=%s\n",
			startdAds.MyLength(), m_unhibernate_constraint.Value());

	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();
	int num_woken = 0;
	ClassAd *startd_ad;
	HashTable<MyString,bool> machines_done(MyStringHash);
	while( (startd_ad=startdAds.Next()) ) {
		MyString machine;
		MyString name;
		startd_ad->LookupString(ATTR_MACHINE,machine);
		startd_ad->LookupString(ATTR_NAME,name);

		if( machines_done.exists(machine)==0 ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already attempted to wake up %s in this cycle.\n",
					name.Value(),machine.Value());
			continue;
		}

			// in case the unhibernate expression is time-sensitive,
			// re-evaluate it now to make sure it still passes
		if( !EvalBool(startd_ad,requirements) ) {
			dprintf(D_ALWAYS,
					"Skipping %s: ROOSTER_UNHIBERNATE is no longer true.\n",
					name.Value());
			continue;
		}

		if( wakeUp(startd_ad) ) {
			machines_done.insert(machine,true);

			if( ++num_woken >= m_max_unhibernate && m_max_unhibernate > 0 ) {
				dprintf(D_ALWAYS,
						"Reached ROOSTER_MAX_UNHIBERNATE=%d in this cycle.\n",
						m_max_unhibernate);
				break;
			}
		}
	}
	startdAds.Close();

	delete requirements;
	requirements = NULL;

	if( startdAds.MyLength() ) {
		dprintf(D_FULLDEBUG,"Done sending wakeup calls.\n");
	}
}
Example #4
0
int
main (int argc, char *argv[])
{
#if !defined(WIN32)
	install_sig_handler(SIGPIPE, (SIG_HANDLER)SIG_IGN );
#endif

	// initialize to read from config file
	myDistro->Init( argc, argv );
	myName = argv[0];
	config();
	dprintf_config_tool_on_error(0);

	// The arguments take two passes to process --- the first pass
	// figures out the mode, after which we can instantiate the required
	// query object.  We add implied constraints from the command line in
	// the second pass.
	firstPass (argc, argv);
	
	// if the mode has not been set, it is STARTD_NORMAL
	if (mode == MODE_NOTSET) {
		setMode (MODE_STARTD_NORMAL, 0, DEFAULT);
	}

	// instantiate query object
	if (!(query = new CondorQuery (type))) {
		dprintf_WriteOnErrorBuffer(stderr, true);
		fprintf (stderr, "Error:  Out of memory\n");
		exit (1);
	}
	// if a first-pass setMode set a mode_constraint, apply it now to the query object
	if (mode_constraint && ! explicit_format) {
		query->addANDConstraint(mode_constraint);
	}

	// set pretty print style implied by the type of entity being queried
	// but do it with default priority, so that explicitly requested options
	// can override it
	switch (type)
	{
#ifdef HAVE_EXT_POSTGRESQL
	  case QUILL_AD:
		setPPstyle(PP_QUILL_NORMAL, 0, DEFAULT);
		break;
#endif /* HAVE_EXT_POSTGRESQL */


	  case DEFRAG_AD:
		setPPstyle(PP_GENERIC_NORMAL, 0, DEFAULT);
		break;

	  case STARTD_AD:
		setPPstyle(PP_STARTD_NORMAL, 0, DEFAULT);
		break;

	  case SCHEDD_AD:
		setPPstyle(PP_SCHEDD_NORMAL, 0, DEFAULT);
		break;

	  case MASTER_AD:
		setPPstyle(PP_MASTER_NORMAL, 0, DEFAULT);
		break;

	  case CKPT_SRVR_AD:
		setPPstyle(PP_CKPT_SRVR_NORMAL, 0, DEFAULT);
		break;

	  case COLLECTOR_AD:
		setPPstyle(PP_COLLECTOR_NORMAL, 0, DEFAULT);
		break;

	  case STORAGE_AD:
		setPPstyle(PP_STORAGE_NORMAL, 0, DEFAULT);
		break;

	  case NEGOTIATOR_AD:
		setPPstyle(PP_NEGOTIATOR_NORMAL, 0, DEFAULT);
		break;

      case GRID_AD:
        setPPstyle(PP_GRID_NORMAL, 0, DEFAULT);
		break;

	  case GENERIC_AD:
		setPPstyle(PP_GENERIC, 0, DEFAULT);
		break;

	  case ANY_AD:
		setPPstyle(PP_ANY_NORMAL, 0, DEFAULT);
		break;

	  default:
		setPPstyle(PP_VERBOSE, 0, DEFAULT);
	}

	// set the constraints implied by the mode
	switch (mode) {
#ifdef HAVE_EXT_POSTGRESQL
	  case MODE_QUILL_NORMAL:
#endif /* HAVE_EXT_POSTGRESQL */

	  case MODE_DEFRAG_NORMAL:
	  case MODE_STARTD_NORMAL:
	  case MODE_MASTER_NORMAL:
	  case MODE_CKPT_SRVR_NORMAL:
	  case MODE_SCHEDD_NORMAL:
	  case MODE_SCHEDD_SUBMITTORS:
	  case MODE_COLLECTOR_NORMAL:
	  case MODE_NEGOTIATOR_NORMAL:
	  case MODE_STORAGE_NORMAL:
	  case MODE_GENERIC_NORMAL:
	  case MODE_ANY_NORMAL:
	  case MODE_GRID_NORMAL:
	  case MODE_HAD_NORMAL:
		break;

	  case MODE_OTHER:
			// tell the query object what the type we're querying is
		query->setGenericQueryType(genericType);
		free(genericType);
		genericType = NULL;
		break;

	  case MODE_STARTD_AVAIL:
			  // For now, -avail shows you machines avail to anyone.
		sprintf (buffer, "%s == \"%s\"", ATTR_STATE,
					state_to_string(unclaimed_state));
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;


	  case MODE_STARTD_RUN:
		sprintf (buffer, "%s == \"%s\"", ATTR_STATE,
					state_to_string(claimed_state));
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;

	  case MODE_STARTD_COD:
	    sprintf (buffer, "%s > 0", ATTR_NUM_COD_CLAIMS );
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;

	  default:
		break;
	}	

	if(javaMode) {
		sprintf( buffer, "%s == TRUE", ATTR_HAS_JAVA );
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addANDConstraint (buffer);
		
		projList.AppendArg(ATTR_HAS_JAVA);
		projList.AppendArg(ATTR_JAVA_MFLOPS);
		projList.AppendArg(ATTR_JAVA_VENDOR);
		projList.AppendArg(ATTR_JAVA_VERSION);

	}

	if(offlineMode) {
		query->addANDConstraint( "size( OfflineUniverses ) != 0" );

		projList.AppendArg( "OfflineUniverses" );

		//
		// Since we can't add a regex to a projection, explicitly list all
		// the attributes we know about.
		//

		projList.AppendArg( "HasVM" );
		projList.AppendArg( "VMOfflineReason" );
		projList.AppendArg( "VMOfflineTime" );
	}

	if(absentMode) {
	    sprintf( buffer, "%s == TRUE", ATTR_ABSENT );
	    if (diagnose) {
	        printf( "Adding constraint %s\n", buffer );
	    }
	    query->addANDConstraint( buffer );
	    
	    projList.AppendArg( ATTR_ABSENT );
	    projList.AppendArg( ATTR_LAST_HEARD_FROM );
	    projList.AppendArg( ATTR_CLASSAD_LIFETIME );
	}

	if(vmMode) {
		sprintf( buffer, "%s == TRUE", ATTR_HAS_VM);
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addANDConstraint (buffer);

		projList.AppendArg(ATTR_VM_TYPE);
		projList.AppendArg(ATTR_VM_MEMORY);
		projList.AppendArg(ATTR_VM_NETWORKING);
		projList.AppendArg(ATTR_VM_NETWORKING_TYPES);
		projList.AppendArg(ATTR_VM_HARDWARE_VT);
		projList.AppendArg(ATTR_VM_AVAIL_NUM);
		projList.AppendArg(ATTR_VM_ALL_GUEST_MACS);
		projList.AppendArg(ATTR_VM_ALL_GUEST_IPS);
		projList.AppendArg(ATTR_VM_GUEST_MAC);
		projList.AppendArg(ATTR_VM_GUEST_IP);

	}

	// second pass:  add regular parameters and constraints
	if (diagnose) {
		printf ("----------\n");
	}

	secondPass (argc, argv);

	// initialize the totals object
	if (ppStyle == PP_CUSTOM && using_print_format) {
		if (pmHeadFoot & HF_NOSUMMARY) ppTotalStyle = PP_CUSTOM;
	} else {
		ppTotalStyle = ppStyle;
	}
	TrackTotals	totals(ppTotalStyle);

	// fetch the query
	QueryResult q;

	if ((mode == MODE_STARTD_NORMAL) && (ppStyle == PP_STARTD_NORMAL)) {
		projList.AppendArg("Name");
		projList.AppendArg("Machine");
		projList.AppendArg("Opsys");
		projList.AppendArg("Arch");
		projList.AppendArg("State");
		projList.AppendArg("Activity");
		projList.AppendArg("LoadAvg");
		projList.AppendArg("Memory");
		projList.AppendArg("ActvtyTime");
		projList.AppendArg("MyCurrentTime");
		projList.AppendArg("EnteredCurrentActivity");
	} else if( ppStyle == PP_VERBOSE ) {
	    // Remove everything from the projection list if we're displaying
	    // the "long form" of the ads.
	    projList.Clear();
		// but if -attributes was supplied, show only those attributes
		if ( ! dashAttributes.isEmpty()) {
			const char * s;
			dashAttributes.rewind();
			while ((s = dashAttributes.next())) {
				projList.AppendArg(s);
			}
		}
	}

	if( projList.Count() > 0 ) {
		char **attr_list = projList.GetStringArray();
		query->setDesiredAttrs(attr_list);
		deleteStringArray(attr_list);
	}

	// if diagnose was requested, just print the query ad
	if (diagnose) {
		ClassAd 	queryAd;

		// print diagnostic information about inferred internal state
		setMode ((Mode) 0, 0, NULL);
		setType (NULL, 0, NULL);
		setPPstyle ((ppOption) 0, 0, DEFAULT);
		printf ("----------\n");

		q = query->getQueryAd (queryAd);
		fPrintAd (stdout, queryAd);

		printf ("----------\n");
		fprintf (stderr, "Result of making query ad was:  %d\n", q);
		exit (1);
	}

        // Address (host:port) is taken from requested pool, if given.
	char* addr = (NULL != pool) ? pool->addr() : NULL;
        Daemon* requested_daemon = pool;

        // If we're in "direct" mode, then we attempt to locate the daemon
	// associated with the requested subsystem (here encoded by value of mode)
        // In this case the host:port of pool (if given) denotes which
        // pool is being consulted
	if( direct ) {
		Daemon *d = NULL;
		switch( mode ) {
		case MODE_MASTER_NORMAL:
			d = new Daemon( DT_MASTER, direct, addr );
			break;
		case MODE_STARTD_NORMAL:
		case MODE_STARTD_AVAIL:
		case MODE_STARTD_RUN:
		case MODE_STARTD_COD:
			d = new Daemon( DT_STARTD, direct, addr );
			break;

#ifdef HAVE_EXT_POSTGRESQL
		case MODE_QUILL_NORMAL:
			d = new Daemon( DT_QUILL, direct, addr );
			break;
#endif /* HAVE_EXT_POSTGRESQL */

		case MODE_SCHEDD_NORMAL:
		case MODE_SCHEDD_SUBMITTORS:
			d = new Daemon( DT_SCHEDD, direct, addr );
			break;
		case MODE_NEGOTIATOR_NORMAL:
			d = new Daemon( DT_NEGOTIATOR, direct, addr );
			break;
		case MODE_CKPT_SRVR_NORMAL:
		case MODE_COLLECTOR_NORMAL:
		case MODE_LICENSE_NORMAL:
		case MODE_STORAGE_NORMAL:
		case MODE_GENERIC_NORMAL:
		case MODE_ANY_NORMAL:
		case MODE_OTHER:
		case MODE_GRID_NORMAL:
		case MODE_HAD_NORMAL:
				// These have to go to the collector, anyway.
			break;
		default:
            fprintf( stderr, "Error:  Illegal mode %d\n", mode );
			exit( 1 );
			break;
		}

                // Here is where we actually override 'addr', if we can obtain
                // address of the requested daemon/subsys.  If it can't be
                // located, then fail with error msg.
                // 'd' will be null (unset) if mode is one of above that must go to
                // collector (MODE_ANY_NORMAL, MODE_COLLECTOR_NORMAL, etc)
		if (NULL != d) {
			if( d->locate() ) {
				addr = d->addr();
				requested_daemon = d;
			} else {
				const char* id = d->idStr();
				if (NULL == id) id = d->name();
				dprintf_WriteOnErrorBuffer(stderr, true);
				if (NULL == id) id = "daemon";
				fprintf(stderr, "Error: Failed to locate %s\n", id);
				fprintf(stderr, "%s\n", d->error());
				exit( 1 );
			}
		}
	}

	ClassAdList result;
	CondorError errstack;
	if (NULL != ads_file) {
		MyString req; // query requirements
		q = query->getRequirements(req);
		const char * constraint = req.empty() ? NULL : req.c_str();
		if (read_classad_file(ads_file, result, constraint)) {
			q = Q_OK;
		}
	} else if (NULL != addr) {
			// this case executes if pool was provided, or if in "direct" mode with
			// subsystem that corresponds to a daemon (above).
			// Here 'addr' represents either the host:port of requested pool, or
			// alternatively the host:port of daemon associated with requested subsystem (direct mode)
		q = query->fetchAds (result, addr, &errstack);
	} else {
			// otherwise obtain list of collectors and submit query that way
		CollectorList * collectors = CollectorList::create();
		q = collectors->query (*query, result, &errstack);
		delete collectors;
	}
		

	// if any error was encountered during the query, report it and exit 
	if (Q_OK != q) {

		dprintf_WriteOnErrorBuffer(stderr, true);
			// we can always provide these messages:
		fprintf( stderr, "Error: %s\n", getStrQueryResult(q) );
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );

        if ((NULL != requested_daemon) && ((Q_NO_COLLECTOR_HOST == q) ||
			(requested_daemon->type() == DT_COLLECTOR)))
		{
				// Specific long message if connection to collector failed.
			const char* fullhost = requested_daemon->fullHostname();
			if (NULL == fullhost) fullhost = "<unknown_host>";
			const char* daddr = requested_daemon->addr();
			if (NULL == daddr) daddr = "<unknown>";
			char info[1000];
			sprintf(info, "%s (%s)", fullhost, daddr);
	        printNoCollectorContact( stderr, info, !expert );
        } else if ((NULL != requested_daemon) && (Q_COMMUNICATION_ERROR == q)) {
				// more helpful message for failure to connect to some daemon/subsys
			const char* id = requested_daemon->idStr();
			if (NULL == id) id = requested_daemon->name();
			if (NULL == id) id = "daemon";
			const char* daddr = requested_daemon->addr();
			if (NULL == daddr) daddr = "<unknown>";
			fprintf(stderr, "Error: Failed to contact %s at %s\n", id, daddr);
		}

		// fail
		exit (1);
	}

	if (noSort) {
		// do nothing 
	} else if (sortSpecs.empty()) {
        // default classad sorting
		result.Sort((SortFunctionType)lessThanFunc);
	} else {
        // User requested custom sorting expressions:
        // insert attributes related to custom sorting
        result.Open();
        while (ClassAd* ad = result.Next()) {
            for (vector<SortSpec>::iterator ss(sortSpecs.begin());  ss != sortSpecs.end();  ++ss) {
                ss->expr->SetParentScope(ad);
                classad::Value v;
                ss->expr->Evaluate(v);
                stringstream vs;
                // This will properly render all supported value types,
                // including undefined and error, although current semantic
                // pre-filters classads where sort expressions are undef/err:
                vs << ((v.IsStringValue())?"\"":"") << v << ((v.IsStringValue())?"\"":"");
                ad->AssignExpr(ss->keyAttr.c_str(), vs.str().c_str());
                // Save the full expr in case user wants to examine on output:
                ad->AssignExpr(ss->keyExprAttr.c_str(), ss->arg.c_str());
            }
        }
        
        result.Open();
		result.Sort((SortFunctionType)customLessThanFunc);
	}

	
	// output result
	prettyPrint (result, &totals);
	
    delete query;

	return 0;
}
Example #5
0
void Defrag::poll()
{
	dprintf(D_FULLDEBUG,"Evaluating defragmentation policy.\n");

		// If we crash during this polling cycle, we will have saved
		// the time of the last poll, so the next cycle will be
		// scheduled on the false assumption that a cycle ran now.  In
		// this way, we error on the side of draining too little
		// rather than too much.

	time_t now = time(NULL);
	time_t prev = m_last_poll;
	m_last_poll = now;
	saveState();

	m_stats.Tick();

	int num_to_drain = m_draining_per_poll;

	time_t last_hour    = (prev / 3600)*3600;
	time_t current_hour = (now  / 3600)*3600;
	time_t last_day     = (prev / (3600*24))*3600*24;
	time_t current_day  = (now  / (3600*24))*3600*24;

	if( current_hour != last_hour ) {
		num_to_drain += prorate(m_draining_per_poll_hour,now-current_hour,3600,m_polling_interval);
	}
	if( current_day != last_day ) {
		num_to_drain += prorate(m_draining_per_poll_day,now-current_day,3600*24,m_polling_interval);
	}

	MachineSet draining_machines;
	int num_draining = countMachines(DRAINING_CONSTRAINT,"<InternalDrainingConstraint>", &draining_machines);
	m_stats.MachinesDraining = num_draining;

	MachineSet whole_machines;
	int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines);
	m_stats.WholeMachines = num_whole_machines;

	dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n",
			num_draining,num_whole_machines);

	// Calculate arrival rate of fully drained machines.  This is a bit tricky because we poll.

	// We count by finding the newly-arrived
	// fully drained machines, and add to that count machines which are no-longer draining.
	// This allows us to find machines that have fully drained, but were then claimed between
	// polling cycles.
	
	MachineSet new_machines;
	MachineSet no_longer_whole_machines;

	// Find newly-arrived machines
	std::set_difference(whole_machines.begin(), whole_machines.end(), 
						m_prev_whole_machines.begin(), m_prev_whole_machines.end(),
						std::inserter(new_machines, new_machines.begin()));
	
	// Now, newly-departed machines
	std::set_difference(m_prev_draining_machines.begin(), m_prev_draining_machines.end(),
					    draining_machines.begin(), draining_machines.end(),
						std::inserter(no_longer_whole_machines, no_longer_whole_machines.begin()));

	dprintf_set("Set of current whole machines is ", &whole_machines);
	dprintf_set("Set of current draining machine is ", &draining_machines);
	dprintf_set("Newly Arrived whole machines is ", &new_machines);
	dprintf_set("Newly departed draining machines is ", &no_longer_whole_machines);

	m_prev_draining_machines = draining_machines;
	m_prev_whole_machines   = whole_machines;

	int newly_drained = new_machines.size() + no_longer_whole_machines.size();
	double arrival_rate = 0.0;

	// If there is an arrival...
	if (newly_drained > 0) {
		time_t current = time(0);

		// And it isn't the first one since defrag boot...
		if (m_last_whole_machine_arrival > 0) {
			m_whole_machines_arrived += newly_drained;

			time_t arrival_time = current - m_last_whole_machine_arrival;
			if (arrival_time < 1) arrival_time = 1; // very unlikely, but just in case

			m_whole_machine_arrival_sum += newly_drained * arrival_time;

			arrival_rate = newly_drained / ((double)arrival_time);
			dprintf(D_ALWAYS, "Arrival rate is %g machines/hour\n", arrival_rate * 3600.0);
		}
		m_last_whole_machine_arrival = current;
	}

	dprintf(D_ALWAYS, "Lifetime whole machines arrived: %d\n", m_whole_machines_arrived);
	if (m_whole_machine_arrival_sum > 0) {
		double lifetime_mean = m_whole_machines_arrived / m_whole_machine_arrival_sum;
		dprintf(D_ALWAYS, "Lifetime mean arrival rate: %g machines / hour\n", 3600.0 * lifetime_mean);

		if (newly_drained > 0) {
			double diff = arrival_rate - lifetime_mean;
			m_whole_machine_arrival_mean_squared += diff * diff;
		}
		double sd = sqrt(m_whole_machine_arrival_mean_squared / m_whole_machines_arrived);
		dprintf(D_ALWAYS, "Lifetime mean arrival rate sd: %g\n", sd * 3600);

		m_stats.MeanDrainedArrival = lifetime_mean;
		m_stats.MeanDrainedArrivalSD = sd;
		m_stats.DrainedMachines = m_whole_machines_arrived;
	}

	queryDrainingCost();

	// If possible, cancel some drains.
	MachineSet cancelled_machines;
	poll_cancel(cancelled_machines);

	if( num_to_drain <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because number to drain in next %ds is calculated to be 0.\n",
				m_polling_interval);
		return;
	}

	if( (int)ceil(m_draining_per_hour) <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_DRAINING_MACHINES_PER_HOUR=%f\n",
				m_draining_per_hour);
		return;
	}

	if( m_max_draining == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=0\n");
		return;
	}

	if( m_max_whole_machines == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=0\n");
		return;
	}

	if( m_max_draining >= 0 ) {
		if( num_draining >= m_max_draining ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and there are %d draining machines.\n",
					m_max_draining, num_draining);
			return;
		}
		else if( num_draining < 0 ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and the query to count draining machines failed.\n",
					m_max_draining);
			return;
		}
	}

	if( m_max_whole_machines >= 0 ) {
		if( num_whole_machines >= m_max_whole_machines ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=%d and there are %d whole machines.\n",
					m_max_whole_machines, num_whole_machines);
			return;
		}
	}

		// Even if m_max_whole_machines is -1 (infinite), we still need
		// the list of whole machines in order to filter them out in
		// the draining selection algorithm, so abort now if the
		// whole machine query failed.
	if( num_whole_machines < 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to find whole machines failed.\n");
		return;
	}

	dprintf(D_ALWAYS,"Looking for %d machines to drain.\n",num_to_drain);

	ClassAdList startdAds;
	std::string requirements;
	formatstr(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str());
	if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n");
		return;
	}

	startdAds.Shuffle();
	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();
	int num_drained = 0;
	ClassAd *startd_ad_ptr;
	MachineSet machines_done;
	while( (startd_ad_ptr=startdAds.Next()) ) {

		ClassAd &startd_ad = *startd_ad_ptr;

		std::string machine;
		std::string name;
		startd_ad.LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		// If we have already cancelled draining on this machine, ignore it for this cycle.
		if( cancelled_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already cancelled draining of %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( machines_done.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already attempted to drain %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( whole_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: because it is already running as a whole machine.\n",
					name.c_str());
			continue;
		}

		if( drain(startd_ad) ) {
			machines_done.insert(machine);

			if( ++num_drained >= num_to_drain ) {
				dprintf(D_ALWAYS,
						"Drained maximum number of machines allowed in this cycle (%d).\n",
						num_to_drain);
				break;
			}
		}
	}
	startdAds.Close();

	dprintf(D_ALWAYS,"Drained %d machines (wanted to drain %d machines).\n",
			num_drained,num_to_drain);

	dprintf(D_FULLDEBUG,"Done evaluating defragmentation policy.\n");
}