Example #1
0
int Defrag::countMachines(char const *constraint,char const *constraint_source,	MachineSet *machines)
{
	ClassAdList startdAds;
	int count = 0;

	if( !queryMachines(constraint,constraint_source,startdAds) ) {
		return -1;
	}

	MachineSet my_machines;
	if( !machines ) {
		machines = &my_machines;
	}

	startdAds.Open();
	ClassAd *startd_ad;
	while( (startd_ad=startdAds.Next()) ) {
		std::string machine;
		std::string name;
		startd_ad->LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		if( machines->count(machine) ) {
			continue;
		}

		machines->insert(machine);
		count++;
	}
	startdAds.Close();

	dprintf(D_FULLDEBUG,"Counted %d machines matching %s=%s\n",
			count,constraint_source,constraint);
	return count;
}
Example #2
0
void Defrag::poll_cancel(MachineSet &cancelled_machines)
{
	if (!m_cancel_requirements.size())
	{
		return;
	}

	MachineSet draining_whole_machines;
	std::stringstream draining_whole_machines_ss;
	draining_whole_machines_ss << "(" <<  m_cancel_requirements << ") && (" << DRAINING_CONSTRAINT << ")";
	int num_draining_whole_machines = countMachines(draining_whole_machines_ss.str().c_str(),
		"<DEFRAG_CANCEL_REQUIREMENTS>", &draining_whole_machines);

	if (num_draining_whole_machines)
	{
		dprintf(D_ALWAYS, "Of the whole machines, %d are in the draining state.\n", num_draining_whole_machines);
	}
	else
	{	// Early exit: nothing to do.
		return;
	}

	ClassAdList startdAds;
	if (!queryMachines(DRAINING_CONSTRAINT, "DRAINING_CONSTRAINT <all draining slots>",startdAds))
	{
		return;
	}

	startdAds.Shuffle();
	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();

	unsigned int cancel_count = 0;
	ClassAd *startd_ad_ptr;
	while ( (startd_ad_ptr=startdAds.Next()) )
	{
		if (!startd_ad_ptr) continue;

		ClassAd &startd_ad = *startd_ad_ptr;
		std::string machine;
		std::string name;
		startd_ad.LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		if( !cancelled_machines.count(machine) && draining_whole_machines.count(machine) ) {
			cancel_drain(startd_ad);
			cancelled_machines.insert(machine);
			cancel_count ++;
		}
	}

	startdAds.Close();


	dprintf(D_ALWAYS, "Cancelled draining of %u whole machines.\n", cancel_count);
}
Example #3
0
void Defrag::poll()
{
	dprintf(D_FULLDEBUG,"Evaluating defragmentation policy.\n");

		// If we crash during this polling cycle, we will have saved
		// the time of the last poll, so the next cycle will be
		// scheduled on the false assumption that a cycle ran now.  In
		// this way, we error on the side of draining too little
		// rather than too much.

	time_t now = time(NULL);
	time_t prev = m_last_poll;
	m_last_poll = now;
	saveState();

	m_stats.Tick();

	int num_to_drain = m_draining_per_poll;

	time_t last_hour    = (prev / 3600)*3600;
	time_t current_hour = (now  / 3600)*3600;
	time_t last_day     = (prev / (3600*24))*3600*24;
	time_t current_day  = (now  / (3600*24))*3600*24;

	if( current_hour != last_hour ) {
		num_to_drain += prorate(m_draining_per_poll_hour,now-current_hour,3600,m_polling_interval);
	}
	if( current_day != last_day ) {
		num_to_drain += prorate(m_draining_per_poll_day,now-current_day,3600*24,m_polling_interval);
	}

	int num_draining = countMachines(DRAINING_CONSTRAINT,"<InternalDrainingConstraint>");
	m_stats.MachinesDraining = num_draining;

	MachineSet whole_machines;
	int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines);
	m_stats.WholeMachines = num_whole_machines;

	dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n",
			num_draining,num_whole_machines);

	queryDrainingCost();

	// If possible, cancel some drains.
	MachineSet cancelled_machines;
	poll_cancel(cancelled_machines);

	if( num_to_drain <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because number to drain in next %ds is calculated to be 0.\n",
				m_polling_interval);
		return;
	}

	if( (int)ceil(m_draining_per_hour) <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_DRAINING_MACHINES_PER_HOUR=%f\n",
				m_draining_per_hour);
		return;
	}

	if( m_max_draining == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=0\n");
		return;
	}

	if( m_max_whole_machines == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=0\n");
		return;
	}

	if( m_max_draining >= 0 ) {
		if( num_draining >= m_max_draining ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and there are %d draining machines.\n",
					m_max_draining, num_draining);
			return;
		}
		else if( num_draining < 0 ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and the query to count draining machines failed.\n",
					m_max_draining);
			return;
		}
	}

	if( m_max_whole_machines >= 0 ) {
		if( num_whole_machines >= m_max_whole_machines ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=%d and there are %d whole machines.\n",
					m_max_whole_machines, num_whole_machines);
			return;
		}
	}

		// Even if m_max_whole_machines is -1 (infinite), we still need
		// the list of whole machines in order to filter them out in
		// the draining selection algorithm, so abort now if the
		// whole machine query failed.
	if( num_whole_machines < 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to find whole machines failed.\n");
		return;
	}

	dprintf(D_ALWAYS,"Looking for %d machines to drain.\n",num_to_drain);

	ClassAdList startdAds;
	std::string requirements;
	sprintf(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str());
	if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n");
		return;
	}

	startdAds.Shuffle();
	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();
	int num_drained = 0;
	ClassAd *startd_ad_ptr;
	MachineSet machines_done;
	while( (startd_ad_ptr=startdAds.Next()) ) {

		if (!startd_ad_ptr) continue;
		ClassAd &startd_ad = *startd_ad_ptr;

		std::string machine;
		std::string name;
		startd_ad.LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		// If we have already cancelled draining on this machine, ignore it for this cycle.
		if( cancelled_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already cancelled draining of %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( machines_done.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already attempted to drain %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( whole_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: because it is already running as a whole machine.\n",
					name.c_str());
			continue;
		}

		if( drain(startd_ad) ) {
			machines_done.insert(machine);

			if( ++num_drained >= num_to_drain ) {
				dprintf(D_ALWAYS,
						"Drained maximum number of machines allowed in this cycle (%d).\n",
						num_to_drain);
				break;
			}
		}
	}
	startdAds.Close();

	dprintf(D_ALWAYS,"Drained %d machines (wanted to drain %d machines).\n",
			num_drained,num_drained);

	dprintf(D_FULLDEBUG,"Done evaluating defragmentation policy.\n");
}
Example #4
0
void Defrag::poll()
{
	dprintf(D_FULLDEBUG,"Evaluating defragmentation policy.\n");

		// If we crash during this polling cycle, we will have saved
		// the time of the last poll, so the next cycle will be
		// scheduled on the false assumption that a cycle ran now.  In
		// this way, we error on the side of draining too little
		// rather than too much.

	time_t now = time(NULL);
	time_t prev = m_last_poll;
	m_last_poll = now;
	saveState();

	m_stats.Tick();

	int num_to_drain = m_draining_per_poll;

	time_t last_hour    = (prev / 3600)*3600;
	time_t current_hour = (now  / 3600)*3600;
	time_t last_day     = (prev / (3600*24))*3600*24;
	time_t current_day  = (now  / (3600*24))*3600*24;

	if( current_hour != last_hour ) {
		num_to_drain += prorate(m_draining_per_poll_hour,now-current_hour,3600,m_polling_interval);
	}
	if( current_day != last_day ) {
		num_to_drain += prorate(m_draining_per_poll_day,now-current_day,3600*24,m_polling_interval);
	}

	MachineSet draining_machines;
	int num_draining = countMachines(DRAINING_CONSTRAINT,"<InternalDrainingConstraint>", &draining_machines);
	m_stats.MachinesDraining = num_draining;

	MachineSet whole_machines;
	int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines);
	m_stats.WholeMachines = num_whole_machines;

	dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n",
			num_draining,num_whole_machines);

	// Calculate arrival rate of fully drained machines.  This is a bit tricky because we poll.

	// We count by finding the newly-arrived
	// fully drained machines, and add to that count machines which are no-longer draining.
	// This allows us to find machines that have fully drained, but were then claimed between
	// polling cycles.
	
	MachineSet new_machines;
	MachineSet no_longer_whole_machines;

	// Find newly-arrived machines
	std::set_difference(whole_machines.begin(), whole_machines.end(), 
						m_prev_whole_machines.begin(), m_prev_whole_machines.end(),
						std::inserter(new_machines, new_machines.begin()));
	
	// Now, newly-departed machines
	std::set_difference(m_prev_draining_machines.begin(), m_prev_draining_machines.end(),
					    draining_machines.begin(), draining_machines.end(),
						std::inserter(no_longer_whole_machines, no_longer_whole_machines.begin()));

	dprintf_set("Set of current whole machines is ", &whole_machines);
	dprintf_set("Set of current draining machine is ", &draining_machines);
	dprintf_set("Newly Arrived whole machines is ", &new_machines);
	dprintf_set("Newly departed draining machines is ", &no_longer_whole_machines);

	m_prev_draining_machines = draining_machines;
	m_prev_whole_machines   = whole_machines;

	int newly_drained = new_machines.size() + no_longer_whole_machines.size();
	double arrival_rate = 0.0;

	// If there is an arrival...
	if (newly_drained > 0) {
		time_t current = time(0);

		// And it isn't the first one since defrag boot...
		if (m_last_whole_machine_arrival > 0) {
			m_whole_machines_arrived += newly_drained;

			time_t arrival_time = current - m_last_whole_machine_arrival;
			if (arrival_time < 1) arrival_time = 1; // very unlikely, but just in case

			m_whole_machine_arrival_sum += newly_drained * arrival_time;

			arrival_rate = newly_drained / ((double)arrival_time);
			dprintf(D_ALWAYS, "Arrival rate is %g machines/hour\n", arrival_rate * 3600.0);
		}
		m_last_whole_machine_arrival = current;
	}

	dprintf(D_ALWAYS, "Lifetime whole machines arrived: %d\n", m_whole_machines_arrived);
	if (m_whole_machine_arrival_sum > 0) {
		double lifetime_mean = m_whole_machines_arrived / m_whole_machine_arrival_sum;
		dprintf(D_ALWAYS, "Lifetime mean arrival rate: %g machines / hour\n", 3600.0 * lifetime_mean);

		if (newly_drained > 0) {
			double diff = arrival_rate - lifetime_mean;
			m_whole_machine_arrival_mean_squared += diff * diff;
		}
		double sd = sqrt(m_whole_machine_arrival_mean_squared / m_whole_machines_arrived);
		dprintf(D_ALWAYS, "Lifetime mean arrival rate sd: %g\n", sd * 3600);

		m_stats.MeanDrainedArrival = lifetime_mean;
		m_stats.MeanDrainedArrivalSD = sd;
		m_stats.DrainedMachines = m_whole_machines_arrived;
	}

	queryDrainingCost();

	// If possible, cancel some drains.
	MachineSet cancelled_machines;
	poll_cancel(cancelled_machines);

	if( num_to_drain <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because number to drain in next %ds is calculated to be 0.\n",
				m_polling_interval);
		return;
	}

	if( (int)ceil(m_draining_per_hour) <= 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_DRAINING_MACHINES_PER_HOUR=%f\n",
				m_draining_per_hour);
		return;
	}

	if( m_max_draining == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=0\n");
		return;
	}

	if( m_max_whole_machines == 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=0\n");
		return;
	}

	if( m_max_draining >= 0 ) {
		if( num_draining >= m_max_draining ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and there are %d draining machines.\n",
					m_max_draining, num_draining);
			return;
		}
		else if( num_draining < 0 ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and the query to count draining machines failed.\n",
					m_max_draining);
			return;
		}
	}

	if( m_max_whole_machines >= 0 ) {
		if( num_whole_machines >= m_max_whole_machines ) {
			dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=%d and there are %d whole machines.\n",
					m_max_whole_machines, num_whole_machines);
			return;
		}
	}

		// Even if m_max_whole_machines is -1 (infinite), we still need
		// the list of whole machines in order to filter them out in
		// the draining selection algorithm, so abort now if the
		// whole machine query failed.
	if( num_whole_machines < 0 ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to find whole machines failed.\n");
		return;
	}

	dprintf(D_ALWAYS,"Looking for %d machines to drain.\n",num_to_drain);

	ClassAdList startdAds;
	std::string requirements;
	formatstr(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str());
	if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) {
		dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n");
		return;
	}

	startdAds.Shuffle();
	startdAds.Sort(StartdSortFunc,&m_rank_ad);

	startdAds.Open();
	int num_drained = 0;
	ClassAd *startd_ad_ptr;
	MachineSet machines_done;
	while( (startd_ad_ptr=startdAds.Next()) ) {

		ClassAd &startd_ad = *startd_ad_ptr;

		std::string machine;
		std::string name;
		startd_ad.LookupString(ATTR_NAME,name);
		slotNameToDaemonName(name,machine);

		// If we have already cancelled draining on this machine, ignore it for this cycle.
		if( cancelled_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already cancelled draining of %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( machines_done.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: already attempted to drain %s in this cycle.\n",
					name.c_str(),machine.c_str());
			continue;
		}

		if( whole_machines.count(machine) ) {
			dprintf(D_FULLDEBUG,
					"Skipping %s: because it is already running as a whole machine.\n",
					name.c_str());
			continue;
		}

		if( drain(startd_ad) ) {
			machines_done.insert(machine);

			if( ++num_drained >= num_to_drain ) {
				dprintf(D_ALWAYS,
						"Drained maximum number of machines allowed in this cycle (%d).\n",
						num_to_drain);
				break;
			}
		}
	}
	startdAds.Close();

	dprintf(D_ALWAYS,"Drained %d machines (wanted to drain %d machines).\n",
			num_drained,num_to_drain);

	dprintf(D_FULLDEBUG,"Done evaluating defragmentation policy.\n");
}