Ejemplo n.º 1
0
bool ScheddNegotiate::getSatisfaction() {
	if( m_jobs_rejected > 0 ) {
		return false;
	}

		// no jobs were explicitly rejected, but did negotiation end
		// before we presented all of our jobs?
	if( m_current_job_id.cluster == -1 ) {
		nextJob();
	}

	if( m_current_job_id.cluster == -1 ) {
		return true; // no more jobs
	}
	return false;
}
Ejemplo n.º 2
0
bool
ScheddNegotiate::sendJobInfo(Sock *sock)
{
		// The Negotiator wants us to send it a job. 

	sock->encode();

	if( m_current_job_id.cluster == -1 && !nextJob() ) {
		if( !sock->snd_int(NO_MORE_JOBS,TRUE) ) {
			dprintf( D_ALWAYS, "Can't send NO_MORE_JOBS to mgr\n" );
			return false;
		}
		m_negotiation_finished = true;
		return true;
	}

	if( !sock->put(JOB_INFO) ) {
		dprintf( D_ALWAYS, "Can't send JOB_INFO to mgr\n" );
		return false;
	}

		// request match diagnostics
	m_current_job_ad.Assign(ATTR_WANT_MATCH_DIAGNOSTICS, true);

		// Send the ad to the negotiator
	if( !putClassAd(sock, m_current_job_ad) ) {
		dprintf( D_ALWAYS,
				 "Can't send job ad to mgr\n" );
		sock->end_of_message();
		return false;
	}
	if( !sock->end_of_message() ) {
		dprintf( D_ALWAYS,
				 "Can't send job eom to mgr\n" );
		return false;
	}

	m_current_resources_delivered = 0;
	m_current_resources_requested = 1;
	m_current_job_ad.LookupInteger(ATTR_RESOURCE_REQUEST_COUNT,m_current_resources_requested);

	dprintf( D_FULLDEBUG,
			 "Sent job %d.%d (autocluster=%d resources_requested=%d) to the negotiator\n",
			 m_current_job_id.cluster, m_current_job_id.proc,
			 m_current_auto_cluster_id, m_current_resources_requested );
	return true;
}
Ejemplo n.º 3
0
bool
ScheddNegotiate::sendResourceRequestList(Sock *sock)
{
	m_jobs_can_offer = scheduler_maxJobsToOffer();

	while (m_num_resource_reqs_to_send > 0) {

		nextJob();

		if ( !sendJobInfo(sock, true) ) {
			return false;
		}

		// If m_negotiation_finished==true, then no more jobs to send. But
		// if we already sent some jobs in response to this request, we
		// don't want to consider the negotitation finished since we still want
		// to receive responses (e.g. matches) back from the negotiator.
		if ( m_negotiation_finished ) {
			if (m_num_resource_reqs_sent > 0 ) {
				m_negotiation_finished = false;
			}
			break;
		}

		// When we call sendJobInfo next at the top of the loop,
		// we don't want it to send all the individual jobs in the current cluster since
		// we already sent an ad with a resource_request_count.  So we want
		// to skip ahead to the next cluster.
		if ( !m_jobs->empty() ) {
			ResourceRequestCluster *cluster = m_jobs->front();
			m_jobs->pop_front();
			delete cluster;
		}

		m_num_resource_reqs_sent++;
		m_num_resource_reqs_to_send--;

		extern void IncrementResourceRequestsSent();
		IncrementResourceRequestsSent();
	}

	// Set m_num_resource_reqs_to_send to zero, as we are not sending
	// any more reqs now, and this counter is inspected in nextJob()
	m_num_resource_reqs_to_send = 0; 

	return true;
}
Ejemplo n.º 4
0
void AvatarJobManager::runJob()
{
	QMutexLocker(&mutex());

	if (IsJobRunning)
		return;

	if (!hasJob())
		return;

	IsJobRunning = true;

	Contact contact = nextJob();
	AvatarJobRunner *runner = new AvatarJobRunner(contact, this);
	connect(runner, SIGNAL(jobFinished(bool)), this, SLOT(jobFinished()));
	runner->runJob();
}
Ejemplo n.º 5
0
DCMsg::MessageClosureEnum
ScheddNegotiate::messageReceived( DCMessenger *messenger, Sock *sock )
{
		// This is called when readMsg() returns true.
		// Now carry out the negotiator's request that we just read.

	switch( m_operation ) {
	case REJECTED:
		m_reject_reason = "Unknown reason";
	case REJECTED_WITH_REASON:
		scheduler_handleJobRejected( m_current_job_id, m_reject_reason.c_str() );

		m_jobs_rejected++;
		setAutoClusterRejected( m_current_auto_cluster_id );
		nextJob();
		break;

	case SEND_JOB_INFO:
		if( !sendJobInfo(sock) ) {
				// We failed to talk to the negotiator, so close the socket.
			return MESSAGE_FINISHED;
		}
		break;

	case PERMISSION_AND_AD: {
		// If the slot we matched is partitionable, edit it
		// so it will look like the resulting dynamic slot. 
		// NOTE: Seems like we no longer need to do this here,
		// since we also do the fixup at claim time in
		// contactStartd().  - Todd 1/12 <*****@*****.**>
		if( !fixupPartitionableSlot(&m_current_job_ad,&m_match_ad) )
		{
			nextJob();
			break;
		}

		std::string slot_name_buf;
		m_match_ad.LookupString(ATTR_NAME,slot_name_buf);
		char const *slot_name = slot_name_buf.c_str();

		int offline = false;
		m_match_ad.EvalBool(ATTR_OFFLINE,NULL,offline);

		if( offline ) {
			dprintf(D_ALWAYS,"Job %d.%d matched to offline machine %s.\n",
					m_current_job_id.cluster,m_current_job_id.proc,slot_name);
			nextJob();
			break;
		}

		if( scheduler_handleMatch(m_current_job_id,m_claim_id.c_str(),m_match_ad,slot_name) )
		{
			m_jobs_matched++;
		}
		nextJob();
		break;
	}

	case END_NEGOTIATE:
		dprintf( D_ALWAYS, "Lost priority - %d jobs matched\n",
				 m_jobs_matched );

		m_negotiation_finished = true;
		break;
	default:
		EXCEPT("should never get here (negotiation op %d)",m_operation);
	}


	if( m_negotiation_finished ) {
			// the following function takes ownership of sock
		scheduler_handleNegotiationFinished( sock );
		sock = NULL;
	}
	else {
			// wait for negotiator to write a response
		messenger->startReceiveMsg( this, sock );
	}

		// By returning MESSAGE_CONTINUING, we tell messenger not to
		// close the socket.  Either we have finished negotiating and
		// sock has been taken care of by the scheduler (e.g. by
		// registering it to wait for the next NEGOTIATE command), or
		// we are not yet done with negotiating and we are waiting for
		// the next operation within the current negotiation round.
	return MESSAGE_CONTINUING;
}
Ejemplo n.º 6
0
DCMsg::MessageClosureEnum
ScheddNegotiate::messageReceived( DCMessenger *messenger, Sock *sock )
{
		// This is called when readMsg() returns true.
		// Now carry out the negotiator's request that we just read.

	switch( m_operation ) {

	case REJECTED:
		m_reject_reason = "Unknown reason";

	case REJECTED_WITH_REASON: {
		// To support resource request lists, the
		// reject reason may end with "...|autocluster|cluster.proc|"
		// if so, reset m_current_auto_cluster_id and m_current_job_id
		// with the values contained in the reject reason, and truncate
		// this information out of m_reject_reason.
		int pos = m_reject_reason.FindChar('|');
		if ( pos >= 0 ) {
			m_reject_reason.Tokenize();
			/*const char *reason =*/ m_reject_reason.GetNextToken("|",false);
			const char *ac = m_reject_reason.GetNextToken("|",false);
			const char *jobid = m_reject_reason.GetNextToken("|",false);
			if (ac && jobid) {
				int rr_cluster, rr_proc;
				m_current_auto_cluster_id = atoi(ac);
				StrToProcId(jobid,rr_cluster,rr_proc);
				if (rr_cluster != m_current_job_id.cluster || rr_proc != m_current_job_id.proc) {
					m_current_resources_delivered = 0;
				}
				m_current_job_id.cluster = rr_cluster;
				m_current_job_id.proc = rr_proc;
			}
			m_reject_reason.setChar(pos,'\0');	// will truncate string at pos
		}
		scheduler_handleJobRejected( m_current_job_id, m_reject_reason.c_str() );
		m_jobs_rejected++;
		setAutoClusterRejected( m_current_auto_cluster_id );
		nextJob();
		break;
	}

	case SEND_JOB_INFO:
		m_num_resource_reqs_sent = 0;  // clear counter of reqs sent this round
		if( !sendJobInfo(sock) ) {
				// We failed to talk to the negotiator, so close the socket.
			return MESSAGE_FINISHED;
		}
		break;

	case SEND_RESOURCE_REQUEST_LIST:
		m_num_resource_reqs_sent = 0; // clear counter of reqs sent this round
		if( !sendResourceRequestList(sock) ) {
				// We failed to talk to the negotiator, so close the socket.
			return MESSAGE_FINISHED;
		}
		break;

	case PERMISSION_AND_AD: {

		// When using request lists, one single
		// "m_current_job_id" is kinda meaningless if we just sent a whole
		// pile of jobs to the negotiator.  So we want to 
		// reset m_current_job_id with the job id info embedded in the offer 
		// that comes back from the negotiator (if it exists).  This will
		// happen with an 8.3.0+ negotiator, and is needed when using
		// resource request lists.  
		int rr_cluster = -1;
		int rr_proc = -1;
		m_match_ad.LookupInteger(ATTR_RESOURCE_REQUEST_CLUSTER, rr_cluster);
		m_match_ad.LookupInteger(ATTR_RESOURCE_REQUEST_PROC, rr_proc);
		if (rr_cluster != -1 && rr_proc != -1) {
			if (rr_cluster != m_current_job_id.cluster || rr_proc != m_current_job_id.proc) {
				m_current_resources_delivered = 0;
			}
			m_current_job_id.cluster = rr_cluster;
			m_current_job_id.proc = rr_proc;
		}

		m_current_resources_delivered++;

		std::string slot_name_buf;
		m_match_ad.LookupString(ATTR_NAME,slot_name_buf);
		char const *slot_name = slot_name_buf.c_str();

		int offline = false;
		m_match_ad.EvalBool(ATTR_OFFLINE,NULL,offline);

		if( offline ) {
			dprintf(D_ALWAYS,"Job %d.%d (delivered=%d) matched to offline machine %s.\n",
					m_current_job_id.cluster,m_current_job_id.proc,m_current_resources_delivered,slot_name);
			nextJob();
			break;
		}

		if( scheduler_handleMatch(m_current_job_id,m_claim_id.c_str(),m_extra_claims.c_str(), m_match_ad,slot_name) )
		{
			m_jobs_matched++;
		}

		nextJob();

		break;
	}

	case END_NEGOTIATE:
		dprintf( D_ALWAYS, "Lost priority - %d jobs matched\n",
				 m_jobs_matched );

		m_negotiation_finished = true;
		break;

	default:
		EXCEPT("should never get here (negotiation op %d)",m_operation);

	} // end of switch on m_operation

	if( m_negotiation_finished ) {
			// the following function takes ownership of sock
		scheduler_handleNegotiationFinished( sock );
		sock = NULL;
	}
	else {
			// wait for negotiator to write a response
		messenger->startReceiveMsg( this, sock );
	}

		// By returning MESSAGE_CONTINUING, we tell messenger not to
		// close the socket.  Either we have finished negotiating and
		// sock has been taken care of by the scheduler (e.g. by
		// registering it to wait for the next NEGOTIATE command), or
		// we are not yet done with negotiating and we are waiting for
		// the next operation within the current negotiation round.
	return MESSAGE_CONTINUING;
}
Ejemplo n.º 7
0
bool
ScheddNegotiate::sendJobInfo(Sock *sock, bool just_sig_attrs)
{
		// The Negotiator wants us to send it a job. 

	sock->encode();

	if( m_current_job_id.cluster == -1 && !nextJob() ) {
		if( !sock->snd_int(NO_MORE_JOBS,TRUE) ) {
			dprintf( D_ALWAYS, "Can't send NO_MORE_JOBS to mgr\n" );
			return false;
		}
		m_negotiation_finished = true;
		return true;
	}

	if( !sock->put(JOB_INFO) ) {
		dprintf( D_ALWAYS, "Can't send JOB_INFO to mgr\n" );
		return false;
	}

		// If schedd wants pslot preemption, advertise here
	m_current_job_ad.Assign(ATTR_WANT_PSLOT_PREEMPTION,
		param_boolean("ALLOW_PSLOT_PREEMPTION", false));
		
		// request match diagnostics
		// 0 = no match diagnostics
		// 1 = match diagnostics string
		// 2 = match diagnostics string decorated w/ autocluster + jobid
	m_current_job_ad.Assign(ATTR_WANT_MATCH_DIAGNOSTICS, (int) 2);

	m_current_job_ad.Assign(ATTR_WANT_PSLOT_PREEMPTION,
		param_boolean("ALLOW_PSLOT_PREEMPTION", false));


		// Send the ad to the negotiator
	int putad_result = 0;
	std::string auto_cluster_attrs;
	if ( just_sig_attrs &&
		 m_current_job_ad.LookupString(ATTR_AUTO_CLUSTER_ATTRS, auto_cluster_attrs) )
	{
		// don't send the entire job ad; just send significant attrs
		classad::References sig_attrs;

		StringTokenIterator list(auto_cluster_attrs);
		const std::string *attr;
		while ((attr = list.next_string())) { sig_attrs.insert(*attr); }

		// besides significant attrs, we also always want to send these attrs cuz
		// the matchmaker explicitly looks for them (for dprintfs or whatever).
		sig_attrs.insert(ATTR_OWNER);
		sig_attrs.insert(ATTR_CLUSTER_ID);
		sig_attrs.insert(ATTR_PROC_ID);
		sig_attrs.insert(ATTR_RESOURCE_REQUEST_COUNT);
		sig_attrs.insert(ATTR_GLOBAL_JOB_ID);
		sig_attrs.insert(ATTR_AUTO_CLUSTER_ID);
		sig_attrs.insert(ATTR_WANT_MATCH_DIAGNOSTICS);
		sig_attrs.insert(ATTR_WANT_PSLOT_PREEMPTION);
		sig_attrs.insert(ATTR_WANT_CLAIMING);  // used for Condor-G matchmaking
		// ship it!
		putad_result = putClassAd(sock, m_current_job_ad, 0, &sig_attrs);
	} else {
		// send the entire classad.  perhaps we are doing this because the
		// ad does not have ATTR_AUTO_CLUSTER_ATTRS defined for some reason,
		// or perhaps we are doing this because we were explicitly told to do so.
		putad_result = putClassAd(sock, m_current_job_ad);
	}
	if( !putad_result ) {
		dprintf( D_ALWAYS,
				 "Can't send job ad to mgr\n" );
		sock->end_of_message();
		return false;
	}
	if( !sock->end_of_message() ) {
		dprintf( D_ALWAYS,
				 "Can't send job eom to mgr\n" );
		return false;
	}

	m_current_resources_delivered = 0;
	m_current_resources_requested = 1;
	m_current_job_ad.LookupInteger(ATTR_RESOURCE_REQUEST_COUNT,m_current_resources_requested);

	dprintf( D_FULLDEBUG,
			 "Sent job %d.%d (autocluster=%d resources_requested=%d) to the negotiator\n",
			 m_current_job_id.cluster, m_current_job_id.proc,
			 m_current_auto_cluster_id, m_current_resources_requested );
	return true;
}
Ejemplo n.º 8
0
// Boilerplate program options code from http://www.radmangames.com/programming/how-to-use-boost-program_options
int main(int argc, char** argv)
{
	bool enable_sorting = true;
	double seed = 0;

	double power_mean = 300.0;				// Watts
	double power_stdev = power_mean/10.0;
	double power_estimate_error_stdev = power_stdev/10.0;

	double arrival_rate = 1000.0;				// Jobs per second

	double completion_time_mean = 0.9*NUM_SERVERS/arrival_rate;	// Seconds
	double completion_time_stdev = completion_time_mean/10.0;

	double sorting_time_min = completion_time_mean/1000.0;
	double sorting_time_max = sorting_time_min*2.0;

	double routing_time_min = sorting_time_min;
	double routing_time_max = sorting_time_max;
	try
	{
	/** Define and parse the program options
	*/
	namespace po = boost::program_options;
	po::options_description desc("Options");
	desc.add_options()
	("help", "Print help messages")
	("seed", po::value<double>(&seed), "Seed for random number generator")
	("power_estimate_error_stdev", po::value<double>(&power_estimate_error_stdev), "Power estimate standard deviation")
	("completion_time_stdev", po::value<double>(&completion_time_stdev), "Completion time standard deviation")
	("enable_sorting", po::value<bool>(&enable_sorting), "Enable sorting")
	;

	po::variables_map vm;
	try
	{
		po::store(po::parse_command_line(argc, argv, desc),
					vm); // can throw

		/** --help option
		*/
		if ( vm.count("help")  )
		{
			std::cout << "Basic Command Line Parameter App" << std::endl
			<< desc << std::endl;
			return SUCCESS;
		}

		po::notify(vm); // throws on error, so do after help in case
		// there are any problems
	}
	catch(po::error& e)
	{
		std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
		std::cerr << desc << std::endl;
		return ERROR_IN_COMMAND_LINE;
	}

    // BEGIN APPLICATION CODE //

	DataCenterRandomPtr rand(new DataCenterRandom(
			seed,
			power_mean,
			power_stdev,
			arrival_rate,
			completion_time_mean,
			completion_time_stdev,
			sorting_time_min,
			sorting_time_max,
			routing_time_min,
			routing_time_max,
			power_estimate_error_stdev));

	PriorityTypePtr sortOrder(new JobEvent::PriorityType(JobEvent::TIME));

	PriorityQueueEventListPtr eventList(new PriorityQueueEventList(
			EVENT_LIST_LEN,
			sortOrder));

	PriorityQueueWorkingServers::SortingDomain sortingDomain;
	if(enable_sorting){
		sortingDomain = PriorityQueueWorkingServers::POWER_AWARE;
	}
	else{
		sortingDomain = PriorityQueueWorkingServers::RANDOM;
	}
	std::ostringstream s;
	s << seed;
	AccumulatorStatistics statistics(s.str());
	PriorityQueueWorkingServersPtr workingServersQueue(new PriorityQueueWorkingServers(
			NUM_SERVERS,
			rand,
			eventList,
			statistics.getAccumulator(AccumulatorStatistics::LATENCY),
			statistics.getAccumulator(AccumulatorStatistics::TOTAL_ENERGY),
			sortOrder,
			s.str() + "server_currents.csv",
			sortingDomain));
	PriorityQueueJobSorterPtr sortedJobQueue(new PriorityQueueJobSorter(
			SORTED_JOBS_LIST_LEN,
			rand,
			workingServersQueue,
			eventList,
			sortOrder,
			enable_sorting));
	QueueJobBufferPtr unsortedJobQueue(new QueueJobBuffer(
			UNSORTED_JOBS_LIST_LEN,
			statistics.getAccumulator(AccumulatorStatistics::TIME_BETWEEN_REJECTED_JOBS),
			sortedJobQueue));


#ifndef UNITTEST
	double time = 0;
	
	JobEventPtr arrival(new JobEvent(0, Event::JOB_ARRIVAL, sortOrder));
	
	_NOTEL(0,"Welcome to the data center stacked server simulator.");
	_LOGL(1,"Initialization parameters: ");
	_LOGL(1,"Sorting enabled: " << enable_sorting);
	_LOGL(1,"Simulation time: " << MAX_TIME);
	_LOGL(1,"Event list length: " << EVENT_LIST_LEN);
	_LOGL(1,"Unsorted jobs list length: " << UNSORTED_JOBS_LIST_LEN);
	_LOGL(1,"Sorted jobs list length: " << SORTED_JOBS_LIST_LEN);
	_LOGL(1,"Number of servers: " << NUM_SERVERS);
	_LOGL(1, *rand);

	// Queue up initial arrival.
	_NOTEL(2,"Creating initial arrival event.");
	eventList->enqueue(arrival);
	
	while(time < MAX_TIME){
		EventPtr e = eventList->dequeue();
		time = e->time;
		_NOTEL(2, time);

		if(e->type == Event::JOB_ARRIVAL || e->type == Event::JOB_FINISHED){
			JobEventPtr job = boost::static_pointer_cast<JobEvent>(e);

			if(job->type == Event::JOB_ARRIVAL){
				double t = time + rand->sample_arrivalTime();
				_NOTEL(2,"Processing job arrival event. Scheduling next job arrival for time " << t);
				JobEventPtr nextJob(new JobEvent(t, Event::JOB_ARRIVAL, sortOrder));
				eventList->enqueue(nextJob);


				if(!unsortedJobQueue->enqueue(job)){
					if(!sortedJobQueue->enqueue(job,time)){
						workingServersQueue->enqueue(job,time);
					}
				}
			}

			else{ // if(job->type == Event::JOB_FINISHED){
				_NOTEL(2,"Processing job removal event.");
				workingServersQueue->remove(job,time);
				if(enable_sorting){
					if(!sortedJobQueue->is_busy() && !sortedJobQueue->is_empty()){
						JobEventPtr job = sortedJobQueue->dequeueJob();
						workingServersQueue->enqueue(job,time);
					}
				}
				else{
					if(!unsortedJobQueue->is_empty()){
						JobEventPtr job = unsortedJobQueue->dequeue();
						workingServersQueue->enqueue(job,time);
					}
				}
			}
		}

		else if(e->type == Event::SORTED_QUEUE_READY){
			if(enable_sorting){
				_NOTEL(2,"Processing sorted queue ready event.");
				sortedJobQueue->reset_busy();

				if(!sortedJobQueue->is_empty() && !workingServersQueue->is_busy() && !workingServersQueue->is_full()){
					JobEventPtr job = sortedJobQueue->dequeueJob();
					workingServersQueue->enqueue(job,time);
				}

				if(!unsortedJobQueue->is_empty()){
					JobEventPtr job = unsortedJobQueue->dequeue();
					if(!sortedJobQueue->enqueue(job,time)){
						workingServersQueue->enqueue(job,time);
					}
				}
				else{
					_NOTEL(2,"Nothing for sorted queue to do.");
				}
			}
			else{
				_NOTEL(0,"PROBLEM!!!");
			}
		}

		else{ // if(e->type == Event::WORKING_SERVERS_QUEUE_READY){
			_NOTEL(2,"Processing working servers queue ready event.");
			workingServersQueue->reset_busy();
			if(enable_sorting){
				if(!sortedJobQueue->is_busy() && !sortedJobQueue->is_empty()){
					JobEventPtr job = sortedJobQueue->dequeueJob();
					workingServersQueue->enqueue(job,time);
				}
			}
			else{
				if(!unsortedJobQueue->is_empty()){
					JobEventPtr job = unsortedJobQueue->dequeue();
					workingServersQueue->enqueue(job,time);
				}
			}
		}
		//_NOTE(3, (*eventList) << std::endl);
	}
	
	_NOTEL(1,"Finished simulation of " << time << " virtual seconds.");
	_LOGL(0,"Simulation results: " << std::endl << statistics);
	AccumulatorPtr latency = statistics.getAccumulator(AccumulatorStatistics::LATENCY);
	AccumulatorPtr total_energy = statistics.getAccumulator(AccumulatorStatistics::TOTAL_ENERGY);
	_LOGL(0, latency->getMean() << "$\\pm$" << latency->getCI(0.95) << " & " << total_energy->getMean() << "$\\pm$" << total_energy->getCI(0.95));
	return 0;

// Run the Unit tests
#else
	_NOTEL(0,"Welcome to the unit tests.");
	test_accumulator(rand,statistics);
	test_working_servers(workingServersQueue,sortOrder,statistics);
	return 0;
#endif
    // END APPLICATION CODE //

  }
  catch(std::exception& e)
  {
    std::cerr << "Unhandled Exception reached the top of main: "
              << e.what() << ", application will now exit" << std::endl;
    return ERROR_UNHANDLED_EXCEPTION;

  }

  return SUCCESS;

} // main