Пример #1
0
bool
ScheddNegotiate::sendResourceRequestList(Sock *sock)
{
	m_jobs_can_offer = scheduler_maxJobsToOffer();

	while (m_num_resource_reqs_to_send > 0) {

		nextJob();

		if ( !sendJobInfo(sock, true) ) {
			return false;
		}

		// If m_negotiation_finished==true, then no more jobs to send. But
		// if we already sent some jobs in response to this request, we
		// don't want to consider the negotitation finished since we still want
		// to receive responses (e.g. matches) back from the negotiator.
		if ( m_negotiation_finished ) {
			if (m_num_resource_reqs_sent > 0 ) {
				m_negotiation_finished = false;
			}
			break;
		}

		// When we call sendJobInfo next at the top of the loop,
		// we don't want it to send all the individual jobs in the current cluster since
		// we already sent an ad with a resource_request_count.  So we want
		// to skip ahead to the next cluster.
		if ( !m_jobs->empty() ) {
			ResourceRequestCluster *cluster = m_jobs->front();
			m_jobs->pop_front();
			delete cluster;
		}

		m_num_resource_reqs_sent++;
		m_num_resource_reqs_to_send--;

		extern void IncrementResourceRequestsSent();
		IncrementResourceRequestsSent();
	}

	// Set m_num_resource_reqs_to_send to zero, as we are not sending
	// any more reqs now, and this counter is inspected in nextJob()
	m_num_resource_reqs_to_send = 0; 

	return true;
}
Пример #2
0
DCMsg::MessageClosureEnum
ScheddNegotiate::messageReceived( DCMessenger *messenger, Sock *sock )
{
		// This is called when readMsg() returns true.
		// Now carry out the negotiator's request that we just read.

	switch( m_operation ) {
	case REJECTED:
		m_reject_reason = "Unknown reason";
	case REJECTED_WITH_REASON:
		scheduler_handleJobRejected( m_current_job_id, m_reject_reason.c_str() );

		m_jobs_rejected++;
		setAutoClusterRejected( m_current_auto_cluster_id );
		nextJob();
		break;

	case SEND_JOB_INFO:
		if( !sendJobInfo(sock) ) {
				// We failed to talk to the negotiator, so close the socket.
			return MESSAGE_FINISHED;
		}
		break;

	case PERMISSION_AND_AD: {
		// If the slot we matched is partitionable, edit it
		// so it will look like the resulting dynamic slot. 
		// NOTE: Seems like we no longer need to do this here,
		// since we also do the fixup at claim time in
		// contactStartd().  - Todd 1/12 <*****@*****.**>
		if( !fixupPartitionableSlot(&m_current_job_ad,&m_match_ad) )
		{
			nextJob();
			break;
		}

		std::string slot_name_buf;
		m_match_ad.LookupString(ATTR_NAME,slot_name_buf);
		char const *slot_name = slot_name_buf.c_str();

		int offline = false;
		m_match_ad.EvalBool(ATTR_OFFLINE,NULL,offline);

		if( offline ) {
			dprintf(D_ALWAYS,"Job %d.%d matched to offline machine %s.\n",
					m_current_job_id.cluster,m_current_job_id.proc,slot_name);
			nextJob();
			break;
		}

		if( scheduler_handleMatch(m_current_job_id,m_claim_id.c_str(),m_match_ad,slot_name) )
		{
			m_jobs_matched++;
		}
		nextJob();
		break;
	}

	case END_NEGOTIATE:
		dprintf( D_ALWAYS, "Lost priority - %d jobs matched\n",
				 m_jobs_matched );

		m_negotiation_finished = true;
		break;
	default:
		EXCEPT("should never get here (negotiation op %d)",m_operation);
	}


	if( m_negotiation_finished ) {
			// the following function takes ownership of sock
		scheduler_handleNegotiationFinished( sock );
		sock = NULL;
	}
	else {
			// wait for negotiator to write a response
		messenger->startReceiveMsg( this, sock );
	}

		// By returning MESSAGE_CONTINUING, we tell messenger not to
		// close the socket.  Either we have finished negotiating and
		// sock has been taken care of by the scheduler (e.g. by
		// registering it to wait for the next NEGOTIATE command), or
		// we are not yet done with negotiating and we are waiting for
		// the next operation within the current negotiation round.
	return MESSAGE_CONTINUING;
}
Пример #3
0
DCMsg::MessageClosureEnum
ScheddNegotiate::messageReceived( DCMessenger *messenger, Sock *sock )
{
		// This is called when readMsg() returns true.
		// Now carry out the negotiator's request that we just read.

	switch( m_operation ) {

	case REJECTED:
		m_reject_reason = "Unknown reason";

	case REJECTED_WITH_REASON: {
		// To support resource request lists, the
		// reject reason may end with "...|autocluster|cluster.proc|"
		// if so, reset m_current_auto_cluster_id and m_current_job_id
		// with the values contained in the reject reason, and truncate
		// this information out of m_reject_reason.
		int pos = m_reject_reason.FindChar('|');
		if ( pos >= 0 ) {
			m_reject_reason.Tokenize();
			/*const char *reason =*/ m_reject_reason.GetNextToken("|",false);
			const char *ac = m_reject_reason.GetNextToken("|",false);
			const char *jobid = m_reject_reason.GetNextToken("|",false);
			if (ac && jobid) {
				int rr_cluster, rr_proc;
				m_current_auto_cluster_id = atoi(ac);
				StrToProcId(jobid,rr_cluster,rr_proc);
				if (rr_cluster != m_current_job_id.cluster || rr_proc != m_current_job_id.proc) {
					m_current_resources_delivered = 0;
				}
				m_current_job_id.cluster = rr_cluster;
				m_current_job_id.proc = rr_proc;
			}
			m_reject_reason.setChar(pos,'\0');	// will truncate string at pos
		}
		scheduler_handleJobRejected( m_current_job_id, m_reject_reason.c_str() );
		m_jobs_rejected++;
		setAutoClusterRejected( m_current_auto_cluster_id );
		nextJob();
		break;
	}

	case SEND_JOB_INFO:
		m_num_resource_reqs_sent = 0;  // clear counter of reqs sent this round
		if( !sendJobInfo(sock) ) {
				// We failed to talk to the negotiator, so close the socket.
			return MESSAGE_FINISHED;
		}
		break;

	case SEND_RESOURCE_REQUEST_LIST:
		m_num_resource_reqs_sent = 0; // clear counter of reqs sent this round
		if( !sendResourceRequestList(sock) ) {
				// We failed to talk to the negotiator, so close the socket.
			return MESSAGE_FINISHED;
		}
		break;

	case PERMISSION_AND_AD: {

		// When using request lists, one single
		// "m_current_job_id" is kinda meaningless if we just sent a whole
		// pile of jobs to the negotiator.  So we want to 
		// reset m_current_job_id with the job id info embedded in the offer 
		// that comes back from the negotiator (if it exists).  This will
		// happen with an 8.3.0+ negotiator, and is needed when using
		// resource request lists.  
		int rr_cluster = -1;
		int rr_proc = -1;
		m_match_ad.LookupInteger(ATTR_RESOURCE_REQUEST_CLUSTER, rr_cluster);
		m_match_ad.LookupInteger(ATTR_RESOURCE_REQUEST_PROC, rr_proc);
		if (rr_cluster != -1 && rr_proc != -1) {
			if (rr_cluster != m_current_job_id.cluster || rr_proc != m_current_job_id.proc) {
				m_current_resources_delivered = 0;
			}
			m_current_job_id.cluster = rr_cluster;
			m_current_job_id.proc = rr_proc;
		}

		m_current_resources_delivered++;

		std::string slot_name_buf;
		m_match_ad.LookupString(ATTR_NAME,slot_name_buf);
		char const *slot_name = slot_name_buf.c_str();

		int offline = false;
		m_match_ad.EvalBool(ATTR_OFFLINE,NULL,offline);

		if( offline ) {
			dprintf(D_ALWAYS,"Job %d.%d (delivered=%d) matched to offline machine %s.\n",
					m_current_job_id.cluster,m_current_job_id.proc,m_current_resources_delivered,slot_name);
			nextJob();
			break;
		}

		if( scheduler_handleMatch(m_current_job_id,m_claim_id.c_str(),m_extra_claims.c_str(), m_match_ad,slot_name) )
		{
			m_jobs_matched++;
		}

		nextJob();

		break;
	}

	case END_NEGOTIATE:
		dprintf( D_ALWAYS, "Lost priority - %d jobs matched\n",
				 m_jobs_matched );

		m_negotiation_finished = true;
		break;

	default:
		EXCEPT("should never get here (negotiation op %d)",m_operation);

	} // end of switch on m_operation

	if( m_negotiation_finished ) {
			// the following function takes ownership of sock
		scheduler_handleNegotiationFinished( sock );
		sock = NULL;
	}
	else {
			// wait for negotiator to write a response
		messenger->startReceiveMsg( this, sock );
	}

		// By returning MESSAGE_CONTINUING, we tell messenger not to
		// close the socket.  Either we have finished negotiating and
		// sock has been taken care of by the scheduler (e.g. by
		// registering it to wait for the next NEGOTIATE command), or
		// we are not yet done with negotiating and we are waiting for
		// the next operation within the current negotiation round.
	return MESSAGE_CONTINUING;
}