DCMsg::MessageClosureEnum ScheddNegotiate::messageReceived( DCMessenger *messenger, Sock *sock ) { // This is called when readMsg() returns true. // Now carry out the negotiator's request that we just read. switch( m_operation ) { case REJECTED: m_reject_reason = "Unknown reason"; case REJECTED_WITH_REASON: scheduler_handleJobRejected( m_current_job_id, m_reject_reason.c_str() ); m_jobs_rejected++; setAutoClusterRejected( m_current_auto_cluster_id ); nextJob(); break; case SEND_JOB_INFO: if( !sendJobInfo(sock) ) { // We failed to talk to the negotiator, so close the socket. return MESSAGE_FINISHED; } break; case PERMISSION_AND_AD: { // If the slot we matched is partitionable, edit it // so it will look like the resulting dynamic slot. // NOTE: Seems like we no longer need to do this here, // since we also do the fixup at claim time in // contactStartd(). - Todd 1/12 <*****@*****.**> if( !fixupPartitionableSlot(&m_current_job_ad,&m_match_ad) ) { nextJob(); break; } std::string slot_name_buf; m_match_ad.LookupString(ATTR_NAME,slot_name_buf); char const *slot_name = slot_name_buf.c_str(); int offline = false; m_match_ad.EvalBool(ATTR_OFFLINE,NULL,offline); if( offline ) { dprintf(D_ALWAYS,"Job %d.%d matched to offline machine %s.\n", m_current_job_id.cluster,m_current_job_id.proc,slot_name); nextJob(); break; } if( scheduler_handleMatch(m_current_job_id,m_claim_id.c_str(),m_match_ad,slot_name) ) { m_jobs_matched++; } nextJob(); break; } case END_NEGOTIATE: dprintf( D_ALWAYS, "Lost priority - %d jobs matched\n", m_jobs_matched ); m_negotiation_finished = true; break; default: EXCEPT("should never get here (negotiation op %d)",m_operation); } if( m_negotiation_finished ) { // the following function takes ownership of sock scheduler_handleNegotiationFinished( sock ); sock = NULL; } else { // wait for negotiator to write a response messenger->startReceiveMsg( this, sock ); } // By returning MESSAGE_CONTINUING, we tell messenger not to // close the socket. Either we have finished negotiating and // sock has been taken care of by the scheduler (e.g. by // registering it to wait for the next NEGOTIATE command), or // we are not yet done with negotiating and we are waiting for // the next operation within the current negotiation round. return MESSAGE_CONTINUING; }
DCMsg::MessageClosureEnum ScheddNegotiate::messageReceived( DCMessenger *messenger, Sock *sock ) { // This is called when readMsg() returns true. // Now carry out the negotiator's request that we just read. switch( m_operation ) { case REJECTED: m_reject_reason = "Unknown reason"; case REJECTED_WITH_REASON: { // To support resource request lists, the // reject reason may end with "...|autocluster|cluster.proc|" // if so, reset m_current_auto_cluster_id and m_current_job_id // with the values contained in the reject reason, and truncate // this information out of m_reject_reason. int pos = m_reject_reason.FindChar('|'); if ( pos >= 0 ) { m_reject_reason.Tokenize(); /*const char *reason =*/ m_reject_reason.GetNextToken("|",false); const char *ac = m_reject_reason.GetNextToken("|",false); const char *jobid = m_reject_reason.GetNextToken("|",false); if (ac && jobid) { int rr_cluster, rr_proc; m_current_auto_cluster_id = atoi(ac); StrToProcId(jobid,rr_cluster,rr_proc); if (rr_cluster != m_current_job_id.cluster || rr_proc != m_current_job_id.proc) { m_current_resources_delivered = 0; } m_current_job_id.cluster = rr_cluster; m_current_job_id.proc = rr_proc; } m_reject_reason.setChar(pos,'\0'); // will truncate string at pos } scheduler_handleJobRejected( m_current_job_id, m_reject_reason.c_str() ); m_jobs_rejected++; setAutoClusterRejected( m_current_auto_cluster_id ); nextJob(); break; } case SEND_JOB_INFO: m_num_resource_reqs_sent = 0; // clear counter of reqs sent this round if( !sendJobInfo(sock) ) { // We failed to talk to the negotiator, so close the socket. return MESSAGE_FINISHED; } break; case SEND_RESOURCE_REQUEST_LIST: m_num_resource_reqs_sent = 0; // clear counter of reqs sent this round if( !sendResourceRequestList(sock) ) { // We failed to talk to the negotiator, so close the socket. return MESSAGE_FINISHED; } break; case PERMISSION_AND_AD: { // When using request lists, one single // "m_current_job_id" is kinda meaningless if we just sent a whole // pile of jobs to the negotiator. So we want to // reset m_current_job_id with the job id info embedded in the offer // that comes back from the negotiator (if it exists). This will // happen with an 8.3.0+ negotiator, and is needed when using // resource request lists. int rr_cluster = -1; int rr_proc = -1; m_match_ad.LookupInteger(ATTR_RESOURCE_REQUEST_CLUSTER, rr_cluster); m_match_ad.LookupInteger(ATTR_RESOURCE_REQUEST_PROC, rr_proc); if (rr_cluster != -1 && rr_proc != -1) { if (rr_cluster != m_current_job_id.cluster || rr_proc != m_current_job_id.proc) { m_current_resources_delivered = 0; } m_current_job_id.cluster = rr_cluster; m_current_job_id.proc = rr_proc; } m_current_resources_delivered++; std::string slot_name_buf; m_match_ad.LookupString(ATTR_NAME,slot_name_buf); char const *slot_name = slot_name_buf.c_str(); int offline = false; m_match_ad.EvalBool(ATTR_OFFLINE,NULL,offline); if( offline ) { dprintf(D_ALWAYS,"Job %d.%d (delivered=%d) matched to offline machine %s.\n", m_current_job_id.cluster,m_current_job_id.proc,m_current_resources_delivered,slot_name); nextJob(); break; } if( scheduler_handleMatch(m_current_job_id,m_claim_id.c_str(),m_extra_claims.c_str(), m_match_ad,slot_name) ) { m_jobs_matched++; } nextJob(); break; } case END_NEGOTIATE: dprintf( D_ALWAYS, "Lost priority - %d jobs matched\n", m_jobs_matched ); m_negotiation_finished = true; break; default: EXCEPT("should never get here (negotiation op %d)",m_operation); } // end of switch on m_operation if( m_negotiation_finished ) { // the following function takes ownership of sock scheduler_handleNegotiationFinished( sock ); sock = NULL; } else { // wait for negotiator to write a response messenger->startReceiveMsg( this, sock ); } // By returning MESSAGE_CONTINUING, we tell messenger not to // close the socket. Either we have finished negotiating and // sock has been taken care of by the scheduler (e.g. by // registering it to wait for the next NEGOTIATE command), or // we are not yet done with negotiating and we are waiting for // the next operation within the current negotiation round. return MESSAGE_CONTINUING; }