Beispiel #1
0
ClassAd *
GetJobAd( int cluster_id, int proc_id, bool /*expStartdAttrs*/, bool /*persist_expansions*/ )
{
	int	rval = -1;

		CurrentSysCall = CONDOR_GetJobAd;

		qmgmt_sock->encode();
		null_on_error( qmgmt_sock->code(CurrentSysCall) );
		null_on_error( qmgmt_sock->code(cluster_id) );
		null_on_error( qmgmt_sock->code(proc_id) );
		null_on_error( qmgmt_sock->end_of_message() );

		qmgmt_sock->decode();
		null_on_error( qmgmt_sock->code(rval) );
		if( rval < 0 ) {
			null_on_error( qmgmt_sock->code(terrno) );
			null_on_error( qmgmt_sock->end_of_message() );
			errno = terrno;
			return NULL;
		}
		ClassAd *ad = new ClassAd;

		if ( !(getClassAd(qmgmt_sock, *ad)) ) {
			delete ad;
			errno = ETIMEDOUT;
			return NULL;
		}

		null_on_error( qmgmt_sock->end_of_message() );

	return ad;
}
Beispiel #2
0
ClassAd *
GetNextDirtyJobByConstraint( char const *constraint, int initScan )
{
	int	rval = -1;

	CurrentSysCall = CONDOR_GetNextDirtyJobByConstraint;

	qmgmt_sock->encode();
	null_on_error( qmgmt_sock->code(CurrentSysCall) );
	null_on_error( qmgmt_sock->code(initScan) );
	null_on_error( qmgmt_sock->put(constraint) );
	null_on_error( qmgmt_sock->end_of_message() );

	qmgmt_sock->decode();
	null_on_error( qmgmt_sock->code(rval) );
	if( rval < 0 ) {
		null_on_error( qmgmt_sock->code(terrno) );
		null_on_error( qmgmt_sock->end_of_message() );
		errno = terrno;
		return NULL;
	}

	ClassAd *ad = new ClassAd;

	if ( ! (getClassAd(qmgmt_sock, *ad)) ) {
		delete ad;
		errno = ETIMEDOUT;
		return NULL;
	}

	null_on_error( qmgmt_sock->end_of_message() );

	return ad;
}
Beispiel #3
0
/* 
   This function reads of a ClaimId string, optional classad, and eom from the
   given stream.  It looks up that ClaimId in the resmgr to find
   the corresponding Resource*.  If such a Resource is found, we
   return the pointer to it, otherwise, we return NULL.  */
Resource*
stream_to_rip( Stream* stream, ClassAd * pad )
{
	char* id = NULL;
	Resource* rip;

	stream->decode();
	if( ! stream->get_secret(id) ) {
		dprintf( D_ALWAYS, "Can't read ClaimId\n" );
		free( id );
		return NULL;
	}
	// if we are not ad end of message, then there may be a classad payload containing argument options.
	if (pad && ! stream->peek_end_of_message()) {
		if ( ! getClassAd(stream, *pad)) {
			dprintf( D_ALWAYS, "Can't read options ClassAd after ClaimId\n");
		}
	}
	if( ! stream->end_of_message() ) {
		dprintf( D_ALWAYS, "Can't read end_of_message\n" );
		free( id );
		return NULL;
	}
	rip = resmgr->get_by_cur_id( id );
	if( !rip ) {
		ClaimIdParser idp( id );
		dprintf( D_ALWAYS, 
				 "Error: can't find resource with ClaimId (%s) -- perhaps this claim was already removed?\n", idp.publicClaimId() );
		free( id );
		return NULL;
	}
	free( id );
	return rip;
}
Beispiel #4
0
int
GetDirtyAttributes(int cluster_id, int proc_id, ClassAd *updated_attrs)
{
	int	rval = -1;
	MyString errs;

	CurrentSysCall = CONDOR_GetDirtyAttributes;

	qmgmt_sock->encode();
	neg_on_error( qmgmt_sock->code(CurrentSysCall) );
	neg_on_error( qmgmt_sock->code(cluster_id) );
	neg_on_error( qmgmt_sock->code(proc_id) );
	neg_on_error( qmgmt_sock->end_of_message() );

	qmgmt_sock->decode();
	neg_on_error( qmgmt_sock->code(rval) );
	if( rval < 0 ) {
		neg_on_error( qmgmt_sock->code(terrno) );
		neg_on_error( qmgmt_sock->end_of_message() );
		errno = terrno;
		return rval;
	}

	if ( !(getClassAd(qmgmt_sock, *updated_attrs)) ) {
		errno = ETIMEDOUT;
		return 0;
	}
	neg_on_error( qmgmt_sock->end_of_message() != 0 );

	return rval;
}
Beispiel #5
0
bool
DCStartd::drainJobs(int how_fast,bool resume_on_completion,char const *check_expr,char const *start_expr,std::string &request_id)
{
	std::string error_msg;
	ClassAd request_ad;
	Sock *sock = startCommand( DRAIN_JOBS, Sock::reli_sock, 20 );
	if( !sock ) {
		formatstr(error_msg,"Failed to start DRAIN_JOBS command to %s",name());
		newError(CA_FAILURE,error_msg.c_str());
		return false;
	}

	request_ad.Assign(ATTR_HOW_FAST,how_fast);
	request_ad.Assign(ATTR_RESUME_ON_COMPLETION,resume_on_completion);
	if( check_expr ) {
		request_ad.AssignExpr(ATTR_CHECK_EXPR,check_expr);
	}
	if( start_expr ) {
		request_ad.AssignExpr(ATTR_START_EXPR,start_expr);
	}

	if( !putClassAd(sock, request_ad) || !sock->end_of_message() ) {
		formatstr(error_msg,"Failed to compose DRAIN_JOBS request to %s",name());
		newError(CA_FAILURE,error_msg.c_str());
		delete sock;
		return false;
	}

	sock->decode();
	ClassAd response_ad;
	if( !getClassAd(sock, response_ad) || !sock->end_of_message() ) {
		formatstr(error_msg,"Failed to get response to DRAIN_JOBS request to %s",name());
		newError(CA_FAILURE,error_msg.c_str());
		delete sock;
		return false;
	}

	response_ad.LookupString(ATTR_REQUEST_ID,request_id);

	bool result = false;
	int error_code = 0;
	response_ad.LookupBool(ATTR_RESULT,result);
	if( !result ) {
		std::string remote_error_msg;
		response_ad.LookupString(ATTR_ERROR_STRING,remote_error_msg);
		response_ad.LookupInteger(ATTR_ERROR_CODE,error_code);
		formatstr(error_msg,
				"Received failure from %s in response to DRAIN_JOBS request: error code %d: %s",
				name(),error_code,remote_error_msg.c_str());
		newError(CA_FAILURE,error_msg.c_str());
		delete sock;
		return false;
	}

	delete sock;
	return true;
}
Beispiel #6
0
bool
ClassAdMsg::readMsg( DCMessenger * /*messenger*/, Sock *sock )
{
	if( !getClassAd( sock, m_msg ) ) {
		sockFailed( sock );
		return false;
	}
	return true;
}
Beispiel #7
0
int TransferQueueManager::HandleRequest(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == TRANSFER_QUEUE_REQUEST );

	ClassAd msg;
	sock->decode();
	if( !getClassAd( sock, msg ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"TransferQueueManager: failed to receive transfer request "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	bool downloading = false;
	MyString fname;
	MyString jobid;
	MyString queue_user;
	filesize_t sandbox_size;
	if( !msg.LookupBool(ATTR_DOWNLOADING,downloading) ||
		!msg.LookupString(ATTR_FILE_NAME,fname) ||
		!msg.LookupString(ATTR_JOB_ID,jobid) ||
		!msg.LookupString(ATTR_USER,queue_user) ||
		!msg.LookupInteger(ATTR_SANDBOX_SIZE,sandbox_size))
	{
		MyString msg_str;
		sPrintAd(msg_str, msg);
		dprintf(D_ALWAYS,"TransferQueueManager: invalid request from %s: %s\n",
				sock->peer_description(), msg_str.Value());
		return FALSE;
	}

		// Currently, we just create the client with the default max queue
		// age.  If it becomes necessary to customize the maximum age
		// on a case-by-case basis, it should be easy to adjust.

	TransferQueueRequest *client =
		new TransferQueueRequest(
			sock,
			sandbox_size,
			fname.Value(),
			jobid.Value(),
			queue_user.Value(),
			downloading,
			m_default_max_queue_age);

	if( !AddRequest( client ) ) {
		delete client;
		return KEEP_STREAM; // we have already closed this socket
	}

	return KEEP_STREAM;
}
Beispiel #8
0
bool
DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr)
{
	ReliSock sock;

	if (IsDebugLevel(D_COMMAND)) {
		dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n",
			getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL");
	}

	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) {
		error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	ClassAd input;
	input.Assign(ATTR_CLAIM_ID,job_claim_id);
	input.Assign(ATTR_SESSION_INFO,session_info);

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	sock.decode();

	ClassAd reply;
	if( !getClassAd(&sock, reply) || !sock.end_of_message() ) {
		error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter";
		return false;
	}

	bool success = false;
	reply.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		reply.LookupString(ATTR_ERROR_STRING,error_msg);
		return false;
	}

	reply.LookupString(ATTR_CLAIM_ID,owner_claim_id);
	reply.LookupString(ATTR_VERSION,starter_version);
		// get the full starter address from the starter in case it contains
		// extra CCB info that we don't already know about
	reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
	return true;
}
Beispiel #9
0
bool
DCStartd::cancelDrainJobs(char const *request_id)
{
	std::string error_msg;
	ClassAd request_ad;
	Sock *sock = startCommand( CANCEL_DRAIN_JOBS, Sock::reli_sock, 20 );
	if( !sock ) {
		formatstr(error_msg,"Failed to start CANCEL_DRAIN_JOBS command to %s",name());
		newError(CA_FAILURE,error_msg.c_str());
		return false;
	}

	if( request_id ) {
		request_ad.Assign(ATTR_REQUEST_ID,request_id);
	}

	if( !putClassAd(sock, request_ad) || !sock->end_of_message() ) {
		formatstr(error_msg,"Failed to compose CANCEL_DRAIN_JOBS request to %s",name());
		newError(CA_FAILURE,error_msg.c_str());
		return false;
	}

	sock->decode();
	ClassAd response_ad;
	if( !getClassAd(sock, response_ad) || !sock->end_of_message() ) {
		formatstr(error_msg,"Failed to get response to CANCEL_DRAIN_JOBS request to %s",name());
		newError(CA_FAILURE,error_msg.c_str());
		delete sock;
		return false;
	}

	bool result = false;
	int error_code = 0;
	response_ad.LookupBool(ATTR_RESULT,result);
	if( !result ) {
		std::string remote_error_msg;
		response_ad.LookupString(ATTR_ERROR_STRING,remote_error_msg);
		response_ad.LookupInteger(ATTR_ERROR_CODE,error_code);
		formatstr(error_msg,
				"Received failure from %s in response to CANCEL_DRAIN_JOBS request: error code %d: %s",
				name(),error_code,remote_error_msg.c_str());
		newError(CA_FAILURE,error_msg.c_str());
		delete sock;
		return false;
	}

	delete sock;
	return true;
}
Beispiel #10
0
int
Starter::receiveJobClassAdUpdate( Stream *stream )
{
	ClassAd update_ad;
	int final_update = 0;

		// It is expected that we will get here when the stream is closed.
		// Unfortunately, log noise will be generated when we try to read
		// from it.

	stream->decode();
	stream->timeout(10);
	if( !stream->get( final_update) ||
		!getClassAd( stream, update_ad ) ||
		!stream->end_of_message() )
	{
		final_update = 1;
	}
	else {
		dprintf(D_FULLDEBUG, "Received job ClassAd update from starter.\n");
		dPrintAd( D_JOB, update_ad );

		// In addition to new info about the job, the starter also
		// inserts contact info for itself (important for CCB and
		// shadow-starter reconnect, because startd needs to relay
		// starter's full contact info to the shadow when queried).
		// It's a bit of a hack to do it through this channel, but
		// better than nothing.
		update_ad.LookupString(ATTR_STARTER_IP_ADDR,m_starter_addr);

		if( s_claim ) {
			s_claim->receiveJobClassAdUpdate( update_ad );
		}
	}

	if( final_update ) {
		dprintf(D_FULLDEBUG, "Closing job ClassAd update socket from starter.\n");
		daemonCore->Cancel_Socket(s_job_update_sock);
		delete s_job_update_sock;
		s_job_update_sock = NULL;
	}
	return KEEP_STREAM;
}
Beispiel #11
0
int
GetAllJobsByConstraint_Next( ClassAd &ad )
{
	int rval = -1;

	ASSERT( CurrentSysCall == CONDOR_GetAllJobsByConstraint );

	neg_on_error( qmgmt_sock->code(rval) );
	if( rval < 0 ) {
		neg_on_error( qmgmt_sock->code(terrno) );
		neg_on_error( qmgmt_sock->end_of_message() );
		errno = terrno;
		return -1;
	}

	if ( ! (getClassAd(qmgmt_sock, ad)) ) {
		errno = ETIMEDOUT;
		return -1;
	}

	return 0;
}
Beispiel #12
0
bool
CCBListener::ReadMsgFromCCB()
{
	if( !m_sock ) {
		return false;
	}
	m_sock->timeout(CCB_TIMEOUT);
	ClassAd msg;
	if( !getClassAd( m_sock, msg ) || !m_sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCBListener: failed to receive message from CCB server %s\n",
				m_ccb_address.Value());
		Disconnected();
		return false;
	}

	m_last_contact_from_peer = time(NULL);
	RescheduleHeartbeat();

	int cmd = -1;
	msg.LookupInteger( ATTR_COMMAND, cmd );
	switch( cmd ) {
	case CCB_REGISTER:
		return HandleCCBRegistrationReply( msg );
	case CCB_REQUEST:
		return HandleCCBRequest( msg );
	case ALIVE:
		dprintf(D_FULLDEBUG,"CCBListener: received heartbeat from server.\n");
		return true;
	}

	MyString msg_str;
	sPrintAd(msg_str, msg);
	dprintf( D_ALWAYS,
			 "CCBListener: Unexpected message received from CCB "
			 "server: %s\n",
			 msg_str.Value() );
	return false;
}
Beispiel #13
0
ClassAd *
GetAllJobsByConstraint_imp( char const *constraint, char const *projection, ClassAdList &list)
{
	int	rval = -1;

		CurrentSysCall = CONDOR_GetAllJobsByConstraint;

		qmgmt_sock->encode();
		null_on_error( qmgmt_sock->code(CurrentSysCall) );
		null_on_error( qmgmt_sock->put(constraint) );
		null_on_error( qmgmt_sock->put(projection) );
		null_on_error( qmgmt_sock->end_of_message() );


		qmgmt_sock->decode();
		while (true) {
			null_on_error( qmgmt_sock->code(rval) );
			if( rval < 0 ) {
				null_on_error( qmgmt_sock->code(terrno) );
				null_on_error( qmgmt_sock->end_of_message() );
				errno = terrno;
				return NULL;
			}

			ClassAd *ad = new ClassAd;

			if ( ! (getClassAd(qmgmt_sock, *ad)) ) {
				delete ad;
				errno = ETIMEDOUT;
				return NULL;
			}
			list.Insert(ad);

		};
		null_on_error( qmgmt_sock->end_of_message() );

	return 0;
}
Beispiel #14
0
bool 
DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/)
{
	if(numdone) { *numdone = 0; }
	ExprTree *tree = NULL;
	const char *lhstr;
	int reply;
	int i;
	ReliSock rsock;
	int JobAdsArrayLen;
	bool use_new_command = true;

	if ( version() ) {
		CondorVersionInfo vi( version() );
		if ( vi.built_since_version(6,7,7) ) {
			use_new_command = true;
		} else {
			use_new_command = false;
		}
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if ( use_new_command ) {
		if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0,
						   errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
					 "Failed to send command (TRANSFER_DATA_WITH_PERMS) "
					 "to the schedd\n" );
			return false;
		}
	} else {
		if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
					 "Failed to send command (TRANSFER_DATA) "
					 "to the schedd\n" );
			return false;
		}
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, 
			"DCSchedd::receiveJobSandbox: authentication failure: %s\n",
			errstack ? errstack->getFullText().c_str() : "" );
		return false;
	}

	rsock.encode();

		// Send our version if using the new command
	if ( use_new_command ) {
			// Need to use a named variable, else the wrong version of	
			// code() is called.
		char *my_version = strdup( CondorVersion() );
		if ( !rsock.code(my_version) ) {
			dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: "
					"Can't send version string to the schedd\n");
			free( my_version );
			return false;
		}
		free( my_version );
	}

		// Send the constraint
	char * nc_constraint = strdup( constraint );	// de-const
	if ( !rsock.code(nc_constraint) ) {
		free( nc_constraint );
		dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: "
				"Can't send JobAdsArrayLen to the schedd\n");
		return false;
	}
	free( nc_constraint );

	if ( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't send initial message (version + constraint) to schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::receiveJobSandbox",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now, read how many jobs matched the constraint.
	rsock.decode();
	if ( !rsock.code(JobAdsArrayLen) ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't receive JobAdsArrayLen from the schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::receiveJobSandbox",
				CEDAR_ERR_GET_FAILED,
				errmsg.c_str());
		}
		return false;
	}

	rsock.end_of_message();

	dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: "
		"%d jobs matched my constraint (%s)\n",
		JobAdsArrayLen, constraint);

		// Now read all the files via the file transfer object
	for (i=0; i<JobAdsArrayLen; i++) {
		FileTransfer ftrans;
		ClassAd job;

			// grab job ClassAd
		if ( !getClassAd(&rsock, job) ) {
			std::string errmsg;
			formatstr(errmsg, "Can't receive job ad %d from the schedd", i);

			dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

			if( errstack ) {
				errstack->push(
							   "DCSchedd::receiveJobSandbox",
							   CEDAR_ERR_GET_FAILED,
							   errmsg.c_str());
			}
			return false;
		}

		rsock.end_of_message();

			// translate the job ad by replacing the 
			// saved SUBMIT_ attributes
		job.ResetExpr();
		while( job.NextExpr(lhstr, tree) ) {
			if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) {
					// this attr name starts with SUBMIT_
					// compute new lhs (strip off the SUBMIT_)
				const char *new_attr_name = strchr(lhstr,'_');
				ExprTree * pTree;
				ASSERT(new_attr_name);
				new_attr_name++;
					// insert attribute
				pTree = tree->Copy();
				job.Insert(new_attr_name, pTree, false);
			}
		}	// while next expr

		if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) {
			if( errstack ) {
				int cluster = -1, proc = -1;
				job.LookupInteger(ATTR_CLUSTER_ID,cluster);
				job.LookupInteger(ATTR_PROC_ID,proc);
				errstack->pushf(
					"DCSchedd::receiveJobSandbox",
					FILETRANSFER_INIT_FAILED,
					"File transfer initialization failed for target job %d.%d",
					cluster, proc );
			}
			return false;
		}
		// We want files to be copied to their final places, so apply
		// any filename remaps when downloading.
		if ( !ftrans.InitDownloadFilenameRemaps(&job) ) {
			return false;
		}
		if ( use_new_command ) {
			ftrans.setPeerVersion( version() );
		}
		if ( !ftrans.DownloadFiles() ) {
			if( errstack ) {
				FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo();

				int cluster = -1, proc = -1;
				job.LookupInteger(ATTR_CLUSTER_ID,cluster);
				job.LookupInteger(ATTR_PROC_ID,proc);
				errstack->pushf(
					"DCSchedd::receiveJobSandbox",
					FILETRANSFER_DOWNLOAD_FAILED,
					"File transfer failed for target job %d.%d: %s",
					cluster, proc, ft_info.error_desc.Value() );
			}
			return false;
		}
	}	
		
	rsock.end_of_message();

	rsock.encode();

	reply = OK;
	rsock.code(reply);
	rsock.end_of_message();

	if(numdone) { *numdone = JobAdsArrayLen; }

	return true;
}
Beispiel #15
0
int
CCBServer::HandleRegistration(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == CCB_REGISTER );

		// Avoid lengthy blocking on communication with our peer.
		// This command-handler should not get called until data
		// is ready to read.
	sock->timeout(1);

	ClassAd msg;
	sock->decode();
	if( !getClassAd( sock, msg ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to receive registration "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	SetSmallBuffers(sock);

	MyString name;
	if( msg.LookupString(ATTR_NAME,name) ) {
			// target daemon name is purely for debugging purposes
		name.formatstr_cat(" on %s",sock->peer_description());
		sock->set_peer_description(name.Value());
	}

	CCBTarget *target = new CCBTarget(sock);

	MyString reconnect_cookie_str,reconnect_ccbid_str;
	CCBID reconnect_cookie,reconnect_ccbid;
	bool reconnected = false;
	if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) &&
		CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) &&
		msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) &&
		CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) )
	{
		target->setCCBID( reconnect_ccbid );
		reconnected = ReconnectTarget( target, reconnect_cookie );
	}

	if( !reconnected ) {
		AddTarget( target );
	}

	CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() );
	ASSERT( reconnect_info );

	sock->encode();

	ClassAd reply_msg;
	MyString ccb_contact;
	CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str );
		// We send our address as part of the CCB contact string, rather
		// than letting the target daemon fill it in.  This is to give us
		// potential flexibility on the CCB server side to do things like
		// assign different targets to different CCB server sub-processes,
		// each with their own command port.
	CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact );

	reply_msg.Assign(ATTR_CCBID,ccb_contact.Value());
	reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER);
	reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value());

	if( !putClassAd( sock, reply_msg ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to send registration response "
				"to %s.\n", sock->peer_description() );

		RemoveTarget( target );
		return KEEP_STREAM; // we have already closed this socket
	}

	return KEEP_STREAM;
}
Beispiel #16
0
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible)
{

	retry_is_sensible = false;

#ifndef HAVE_SSH_TO_JOB
	error_msg = "This version of Condor does not support ssh key exchange.";
	return false;
#else
	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) {
		error_msg = "Failed to send START_SSHD to starter";
		return false;
	}

	ClassAd input;

	if( preferred_shells && *preferred_shells ) {
		input.Assign(ATTR_SHELL,preferred_shells);
	}

	if( slot_name && *slot_name ) {
			// This is a little silly.
			// We are telling the remote side the name of the slot so
			// that it can put it in the welcome message.
		input.Assign(ATTR_NAME,slot_name);
	}

	if( ssh_keygen_args && *ssh_keygen_args ) {
		input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args);
	}

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to send START_SSHD request to starter";
		return false;
	}

	ClassAd result;
	sock.decode();
	if( !getClassAd(&sock, result) || !sock.end_of_message() ) {
		error_msg = "Failed to read response to START_SSHD from starter";
		return false;
	}

	bool success = false;
	result.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		std::string remote_error_msg;
		result.LookupString(ATTR_ERROR_STRING,remote_error_msg);
		error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str());
		retry_is_sensible = false;
		result.LookupBool(ATTR_RETRY,retry_is_sensible);
		return false;
	}

	result.LookupString(ATTR_REMOTE_USER,remote_user);

	std::string public_server_key;
	if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) {
		error_msg = "No public ssh server key received in reply to START_SSHD";
		return false;
	}
	std::string private_client_key;
	if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) {
		error_msg = "No ssh client key received in reply to START_SSHD";
		return false;
	}


		// store the private client key
	unsigned char *decode_buf = NULL;
	int length = -1;
	condor_base64_decode(private_client_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh client key.";
		return false;
	}
	FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400);
	if( !fp ) {
		error_msg.formatstr("Failed to create %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.formatstr("Failed to write to %s: %s",
						  private_client_key_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}
	if( fclose(fp)!=0 ) {
		error_msg.formatstr("Failed to close %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;


		// store the public server key in the known_hosts file
	length = -1;
	condor_base64_decode(public_server_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh server key.";
		return false;
	}
	fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600);
	if( !fp ) {
		error_msg.formatstr("Failed to create %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}

		// prepend a host name pattern (*) to the public key to make a valid
		// record in the known_hosts file
	fprintf(fp,"* ");

	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.formatstr("Failed to write to %s: %s",
						  known_hosts_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}

	if( fclose(fp)!=0 ) {
		error_msg.formatstr("Failed to close %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;

	return true;
#endif
}
Beispiel #17
0
bool
DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q)
{
	compat_classad::ClassAd ad;
	ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout);
	ad.InsertAttr("OutOffset", stdout_offset);
	ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr);
	ad.InsertAttr("ErrOffset", stderr_offset);
	ad.InsertAttr(ATTR_VERSION, CondorVersion());

	size_t total_files = 0;
	total_files += transfer_stdout ? 1 : 0;
	total_files += transfer_stderr ? 1 : 0;

	if (filenames.size())
	{
		total_files += filenames.size();
		std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size());
		std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size());
		std::vector<ssize_t>::const_iterator it2 = offsets.begin();
		for (std::vector<std::string>::const_iterator it = filenames.begin();
			it != filenames.end() && it2 != offsets.end();
			it++, it2++)
		{
			classad::Value value;
			value.SetStringValue(*it);
			filelist.push_back(classad::Literal::MakeLiteral(value));
			value.SetIntegerValue(*it2);
			offsetlist.push_back(classad::Literal::MakeLiteral(value));
		}
		classad::ExprTree *list(classad::ExprList::MakeExprList(filelist));
		ad.Insert("TransferFiles", list);
		list = classad::ExprList::MakeExprList(offsetlist);
		ad.Insert("TransferOffsets", list);
	}

	ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes));

	ReliSock sock;

	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) {
		error_msg = "Failed to send START_PEEK to starter";
		return false;
	}
	sock.encode();
	if (!putClassAd(&sock, ad) || !sock.end_of_message()) {
		error_msg = "Failed to send request to starter";
		return false;
	}

	compat_classad::ClassAd response;
	sock.decode();
	if (!getClassAd(&sock, response) || !sock.end_of_message())
	{
		error_msg = "Failed to read response for peeking at logs.";
		return false;
	}
	dPrintAd(D_FULLDEBUG, response);

	bool success = false;
	if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success)
	{
		response.EvaluateAttrBool(ATTR_RETRY, retry_sensible);
		error_msg = "Remote operation failed.";
		response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg);
		return false;
	}
	classad::Value valueX;
	classad_shared_ptr<classad::ExprList> list;
	if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list))
	{
		error_msg = "Unable to evaluate starter response";
		return false;
	}
	classad_shared_ptr<classad::ExprList> offlist;
	if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist))
	{
		error_msg = "Unable to evaluate starter response (missing offsets)";
		return false;
	}

	size_t remaining = max_bytes;
	size_t file_count = 0;
	classad::ExprList::const_iterator it2 = offlist->begin();
	for (classad::ExprList::const_iterator it = list->begin();
		it != list->end() && it2 != offlist->end();
		it++, it2++)
	{
		classad::Value value;
		(*it2)->Evaluate(value);
		off_t off = -1;
		value.IsIntegerValue(off);
		(*it)->Evaluate(value);
		std::string filename;
		int64_t xfer_fd = -1;
		if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd))
		{
			if (xfer_fd == 0) filename = "_condor_stdout";
			if (xfer_fd == 1) filename = "_condor_stderr";
		}
		int fd = next.getNextFD(filename);
		filesize_t size = -1;
		int retval;
		if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED))
		{
			error_msg = "Internal error when transferring file " + filename;
		}
		else if (size >= 0)
		{
			remaining -= max_bytes;
			file_count++;
			off += size;
		}
		else
		{
			error_msg = "Failed to transfer file " + filename;
		}
		if (xfer_fd == 0)
		{
			stdout_offset = off;
			//dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset);
		}
		else if (xfer_fd == 1)
		{
			stderr_offset = off;
		}
		else
		{
			std::vector<ssize_t>::iterator it4 = offsets.begin();
			for (std::vector<std::string>::const_iterator it3 = filenames.begin();
				it3 != filenames.end() && it4 != offsets.end();
				it3++, it4++)
			{
				if (*it3 == filename) *it4 = off;
			}
		}
	}
	size_t remote_file_count;
	if (!sock.get(remote_file_count) || !sock.end_of_message())
	{
		error_msg = "Unable to get remote file count.";
		return false;
	}
	if (file_count != remote_file_count)
	{
		std::stringstream ss;
		ss << "Recieved " << file_count << " files, but remote side thought it sent " << remote_file_count << " files";
		error_msg = ss.str();
		return false;
	}
	if ((total_files != file_count) && !error_msg.size())
	{
		error_msg = "At least one file transfer failed.";
		return false;
	}
	return true;
}
Beispiel #18
0
bool
ScheddNegotiate::readMsg( DCMessenger * /*messenger*/, Sock *sock )
{
		// Get the negotiator's request.
		// Note that end_of_message() is handled by our caller.

	if( !sock->code(m_operation) ) {
		dprintf( D_ALWAYS, "Can't receive request from negotiator\n" );
		return false;
	}

	switch( m_operation ) {
	case REJECTED_WITH_REASON:
		if( !sock->code(m_reject_reason) ) {
			dprintf( D_ALWAYS,
					 "Can't receive reject reason from negotiator\n" );
			return false;
		}
		break;

	case REJECTED:
		break;

	case SEND_JOB_INFO:
		break;

	case SEND_RESOURCE_REQUEST_LIST:
		if( !sock->code(m_num_resource_reqs_to_send) ) {
			dprintf( D_ALWAYS,
					 "Can't receive num_resource_reqs_to_send from negotiator\n" );
			return false;
		}
		break;

	case PERMISSION:
			// No negotiator since 7.1.3 should ever send this
			// command, and older ones should not send it either,
			// since we advertise WantResAd=True.
		dprintf( D_ALWAYS, "Negotiator sent PERMISSION rather than expected PERMISSION_AND_AD!  Aborting.\n");
		return false;
		break;

	case PERMISSION_AND_AD: {
		char *claim_id = NULL;
		if( !sock->get_secret(claim_id) || !claim_id ) {
			dprintf( D_ALWAYS,
					 "Can't receive ClaimId from negotiator\n" );
			return false;
		}
		m_claim_id = claim_id;
		free( claim_id );

		size_t space = m_claim_id.find(' ');
		if (space != std::string::npos) {
			m_extra_claims = m_claim_id.substr(space + 1, std::string::npos);
			m_claim_id = m_claim_id.substr(0, space);
		}

		m_match_ad.Clear();

			// get startd ad from negotiator
		if( !getClassAd(sock, m_match_ad) ) {
			dprintf( D_ALWAYS,
					 "Can't get my match ad from negotiator\n" );
			return false;
		}
#if defined(ADD_TARGET_SCOPING)
		m_match_ad.AddTargetRefs( TargetJobAttrs );
#endif

		break;
	}
	case END_NEGOTIATE:
		break;
	default:
		dprintf( D_ALWAYS, "Got unexpected request (%d)\n", m_operation );
		return false;
	}

	return true;
}
Beispiel #19
0
int
CCBServer::HandleRequest(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == CCB_REQUEST );

		// Avoid lengthy blocking on communication with our peer.
		// This command-handler should not get called until data
		// is ready to read.
	sock->timeout(1);

	ClassAd msg;
	sock->decode();
	if( !getClassAd( sock, msg ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to receive request "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	MyString name;
	if( msg.LookupString(ATTR_NAME,name) ) {
			// client name is purely for debugging purposes
		name.formatstr_cat(" on %s",sock->peer_description());
		sock->set_peer_description(name.Value());
	}
	MyString target_ccbid_str;
	MyString return_addr;
	MyString connect_id; // id target daemon should present to requester
	CCBID target_ccbid;

		// NOTE: using ATTR_CLAIM_ID for connect id so that it is
		// automatically treated as a secret over the network.
		// It must be presented by the target daemon when connecting
		// to the requesting client, so the client can confirm that
		// the connection is in response to its request.

	if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) ||
		!msg.LookupString(ATTR_MY_ADDRESS,return_addr) ||
		!msg.LookupString(ATTR_CLAIM_ID,connect_id) )
	{
		MyString ad_str;
		sPrintAd(ad_str, msg);
		dprintf(D_ALWAYS,
				"CCB: invalid request from %s: %s\n",
				sock->peer_description(), ad_str.Value() );
		return FALSE;
	}
	if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) {
		dprintf(D_ALWAYS,
				"CCB: request from %s contains invalid CCBID %s\n",
				sock->peer_description(), target_ccbid_str.Value() );
		return FALSE;
	}

	CCBTarget *target = GetTarget( target_ccbid );
	if( !target ) {
		dprintf(D_ALWAYS,
			"CCB: rejecting request from %s for ccbid %s because no daemon is "
			"currently registered with that id "
			"(perhaps it recently disconnected).\n",
			sock->peer_description(), target_ccbid_str.Value());

		MyString error_msg;
		error_msg.formatstr(
			"CCB server rejecting request for ccbid %s because no daemon is "
			"currently registered with that id "
			"(perhaps it recently disconnected).", target_ccbid_str.Value());
		RequestReply( sock, false, error_msg.Value(), 0, target_ccbid );
		return FALSE;
	}

	SetSmallBuffers(sock);

	CCBServerRequest *request =
		new CCBServerRequest(
			sock,
			target_ccbid,
			return_addr.Value(),
			connect_id.Value() );
	AddRequest( request, target );

	dprintf(D_FULLDEBUG,
			"CCB: received request id %lu from %s for target ccbid %s "
			"(registered as %s)\n",
			request->getRequestID(),
			request->getSock()->peer_description(),
			target_ccbid_str.Value(),
			target->getSock()->peer_description());

	ForwardRequestToTarget( request, target );

	return KEEP_STREAM;
}
Beispiel #20
0
ClassAd *CollectorEngine::
collect (int command,ClassAd *clientAd,const condor_sockaddr& from,int &insert,Sock *sock)
{
	ClassAd		*retVal;
	ClassAd		*pvtAd;
	int		insPvt;
	AdNameHashKey		hk;
	HashString	hashString;
	static int repeatStartdAds = -1;		// for debugging
	ClassAd		*clientAdToRepeat = NULL;
	_condor_auto_accum_runtime<collector_runtime_probe> rt(CollectorEngine_rucc_runtime);
	double rt_last = rt.begin;

	if (repeatStartdAds == -1) {
		repeatStartdAds = param_integer("COLLECTOR_REPEAT_STARTD_ADS",0);
	}

	if( !ValidateClassAd(command,clientAd,sock) ) {
		return NULL;
	}

	CollectorEngine_rucc_validateAd_runtime.Add(rt.tick(rt_last));

	// mux on command
	switch (command)
	{
	  case UPDATE_STARTD_AD:
	  case UPDATE_STARTD_AD_WITH_ACK:
		if ( repeatStartdAds > 0 ) {
			clientAdToRepeat = new ClassAd(*clientAd);
		}
		if (!makeStartdAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );

		CollectorEngine_rucc_makeHashKey_runtime.Add(rt.tick(rt_last));

		retVal=updateClassAd (StartdAds, "StartdAd     ", "Start",
							  clientAd, hk, hashString, insert, from );

		if (last_updateClassAd_was_insert) { CollectorEngine_rucc_insertAd_runtime.Add(rt.tick(rt_last));
		} else { CollectorEngine_rucc_updateAd_runtime.Add(rt.tick(rt_last)); }

		// if we want to store private ads
		if (!sock)
		{
			dprintf (D_ALWAYS, "Want private ads, but no socket given!\n");
			break;
		}
		else
		{
			if (!(pvtAd = new ClassAd))
			{
				EXCEPT ("Memory error!");
			}
			if( !getClassAd(sock, *pvtAd) )
			{
				dprintf(D_FULLDEBUG,"\t(Could not get startd's private ad)\n");
				delete pvtAd;
				break;
			}

				// Fix up some stuff in the private ad that we depend on.
				// We started doing this in 7.2.0, so once we no longer
				// care about compatibility with stuff from before then,
				// the startd could stop bothering to send these attributes.

				// Queries of private ads depend on the following:
			SetMyTypeName( *pvtAd, STARTD_ADTYPE );

				// Negotiator matches up private ad with public ad by
				// using the following.
			if( retVal ) {
				CopyAttribute( ATTR_MY_ADDRESS, *pvtAd, *retVal );
				CopyAttribute( ATTR_NAME, *pvtAd, *retVal );
			}

			CollectorEngine_rucc_getPvtAd_runtime.Add(rt.tick(rt_last));

			// insert the private ad into its hashtable --- use the same
			// hash key as the public ad
			(void) updateClassAd (StartdPrivateAds, "StartdPvtAd  ",
								  "StartdPvt", pvtAd, hk, hashString, insPvt,
								  from );
			if (last_updateClassAd_was_insert) { CollectorEngine_rucc_insertPvtAd_runtime.Add(rt.tick(rt_last));
			} else { CollectorEngine_rucc_updatePvtAd_runtime.Add(rt.tick(rt_last)); }
		}

		// create fake duplicates of this ad, each with a different name, if
		// we are told to do so.  this feature exists for developer
		// scalability testing.
		if ( repeatStartdAds > 0 && clientAdToRepeat ) {
			ClassAd *fakeAd;
			int n;
			char newname[150],oldname[130];
			oldname[0] = '\0';
			clientAdToRepeat->LookupString("Name",oldname,sizeof(oldname));
			for (n=0;n<repeatStartdAds;n++) {
				fakeAd = new ClassAd(*clientAdToRepeat);
				snprintf(newname,sizeof(newname),
						 "Name=\"fake%d-%s\"",n,oldname);
				fakeAd->Insert(newname);
				makeStartdAdHashKey (hk, fakeAd);
				hashString.Build( hk );
				if (! updateClassAd (StartdAds, "StartdAd     ", "Start",
							  fakeAd, hk, hashString, insert, from ) )
				{
					// don't leak memory if there is some failure
					delete fakeAd;
				}
			}
			delete clientAdToRepeat;
			clientAdToRepeat = NULL;
			CollectorEngine_rucc_repeatAd_runtime.Add(rt.tick(rt_last));
		}
		break;

	  case MERGE_STARTD_AD:
		if (!makeStartdAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=mergeClassAd (StartdAds, "StartdAd     ", "Start",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_SCHEDD_AD:
		if (!makeScheddAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (ScheddAds, "ScheddAd     ", "Schedd",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_SUBMITTOR_AD:
		// use the same hashkey function as a schedd ad
		if (!makeScheddAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		// since submittor ads always follow a schedd ad, and a master check is
		// performed for schedd ads, we don't need a master check in here
		hashString.Build( hk );
		retVal=updateClassAd (SubmittorAds, "SubmittorAd  ", "Submittor",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_LICENSE_AD:
		// use the same hashkey function as a schedd ad
		if (!makeLicenseAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		// since submittor ads always follow a schedd ad, and a master check is
		// performed for schedd ads, we don't need a master check in here
		hashString.Build( hk );
		retVal=updateClassAd (LicenseAds, "LicenseAd  ", "License",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_MASTER_AD:
		if (!makeMasterAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (MasterAds, "MasterAd     ", "Master",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_CKPT_SRVR_AD:
		if (!makeCkptSrvrAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (CkptServerAds, "CkptSrvrAd   ", "CkptSrvr",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_COLLECTOR_AD:
		if (!makeCollectorAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (CollectorAds, "CollectorAd  ", "Collector",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_STORAGE_AD:
		if (!makeStorageAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (StorageAds, "StorageAd  ", "Storage",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_ACCOUNTING_AD:
		if (!makeAccountingAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (AccountingAds, "AccountingAd  ", "Accouting",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_NEGOTIATOR_AD:
		if (!makeNegotiatorAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		if (m_allowOnlyOneNegotiator) {
			// first, purge all the existing negotiator ads, since we
			// want to enforce that *ONLY* 1 negotiator is in the
			// collector any given time.
			purgeHashTable( NegotiatorAds );
		}
		retVal=updateClassAd (NegotiatorAds, "NegotiatorAd  ", "Negotiator",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_HAD_AD:
		if (!makeHadAdHashKey (hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (HadAds, "HadAd  ", "HAD",
							  clientAd, hk, hashString, insert, from );
		break;

	  case UPDATE_GRID_AD:
		if (!makeGridAdHashKey(hk, clientAd))
		{
			dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			insert = -3;
			retVal = 0;
			break;
		}
		hashString.Build( hk );
		retVal=updateClassAd (GridAds, "GridAd  ", "Grid",
							  clientAd, hk, hashString, insert, from );
          break;

	  case UPDATE_AD_GENERIC:
	  {
		  const char *type_str = GetMyTypeName(*clientAd);
		  if (type_str == NULL) {
			  dprintf(D_ALWAYS, "collect: UPDATE_AD_GENERIC: ad has no type\n");
			  insert = -3;
			  retVal = 0;
			  break;
		  }
		  MyString type(type_str);
		  CollectorHashTable *cht = findOrCreateTable(type);
		  if (cht == NULL) {
			  dprintf(D_ALWAYS, "collect: findOrCreateTable failed\n");
			  insert = -3;
			  retVal = 0;
			  break;
		  }
		  if (!makeGenericAdHashKey (hk, clientAd))
		  {
			  dprintf(D_ALWAYS, "Could not make hashkey --- ignoring ad\n");
			  insert = -3;
			  retVal = 0;
			  break;
		  }
		  hashString.Build(hk);
		  retVal = updateClassAd(*cht, type_str, type_str, clientAd,
					 hk, hashString, insert, from);
		  break;
	  }

	  case QUERY_STARTD_ADS:
	  case QUERY_SCHEDD_ADS:
	  case QUERY_MASTER_ADS:
	  case QUERY_SUBMITTOR_ADS:
	  case QUERY_CKPT_SRVR_ADS:
	  case QUERY_STARTD_PVT_ADS:
	  case QUERY_COLLECTOR_ADS:
  	  case QUERY_NEGOTIATOR_ADS:
  	  case QUERY_HAD_ADS:
	  case QUERY_GENERIC_ADS:
	  case INVALIDATE_STARTD_ADS:
	  case INVALIDATE_SCHEDD_ADS:
	  case INVALIDATE_MASTER_ADS:
	  case INVALIDATE_CKPT_SRVR_ADS:
	  case INVALIDATE_SUBMITTOR_ADS:
	  case INVALIDATE_COLLECTOR_ADS:
	  case INVALIDATE_NEGOTIATOR_ADS:
	  case INVALIDATE_HAD_ADS:
	  case INVALIDATE_ADS_GENERIC:
		// these are not implemented in the engine, but we allow another
		// daemon to detect that these commands have been given
	    insert = -2;
		retVal = 0;
	    break;

	  default:
		dprintf (D_ALWAYS, "Received illegal command: %d\n", command);
		insert = -1;
		retVal = 0;
	}

	if (command != UPDATE_STARTD_AD && command != UPDATE_STARTD_AD_WITH_ACK) {
		CollectorEngine_rucc_other_runtime.Add(rt.tick(rt_last));
	}


	// return the updated ad
	return retVal;
}
Beispiel #21
0
void
CCBServer::HandleRequestResultsMsg( CCBTarget *target )
{
		// Reply from target daemon about whether it succeeded in
		// connecting to the requested client.

	Sock *sock = target->getSock();

	ClassAd msg;
	sock->decode();
	if( !getClassAd( sock, msg ) || !sock->end_of_message() ) {
			// disconnect
		dprintf(D_FULLDEBUG,
				"CCB: received disconnect from target daemon %s "
				"with ccbid %lu.\n",
				sock->peer_description(), target->getCCBID() );
		RemoveTarget( target );
		return;
	}

	int command = 0;
	if( msg.LookupInteger( ATTR_COMMAND, command ) && command == ALIVE ) {
		SendHeartbeatResponse( target );
		return;
	}

	target->decPendingRequestResults();

	bool success = false;
	MyString error_msg;
	MyString reqid_str;
	CCBID reqid;
	MyString connect_id;
	msg.LookupBool( ATTR_RESULT, success );
	msg.LookupString( ATTR_ERROR_STRING, error_msg );
	msg.LookupString( ATTR_REQUEST_ID, reqid_str );
	msg.LookupString( ATTR_CLAIM_ID, connect_id );

	if( !CCBIDFromString( reqid, reqid_str.Value() ) ) {
		MyString msg_str;
		sPrintAd(msg_str, msg);
		dprintf(D_ALWAYS,
				"CCB: received reply from target daemon %s with ccbid %lu "
				"without a valid request id: %s\n",
				sock->peer_description(),
				target->getCCBID(),
				msg_str.Value());
		RemoveTarget( target );
		return;
	}

	CCBServerRequest *request = GetRequest( reqid );
	if( request && request->getSock()->readReady() ) {
		// Request socket must have just closed.  To avoid noise in
		// logs when we fail to write to it, delete the request now.
		RemoveRequest( request );
		request = NULL;
	}

	char const *request_desc = "(client which has gone away)";
	if( request ) {
		request_desc = request->getSock()->peer_description();
	}

	if( success ) {
		dprintf(D_FULLDEBUG,"CCB: received 'success' from target daemon %s "
				"with ccbid %lu for "
				"request %s from %s.\n",
				sock->peer_description(),
				target->getCCBID(),
				reqid_str.Value(),
				request_desc);
	}
	else {
		dprintf(D_FULLDEBUG,"CCB: received error from target daemon %s "
				"with ccbid %lu for "
				"request %s from %s: %s\n",
				sock->peer_description(),
				target->getCCBID(),
				reqid_str.Value(),
				request_desc,
				error_msg.Value());
	}

	if( !request ) {
		if( success ) {
				// expected: the client has gone away; it got what it wanted
			return;
		}
		dprintf( D_FULLDEBUG,
				 "CCB: client for request %s to target daemon %s with ccbid "
				 "%lu disappeared before receiving error details.\n",
				 reqid_str.Value(),
				 sock->peer_description(),
				 target->getCCBID());
		return;
	}
	if( connect_id != request->getConnectID() ) {
		MyString msg_str;
		sPrintAd(msg_str, msg);
		dprintf( D_FULLDEBUG,
				 "CCB: received wrong connect id (%s) from target daemon %s "
				 "with ccbid %lu for "
				 "request %s\n",
				 connect_id.Value(),
				 sock->peer_description(),
				 target->getCCBID(),
				 reqid_str.Value());
		RemoveTarget( target );
		return;
	}

	RequestFinished( request, success, error_msg.Value() );
}
Beispiel #22
0
ClassAd*
DCSchedd::actOnJobs( JobAction action,
					 const char* constraint, StringList* ids, 
					 const char* reason, const char* reason_attr,
					 const char* reason_code, const char* reason_code_attr,
					 action_result_type_t result_type,
					 bool notify_scheduler,
					 CondorError * errstack )
{
	char* tmp = NULL;
	char buf[512];
	int size, reply;
	ReliSock rsock;

		// // // // // // // //
		// Construct the ad we want to send
		// // // // // // // //

	ClassAd cmd_ad;

	sprintf( buf, "%s = %d", ATTR_JOB_ACTION, action );
	cmd_ad.Insert( buf );
	
	sprintf( buf, "%s = %d", ATTR_ACTION_RESULT_TYPE, 
			 (int)result_type );
	cmd_ad.Insert( buf );

	sprintf( buf, "%s = %s", ATTR_NOTIFY_JOB_SCHEDULER, 
			 notify_scheduler ? "True" : "False" );
	cmd_ad.Insert( buf );

	if( constraint ) {
		if( ids ) {
				// This is a programming error, not a run-time one
			EXCEPT( "DCSchedd::actOnJobs has both constraint and ids!" );
		}
		size = strlen(constraint) + strlen(ATTR_ACTION_CONSTRAINT) + 4;  
		tmp = (char*) malloc( size*sizeof(char) );
		if( !tmp ) {
			EXCEPT( "Out of memory!" );
		}
		sprintf( tmp, "%s = %s", ATTR_ACTION_CONSTRAINT, constraint ); 
		if( ! cmd_ad.Insert(tmp) ) {
			dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
					 "Can't insert constraint (%s) into ClassAd!\n",
					 constraint );
			free( tmp );
			return NULL;
		}			
		free( tmp );
		tmp = NULL;
	} else if( ids ) {
		char* action_ids = ids->print_to_string();
		if ( action_ids ) {
			size = strlen(action_ids) + strlen(ATTR_ACTION_IDS) + 7;
			tmp = (char*) malloc( size*sizeof(char) );
			if( !tmp ) {
				EXCEPT( "Out of memory!" );
			}
			sprintf( tmp, "%s = \"%s\"", ATTR_ACTION_IDS, action_ids );
			cmd_ad.Insert( tmp );
			free( tmp );
			tmp = NULL;
			free(action_ids);
			action_ids = NULL;
		}
	} else {
		EXCEPT( "DCSchedd::actOnJobs called without constraint or ids" );
	}

	if( reason_attr && reason ) {
		size = strlen(reason_attr) + strlen(reason) + 7;
		tmp = (char*) malloc( size*sizeof(char) );
		if( !tmp ) {
			EXCEPT( "Out of memory!" );
		}
		sprintf( tmp, "%s = \"%s\"", reason_attr, reason );
		cmd_ad.Insert( tmp );
		free( tmp );
		tmp = NULL;
	}

	if( reason_code_attr && reason_code ) {
		cmd_ad.AssignExpr(reason_code_attr,reason_code);
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return NULL;
	}
	if( ! startCommand(ACT_ON_JOBS, (Sock*)&rsock, 0, errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
				 "Failed to send command (ACT_ON_JOBS) to the schedd\n" );
		return NULL;
	}
		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return NULL;
	}

		// Now, put the command classad on the wire
	if( ! (putClassAd(&rsock, cmd_ad) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send classad\n" );
		return NULL;
	}

		// Next, we need to read the reply from the schedd if things
		// are ok and it's going to go forward.  If the schedd can't
		// read our reply to this ClassAd, it assumes we got killed
		// and it should abort its transaction
	rsock.decode();
	ClassAd* result_ad = new ClassAd();
	if( ! (getClassAd(&rsock, *result_ad) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: "
				 "Can't read response ad from %s\n", _addr );
		delete( result_ad );
		return NULL;
	}

		// If the action totally failed, the schedd will already have
		// aborted the transaction and closed up shop, so there's no
		// reason trying to continue.  However, we still want to
		// return the result ad we got back so that our caller can
		// figure out what went wrong.
	reply = FALSE;
	result_ad->LookupInteger( ATTR_ACTION_RESULT, reply );
	if( reply != OK ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Action failed\n" );
		return result_ad;
	}

		// Tell the schedd we're still here and ready to go
	rsock.encode();
	int answer = OK;
	if( ! (rsock.code(answer) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send reply\n" );
		delete( result_ad );
		return NULL;
	}
	
		// finally, make sure the schedd didn't blow up trying to
		// commit these changes to the job queue...
	rsock.decode();
	if( ! (rsock.code(reply) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: "
				 "Can't read confirmation from %s\n", _addr );
		delete( result_ad );
		return NULL;
	}

	return result_ad;
}
Beispiel #23
0
// Read history from a remote schedd
static void readHistoryRemote(classad::ExprTree *constraintExpr)
{
	printHeader();
	if(longformat && use_xml) {
		std::string out;
		AddClassAdXMLFileHeader(out);
		printf("%s\n", out.c_str());
	}

	classad::ClassAd ad;
	classad::ExprList *projList(new classad::ExprList());
	classad::ExprTree *projTree = static_cast<classad::ExprTree*>(projList);
	ad.Insert(ATTR_PROJECTION, projTree);
	ad.Insert(ATTR_REQUIREMENTS, constraintExpr);
	ad.InsertAttr(ATTR_NUM_MATCHES, specifiedMatch <= 0 ? -1 : specifiedMatch);

	DCSchedd schedd(g_name.size() ? g_name.c_str() : NULL, g_pool.size() ? g_pool.c_str() : NULL);
	if (!schedd.locate(Daemon::LOCATE_FOR_LOOKUP)) {
		fprintf(stderr, "Unable to locate remote schedd (name=%s, pool=%s).\n", g_name.c_str(), g_pool.c_str());
		exit(1);
	}
	Sock* sock;
	if (!(sock = schedd.startCommand(QUERY_SCHEDD_HISTORY, Stream::reli_sock, 0))) {
		fprintf(stderr, "Unable to send history command to remote schedd;\n"
			"Typically, either the schedd is not responding, does not authorize you, or does not support remote history.\n");
		exit(1);
	}
	classad_shared_ptr<Sock> sock_sentry(sock);

	if (!putClassAd(sock, ad) || !sock->end_of_message()) {
		fprintf(stderr, "Unable to send request to remote schedd; likely a server or network error.\n");
		exit(1);
	}

	while (true) {
		compat_classad::ClassAd ad;
		if (!getClassAd(sock, ad)) {
			fprintf(stderr, "Failed to recieve remote ad.\n");
			exit(1);
		}
		long long intVal;
		if (ad.EvaluateAttrInt(ATTR_OWNER, intVal) && (intVal == 0)) { // Last ad.
			if (!sock->end_of_message()) {
				fprintf(stderr, "Unable to close remote socket.\n");
			}
			sock->close();
			std::string errorMsg;
			if (ad.EvaluateAttrInt(ATTR_ERROR_CODE, intVal) && intVal && ad.EvaluateAttrString(ATTR_ERROR_STRING, errorMsg)) {
				fprintf(stderr, "Error %lld: %s\n", intVal, errorMsg.c_str());
				exit(intVal);
			}
			if (ad.EvaluateAttrInt("MalformedAds", intVal) && intVal) {
				fprintf(stderr, "Remote side had parse errors on history file");
				exit(1);
			}
			if (!ad.EvaluateAttrInt(ATTR_NUM_MATCHES, intVal) || (intVal != matchCount)) {
				fprintf(stderr, "Client and server do not agree on number of ads sent;\n"
					"Indicates lost network packets or an internal error\n");
				exit(1);
			}
			break;
		}
		matchCount++;
		printJob(ad);
	}

	if(longformat && use_xml) {
		std::string out;
		AddClassAdXMLFileFooter(out);
		printf("%s\n", out.c_str());
	}
}
Beispiel #24
0
bool 
DCStartd::deactivateClaim( bool graceful, bool *claim_is_closing )
{
	dprintf( D_FULLDEBUG, "Entering DCStartd::deactivateClaim(%s)\n",
			 graceful ? "graceful" : "forceful" );

	if( claim_is_closing ) {
		*claim_is_closing = false;
	}

	setCmdStr( "deactivateClaim" );
	if( ! checkClaimId() ) {
		return false;
	}
	if( ! checkAddr() ) {
		return false;
	}

		// if this claim is associated with a security session
	ClaimIdParser cidp(claim_id);
	char const *sec_session = cidp.secSessionId();

	if (IsDebugLevel(D_COMMAND)) {
		int cmd = graceful ? DEACTIVATE_CLAIM : DEACTIVATE_CLAIM_FORCIBLY;
		dprintf (D_COMMAND, "DCStartd::deactivateClaim(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL");
	}

	bool  result;
	ReliSock reli_sock;
	reli_sock.timeout(20);   // years of research... :)
	if( ! reli_sock.connect(_addr) ) {
		std::string err = "DCStartd::deactivateClaim: ";
		err += "Failed to connect to startd (";
		err += _addr ? _addr : "NULL";
		err += ')';
		newError( CA_CONNECT_FAILED, err.c_str() );
		return false;
	}
	int cmd;
	if( graceful ) {
		cmd = DEACTIVATE_CLAIM;
	} else {
		cmd = DEACTIVATE_CLAIM_FORCIBLY;
	}
	result = startCommand( cmd, (Sock*)&reli_sock, 20, NULL, NULL, false, sec_session ); 
	if( ! result ) {
		std::string err = "DCStartd::deactivateClaim: ";
		err += "Failed to send command ";
		if( graceful ) {
			err += "DEACTIVATE_CLAIM";
		} else {
			err += "DEACTIVATE_CLAIM_FORCIBLY";
		}
		err += " to the startd";
		newError( CA_COMMUNICATION_ERROR, err.c_str() );
		return false;
	}
		// Now, send the ClaimId
	if( ! reli_sock.put_secret(claim_id) ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::deactivateClaim: Failed to send ClaimId to the startd" );
		return false;
	}
	if( ! reli_sock.end_of_message() ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::deactivateClaim: Failed to send EOM to the startd" );
		return false;
	}

	reli_sock.decode();
	ClassAd response_ad;
	if( !getClassAd(&reli_sock, response_ad) || !reli_sock.end_of_message() ) {
		dprintf( D_FULLDEBUG, "DCStartd::deactivateClaim: failed to read response ad.\n");
			// The response ad is not critical and is expected to be missing
			// if the startd is from before 7.0.5.
	}
	else {
		bool start = true;
		response_ad.LookupBool(ATTR_START,start);
		if( claim_is_closing ) {
			*claim_is_closing = !start;
		}
	}

		// we're done
	dprintf( D_FULLDEBUG, "DCStartd::deactivateClaim: "
			 "successfully sent command\n" );
	return true;
}
Beispiel #25
0
bool DCSchedd::getJobConnectInfo(
	PROC_ID jobid,
	int subproc,
	char const *session_info,
	int timeout,
	CondorError *errstack,
	MyString &starter_addr,
	MyString &starter_claim_id,
	MyString &starter_version,
	MyString &slot_name,
	MyString &error_msg,
	bool &retry_is_sensible,
	int &job_status,
	MyString &hold_reason)
{
	ClassAd input;
	ClassAd output;

	input.Assign(ATTR_CLUSTER_ID,jobid.cluster);
	input.Assign(ATTR_PROC_ID,jobid.proc);
	if( subproc != -1 ) {
		input.Assign(ATTR_SUB_PROC_ID,subproc);
	}
	input.Assign(ATTR_SESSION_INFO,session_info);

	ReliSock sock;
	if( !connectSock(&sock,timeout,errstack) ) {
		error_msg = "Failed to connect to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !forceAuthentication(&sock, errstack) ) {
		error_msg = "Failed to authenticate";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.decode();
	if( !getClassAd(&sock, output) || !sock.end_of_message() ) {
		error_msg = "Failed to get response from schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( IsFulldebug(D_FULLDEBUG) ) {
		std::string adstr;
		sPrintAd(adstr, output, true);
		dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n",
				adstr.c_str());
	}

	bool result=false;
	output.LookupBool(ATTR_RESULT,result);

	if( !result ) {
		output.LookupString(ATTR_HOLD_REASON,hold_reason);
		output.LookupString(ATTR_ERROR_STRING,error_msg);
		retry_is_sensible = false;
		output.LookupBool(ATTR_RETRY,retry_is_sensible);
		output.LookupInteger(ATTR_JOB_STATUS,job_status);
	}
	else {
		output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
		output.LookupString(ATTR_CLAIM_ID,starter_claim_id);
		output.LookupString(ATTR_VERSION,starter_version);
		output.LookupString(ATTR_REMOTE_HOST,slot_name);
	}

	return result;
}
Beispiel #26
0
bool DCSchedd::recycleShadow( int previous_job_exit_reason, ClassAd **new_job_ad, MyString &error_msg )
{
	int timeout = 300;
	CondorError errstack;

	ReliSock sock;
	if( !connectSock(&sock,timeout,&errstack) ) {
		error_msg.formatstr("Failed to connect to schedd: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	if( !startCommand(RECYCLE_SHADOW, &sock, timeout, &errstack) ) {
		error_msg.formatstr("Failed to send RECYCLE_SHADOW to schedd: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	if( !forceAuthentication(&sock, &errstack) ) {
		error_msg.formatstr("Failed to authenticate: %s",
						  errstack.getFullText().c_str());
		return false;
	}

	sock.encode();
	int mypid = getpid();
	if( !sock.put( mypid ) ||
		!sock.put( previous_job_exit_reason ) ||
		!sock.end_of_message() )
	{
		error_msg = "Failed to send job exit reason";
		return false;
	}

	sock.decode();

	int found_new_job = 0;
	sock.get( found_new_job );

	if( found_new_job ) {
		*new_job_ad = new ClassAd();
		if( !getClassAd( &sock, *(*new_job_ad) ) ) {
			error_msg = "Failed to receive new job ClassAd";
			delete *new_job_ad;
			*new_job_ad = NULL;
			return false;
		}
	}

	if( !sock.end_of_message() ) {
		error_msg = "Failed to receive end of message";
		delete *new_job_ad;
		*new_job_ad = NULL;
		return false;
	}

	if( *new_job_ad ) {
		sock.encode();
		int ok=1;
		if( !sock.put(ok) ||
			!sock.end_of_message() )
		{
			error_msg = "Failed to send ok";
			delete *new_job_ad;
			*new_job_ad = NULL;
			return false;
		}
	}

	return true;
}
Beispiel #27
0
bool
ClaimStartdMsg::readMsg( DCMessenger * /*messenger*/, Sock *sock ) {
	// Now, we set the timeout on the socket to 1 second.  Since we 
	// were called by as a Register_Socket callback, this should not 
	// block if things are working as expected.  
	// However, if the Startd wigged out and sent a 
	// partial int or some such, we cannot afford to block. -Todd 3/2000
	sock->timeout(1);

 	if( !sock->get(m_reply) ) {
		dprintf( failureDebugLevel(),
				 "Response problem from startd when requesting claim %s.\n",
				 description() );	
		sockFailed( sock );
		return false;
	}

	/* 
		Reply of 0 (NOT_OK) means claim rejected.
		Reply of 1 (OK) means claim accepted.
		Reply of 3 (REQUEST_CLAIM_LEFTOVERS) means claim accepted by a
		  partitionable slot, and the "leftovers" slot ad and claim id
		  will be sent next.
		Reply of 4 (REQUEST_CLAIM_PAIR) means claim accepted by a slot
		  that is paired, and the partner slot ad and claim id will be
		  sent next.
	*/

	if( m_reply == OK ) {
			// no need to log success, because DCMsg::reportSuccess() will
	} else if( m_reply == NOT_OK ) {
		dprintf( failureDebugLevel(), "Request was NOT accepted for claim %s\n", description() );
	} else if( m_reply == REQUEST_CLAIM_LEFTOVERS ) {
	 	if( !sock->get(m_leftover_claim_id) ||
			!getClassAd( sock, m_leftover_startd_ad )  ) 
		{
			// failed to read leftover partitionable slot info
			dprintf( failureDebugLevel(),
				 "Failed to read paritionable slot leftover from startd - claim %s.\n",
				 description() );
			// treat this failure same as NOT_OK, since this startd is screwed
			m_reply = NOT_OK;
		} else {
			// successfully read leftover partitionable slot info
			m_have_leftovers = true;
			// change reply to OK cuz claim was a success
			m_reply = OK;
		}
	} else if( m_reply == REQUEST_CLAIM_PAIR ) {
		if( !sock->get(m_paired_claim_id) ||
			!getClassAd( sock, m_paired_startd_ad ) )
		{
			// failed to read paired slot info
			dprintf( failureDebugLevel(),
				 "Failed to read paired slot info from startd - claim %s.\n",
				 description() );
			// treat this failure same as NOT_OK, since this startd is screwed
			m_reply = NOT_OK;
		} else {
			// successfully read paired slot info
			m_have_paired_slot = true;
			// change reply to OK cuz claim was a success
			m_reply = OK;
		}
	} else {
		dprintf( failureDebugLevel(), "Unknown reply from startd when requesting claim %s\n",description());
	}
		
	// end_of_message() is done by caller

	return true;
}
Beispiel #28
0
// when a transferd registers itself, it identifies who it is. The connection
// is then held open and the schedd periodically might send more transfer
// requests to the transferd. Also, if the transferd dies, the schedd is 
// informed quickly and reliably due to the closed connection.
bool
DCSchedd::register_transferd(MyString sinful, MyString id, int timeout, 
		ReliSock **regsock_ptr, CondorError *errstack) 
{
	ReliSock *rsock;
	int invalid_request = 0;
	ClassAd regad;
	ClassAd respad;
	std::string errstr;
	std::string reason;

	if (regsock_ptr != NULL) {
		// Our caller wants a pointer to the socket we used to succesfully
		// register the claim. The NULL pointer will represent failure and
		// this will only be set to something real if everything was ok.
		*regsock_ptr = NULL;
	}

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the schedd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_REGISTER, Stream::reli_sock,
		timeout, errstack);

	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCSchedd::register_transferd: "
				 "Failed to send command (TRANSFERD_REGISTER) "
				 "to the schedd\n" );
		errstack->push("DC_SCHEDD", 1, 
			"Failed to start a TRANSFERD_REGISTER command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd::register_transferd authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_SCHEDD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	// set up my registration request.
	regad.Assign(ATTR_TREQ_TD_SINFUL, sinful);
	regad.Assign(ATTR_TREQ_TD_ID, id);

	// This is the initial registration identification ad to the schedd
	// It contains:
	//	ATTR_TREQ_TD_SINFUL
	//	ATTR_TREQ_TD_ID
	putClassAd(rsock, regad);
	rsock->end_of_message();

	// Get the response from the schedd.
	rsock->decode();

	// This is the response ad from the schedd:
	// It contains:
	//	ATTR_TREQ_INVALID_REQUEST
	//
	// OR
	// 
	//	ATTR_TREQ_INVALID_REQUEST
	//	ATTR_TREQ_INVALID_REASON
	getClassAd(rsock, respad);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid_request);

	if (invalid_request == FALSE) {
		// not an invalid request
		if (regsock_ptr)
			*regsock_ptr = rsock;
		return true;
	}

	respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
	errstack->pushf("DC_SCHEDD", 1, "Schedd refused registration: %s", reason.c_str());

	return false;
}
bool
ScheddNegotiate::readMsg( DCMessenger * /*messenger*/, Sock *sock )
{
		// Get the negotiator's request.
		// Note that end_of_message() is handled by our caller.

	if( !sock->code(m_operation) ) {
		dprintf( D_ALWAYS, "Can't receive request from negotiator\n" );
		return false;
	}

	switch( m_operation ) {
	case REJECTED_WITH_REASON:
		if( !sock->code(m_reject_reason) ) {
			dprintf( D_ALWAYS,
					 "Can't receive reject reason from negotiator\n" );
			return false;
		}
		break;
	case REJECTED:
		break;

	case SEND_JOB_INFO:
		break;

	case PERMISSION:
			// No negotiator since 7.1.3 should ever send this
			// command, and older ones should not send it either,
			// since we advertise WantResAd=True.
		dprintf( D_ALWAYS, "Negotiator sent PERMISSION rather than expected PERMISSION_AND_AD!  Aborting.\n");
		return false;
		break;

	case PERMISSION_AND_AD: {
		char *claim_id = NULL;
		if( !sock->get_secret(claim_id) || !claim_id ) {
			dprintf( D_ALWAYS,
					 "Can't receive ClaimId from negotiator\n" );
			return false;
		}
		m_claim_id = claim_id;
		free( claim_id );

		m_match_ad.Clear();

			// get startd ad from negotiator
		if( !getClassAd(sock, m_match_ad) ) {
			dprintf( D_ALWAYS,
					 "Can't get my match ad from negotiator\n" );
			return false;
		}
#if !defined(WANT_OLD_CLASSADS)
		m_match_ad.AddTargetRefs( TargetJobAttrs );
#endif

		break;
	}
	case END_NEGOTIATE:
		break;
	default:
		dprintf( D_ALWAYS, "Got unexpected request (%d)\n", m_operation );
		return false;
	}

	return true;
}
Beispiel #30
0
// I'm going to ask the schedd for where I can put the files for the jobs I've
// specified. The schedd is going to respond with A) a message telling me it
// has the answer right away, or B) an answer telling me I have to wait 
// an unknown length of time for the schedd to schedule me a place to put it.
bool 
DCSchedd::requestSandboxLocation(ClassAd *reqad, ClassAd *respad, 
	CondorError * errstack)
{
	ReliSock rsock;
	int will_block;
	ClassAd status_ad;

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if( ! startCommand(REQUEST_SANDBOX_LOCATION, (Sock*)&rsock, 0,
						   errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): "
				 "Failed to send command (REQUEST_SANDBOX_LOCATION) "
				 "to schedd (%s)\n", _addr );
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return false;
	}

	rsock.encode();

	///////////////////////////////////////////////////////////////////////
	// Send my sandbox location request packet to the schedd.
	///////////////////////////////////////////////////////////////////////

	// This request ad will either contain:
	//	ATTR_TREQ_PEER_VERSION
	//	ATTR_TREQ_HAS_CONSTRAINT
	//	ATTR_TREQ_JOBID_LIST
	//	ATTR_TREQ_FTP
	// 
	// OR
	//
	//	ATTR_TREQ_DIRECTION
	//	ATTR_TREQ_PEER_VERSION
	//	ATTR_TREQ_HAS_CONSTRAINT
	//	ATTR_TREQ_CONSTRAINT
	//	ATTR_TREQ_FTP
	dprintf(D_ALWAYS, "Sending request ad.\n");
	if (putClassAd(&rsock, *reqad) != 1) {
		dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): "
				"Can't send reqad to the schedd\n");
		return false;
	}
	rsock.end_of_message();

	rsock.decode();

	///////////////////////////////////////////////////////////////////////
	// Read back a response ad which will tell me which jobs the schedd
	// said I could modify and whether or not I'm am going to have to block
	// before getting the payload of the transferd location/capability ad.
	///////////////////////////////////////////////////////////////////////

	// This status ad will contain
	//	ATTR_TREQ_INVALID_REQUEST (set to true)
	//	ATTR_TREQ_INVALID_REASON
	//
	// OR
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_JOBID_ALLOW_LIST
	//	ATTR_TREQ_JOBID_DENY_LIST
	//	ATTR_TREQ_WILL_BLOCK

	dprintf(D_ALWAYS, "Receiving status ad.\n");
	if (getClassAd(&rsock, status_ad) == false) {
		dprintf(D_ALWAYS, "Schedd closed connection to me. Aborting sandbox "
			"submission.\n");
		return false;
	}
	rsock.end_of_message();

	status_ad.LookupInteger(ATTR_TREQ_WILL_BLOCK, will_block);

	dprintf(D_ALWAYS, "Client will %s\n", will_block==1?"block":"not block");

	if (will_block == 1) {
		// set to 20 minutes.
		rsock.timeout(60*20);
	}

	///////////////////////////////////////////////////////////////////////
	// Read back the payload ad from the schedd about the transferd location
	// and capability string I can use for the fileset I wish to transfer.
	///////////////////////////////////////////////////////////////////////

	// read back the response ad from the schedd which contains a 
	// td sinful string, and a capability. These represent my ability to
	// read/write a certain fileset somewhere.

	// This response ad from the schedd will contain:
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to true)
	//	ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_TD_SINFUL
	//	ATTR_TREQ_JOBID_ALLOW_LIST

	dprintf(D_ALWAYS, "Receiving response ad.\n");
	if (getClassAd(&rsock, *respad) != true) {
		dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): "
				"Can't receive respond ad from the schedd\n");
		return false;
	}
	rsock.end_of_message();

	return true;
}