示例#1
0
void
CCBServer::ForwardRequestToTarget( CCBServerRequest *request, CCBTarget *target )
{
	Sock *sock = target->getSock();

	ClassAd msg;
	msg.Assign( ATTR_COMMAND, CCB_REQUEST );
	msg.Assign( ATTR_MY_ADDRESS, request->getReturnAddr() );
	msg.Assign( ATTR_CLAIM_ID, request->getConnectID() );
	// for easier debugging
	msg.Assign( ATTR_NAME, request->getSock()->peer_description() );

	MyString reqid_str;
	CCBIDToString( request->getRequestID(), reqid_str);
	msg.Assign( ATTR_REQUEST_ID, reqid_str );

	sock->encode();
	if( !msg.put( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to forward request id %lu from %s to target "
				"daemon %s with ccbid %lu\n",
				request->getRequestID(),
				request->getSock()->peer_description(),
				target->getSock()->peer_description(),
				target->getCCBID());

		RequestFinished( request, false, "failed to forward request to target" );
		return;
	}

		// Now wait for target to respond (HandleRequestResultsMsg).
		// We will get the response next time we poll the socket.
		// To get a faster response, we _could_ register the socket
		// now, if it has not already been registered.
}
示例#2
0
bool
CCBListener::WriteMsgToCCB(ClassAd &msg)
{
	if( !m_sock || m_waiting_for_connect ) {
		return false;
	}

	m_sock->encode();
	if( !msg.put( *m_sock ) || !m_sock->end_of_message() ) {
		Disconnected();
		return false;
	}

	return true;
}
示例#3
0
bool
DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr)
{
	ReliSock sock;

	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) {
		error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	ClassAd input;
	input.Assign(ATTR_CLAIM_ID,job_claim_id);
	input.Assign(ATTR_SESSION_INFO,session_info);

	sock.encode();
	if( !input.put(sock) || !sock.end_of_message() ) {
		error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	sock.decode();

	ClassAd reply;
	if( !reply.initFromStream(sock) || !sock.end_of_message() ) {
		error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter";
		return false;
	}

	bool success = false;
	reply.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		reply.LookupString(ATTR_ERROR_STRING,error_msg);
		return false;
	}

	reply.LookupString(ATTR_CLAIM_ID,owner_claim_id);
	reply.LookupString(ATTR_VERSION,starter_version);
		// get the full starter address from the starter in case it contains
		// extra CCB info that we don't already know about
	reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
	return true;
}
示例#4
0
void
CCBServer::RequestReply( Sock *sock, bool success, char const *error_msg, CCBID request_cid, CCBID target_cid )
{
	if( success && sock->readReady() ) {
			// the client must have disconnected (which is expected if
			// the client has already received the reversed connection)
		return;
	}

	ClassAd msg;
	msg.Assign( ATTR_RESULT, success );
	msg.Assign( ATTR_ERROR_STRING, error_msg );

	sock->encode();
	if( !msg.put( *sock ) || !sock->end_of_message() ) {
			// Would like to be completely quiet if success and the
			// client has disconnected, since this is normal; however,
			// the above write operations will generate noise when
			// they fail, so at least in FULLDEBUG, we explain what's
			// going on.  Note that most of the time, we should not get
			// here for successful requests, because we either observe
			// the client disconnect earlier, or the above check on
			// the socket catches it.  Why bother sending a reply on
			// success at all?  Because if the client has not yet
			// seen the reverse connect and we just disconnect without
			// telling it the request was successful, then it will
			// think something has gone wrong.
		dprintf(success ? D_FULLDEBUG : D_ALWAYS,
				"CCB: failed to send result (%s) for request id %lu "
				"from %s requesting a reversed connection to target daemon "
				"with ccbid %lu: %s %s\n",
				success ? "request succeeded" : "request failed",
				request_cid,
				sock->peer_description(),
				target_cid,
				error_msg,
				success ? "(since the request was successful, it is expected "
				          "that the client may disconnect before receiving "
				          "results)" : "" );
	}
}
示例#5
0
void
CCBServer::SendHeartbeatResponse( CCBTarget *target )
{
	Sock *sock = target->getSock();

	ClassAd msg;
	msg.Assign( ATTR_COMMAND, ALIVE );
	sock->encode();
	if( !msg.put( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to send heartbeat to target "
				"daemon %s with ccbid %lu\n",
				target->getSock()->peer_description(),
				target->getCCBID());

		RemoveTarget( target );
		return;
	}
	dprintf(D_FULLDEBUG,"CCB: sent heartbeat to target %s\n",
			sock->peer_description());
}
示例#6
0
// This handler is called when a client wishes to write files from the
// transferd's storage.
int
TransferD::write_files_handler(int cmd, Stream *sock) 
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString capability;
	int protocol = FTP_UNKNOWN;
	TransferRequest *treq = NULL;
	MyString fquser;
	static int transfer_reaper_id = -1;
	ThreadArg *thread_arg;
	int tid;
	ClassAd reqad;
	ClassAd respad;

	cmd = cmd; // quiet the compiler.

	dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n");

	/////////////////////////////////////////////////////////////////////////
	// make sure we are authenticated
	/////////////////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText() );
			refuse( rsock );
			return CLOSE_STREAM;
		} 
	}

	fquser = rsock->getFullyQualifiedUser();


	/////////////////////////////////////////////////////////////////////////
	// Check to see if the capability the client tells us is something that
	// we have knowledge of. We ONLY check the capability and not the
	// identity of the person in question. This allows people of different
	// identities to write files here as long as they had the right 
	// capability. While this might not sound secure, they STILL had to have
	// authenticated as someone this daemon trusts. 
	// Similarly, check the protocol it wants to use as well as ensure that
	// the direction the transfer request was supposed to be is being honored.
	/////////////////////////////////////////////////////////////////////////
	rsock->decode();

	// soak the request ad from the client about what it wants to transfer
	reqad.initFromStream(*rsock);
	rsock->end_of_message();

	reqad.LookupString(ATTR_TREQ_CAPABILITY, capability);

	rsock->encode();

	// do I know of such a capability?
	if (m_treqs.lookup(capability, treq) != 0) {
		// didn't find it. Log it and tell them to leave and close up shop
		respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
		respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!");
		respad.put(*rsock);
		rsock->end_of_message();

		dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
			"using capability '%s', but there was no such capability. "
			"Access denied.\n", fquser.Value(), capability.Value());
		return CLOSE_STREAM;
	}

	reqad.LookupInteger(ATTR_TREQ_FTP, protocol);

	// am I willing to use this protocol?
	switch(protocol) {
		case FTP_CFTP: // FileTrans protocol, I'm happy.
			break;

		default:
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Invalid file transfer protocol!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"using protocol '%d', but I don't support that protocol. "
				"Access denied.\n", fquser.Value(), protocol);
			return CLOSE_STREAM;
	}

	// nsure that this transfer request was of the uploading variety
	if (treq->get_direction() != FTPD_UPLOAD) {
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Transfer Request was not an uploading request!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"to a transfer request that wasn't expecting to be written. "
				"Access denied.\n", fquser.Value());
	}

	/////////////////////////////////////////////////////////////////////////
	// Tell the client everything was ok.
	/////////////////////////////////////////////////////////////////////////

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);
	respad.put(*rsock);
	rsock->end_of_message();

	/////////////////////////////////////////////////////////////////////////
	// Set up a thread (a process under unix) to read ALL of the job files
	// for all of the ads in the TransferRequest.
	/////////////////////////////////////////////////////////////////////////

	// now create a thread, passing in the sock, which uses the file transfer
	// object to accept the files.

	if (transfer_reaper_id == -1) {
		// only set this up ONCE so each and every thread gets one.
		transfer_reaper_id = daemonCore->Register_Reaper(
						"write_files_reaper",
						(ReaperHandlercpp) &TransferD::write_files_reaper,
						"write_files_reaper",
						this
						);
	}

	thread_arg = new ThreadArg(protocol, treq);

	// Start a new thread (process on Unix) to do the work
	tid = daemonCore->Create_Thread(
		(ThreadStartFunc)&TransferD::write_files_thread,
		(void *)thread_arg,
		rsock,
		transfer_reaper_id
		);
	
	if (tid == FALSE) {
		// XXX How do I handle this failure?
	}


	// associate the tid with the request so I can deal with it propery in
	// the reaper
	m_client_to_transferd_threads.insert(tid, treq);

	// The stream is inherited to the thread, who does the transfer and
	// finishes the protocol, but in the parent, I'm closing it.
	return CLOSE_STREAM;
}
示例#7
0
int
TransferD::write_files_reaper(int tid, int exit_status)
{
	TransferRequest *treq = NULL;
	MyString str;
	ClassAd result;
	int exit_code;
	int signal;

	dprintf(D_ALWAYS, "TransferD::write_files_reaper(): "
		"A file transfer into the transferd has completed: "
		"tid %d, status: %d\n",
		tid, exit_status);
	
	/////////////////////////////////////////////////////////////////////////
	// Consistancy check to make sure I asked to do the transfer
	/////////////////////////////////////////////////////////////////////////
	if (m_client_to_transferd_threads.lookup((long)tid, treq) != 0) 
	{
		EXCEPT("TransferD::write_files_reaper(): "
			"Programmer error: I have no record of it! ");
	}
	// remove it from the thread hash now that I'm dealing with it.
	m_client_to_transferd_threads.remove((long)tid);

	/////////////////////////////////////////////////////////////////////////
	// Determine status ad.
	/////////////////////////////////////////////////////////////////////////

	// The schedd will know who I'm talking about cause it has the
	// same capability for this transfer request.
	str = treq->get_capability();
	result.Assign(ATTR_TREQ_CAPABILITY, str);

	// figure out what the exit_status means and encode it into the result ad
	if (WIFSIGNALED(exit_status)) {
		signal = WTERMSIG(exit_status);
		dprintf(D_ALWAYS, "Thread exited with signal: %d\n", signal);

		result.Assign(ATTR_TREQ_UPDATE_STATUS, "NOT OK");
		str.sprintf("Died with signal %d", signal);
		result.Assign(ATTR_TREQ_UPDATE_REASON, str);
		result.Assign(ATTR_TREQ_SIGNALED, TRUE);

	} else {
		exit_code = WEXITSTATUS(exit_status);
		dprintf(D_ALWAYS, "Thread exited with exit code: %d\n", exit_code);
		switch(exit_code) {
			case EXIT_SUCCESS:
				result.Assign(ATTR_TREQ_UPDATE_STATUS, "OK");
				result.Assign(ATTR_TREQ_UPDATE_REASON, "Successful transfer");
				result.Assign(ATTR_TREQ_SIGNALED, FALSE);
				result.Assign(ATTR_TREQ_EXIT_CODE, exit_code);

				break;

			default:
				result.Assign(ATTR_TREQ_UPDATE_STATUS, "NOT OK");
				str.sprintf("Exited with bad exit code %d", exit_code);
				result.Assign(ATTR_TREQ_UPDATE_REASON, str);
				result.Assign(ATTR_TREQ_SIGNALED, FALSE);
				result.Assign(ATTR_TREQ_EXIT_CODE, exit_code);

				break;
		}
	}

	/////////////////////////////////////////////////////////////////////////
	// Call back schedd with status ad. If failed, don't repeat
	// it, the schedd will send another transfer request if it wants it
	// done again.
	/////////////////////////////////////////////////////////////////////////
	m_update_sock->encode();
	result.put(*m_update_sock);
	m_update_sock->end_of_message();

	// now remove the treq forever from our knowledge
	m_treqs.remove(treq->get_capability());

	// bye bye.
	delete treq;

	// Now, if the hash is empty, mark it down as the start of our inactivity
	// timer
	if (m_treqs.getNumElements() == 0) {
		dprintf(D_ALWAYS, 
			"Last transfer request handled. Becoming inactive.\n");
		m_inactivity_timer = time(NULL);
	}

	return TRUE;
}
示例#8
0
// The function occurs in a seperate thread or process
int
TransferD::write_files_thread(void *targ, Stream *sock)
{	
	ThreadArg *thread_arg = (ThreadArg*)targ;
	ReliSock *rsock = (ReliSock*)sock;
	TransferRequest *treq = NULL;
	// int protocol;
	SimpleList<ClassAd*> *jad_list = NULL;
	ClassAd *jad = NULL;
	int cluster, proc;
	int old_timeout;
	int result;
	ClassAd respad;

	// XXX This is a damn dirty hack whose solution resides in implementing
	// a checksum for the files.
	// Now we sleep here for one second.  Why?  So we are certain
	// to transfer back output files even if the job ran for less
	// than one second. This is because:
	// stat() can't tell the difference between:
	//   1) A job starts up, touches a file, and exits all in one second
	//   2) A job starts up, doesn't touch the file, and exits all in one
	//    second
	// So if we force the start time of the job to be one second later than
	// the time we know the files were written, stat() should be able
	// to perceive what happened, if anything.

	sleep(1);

	// even though I'm in a new process, I got here either through forking
	// or through a thread, so this memory is a copy.
	// protocol = thread_arg->protocol;
	treq = thread_arg->treq;
	delete thread_arg;

	// XXX deal with protocol value.

	////////////////////////////////////////////////////////////////////////
	// Sort the classads (XXX maybe put at a higher level in the protocol)
	////////////////////////////////////////////////////////////////////////
	
	// XXX TODO

	////////////////////////////////////////////////////////////////////////
	// Do the transfer.
	////////////////////////////////////////////////////////////////////////

	// file transfers can take a long time....
	old_timeout = rsock->timeout(60 * 60 * 8);

	jad_list = treq->todo_tasks();

	while(jad_list->Next(jad)) {
		FileTransfer ftrans;

		jad->LookupInteger(ATTR_CLUSTER_ID, cluster);
		jad->LookupInteger(ATTR_PROC_ID, proc);
		dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
			"Transferring fileset for job %d.%d\n",
				cluster, proc);

		result = ftrans.SimpleInit(jad, true, true, rsock);
		if ( !result ) {
			dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
				"failed to init file transfer for job %d.%d \n",
				cluster, proc );

			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"FileTransfer Object failed to SimpleInit.");
			respad.put(*rsock);
			rsock->end_of_message();

			rsock->timeout(old_timeout);

			return EXIT_FAILURE;
		}

		ftrans.setPeerVersion(treq->get_peer_version().Value());

		// We're "downloading" from the client to here.
		result = ftrans.DownloadFiles();
		if ( !result ) {

			dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
				"failed to transfer files for job %d.%d \n",
				cluster, proc );

			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"FileTransfer Object failed to download.");
			respad.put(*rsock);
			rsock->end_of_message();

			rsock->timeout(old_timeout);
			return EXIT_FAILURE;
		}
	}

	rsock->end_of_message();

	//////////////////////////////////////////////////////////////////////////
	// Now that the file transfer is done, tell the client everything is ok.
	//////////////////////////////////////////////////////////////////////////

	dprintf(D_ALWAYS, "Informing client of finished transfer.\n");

	rsock->encode();

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);

	// This response ad to the client will contain:
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//
	respad.put(*rsock);
	rsock->end_of_message();

	delete rsock;

	return EXIT_SUCCESS;
}
示例#9
0
int main( int argc, char *argv[] )
{
	const char *filename=0;
	char *pool=0;
	int command=-1;
	int i;
	bool use_tcp = false;
	bool with_ack = false;
	bool allow_multiple = false;
	param_functions *p_funcs = NULL;


	myDistro->Init( argc, argv );
	config();
	p_funcs = get_param_functions();

	for( i=1; i<argc; i++ ) {
		if(!strcmp(argv[i],"-help")) {
			usage(argv[0]);
			exit(0);
		} else if(!strcmp(argv[i],"-pool")) {	
			i++;
			if(!argv[i]) {
				fprintf(stderr,"-pool requires an argument.\n\n");
				usage(argv[0]);
				exit(1);
			}
			pool = argv[i];
		} else if(!strncmp(argv[i],"-tcp",strlen(argv[i]))) {
			use_tcp = true;
		} else if(!strncmp(argv[i],"-multiple",strlen(argv[i]))) {
				// We don't set allow_multiple=true by default, because
				// existing users (e.g. glideinWMS) have stray blank lines
				// in the input file.
			allow_multiple = true;
		} else if(!strcmp(argv[i],"-version")) {
			version();
			exit(0);
		} else if(!strcmp(argv[i],"-debug")) {
				// dprintf to console
			Termlog = 1;
			p_funcs = get_param_functions();
			dprintf_config ("TOOL", p_funcs);
		} else if(argv[i][0]!='-' || !strcmp(argv[i],"-")) {
			if(command==-1) {
				command = getCollectorCommandNum(argv[i]);
				if(command==-1) {
					fprintf(stderr,"Unknown command name %s\n\n",argv[i]);
					usage(argv[0]);
					exit(1);
				}
			} else if(!filename) {
				filename = argv[i];
			} else {
				fprintf(stderr,"Extra argument: %s\n\n",argv[i]);
				usage(argv[0]);
				exit(1);
			}
		} else {
			fprintf(stderr,"Unknown argument: %s\n\n",argv[i]);
			usage(argv[0]);
			exit(1);
		}
	}

	FILE *file;
	ClassAdList ads;
	Daemon *collector;
	Sock *sock;

	switch( command ) {
	case UPDATE_STARTD_AD_WITH_ACK:
		with_ack = true;
		break;
	}

	if( with_ack ) {
		use_tcp =  true;
	}

	if(!filename || !strcmp(filename,"-")) {
		file = stdin;
		filename = "(stdin)";
	} else {
		file = safe_fopen_wrapper_follow(filename,"r");
	}
	if(!file) {
		fprintf(stderr,"couldn't open %s: %s\n",filename,strerror(errno));
		return 1;
	}

	while(!feof(file)) {
		int eof=0,error=0,empty=0;
		char const *delim = "\n";
		if( !allow_multiple ) {
			delim = "***";
		}
		ClassAd *ad = new ClassAd(file,const_cast<char *>(delim),eof,error,empty);
		if(error) {
			fprintf(stderr,"couldn't parse ClassAd in %s\n",filename);
			delete ad;
			return 1;
		}
		if( empty ) {
			delete ad;
			break;
		}
		if( !allow_multiple && ads.Length() > 0 ) {
			fprintf(stderr,"ERROR: failed to parse '%s' as a ClassAd attribute\n",delim);
			delete ad;
			return 1;
		}
		ads.Insert(ad);
	}

	if(ads.Length() == 0) {
		fprintf(stderr,"%s is empty\n",filename);
		return 1;
	}

	CollectorList * collectors;
	if ( pool ) {
		collector = new Daemon( DT_COLLECTOR, pool, 0 );
		collectors = new CollectorList();
		collectors->append (collector);
	} else {
		collectors = CollectorList::create();
	}

	bool had_error = false;

	collectors->rewind();
	while (collectors->next(collector)) {
		
		dprintf(D_FULLDEBUG,"locating collector %s...\n", collector->name());

		if(!collector->locate()) {
			fprintf(stderr,"couldn't locate collector: %s\n",collector->error());
			had_error = true;
			continue;
		}

		dprintf(D_FULLDEBUG,"collector is %s located at %s\n",
				collector->hostname(),collector->addr());

		sock = NULL;

		ClassAd *ad;
		int success_count = 0;
		int failure_count = 0;
		ads.Rewind();
		while( (ad=ads.Next()) ) {

				// If there's no "MyAddress", generate one..
			if( !ad->Lookup( ATTR_MY_ADDRESS ) ) {
				MyString tmp;
				tmp.formatstr( "<%s:0>", my_ip_string() );
				ad->Assign( ATTR_MY_ADDRESS, tmp.Value() );
			}

			if ( use_tcp ) {
				if( !sock ) {
					sock = collector->startCommand(command,Stream::reli_sock,20);
				}
				else {
						// Use existing connection.
					sock->encode();
					sock->put(command);
				}
			} else {
					// We must open a new UDP socket each time.
				delete sock;
				sock = collector->startCommand(command,Stream::safe_sock,20);
			}

			int result = 0;
			if ( sock ) {
				result += ad->put( *sock );
				result += sock->end_of_message();
			}
			if ( result != 2 ) {
				fprintf(stderr,"failed to send classad to %s\n",collector->addr());
				had_error = true;
				failure_count++;
				delete sock;
				sock = NULL;
				continue;
			}

			if( with_ack ) {
				sock->decode();
				int ok = 0;
				if( !sock->get(ok) || !sock->end_of_message() ) {
					fprintf(stderr,"failed to get ack from %s\n",collector->addr());
					had_error = true;
					failure_count++;
					delete sock;
					sock = NULL;
					continue;
				}

					// ack protocol does not allow for multiple updates,
					// so close the socket now
				delete sock;
				sock = NULL;
			}

			success_count++;
		}
		if( sock ) {
			CondorVersionInfo const *ver = sock->get_peer_version();
			if( !ver || ver->built_since_version(7,7,3) ) {
					// graceful hangup so the collector knows we are done
				sock->encode();
				command = DC_NOP;
				sock->put(command);
				sock->end_of_message();
			}

			delete sock;
			sock = NULL;
		}

		printf("Sent %d of %d ad%s to %s.\n",
			   success_count,
			   success_count + failure_count,
			   success_count+failure_count == 1 ? "" : "s",
			   collector->name());
	}

	delete collectors;

	return (had_error)?1:0;
}
示例#10
0
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible)
{

	retry_is_sensible = false;

#ifndef HAVE_SSH_TO_JOB
	error_msg = "This version of Condor does not support ssh key exchange.";
	return false;
#else
	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) {
		error_msg = "Failed to send START_SSHD to starter";
		return false;
	}

	ClassAd input;

	if( preferred_shells && *preferred_shells ) {
		input.Assign(ATTR_SHELL,preferred_shells);
	}

	if( slot_name && *slot_name ) {
			// This is a little silly.
			// We are telling the remote side the name of the slot so
			// that it can put it in the welcome message.
		input.Assign(ATTR_NAME,slot_name);
	}

	if( ssh_keygen_args && *ssh_keygen_args ) {
		input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args);
	}

	sock.encode();
	if( !input.put(sock) || !sock.end_of_message() ) {
		error_msg = "Failed to send START_SSHD request to starter";
		return false;
	}

	ClassAd result;
	sock.decode();
	if( !result.initFromStream(sock) || !sock.end_of_message() ) {
		error_msg = "Failed to read response to START_SSHD from starter";
		return false;
	}

	bool success = false;
	result.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		std::string remote_error_msg;
		result.LookupString(ATTR_ERROR_STRING,remote_error_msg);
		error_msg.sprintf("%s: %s",slot_name,remote_error_msg.c_str());
		retry_is_sensible = false;
		result.LookupBool(ATTR_RETRY,retry_is_sensible);
		return false;
	}

	result.LookupString(ATTR_REMOTE_USER,remote_user);

	std::string public_server_key;
	if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) {
		error_msg = "No public ssh server key received in reply to START_SSHD";
		return false;
	}
	std::string private_client_key;
	if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) {
		error_msg = "No ssh client key received in reply to START_SSHD";
		return false;
	}


		// store the private client key
	unsigned char *decode_buf = NULL;
	int length = -1;
	condor_base64_decode(private_client_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh client key.";
		return false;
	}
	FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400);
	if( !fp ) {
		error_msg.sprintf("Failed to create %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.sprintf("Failed to write to %s: %s",
						  private_client_key_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}
	if( fclose(fp)!=0 ) {
		error_msg.sprintf("Failed to close %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;


		// store the public server key in the known_hosts file
	length = -1;
	condor_base64_decode(public_server_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh server key.";
		return false;
	}
	fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600);
	if( !fp ) {
		error_msg.sprintf("Failed to create %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}

		// prepend a host name pattern (*) to the public key to make a valid
		// record in the known_hosts file
	fprintf(fp,"* ");

	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.sprintf("Failed to write to %s: %s",
						  known_hosts_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}

	if( fclose(fp)!=0 ) {
		error_msg.sprintf("Failed to close %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;

	return true;
#endif
}
示例#11
0
// when a transferd registers itself, it identifies who it is. The connection
// is then held open and the schedd periodically might send more transfer
// requests to the transferd. Also, if the transferd dies, the schedd is 
// informed quickly and reliably due to the closed connection.
bool
DCSchedd::register_transferd(MyString sinful, MyString id, int timeout, 
		ReliSock **regsock_ptr, CondorError *errstack) 
{
	ReliSock *rsock;
	int invalid_request = 0;
	ClassAd regad;
	ClassAd respad;
	std::string errstr;
	std::string reason;

	if (regsock_ptr != NULL) {
		// Our caller wants a pointer to the socket we used to succesfully
		// register the claim. The NULL pointer will represent failure and
		// this will only be set to something real if everything was ok.
		*regsock_ptr = NULL;
	}

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the schedd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_REGISTER, Stream::reli_sock,
		timeout, errstack);

	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCSchedd::register_transferd: "
				 "Failed to send command (TRANSFERD_REGISTER) "
				 "to the schedd\n" );
		errstack->push("DC_SCHEDD", 1, 
			"Failed to start a TRANSFERD_REGISTER command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd::register_transferd authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_SCHEDD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	// set up my registration request.
	regad.Assign(ATTR_TREQ_TD_SINFUL, sinful);
	regad.Assign(ATTR_TREQ_TD_ID, id);

	// This is the initial registration identification ad to the schedd
	// It contains:
	//	ATTR_TREQ_TD_SINFUL
	//	ATTR_TREQ_TD_ID
	regad.put(*rsock);
	rsock->end_of_message();

	// Get the response from the schedd.
	rsock->decode();

	// This is the response ad from the schedd:
	// It contains:
	//	ATTR_TREQ_INVALID_REQUEST
	//
	// OR
	// 
	//	ATTR_TREQ_INVALID_REQUEST
	//	ATTR_TREQ_INVALID_REASON
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid_request);

	if (invalid_request == FALSE) {
		// not an invalid request
		if (regsock_ptr)
			*regsock_ptr = rsock;
		return true;
	}

	respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
	errstack->pushf("DC_SCHEDD", 1, "Schedd refused registration: %s", reason.c_str());

	return false;
}
示例#12
0
bool DCSchedd::getJobConnectInfo(
	PROC_ID jobid,
	int subproc,
	char const *session_info,
	int timeout,
	CondorError *errstack,
	MyString &starter_addr,
	MyString &starter_claim_id,
	MyString &starter_version,
	MyString &slot_name,
	MyString &error_msg,
	bool &retry_is_sensible)
{
	ClassAd input;
	ClassAd output;

	input.Assign(ATTR_CLUSTER_ID,jobid.cluster);
	input.Assign(ATTR_PROC_ID,jobid.proc);
	if( subproc != -1 ) {
		input.Assign(ATTR_SUB_PROC_ID,subproc);
	}
	input.Assign(ATTR_SESSION_INFO,session_info);

	ReliSock sock;
	if( !connectSock(&sock,timeout,errstack) ) {
		error_msg = "Failed to connect to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( !forceAuthentication(&sock, errstack) ) {
		error_msg = "Failed to authenticate";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.encode();
	if( !input.put(sock) || !sock.end_of_message() ) {
		error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	sock.decode();
	if( !output.initFromStream(sock) || !sock.end_of_message() ) {
		error_msg = "Failed to get response from schedd";
		dprintf( D_ALWAYS, "%s\n",error_msg.Value());
		return false;
	}

	if( IsFulldebug(D_FULLDEBUG) ) {
		std::string adstr;
		output.SetPrivateAttributesInvisible(true);
		output.sPrint(adstr);
		output.SetPrivateAttributesInvisible(false);
		dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n",
				adstr.c_str());
	}

	bool result=false;
	output.LookupBool(ATTR_RESULT,result);

	if( !result ) {
		output.LookupString(ATTR_ERROR_STRING,error_msg);
		retry_is_sensible = false;
		output.LookupBool(ATTR_RETRY,retry_is_sensible);
	}
	else {
		output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
		output.LookupString(ATTR_CLAIM_ID,starter_claim_id);
		output.LookupString(ATTR_VERSION,starter_version);
		output.LookupString(ATTR_REMOTE_HOST,slot_name);
	}

	return result;
}
示例#13
0
bool
DCTransferQueue::RequestTransferQueueSlot(bool downloading,char const *fname,char const *jobid,int timeout,MyString &error_desc)
{
	ASSERT(fname);
	ASSERT(jobid);

	if( GoAheadAlways( downloading ) ) {
		m_xfer_downloading = downloading;
		m_xfer_fname = fname;
		m_xfer_jobid = jobid;
		return true;
	}
	CheckTransferQueueSlot();
	if( m_xfer_queue_sock ) {
			// A request has already been made.
			// Currently, this is a no-op, because any upload/download slot
			// is as good as any other.  In the future, there may be
			// different queues for different paths.

		ASSERT( m_xfer_downloading == downloading );
		m_xfer_fname = fname;
		m_xfer_jobid = jobid;
		return true;
	}

	time_t started = time(NULL);
	CondorError errstack;
		// Our caller has to finish this operation in the specified
		// amount of time or risk not responding to the file transfer
		// peer in time, so ignore the timeout multiplier and set the
		// timeout exactly as specified.
	m_xfer_queue_sock = reliSock( timeout, 0, &errstack, false, true );

	if( !m_xfer_queue_sock ) {
		formatstr(m_xfer_rejected_reason,
			"Failed to connect to transfer queue manager for job %s (%s): %s.",
			jobid, fname, errstack.getFullText().c_str() );
		error_desc = m_xfer_rejected_reason;
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());
		return false;
	}

	if( timeout ) {
		timeout -= time(NULL)-started;
		if( timeout <= 0 ) {
			timeout = 1;
		}
	}

	bool connected = startCommand(
		TRANSFER_QUEUE_REQUEST, m_xfer_queue_sock, timeout, &errstack );

	if( !connected )
	{
		delete m_xfer_queue_sock;
		m_xfer_queue_sock = NULL;
		formatstr(m_xfer_rejected_reason,
			"Failed to initiate transfer queue request for job %s (%s): %s.",
			jobid, fname, errstack.getFullText().c_str() );
		error_desc = m_xfer_rejected_reason;
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());
		return false;
	}

	m_xfer_downloading = downloading;
	m_xfer_fname = fname;
	m_xfer_jobid = jobid;

	ClassAd msg;
	msg.Assign(ATTR_DOWNLOADING,downloading);
	msg.Assign(ATTR_FILE_NAME,fname);
	msg.Assign(ATTR_JOB_ID,jobid);

	m_xfer_queue_sock->encode();

	if( !msg.put(*m_xfer_queue_sock) || !m_xfer_queue_sock->end_of_message() )
	{
		formatstr(m_xfer_rejected_reason,
			"Failed to write transfer request to %s for job %s "
			"(initial file %s).",
			m_xfer_queue_sock->peer_description(),
			m_xfer_jobid.c_str(), m_xfer_fname.c_str());
		error_desc = m_xfer_rejected_reason;
		dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str());
		return false;
	}

	m_xfer_queue_sock->decode();

		// Request has been initiated.  Now sender should call
		// PollForTransferQueueSlot() to get response.
	m_xfer_queue_pending = true;
	return true;
}