Пример #1
0
int
agent_starter( ReliSock * s )
{
	ReliSock* stream = (ReliSock*)s;
	char *subsys = NULL;

	stream->decode();
	if( ! stream->code(subsys) ||
		! stream->end_of_message() ) {
		dprintf( D_ALWAYS, "Can't read subsystem name\n" );
		free( subsys );
		return FALSE;
	}

	dprintf ( D_ALWAYS, "Starting agent '%s'\n", subsys );

	if( strcasecmp(subsys, "fetch_log") == 0 ) {
		free (subsys);
		return handle_agent_fetch_log( stream );
	}

	// default:

	free (subsys);
	dprintf( D_ALWAYS, "WARNING: unrecognized agent name\n" );
	return FALSE;
}
Пример #2
0
bool do_item(Daemon* d, MyString name, int num, int output_mode) {

	CondorError errstack;
	ClassAd authz_ad;
	bool sc_success;
	ReliSock *sock = NULL;
	bool fn_success = false;

	sock = (ReliSock*) d->makeConnectedSocket( Stream::reli_sock, 0, 0, &errstack );
	if (sock) {

		sc_success = d->startSubCommand(DC_SEC_QUERY, num, sock, 0, &errstack);
		if (sc_success) {

			sock->decode();
			if (getClassAd(sock, authz_ad) &&
				sock->end_of_message()) {
				fn_success = true;
			}
		}
	}

	print_info(fn_success, d->addr(), sock, name, num, &authz_ad, &errstack, output_mode);

	return fn_success;

}
Пример #3
0
bool do_item(Daemon* d, MyString name, int num, int output_mode) {

	CondorError errstack;
	ClassAd authz_ad;
	bool sc_success;
	ReliSock *sock = NULL;
	bool fn_success = false;

	sock = (ReliSock*) d->makeConnectedSocket( Stream::reli_sock, 0, 0, &errstack );
	if (sock) {

		sc_success = d->startSubCommand(DC_SEC_QUERY, num, sock, 0, &errstack);
		if (sc_success) {

			sock->decode();
			if (getClassAd(sock, authz_ad) &&
				sock->end_of_message()) {
				fn_success = true;
			}
		}
		print_info(fn_success, sock->get_connect_addr(), sock, name, num, &authz_ad, &errstack, output_mode);
	} else {
		// we know that d->addr() is not null because we checked before
		// calling do_item.  but i'll be paranoid and check again.
		fprintf(stderr, "ERROR: failed to make connection to %s\n", d->addr()?d->addr():"(null)");
	}

	return fn_success;

}
Пример #4
0
/*
  Takes sinful address of startd and sends it the given cmd, along
  with the capability and an end_of_message.
*/
int
send_cmd_to_startd(char *sin_host, char *capability, int cmd)
{
	ReliSock* sock = NULL;

	Daemon startd (DT_STARTD, sin_host, NULL);
	if (!(sock = (ReliSock*)startd.startCommand(cmd, Stream::reli_sock, 20))) {
		dprintf( D_ALWAYS, "Can't connect to startd at %s\n", sin_host );
		return -1;
	}

    
  // send the capability
  ClaimIdParser idp( capability );
  dprintf(D_FULLDEBUG, "send capability %s\n", idp.publicClaimId() );
  if(!sock->code(capability)){
    dprintf( D_ALWAYS, "sock->code(%s) failed.\n", idp.publicClaimId() );
    delete sock;
    return -3;
  }

  // send end of message
  if( !sock->end_of_message() ) {
    dprintf( D_ALWAYS, "end_of_message failed\n" );
    delete sock;
    return -4;
  }
  dprintf( D_FULLDEBUG, "Sent command %d to startd at %s with cap %s\n",
		   cmd, sin_host, idp.publicClaimId() );

  delete sock;

  return 0;
}
Пример #5
0
// when a transferd registers itself, it identifies who it is. The connection
// is then held open and the schedd periodically might send more transfer
// requests to the transferd. Also, if the transferd dies, the schedd is 
// informed quickly and reliably due to the closed connection.
bool
DCTransferD::setup_treq_channel(ReliSock **treq_sock_ptr, 
	int timeout, CondorError *errstack) 
{
	ReliSock *rsock;

	if (treq_sock_ptr != NULL) {
		// Our caller wants a pointer to the socket we used to succesfully
		// register the claim. The NULL pointer will represent failure and
		// this will only be set to something real if everything was ok.
		*treq_sock_ptr = NULL;
	}

	
	/////////////////////////////////////////////////////////////////////////
	// Connect to the transfer daemon
	/////////////////////////////////////////////////////////////////////////

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the transferd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_CONTROL_CHANNEL,
		Stream::reli_sock, timeout, errstack);

	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCTransferD::setup_treq_channel: "
				 "Failed to send command (TRANSFERD_CONTROL_CHANNEL) "
				 "to the schedd\n" );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to start a TRANSFERD_CONTROL_CHANNEL command.");
		return false;
	}

	/////////////////////////////////////////////////////////////////////////
	// Make sure we are authenticated.
	/////////////////////////////////////////////////////////////////////////

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCTransferD::setup_treq_channel() authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	/////////////////////////////////////////////////////////////////////////
	// At this point, the socket passed all of the authentication protocols
	// so it is ready for use.
	/////////////////////////////////////////////////////////////////////////

	if (treq_sock_ptr)
		*treq_sock_ptr = rsock;

	return true;
}
Пример #6
0
int 
ReliSock::accept( ReliSock	&c )
{
	int c_sock;

	if (_state != sock_special || _special_state != relisock_listen ||
													c._state != sock_virgin)
	{
		return FALSE;
	}

	if (_timeout > 0) {
		Selector		selector;
		selector.set_timeout( _timeout );
		selector.add_fd( _sock, Selector::IO_READ );

		selector.execute();

		if( selector.timed_out() ) {
			return FALSE;
		} else if ( !selector.has_ready() ) {
			dprintf( D_ALWAYS, "select returns %d, connect failed\n",
				selector.select_retval() );
			return FALSE;
		}
	}

#ifndef WIN32 /* Unix */
	errno = 0;
#endif
	if ((c_sock = condor_accept(_sock, c._who)) < 0) {
#ifndef WIN32 /* Unix */
		if ( errno == EMFILE ) {
			_condor_fd_panic ( __LINE__, __FILE__ ); /* This calls dprintf_exit! */
		}
#endif
		return FALSE;

	}

	c.assign(c_sock);
	c.enter_connected_state("ACCEPT");
	c.decode();

	int on = 1;
	c.setsockopt(SOL_SOCKET, SO_KEEPALIVE, (char*)&on, sizeof(on));


		/* Set no delay to disable Nagle, since we buffer all our
		   relisock output and it degrades performance of our
		   various chatty protocols. -Todd T, 9/05
		*/
	c.setsockopt(IPPROTO_TCP, TCP_NODELAY, (char*)&on, sizeof(on));

	return TRUE;
}
Пример #7
0
int TransferQueueManager::HandleRequest(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == TRANSFER_QUEUE_REQUEST );

	ClassAd msg;
	sock->decode();
	if( !getClassAd( sock, msg ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"TransferQueueManager: failed to receive transfer request "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	bool downloading = false;
	MyString fname;
	MyString jobid;
	MyString queue_user;
	filesize_t sandbox_size;
	if( !msg.LookupBool(ATTR_DOWNLOADING,downloading) ||
		!msg.LookupString(ATTR_FILE_NAME,fname) ||
		!msg.LookupString(ATTR_JOB_ID,jobid) ||
		!msg.LookupString(ATTR_USER,queue_user) ||
		!msg.LookupInteger(ATTR_SANDBOX_SIZE,sandbox_size))
	{
		MyString msg_str;
		sPrintAd(msg_str, msg);
		dprintf(D_ALWAYS,"TransferQueueManager: invalid request from %s: %s\n",
				sock->peer_description(), msg_str.Value());
		return FALSE;
	}

		// Currently, we just create the client with the default max queue
		// age.  If it becomes necessary to customize the maximum age
		// on a case-by-case basis, it should be easy to adjust.

	TransferQueueRequest *client =
		new TransferQueueRequest(
			sock,
			sandbox_size,
			fname.Value(),
			jobid.Value(),
			queue_user.Value(),
			downloading,
			m_default_max_queue_age);

	if( !AddRequest( client ) ) {
		delete client;
		return KEEP_STREAM; // we have already closed this socket
	}

	return KEEP_STREAM;
}
Пример #8
0
// Called when the schedd initially connects to the transferd to finish
// the registration process.
int
TransferD::setup_transfer_request_handler(int  /*cmd*/, Stream *sock)
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString sock_id;

	dprintf(D_ALWAYS, "Got TRANSFER_CONTROL_CHANNEL!\n");

	rsock->decode();

	///////////////////////////////////////////////////////////////
	// make sure we are authenticated
	///////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText().c_str() );
			refuse(rsock);
			return CLOSE_STREAM;
		} 
	}

	rsock->decode();

	///////////////////////////////////////////////////////////////
	// Register this socket with a socket handler to handle incoming requests
	///////////////////////////////////////////////////////////////

	sock_id += "<TreqChannel-Socket>";

	char* _sock_id = strdup( sock_id.Value() );		//de-const

	// register the handler for any future transfer requests on this socket.
	daemonCore->Register_Socket((Sock*)rsock, _sock_id,
		(SocketHandlercpp)&TransferD::accept_transfer_request_handler,
		"TransferD::accept_transfer_request_handler", this, ALLOW);
	
	free( _sock_id );
	
	dprintf(D_ALWAYS, "Treq channel established.\n");
	dprintf(D_ALWAYS, "Accepting Transfer Requests.\n");

	return KEEP_STREAM;
}
Пример #9
0
bool
DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr)
{
	ReliSock sock;

	if (IsDebugLevel(D_COMMAND)) {
		dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n",
			getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL");
	}

	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) {
		error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	ClassAd input;
	input.Assign(ATTR_CLAIM_ID,job_claim_id);
	input.Assign(ATTR_SESSION_INFO,session_info);

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter";
		return false;
	}

	sock.decode();

	ClassAd reply;
	if( !getClassAd(&sock, reply) || !sock.end_of_message() ) {
		error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter";
		return false;
	}

	bool success = false;
	reply.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		reply.LookupString(ATTR_ERROR_STRING,error_msg);
		return false;
	}

	reply.LookupString(ATTR_CLAIM_ID,owner_claim_id);
	reply.LookupString(ATTR_VERSION,starter_version);
		// get the full starter address from the starter in case it contains
		// extra CCB info that we don't already know about
	reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr);
	return true;
}
Пример #10
0
	void
	initialize()
	{
		char *tmp;
		string collName;

		dprintf(D_FULLDEBUG, "AviaryLocatorPlugin: Initializing...\n");

		tmp = param("COLLECTOR_NAME");
		if (NULL == tmp) {
			collName = getPoolName();
		} else {
			collName = tmp;
			free(tmp); tmp = NULL;
		}

		string log_name;
		formatstr(log_name,"aviary_locator.log");
		provider = AviaryProviderFactory::create(log_name, getPoolName(),CUSTOM,LOCATOR, "services/locator/");
		if (!provider) {
			EXCEPT("Unable to configure AviaryProvider. Exiting...");
		}

		ReliSock *sock = new ReliSock;
		if (!sock) {
			EXCEPT("Failed to allocate transport socket");
		}

		if (!sock->assign(provider->getListenerSocket())) {
			EXCEPT("Failed to bind transport socket");
		}
		int index;
		if (-1 == (index =
				daemonCore->Register_Socket((Stream *) sock,
											"Aviary Method Socket",
										   (SocketHandlercpp) ( &AviaryLocatorPlugin::handleTransportSocket ),
										   "Handler for Aviary Methods.", this))) {
			EXCEPT("Failed to register transport socket");
		}

		int pruning_interval = param_integer("AVIARY_LOCATOR_PRUNE_INTERVAL",20);
		if (-1 == (index = daemonCore->Register_Timer(
			pruning_interval,pruning_interval*2,
							(TimerHandlercpp)(&AviaryLocatorPlugin::handleTimerCallback),
							"Timer for pruning unresponsive endpoints", this))) {
        	EXCEPT("Failed to register pruning timer");
        }
	}
Пример #11
0
int IOProxyHandler::handle_request( Stream *s )
{
	char line[CHIRP_LINE_MAX];
	ReliSock *r = (ReliSock *) s;

	if(r->get_line_raw(line,CHIRP_LINE_MAX)>0)  {
		if( got_cookie ) {
			handle_standard_request(r,line);
		} else {
			handle_cookie_request(r,line);
		}
		return KEEP_STREAM;
	} else {
		dprintf(D_FULLDEBUG,"IOProxyHandler: closing connection to %s\n",r->peer_ip_str());
		delete this;
		return ~KEEP_STREAM;
	}
}
Пример #12
0
 void
 initialize()
 {
     char *tmp;
     string collName;
     
     dprintf(D_FULLDEBUG, "AviaryCollectorPlugin: Initializing...\n");
     
     tmp = param("COLLECTOR_NAME");
     if (NULL == tmp) {
         collName = getPoolName();
     } else {
         collName = tmp;
         free(tmp); tmp = NULL;
     }
     
     string log_name("aviary_collector.log");
     string id_name("collector"); id_name+=SEPARATOR; id_name+=getPoolName();
     provider = AviaryProviderFactory::create(log_name, id_name,"COLLECTOR","POOL","services/collector/");
     if (!provider) {
         EXCEPT("Unable to configure AviaryProvider. Exiting...");
     }
     
     collector = CollectorObject::getInstance();
     
     ReliSock *sock = new ReliSock;
     if (!sock) {
         EXCEPT("Failed to allocate transport socket");
     }
     
     if (!sock->assign(provider->getListenerSocket())) {
         EXCEPT("Failed to bind transport socket");
     }
     int index;
     if (-1 == (index =
         daemonCore->Register_Socket((Stream *) sock,
                                     "Aviary Method Socket",
                                     (SocketHandlercpp) ( &AviaryCollectorPlugin::handleTransportSocket ),
                                     "Handler for Aviary Methods.", this))) {
         EXCEPT("Failed to register transport socket");
                                     }
     collector->setMyAddress(daemonCore->publicNetworkIpAddr());
 }
Пример #13
0
void
AviaryScheddPlugin::earlyInitialize()
{

    // Since this plugin is registered with multiple
    // PluginManagers it may be initialized more than once,
    // and we don't want that
	static bool skip = false;
	if (skip) return; skip = true;

    string log_name("aviary_job.log");
    string id_name("job"); id_name+=SEPARATOR; id_name+=getScheddName();
    provider = AviaryProviderFactory::create(log_name,id_name,
											 "SCHEDULER","JOB","services/job/");
    if (!provider) {
        EXCEPT("Unable to configure AviaryProvider. Exiting...");
    }

	schedulerObj = SchedulerObject::getInstance();

	dirtyJobs = new DirtyJobsType();

	isHandlerRegistered = false;

	ReliSock *sock = new ReliSock;
	if (!sock) {
		EXCEPT("Failed to allocate transport socket");
	}
	if (!sock->assign(provider->getListenerSocket())) {
		EXCEPT("Failed to bind transport socket");
	}
	int index;
	if (-1 == (index =
			   daemonCore->Register_Socket((Stream *) sock,
										   "Aviary Method Socket",
										   (SocketHandlercpp) ( &AviaryScheddPlugin::handleTransportSocket ),
										   "Handler for Aviary Methods.",
										   this))) {
		EXCEPT("Failed to register transport socket");
	}

	m_initialized = false;
}
Пример #14
0
ReliSock::ReliSock(const ReliSock & orig) : Sock(orig)
{
	init();
	// now copy all cedar state info via the serialize() method
	char *buf = NULL;
	buf = orig.serialize();	// get state from orig sock
	ASSERT(buf);
	serialize(buf);			// put the state into the new sock
	delete [] buf;
}
Пример #15
0
SharedPortState::HandlerResult
SharedPortState::HandleHeader(Stream *&s)
{
	// First tell the target daemon that we are about to send the fd.

	ReliSock *sock = static_cast<ReliSock*>(s);
	sock->encode();
	if( !sock->put((int)SHARED_PORT_PASS_SOCK) ||
		!sock->end_of_message() )
	{
		dprintf(D_ALWAYS,"SharedPortClient: failed to send SHARED_PORT_PASS_FD to %s%s: %s\n",
			m_sock_name.c_str(),
			m_requested_by.c_str(),
			strerror(errno));
			return FAILED;
	}

	m_state = SEND_FD;
	return CONTINUE;
}
Пример #16
0
int IOProxy::connect_callback( Stream * /*stream*/ )
{
	ReliSock *client = new ReliSock;
	bool accept_client = false;
	int success;

	success = server->accept(*client);
	if(success) {
		if(get_local_ipaddr().compare_address(client->peer_addr())) {
			dprintf(D_ALWAYS,"IOProxy: accepting connection from %s\n",client->peer_ip_str());
			accept_client = true;
		} else {
			dprintf(D_ALWAYS,"IOProxy: rejecting connection from %s: invalid ip addr\n",client->peer_ip_str());
		}
	} else {
		dprintf(D_ALWAYS,"IOProxy: Couldn't accept connection: %s\n",strerror(errno));
	}
	
	if(accept_client) {
		IOProxyHandler *handler = new IOProxyHandler();
		if(!handler->init(client,cookie)) {
			dprintf(D_ALWAYS,"IOProxy: couldn't register request callback!\n");
			client->close();
			delete client;
		}

	} else {
		client->close();
		delete client;
	}

	return KEEP_STREAM;
}
// specific command function - sends local daemon's version and state over 
// the socket
bool
AbstractReplicatorStateMachine::versionAndStateCommand(ReliSock& socket)
{
    if( ! versionCommand( socket ) ) {
            return false;
    }
	int stateAsInteger = int( m_state );

    if( ! socket.code( stateAsInteger ) /*|| ! socket.end_of_message( )*/ ) {
        dprintf( D_NETWORK,
            "AbstractReplicatorStateMachine::versionAndStateCommand "
            "unable to code the state or eom%d\n", m_state );
        return false;
    }
    dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::versionAndStateCommand "
                       "sent command successfully\n" );
    return true;
}
Пример #18
0
DCStarter::X509UpdateStatus
DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time)
{
	ReliSock rsock;
	rsock.timeout(60);
	if( ! rsock.connect(_addr) ) {
		dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
			"Failed to connect to starter %s\n", _addr);
		return XUS_Error;
	}

	CondorError errstack;
	if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) {
		dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: "
				 "Failed send command to the starter: %s\n",
				 errstack.getFullText().c_str());
		return XUS_Error;
	}

		// Send the gsi proxy
	filesize_t file_size = 0;	// will receive the size of the file
	if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) {
		dprintf(D_ALWAYS,
			"DCStarter::delegateX509Proxy "
			"failed to delegate proxy file %s (size=%ld)\n",
			filename, (long int)file_size);
		return XUS_Error;
	}

		// Fetch the result
	rsock.decode();
	int reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	switch(reply) {
		case 0: return XUS_Error;
		case 1: return XUS_Okay;
		case 2: return XUS_Declined;
	}
	dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
		"remote side returned unknown code %d. Treating "
		"as an error.\n", reply);
	return XUS_Error;
}
Пример #19
0
int
DCStartd::delegateX509Proxy( const char* proxy, time_t expiration_time, time_t *result_expiration_time )
{
	dprintf( D_FULLDEBUG, "Entering DCStartd::delegateX509Proxy()\n" );

	setCmdStr( "delegateX509Proxy" );

	if( ! claim_id ) {
		newError( CA_INVALID_REQUEST,
				  "DCStartd::delegateX509Proxy: Called with NULL claim_id" );
		return CONDOR_ERROR;
	}

		// if this claim is associated with a security session
	ClaimIdParser cidp(claim_id);

	//
	// 1) begin the DELEGATE_GSI_CRED_STARTD command
	//
	ReliSock* tmp = (ReliSock*)startCommand( DELEGATE_GSI_CRED_STARTD,
	                                         Stream::reli_sock,
	                                         20, NULL, NULL, false,
											 cidp.secSessionId() ); 
	if( ! tmp ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: Failed to send command DELEGATE_GSI_CRED_STARTD to the startd" );
		return CONDOR_ERROR;
	}

	//
	// 2) get reply from startd - OK means continue, NOT_OK means
	//    don't bother (the startd doesn't require a delegated
	//    proxy
	//
	tmp->decode();
	int reply;
	if( !tmp->code(reply) ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: failed to receive reply from startd (1)" );
		delete tmp;
		return CONDOR_ERROR;
	}
	if ( !tmp->end_of_message() ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: end of message error from startd (1)" );
		delete tmp;
		return CONDOR_ERROR;
	}
	if( reply == NOT_OK ) {
		delete tmp;
		return NOT_OK;
	}
		
	//
	// 3) send over the claim id and delegate (or copy) the given proxy
	//
	tmp->encode();
	int use_delegation =
		param_boolean( "DELEGATE_JOB_GSI_CREDENTIALS", true ) ? 1 : 0;
	if( !tmp->code( claim_id ) ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: Failed to send claim id to the startd" );
		delete tmp;
		return CONDOR_ERROR;
	}
	if ( !tmp->code( use_delegation ) ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: Failed to send use_delegation flag to the startd" );
		delete tmp;
		return CONDOR_ERROR;
	}
	int rv;
	filesize_t dont_care;
	if( use_delegation ) {
		rv = tmp->put_x509_delegation( &dont_care, proxy, expiration_time, result_expiration_time );
	}
	else {
		dprintf( D_FULLDEBUG,
		         "DELEGATE_JOB_GSI_CREDENTIALS is False; using direct copy\n");
		if( ! tmp->get_encryption() ) {
			newError( CA_COMMUNICATION_ERROR,
					  "DCStartd::delegateX509Proxy: Cannot copy: channel does not have encryption enabled" );
			delete tmp;
			return CONDOR_ERROR;
		}
		rv = tmp->put_file( &dont_care, proxy );
	}
	if( rv == -1 ) {
		newError( CA_FAILURE,
				  "DCStartd::delegateX509Proxy: Failed to delegate proxy" );
		delete tmp;
		return CONDOR_ERROR;
	}
	if ( !tmp->end_of_message() ) {
		newError( CA_FAILURE,
				  "DCStartd::delegateX509Proxy: end of message error to startd" );
		delete tmp;
		return CONDOR_ERROR;
	}

	// command successfully sent; now get the reply
	tmp->decode();
	if( !tmp->code(reply) ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: failed to receive reply from startd (2)" );
		delete tmp;
		return CONDOR_ERROR;
	}
	if ( !tmp->end_of_message() ) {
		newError( CA_COMMUNICATION_ERROR,
				  "DCStartd::delegateX509Proxy: end of message error from startd (2)" );
		delete tmp;
		return CONDOR_ERROR;
	}
	delete tmp;

	dprintf( D_FULLDEBUG,
	         "DCStartd::delegateX509Proxy: successfully sent command, reply is: %d\n",
	         reply );

	return reply;
}
Пример #20
0
int
CCBServer::HandleRequest(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == CCB_REQUEST );

		// Avoid lengthy blocking on communication with our peer.
		// This command-handler should not get called until data
		// is ready to read.
	sock->timeout(1);

	ClassAd msg;
	sock->decode();
	if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to receive request "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	MyString name;
	if( msg.LookupString(ATTR_NAME,name) ) {
			// client name is purely for debugging purposes
		name.formatstr_cat(" on %s",sock->peer_description());
		sock->set_peer_description(name.Value());
	}
	MyString target_ccbid_str;
	MyString return_addr;
	MyString connect_id; // id target daemon should present to requester
	CCBID target_ccbid;

		// NOTE: using ATTR_CLAIM_ID for connect id so that it is
		// automatically treated as a secret over the network.
		// It must be presented by the target daemon when connecting
		// to the requesting client, so the client can confirm that
		// the connection is in response to its request.

	if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) ||
		!msg.LookupString(ATTR_MY_ADDRESS,return_addr) ||
		!msg.LookupString(ATTR_CLAIM_ID,connect_id) )
	{
		MyString ad_str;
		msg.sPrint(ad_str);
		dprintf(D_ALWAYS,
				"CCB: invalid request from %s: %s\n",
				sock->peer_description(), ad_str.Value() );
		return FALSE;
	}
	if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) {
		dprintf(D_ALWAYS,
				"CCB: request from %s contains invalid CCBID %s\n",
				sock->peer_description(), target_ccbid_str.Value() );
		return FALSE;
	}

	CCBTarget *target = GetTarget( target_ccbid );
	if( !target ) {
		dprintf(D_ALWAYS,
			"CCB: rejecting request from %s for ccbid %s because no daemon is "
			"currently registered with that id "
			"(perhaps it recently disconnected).\n",
			sock->peer_description(), target_ccbid_str.Value());

		MyString error_msg;
		error_msg.formatstr(
			"CCB server rejecting request for ccbid %s because no daemon is "
			"currently registered with that id "
			"(perhaps it recently disconnected).", target_ccbid_str.Value());
		RequestReply( sock, false, error_msg.Value(), 0, target_ccbid );
		return FALSE;
	}

	SetSmallBuffers(sock);

	CCBServerRequest *request =
		new CCBServerRequest(
			sock,
			target_ccbid,
			return_addr.Value(),
			connect_id.Value() );
	AddRequest( request, target );

	dprintf(D_FULLDEBUG,
			"CCB: received request id %lu from %s for target ccbid %s "
			"(registered as %s)\n",
			request->getRequestID(),
			request->getSock()->peer_description(),
			target_ccbid_str.Value(),
			target->getSock()->peer_description());

	ForwardRequestToTarget( request, target );

	return KEEP_STREAM;
}
Пример #21
0
int
CCBServer::HandleRegistration(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == CCB_REGISTER );

		// Avoid lengthy blocking on communication with our peer.
		// This command-handler should not get called until data
		// is ready to read.
	sock->timeout(1);

	ClassAd msg;
	sock->decode();
	if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to receive registration "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	SetSmallBuffers(sock);

	MyString name;
	if( msg.LookupString(ATTR_NAME,name) ) {
			// target daemon name is purely for debugging purposes
		name.formatstr_cat(" on %s",sock->peer_description());
		sock->set_peer_description(name.Value());
	}

	CCBTarget *target = new CCBTarget(sock);

	MyString reconnect_cookie_str,reconnect_ccbid_str;
	CCBID reconnect_cookie,reconnect_ccbid;
	bool reconnected = false;
	if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) &&
		CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) &&
		msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) &&
		CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) )
	{
		target->setCCBID( reconnect_ccbid );
		reconnected = ReconnectTarget( target, reconnect_cookie );
	}

	if( !reconnected ) {
		AddTarget( target );
	}

	CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() );
	ASSERT( reconnect_info );

	sock->encode();

	ClassAd reply_msg;
	MyString ccb_contact;
	CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str );
		// We send our address as part of the CCB contact string, rather
		// than letting the target daemon fill it in.  This is to give us
		// potential flexibility on the CCB server side to do things like
		// assign different targets to different CCB server sub-processes,
		// each with their own command port.
	CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact );

	reply_msg.Assign(ATTR_CCBID,ccb_contact.Value());
	reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER);
	reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value());

	if( !reply_msg.put( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to send registration response "
				"to %s.\n", sock->peer_description() );

		RemoveTarget( target );
		return KEEP_STREAM; // we have already closed this socket
	}

	return KEEP_STREAM;
}
Пример #22
0
// download the files associated with the jobads to the sandbox at td_sinful
// with the supplied capability.
// The work_ad should contain:
//	ATTR_TREQ_CAPABILITY
//	ATTR_TREQ_FTP
//	ATTR_TREQ_JOBID_ALLOW_LIST
bool 
DCTransferD::download_job_files(ClassAd *work_ad, CondorError * errstack)
{
	ReliSock *rsock = NULL;
	int timeout = 60 * 60 * 8; // transfers take a long time...
	int i;
	ClassAd reqad, respad;
	std::string cap;
	int ftp;
	int invalid;
	int protocol;
	std::string reason;
	int num_transfers;
	ClassAd jad;
	const char *lhstr = NULL;
	ExprTree *tree = NULL;

	//////////////////////////////////////////////////////////////////////////
	// Connect to the transferd and authenticate
	//////////////////////////////////////////////////////////////////////////

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the transferd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_READ_FILES, Stream::reli_sock,
		timeout, errstack);
	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCTransferD::download_job_files: "
				 "Failed to send command (TRANSFERD_READ_FILES) "
				 "to the schedd\n" );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to start a TRANSFERD_READ_FILES command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCTransferD::download_job_files() authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	//////////////////////////////////////////////////////////////////////////
	// Query the transferd about the capability/protocol and see if I can 
	// download my files. It will respond with a classad saying good or bad.
	//////////////////////////////////////////////////////////////////////////

	work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap);
	work_ad->LookupInteger(ATTR_TREQ_FTP, ftp);

	reqad.Assign(ATTR_TREQ_CAPABILITY, cap);
	reqad.Assign(ATTR_TREQ_FTP, ftp);

	// This request ad to the transferd should contain:
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_FTP
	reqad.put(*rsock);
	rsock->end_of_message();

	rsock->decode();

	// This response ad from the transferd should contain:
	// ATTR_TREQ_INVALID_REQUEST (set to true)
	// ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_NUM_TRANSFERS
	//
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	
	if (invalid == TRUE) {
		// The transferd rejected my attempt to upload the fileset
		delete rsock;
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	respad.LookupInteger(ATTR_TREQ_NUM_TRANSFERS, num_transfers);

	//////////////////////////////////////////////////////////////////////////
	// Based upon the protocol I've chosen, use that method to download the
	// files. When using the FileTrans protocol, a child process on the
	// transferd side will be sending me individual job ads and then 
	// instantiating a filetransfer object for that ad.
	//////////////////////////////////////////////////////////////////////////

	dprintf(D_ALWAYS, "Receiving fileset");
	work_ad->LookupInteger(ATTR_TREQ_FTP, protocol);
	switch(protocol) {
		case FTP_CFTP: // download the files using the FileTransfer Object
			for (i = 0; i < num_transfers; i++) {

				// Grab a job ad the server is sending us so we know what
				// to receive.
				jad.initFromStream(*rsock);
				rsock->end_of_message();

				// translate the job ad by replacing the 
				// saved SUBMIT_ attributes so the download goes into the
				// correct place.
				jad.ResetExpr();
				while( jad.NextExpr(lhstr, tree) ) {
					if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) {
							// this attr name starts with SUBMIT_
							// compute new lhs (strip off the SUBMIT_)
						const char *new_attr_name = strchr(lhstr,'_');
						ExprTree * pTree;
						ASSERT(new_attr_name);
						new_attr_name++;
							// insert attribute
						pTree = tree->Copy();
						jad.Insert(new_attr_name, pTree, false);
					}
				}	// while next expr
		
				// instantiate a filetransfer object and have it accept the
				// files.
				FileTransfer ftrans;
				if ( !ftrans.SimpleInit(&jad, false, false, rsock) )
				{
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to initate uploading of files.");
					return false;
				}

				// We want files to be copied to their final places, so apply
				// any filename remaps when downloading.
				if ( !ftrans.InitDownloadFilenameRemaps(&jad) ) {
					return false;
				}

				ftrans.setPeerVersion( version() );

				if ( !ftrans.DownloadFiles() ) {
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to download files.");
					return false;
				}

				dprintf(D_ALWAYS | D_NOHEADER, ".");
			}	
			rsock->end_of_message();

			dprintf(D_ALWAYS | D_NOHEADER, "\n");

			break;

		default:
			// Bail due to user error. This client doesn't support the unknown
			// protocol.

			delete rsock;
			errstack->push("DC_TRANSFERD", 1, 
				"Unknown file transfer protocol selected.");
			return false;
			break;
	}

	//////////////////////////////////////////////////////////////////////////
	// Get the response from the transferd once it sees a completed 
	// movement of files to the child process.
	//////////////////////////////////////////////////////////////////////////

	rsock->decode();
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	// close up shop
	delete rsock;

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	if ( invalid == TRUE ) {
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	return true;
}
Пример #23
0
// upload the files associated with the jobads to the sandbox at td_sinful
// with the supplied capability.
// The work_ad should contain:
//	ATTR_TREQ_CAPABILITY
//	ATTR_TREQ_FTP
//	ATTR_TREQ_JOBID_ALLOW_LIST
bool 
DCTransferD::upload_job_files(int JobAdsArrayLen, ClassAd* JobAdsArray[], 
		ClassAd *work_ad, CondorError * errstack)
{
	ReliSock *rsock = NULL;
	int timeout = 60 * 60 * 8; // transfers take a long time...
	int i;
	ClassAd reqad, respad;
	std::string cap;
	int ftp;
	int invalid;
	int protocol;
	std::string reason;

	//////////////////////////////////////////////////////////////////////////
	// Connect to the transferd and authenticate
	//////////////////////////////////////////////////////////////////////////

	// This call with automatically connect to _addr, which was set in the
	// constructor of this object to be the transferd in question.
	rsock = (ReliSock*)startCommand(TRANSFERD_WRITE_FILES, Stream::reli_sock,
		timeout, errstack);
	if( ! rsock ) {
		dprintf( D_ALWAYS, "DCTransferD::upload_job_files: "
				 "Failed to send command (TRANSFERD_WRITE_FILES) "
				 "to the schedd\n" );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to start a TRANSFERD_WRITE_FILES command.");
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( rsock, errstack )) {
		dprintf( D_ALWAYS, "DCTransferD::upload_job_files() authentication "
				"failure: %s\n", errstack->getFullText().c_str() );
		errstack->push("DC_TRANSFERD", 1, 
			"Failed to authenticate properly.");
		return false;
	}

	rsock->encode();

	//////////////////////////////////////////////////////////////////////////
	// Query the transferd about the capability/protocol and see if I can 
	// upload my files. It will respond with a classad saying good or bad.
	//////////////////////////////////////////////////////////////////////////

	work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap);
	work_ad->LookupInteger(ATTR_TREQ_FTP, ftp);

	reqad.Assign(ATTR_TREQ_CAPABILITY, cap);
	reqad.Assign(ATTR_TREQ_FTP, ftp);

	// This request ad to the transferd should contain:
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_FTP
	reqad.put(*rsock);
	rsock->end_of_message();

	rsock->decode();

	// This response ad to the transferd should contain:
	// ATTR_TREQ_INVALID_REQUEST (set to true)
	// ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	
	if (invalid == TRUE) {
		// The transferd rejected my attempt to upload the fileset
		delete rsock;
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	//////////////////////////////////////////////////////////////////////////
	// Sort the job ads array into numerically sorted order. The transferd
	// must do the same.
	//////////////////////////////////////////////////////////////////////////

	// TODO

	//////////////////////////////////////////////////////////////////////////
	// Based upon the protocol I've chosen, use that method to upload the
	// files. When using the FileTrans protocol, a child process on the
	// transferd side will be inheriting a socket and accepting the
	// FileTransfer object's protocol.
	//////////////////////////////////////////////////////////////////////////

	// XXX Fix to only send jobads for allowed jobs

	dprintf(D_ALWAYS, "Sending fileset");
	work_ad->LookupInteger(ATTR_TREQ_FTP, protocol);
	switch(protocol) {
		case FTP_CFTP:
			// upload the files using the FileTransfer Object

			for (i=0; i<JobAdsArrayLen; i++) {
				FileTransfer ftrans;
				if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, rsock) )
				{
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to initate uploading of files.");
					return false;
				}

				ftrans.setPeerVersion( version() );

				if ( !ftrans.UploadFiles(true,false) ) {
					delete rsock;
					errstack->push("DC_TRANSFERD", 1, 
						"Failed to upload files.");
					return false;
				}

				dprintf(D_ALWAYS | D_NOHEADER, ".");
			}	
			rsock->end_of_message();
			dprintf(D_ALWAYS | D_NOHEADER, "\n");
			break;

		default:
			// Bail due to user error. This client doesn't support the uknown
			// protocol.

			delete rsock;
			errstack->push("DC_TRANSFERD", 1, 
				"Unknown file transfer protocol selected.");
			return false;
			break;
	}

	//////////////////////////////////////////////////////////////////////////
	// Get the response from the transferd once it sees a completed 
	// movement of files from the child process.
	//////////////////////////////////////////////////////////////////////////

	rsock->decode();
	respad.initFromStream(*rsock);
	rsock->end_of_message();

	// close up shop
	delete rsock;

	respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid);
	if ( invalid == TRUE ) {
		respad.LookupString(ATTR_TREQ_INVALID_REASON, reason);
		errstack->push("DC_TRANSFERD", 1, reason.c_str());
		return false;
	}

	return true;
}
Пример #24
0
// The function occurs in a seperate thread or process
int
TransferD::write_files_thread(void *targ, Stream *sock)
{	
	ThreadArg *thread_arg = (ThreadArg*)targ;
	ReliSock *rsock = (ReliSock*)sock;
	TransferRequest *treq = NULL;
	// int protocol;
	SimpleList<ClassAd*> *jad_list = NULL;
	ClassAd *jad = NULL;
	int cluster, proc;
	int old_timeout;
	int result;
	ClassAd respad;

	// XXX This is a damn dirty hack whose solution resides in implementing
	// a checksum for the files.
	// Now we sleep here for one second.  Why?  So we are certain
	// to transfer back output files even if the job ran for less
	// than one second. This is because:
	// stat() can't tell the difference between:
	//   1) A job starts up, touches a file, and exits all in one second
	//   2) A job starts up, doesn't touch the file, and exits all in one
	//    second
	// So if we force the start time of the job to be one second later than
	// the time we know the files were written, stat() should be able
	// to perceive what happened, if anything.

	sleep(1);

	// even though I'm in a new process, I got here either through forking
	// or through a thread, so this memory is a copy.
	// protocol = thread_arg->protocol;
	treq = thread_arg->treq;
	delete thread_arg;

	// XXX deal with protocol value.

	////////////////////////////////////////////////////////////////////////
	// Sort the classads (XXX maybe put at a higher level in the protocol)
	////////////////////////////////////////////////////////////////////////
	
	// XXX TODO

	////////////////////////////////////////////////////////////////////////
	// Do the transfer.
	////////////////////////////////////////////////////////////////////////

	// file transfers can take a long time....
	old_timeout = rsock->timeout(60 * 60 * 8);

	jad_list = treq->todo_tasks();

	while(jad_list->Next(jad)) {
		FileTransfer ftrans;

		jad->LookupInteger(ATTR_CLUSTER_ID, cluster);
		jad->LookupInteger(ATTR_PROC_ID, proc);
		dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
			"Transferring fileset for job %d.%d\n",
				cluster, proc);

		result = ftrans.SimpleInit(jad, true, true, rsock);
		if ( !result ) {
			dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
				"failed to init file transfer for job %d.%d \n",
				cluster, proc );

			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"FileTransfer Object failed to SimpleInit.");
			respad.put(*rsock);
			rsock->end_of_message();

			rsock->timeout(old_timeout);

			return EXIT_FAILURE;
		}

		ftrans.setPeerVersion(treq->get_peer_version().Value());

		// We're "downloading" from the client to here.
		result = ftrans.DownloadFiles();
		if ( !result ) {

			dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
				"failed to transfer files for job %d.%d \n",
				cluster, proc );

			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"FileTransfer Object failed to download.");
			respad.put(*rsock);
			rsock->end_of_message();

			rsock->timeout(old_timeout);
			return EXIT_FAILURE;
		}
	}

	rsock->end_of_message();

	//////////////////////////////////////////////////////////////////////////
	// Now that the file transfer is done, tell the client everything is ok.
	//////////////////////////////////////////////////////////////////////////

	dprintf(D_ALWAYS, "Informing client of finished transfer.\n");

	rsock->encode();

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);

	// This response ad to the client will contain:
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//
	respad.put(*rsock);
	rsock->end_of_message();

	delete rsock;

	return EXIT_SUCCESS;
}
Пример #25
0
// This handler is called when a client wishes to write files from the
// transferd's storage.
int
TransferD::write_files_handler(int cmd, Stream *sock) 
{
	ReliSock *rsock = (ReliSock*)sock;
	MyString capability;
	int protocol = FTP_UNKNOWN;
	TransferRequest *treq = NULL;
	MyString fquser;
	static int transfer_reaper_id = -1;
	ThreadArg *thread_arg;
	int tid;
	ClassAd reqad;
	ClassAd respad;

	cmd = cmd; // quiet the compiler.

	dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n");

	/////////////////////////////////////////////////////////////////////////
	// make sure we are authenticated
	/////////////////////////////////////////////////////////////////////////
	if( ! rsock->triedAuthentication() ) {
		CondorError errstack;
		if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) {
			// we failed to authenticate, we should bail out now
			// since we don't know what user is trying to perform
			// this action.
			// TODO: it'd be nice to print out what failed, but we
			// need better error propagation for that...
			errstack.push( "TransferD::setup_transfer_request_handler()", 42,
				"Failure to register transferd - Authentication failed" );
			dprintf( D_ALWAYS, "setup_transfer_request_handler() "
				"aborting: %s\n",
				errstack.getFullText() );
			refuse( rsock );
			return CLOSE_STREAM;
		} 
	}

	fquser = rsock->getFullyQualifiedUser();


	/////////////////////////////////////////////////////////////////////////
	// Check to see if the capability the client tells us is something that
	// we have knowledge of. We ONLY check the capability and not the
	// identity of the person in question. This allows people of different
	// identities to write files here as long as they had the right 
	// capability. While this might not sound secure, they STILL had to have
	// authenticated as someone this daemon trusts. 
	// Similarly, check the protocol it wants to use as well as ensure that
	// the direction the transfer request was supposed to be is being honored.
	/////////////////////////////////////////////////////////////////////////
	rsock->decode();

	// soak the request ad from the client about what it wants to transfer
	reqad.initFromStream(*rsock);
	rsock->end_of_message();

	reqad.LookupString(ATTR_TREQ_CAPABILITY, capability);

	rsock->encode();

	// do I know of such a capability?
	if (m_treqs.lookup(capability, treq) != 0) {
		// didn't find it. Log it and tell them to leave and close up shop
		respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
		respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!");
		respad.put(*rsock);
		rsock->end_of_message();

		dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
			"using capability '%s', but there was no such capability. "
			"Access denied.\n", fquser.Value(), capability.Value());
		return CLOSE_STREAM;
	}

	reqad.LookupInteger(ATTR_TREQ_FTP, protocol);

	// am I willing to use this protocol?
	switch(protocol) {
		case FTP_CFTP: // FileTrans protocol, I'm happy.
			break;

		default:
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Invalid file transfer protocol!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"using protocol '%d', but I don't support that protocol. "
				"Access denied.\n", fquser.Value(), protocol);
			return CLOSE_STREAM;
	}

	// nsure that this transfer request was of the uploading variety
	if (treq->get_direction() != FTPD_UPLOAD) {
			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"Transfer Request was not an uploading request!");
			respad.put(*rsock);
			rsock->end_of_message();

			dprintf(D_ALWAYS, "Client identity '%s' tried to write some files "
				"to a transfer request that wasn't expecting to be written. "
				"Access denied.\n", fquser.Value());
	}

	/////////////////////////////////////////////////////////////////////////
	// Tell the client everything was ok.
	/////////////////////////////////////////////////////////////////////////

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);
	respad.put(*rsock);
	rsock->end_of_message();

	/////////////////////////////////////////////////////////////////////////
	// Set up a thread (a process under unix) to read ALL of the job files
	// for all of the ads in the TransferRequest.
	/////////////////////////////////////////////////////////////////////////

	// now create a thread, passing in the sock, which uses the file transfer
	// object to accept the files.

	if (transfer_reaper_id == -1) {
		// only set this up ONCE so each and every thread gets one.
		transfer_reaper_id = daemonCore->Register_Reaper(
						"write_files_reaper",
						(ReaperHandlercpp) &TransferD::write_files_reaper,
						"write_files_reaper",
						this
						);
	}

	thread_arg = new ThreadArg(protocol, treq);

	// Start a new thread (process on Unix) to do the work
	tid = daemonCore->Create_Thread(
		(ThreadStartFunc)&TransferD::write_files_thread,
		(void *)thread_arg,
		rsock,
		transfer_reaper_id
		);
	
	if (tid == FALSE) {
		// XXX How do I handle this failure?
	}


	// associate the tid with the request so I can deal with it propery in
	// the reaper
	m_client_to_transferd_threads.insert(tid, treq);

	// The stream is inherited to the thread, who does the transfer and
	// finishes the protocol, but in the parent, I'm closing it.
	return CLOSE_STREAM;
}
Пример #26
0
void
Triggerd::init()
{
   std::string trigger_log;
   ClassAd* ad;
   HashKey key;
   uint32_t key_value;
   ReliSock* sock;
   int index;
   char* host;
   char* tmp;
   char* dataDir = NULL;
   char* username;
   char* password;
   char* mechanism;
   int port, interval;
   std::string storefile;
   std::string error_text;
   std::stringstream int_str;
   qpid::management::ConnectionSettings settings;
   bool enable_console = true;

   dprintf(D_FULLDEBUG, "Triggerd::init called\n");

   char* name = param("TRIGGERD_NAME");
   if (name)
   {
      char* valid_name = build_valid_daemon_name(name);
      daemonName = valid_name;
      delete[] name;
      delete[] valid_name;
   }
   else
   {
      char* default_name = build_valid_daemon_name("triggerd");
      if(default_name)
      {
         daemonName = default_name;
         delete[] default_name;
      }
   }

   port = param_integer("QMF_BROKER_PORT", 5672);
   if (NULL == (host = param("QMF_BROKER_HOST")))
   {
      host = strdup("localhost");
   }

   if (NULL == (username = param("QMF_BROKER_USERNAME")))
   {
      username = strdup("");
   }

   if (NULL == (mechanism = param("QMF_BROKER_AUTH_MECH")))
   {
      mechanism = strdup("ANONYMOUS");
   }

   tmp = param("QMF_STOREFILE");
   if (NULL == tmp)
   {
      storefile = ".triggerd_storefile";
   }
   else
   {
      storefile = tmp;
      free(tmp);
      tmp = NULL;
   }
   interval = param_integer("QMF_UPDATE_INTERVAL", 10);

   password = getBrokerPassword();

   dataDir = param("DATA");
   ASSERT(dataDir);

   trigger_log = dataDir;
   trigger_log += "/triggers.log";
   triggerCollection = new ClassAdCollection(NULL, trigger_log.c_str());
   free(dataDir);

   settings.host = std::string(host);
   settings.port = port;
   settings.username = std::string(username);
   settings.password = std::string(password);
   settings.mechanism = std::string(mechanism);

   // Initialize the QMF agent
   singleton = new ManagementAgent::Singleton();
   ManagementAgent* agent = singleton->getInstance();

   CondorTriggerService::registerSelf(agent);
   CondorTrigger::registerSelf(agent);
   EventCondorTriggerNotify::registerSelf(agent);

   agent->setName("com.redhat.grid","condortriggerservice", daemonName.c_str());
   agent->init(settings, interval, true, storefile);
   mgmtObject = new CondorTriggerService(agent, this);

   // Initialize the QMF console, if desired
   enable_console = param_boolean("ENABLE_ABSENT_NODES_DETECTION", false);
   if (true == enable_console)
   {
      console = new TriggerConsole();
      console->config(host, port, username, password, mechanism);
   }

   free(host);
   free(username);
   free(password);
   free(mechanism);

   // Initialize the triggers if any already exist
   triggerCollection->StartIterateAllClassAds();
   while(true == triggerCollection->IterateAllClassAds(ad, key))
   {
      key_value = atoll(key.value());
      if (triggers.end() == triggers.find(key_value))
      {
         if (STATUS_OK != AddTriggerToCollection(key_value, ad, error_text))
         {
            dprintf(D_ALWAYS, "Triggerd Error: '%s'.  Removing trigger\n", error_text.c_str());
            int_str << key_value;
            triggerCollection->DestroyClassAd(int_str.str().c_str());
         }
      }
   }

   bool lifetime = param_boolean("QMF_IS_PERSISTENT", true);
   agent->addObject(mgmtObject, daemonName.c_str(), lifetime);

   // Create a socket to handle management method calls
   sock = new ReliSock;
   if (NULL == sock)
   {
      EXCEPT("Failed to create Managment socket");
   }
   if (0 == sock->assign(agent->getSignalFd()))
   {
      EXCEPT("Failed to bind Management socket");
   }
   if (-1 == (index = daemonCore->Register_Socket((Stream *) sock, 
                                                  "Management Method Socket",
                                                  (SocketHandlercpp) &Triggerd::HandleMgmtSocket,
                                                  "Handler for Management Methods",
                                                  this)))
   {
      EXCEPT("Failed to register Management socket");
   }

   config();
}
Пример #27
0
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible)
{

	retry_is_sensible = false;

#ifndef HAVE_SSH_TO_JOB
	error_msg = "This version of Condor does not support ssh key exchange.";
	return false;
#else
	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) {
		error_msg = "Failed to send START_SSHD to starter";
		return false;
	}

	ClassAd input;

	if( preferred_shells && *preferred_shells ) {
		input.Assign(ATTR_SHELL,preferred_shells);
	}

	if( slot_name && *slot_name ) {
			// This is a little silly.
			// We are telling the remote side the name of the slot so
			// that it can put it in the welcome message.
		input.Assign(ATTR_NAME,slot_name);
	}

	if( ssh_keygen_args && *ssh_keygen_args ) {
		input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args);
	}

	sock.encode();
	if( !putClassAd(&sock, input) || !sock.end_of_message() ) {
		error_msg = "Failed to send START_SSHD request to starter";
		return false;
	}

	ClassAd result;
	sock.decode();
	if( !getClassAd(&sock, result) || !sock.end_of_message() ) {
		error_msg = "Failed to read response to START_SSHD from starter";
		return false;
	}

	bool success = false;
	result.LookupBool(ATTR_RESULT,success);
	if( !success ) {
		std::string remote_error_msg;
		result.LookupString(ATTR_ERROR_STRING,remote_error_msg);
		error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str());
		retry_is_sensible = false;
		result.LookupBool(ATTR_RETRY,retry_is_sensible);
		return false;
	}

	result.LookupString(ATTR_REMOTE_USER,remote_user);

	std::string public_server_key;
	if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) {
		error_msg = "No public ssh server key received in reply to START_SSHD";
		return false;
	}
	std::string private_client_key;
	if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) {
		error_msg = "No ssh client key received in reply to START_SSHD";
		return false;
	}


		// store the private client key
	unsigned char *decode_buf = NULL;
	int length = -1;
	condor_base64_decode(private_client_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh client key.";
		return false;
	}
	FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400);
	if( !fp ) {
		error_msg.formatstr("Failed to create %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.formatstr("Failed to write to %s: %s",
						  private_client_key_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}
	if( fclose(fp)!=0 ) {
		error_msg.formatstr("Failed to close %s: %s",
						  private_client_key_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;


		// store the public server key in the known_hosts file
	length = -1;
	condor_base64_decode(public_server_key.c_str(),&decode_buf,&length);
	if( !decode_buf ) {
		error_msg = "Error decoding ssh server key.";
		return false;
	}
	fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600);
	if( !fp ) {
		error_msg.formatstr("Failed to create %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}

		// prepend a host name pattern (*) to the public key to make a valid
		// record in the known_hosts file
	fprintf(fp,"* ");

	if( fwrite(decode_buf,length,1,fp)!=1 ) {
		error_msg.formatstr("Failed to write to %s: %s",
						  known_hosts_file,strerror(errno));
		fclose( fp );
		free( decode_buf );
		return false;
	}

	if( fclose(fp)!=0 ) {
		error_msg.formatstr("Failed to close %s: %s",
						  known_hosts_file,strerror(errno));
		free( decode_buf );
		return false;
	}
	fp = NULL;
	free( decode_buf );
	decode_buf = NULL;

	return true;
#endif
}
// sending command to remote replication daemon; specified command function
// allows to specify which data is to be sent to the remote daemon
void
AbstractReplicatorStateMachine::sendCommand(
    int command, char* daemonSinfulString, CommandFunction function )
{
    dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand %s to %s\n",
               utilToString( command ), daemonSinfulString );
    Daemon  daemon( DT_ANY, daemonSinfulString );
    ReliSock socket;

    // no retries after 'm_connectionTimeout' seconds of unsuccessful connection
    socket.timeout( m_connectionTimeout );
    socket.doNotEnforceMinimalCONNECT_TIMEOUT( );

    if( ! socket.connect( daemonSinfulString, 0, false ) ) {
        dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "unable to connect to %s\n",
                   daemonSinfulString );
		socket.close( );

        return ;
    }
// General actions for any command sending
    if( ! daemon.startCommand( command, &socket, m_connectionTimeout ) ) {
        dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "cannot start command %s to %s\n",
                   utilToString( command ), daemonSinfulString );
		socket.close( );

        return ;
    }

    char const* sinfulString = daemonCore->InfoCommandSinfulString();
    if(! socket.put( sinfulString )/* || ! socket.end_of_message( )*/) {
        dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "unable to code the local sinful string or eom%s\n",
                   sinfulString );
		socket.close( );

        return ;
    }
    else {
        dprintf( D_FULLDEBUG, "AbstractReplicatorStateMachine::sendCommand "
                              "local sinful string coded successfully\n" );
    }
// End of General actions for any command sending

// Command-specific actions
	if( ! ((*this).*(function))( socket ) ) {
    	socket.close( );

		return ;
	}
// End of Command-specific actions
	if( ! socket.end_of_message( ) ) {
		socket.close( );
       	dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "unable to code the end of message\n" );
       	return ;
   	}

	socket.close( );
   	dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                       "%s command sent to %s successfully\n",
             utilToString( command ), daemonSinfulString );
}
Пример #29
0
bool
DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q)
{
	compat_classad::ClassAd ad;
	ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout);
	ad.InsertAttr("OutOffset", stdout_offset);
	ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr);
	ad.InsertAttr("ErrOffset", stderr_offset);
	ad.InsertAttr(ATTR_VERSION, CondorVersion());

	size_t total_files = 0;
	total_files += transfer_stdout ? 1 : 0;
	total_files += transfer_stderr ? 1 : 0;

	if (filenames.size())
	{
		total_files += filenames.size();
		std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size());
		std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size());
		std::vector<ssize_t>::const_iterator it2 = offsets.begin();
		for (std::vector<std::string>::const_iterator it = filenames.begin();
			it != filenames.end() && it2 != offsets.end();
			it++, it2++)
		{
			classad::Value value;
			value.SetStringValue(*it);
			filelist.push_back(classad::Literal::MakeLiteral(value));
			value.SetIntegerValue(*it2);
			offsetlist.push_back(classad::Literal::MakeLiteral(value));
		}
		classad::ExprTree *list(classad::ExprList::MakeExprList(filelist));
		ad.Insert("TransferFiles", list);
		list = classad::ExprList::MakeExprList(offsetlist);
		ad.Insert("TransferOffsets", list);
	}

	ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes));

	ReliSock sock;

	if( !connectSock(&sock, timeout, NULL) ) {
		error_msg = "Failed to connect to starter";
		return false;
	}

	if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) {
		error_msg = "Failed to send START_PEEK to starter";
		return false;
	}
	sock.encode();
	if (!putClassAd(&sock, ad) || !sock.end_of_message()) {
		error_msg = "Failed to send request to starter";
		return false;
	}

	compat_classad::ClassAd response;
	sock.decode();
	if (!getClassAd(&sock, response) || !sock.end_of_message())
	{
		error_msg = "Failed to read response for peeking at logs.";
		return false;
	}
	dPrintAd(D_FULLDEBUG, response);

	bool success = false;
	if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success)
	{
		response.EvaluateAttrBool(ATTR_RETRY, retry_sensible);
		error_msg = "Remote operation failed.";
		response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg);
		return false;
	}
	classad::Value valueX;
	classad_shared_ptr<classad::ExprList> list;
	if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list))
	{
		error_msg = "Unable to evaluate starter response";
		return false;
	}
	classad_shared_ptr<classad::ExprList> offlist;
	if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist))
	{
		error_msg = "Unable to evaluate starter response (missing offsets)";
		return false;
	}

	size_t remaining = max_bytes;
	size_t file_count = 0;
	classad::ExprList::const_iterator it2 = offlist->begin();
	for (classad::ExprList::const_iterator it = list->begin();
		it != list->end() && it2 != offlist->end();
		it++, it2++)
	{
		classad::Value value;
		(*it2)->Evaluate(value);
		off_t off = -1;
		value.IsIntegerValue(off);
		(*it)->Evaluate(value);
		std::string filename;
		int64_t xfer_fd = -1;
		if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd))
		{
			if (xfer_fd == 0) filename = "_condor_stdout";
			if (xfer_fd == 1) filename = "_condor_stderr";
		}
		int fd = next.getNextFD(filename);
		filesize_t size = -1;
		int retval;
		if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED))
		{
			error_msg = "Internal error when transferring file " + filename;
		}
		else if (size >= 0)
		{
			remaining -= max_bytes;
			file_count++;
			off += size;
		}
		else
		{
			error_msg = "Failed to transfer file " + filename;
		}
		if (xfer_fd == 0)
		{
			stdout_offset = off;
			//dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset);
		}
		else if (xfer_fd == 1)
		{
			stderr_offset = off;
		}
		else
		{
			std::vector<ssize_t>::iterator it4 = offsets.begin();
			for (std::vector<std::string>::const_iterator it3 = filenames.begin();
				it3 != filenames.end() && it4 != offsets.end();
				it3++, it4++)
			{
				if (*it3 == filename) *it4 = off;
			}
		}
	}
	size_t remote_file_count;
	if (!sock.get(remote_file_count) || !sock.end_of_message())
	{
		error_msg = "Unable to get remote file count.";
		return false;
	}
	if (file_count != remote_file_count)
	{
		std::stringstream ss;
		ss << "Recieved " << file_count << " files, but remote side thought it sent " << remote_file_count << " files";
		error_msg = ss.str();
		return false;
	}
	if ((total_files != file_count) && !error_msg.size())
	{
		error_msg = "At least one file transfer failed.";
		return false;
	}
	return true;
}
Пример #30
0
int
part_send_job(
	      int test_starter,
	      char *host,
	      int &reason,
	      char *capability,
	      char * /*schedd*/,
	      PROC *proc,
	      int &sd1,
	      int &sd2,
	      char **name)
{
  int reply;
  ReliSock *sock = NULL;
  StartdRec stRec;
  PORTS ports;
  bool done = false;
  int retry_delay = 3;
  int num_retries = 0;

  // make sure we have the job classad
  InitJobAd(proc->id.cluster, proc->id.proc);

  while( !done ) {

	  Daemon startd(DT_STARTD, host, NULL);
	  if (!(sock = (ReliSock*)startd.startCommand ( ACTIVATE_CLAIM, Stream::reli_sock, 90))) {
		  dprintf( D_ALWAYS, "startCommand(ACTIVATE_CLAIM) to startd failed\n");
		  goto returnfailure;
	  }

		  // Send the capability
	  ClaimIdParser idp( capability );
	  dprintf(D_FULLDEBUG, "send capability %s\n", idp.publicClaimId() );
	  if( !sock->put_secret(capability) ) {
		  dprintf( D_ALWAYS, "sock->put(\"%s\") failed\n",idp.publicClaimId());
		  goto returnfailure;
	  }

	  // Send the starter number
	  if( test_starter ) {
		  dprintf( D_ALWAYS, "Requesting Alternate Starter %d\n", test_starter );
	  } else {
		  dprintf( D_ALWAYS, "Requesting Primary Starter\n" );
	  }
	  if( !sock->code(test_starter) ) {
		  dprintf( D_ALWAYS, "sock->code(%d) failed\n", test_starter );
		  goto returnfailure;
	  }

		  // Send the job info 
	  if( !JobAd->put(*sock) ) {
		  dprintf( D_ALWAYS, "failed to send job ad\n" );
		  goto returnfailure;
	  }	

	  if( !sock->end_of_message() ) {
		  dprintf( D_ALWAYS, "failed to send message to startd\n" );
		  goto returnfailure;
	  }

		  // We're done sending.  Now, get the reply.
	  sock->decode();
	  if( !sock->code(reply) || !sock->end_of_message() ) {
		  dprintf( D_ALWAYS, "failed to receive reply from startd\n" );
		  goto returnfailure;
	  }
	  
	  switch( reply ) {
	  case OK:
		  dprintf( D_ALWAYS, "Shadow: Request to run a job was ACCEPTED\n" );
		  done = true;
		  break;

	  case NOT_OK:
		  dprintf( D_ALWAYS, "Shadow: Request to run a job was REFUSED\n");
		  goto returnfailure;
		  break;

	  case CONDOR_TRY_AGAIN:
		  num_retries++;
		  dprintf( D_ALWAYS,
				   "Shadow: Request to run a job was TEMPORARILY REFUSED\n" );
		  if( num_retries > 20 ) {
			  dprintf( D_ALWAYS, "Shadow: Too many retries, giving up.\n" );
			  goto returnfailure;
		  }			  
		  delete sock;
		  dprintf( D_ALWAYS,
				   "Shadow: will try again in %d seconds\n", retry_delay );
		  sleep( retry_delay );
		  break;

	  default:
		  dprintf(D_ALWAYS,"Unknown reply from startd for command ACTIVATE_CLAIM\n");
		  dprintf(D_ALWAYS,"Shadow: Request to run a job was REFUSED\n");
		  goto returnfailure;
		  break;
	  }
  }

  /* start flock : dhruba */
  sock->decode();
  memset( &stRec, '\0', sizeof(stRec) );
  if( !sock->code(stRec) || !sock->end_of_message() ) {
	  dprintf(D_ALWAYS, "Can't read reply from startd.\n");
	  goto returnfailure;
  }
  ports = stRec.ports;
  if( stRec.ip_addr ) {
	host = stRec.server_name;
	if(name) {
		*name = strdup(stRec.server_name);
	}
    dprintf(D_FULLDEBUG,
	    "host = %s inet_addr = 0x%x port1 = %d port2 = %d\n",
	    host, stRec.ip_addr,ports.port1, ports.port2
	    );
  } else {
    dprintf(D_FULLDEBUG,
	    "host = %s port1 = %d port2 = %d\n",
	    host, ports.port1, ports.port2
	    );
  }    

  if( ports.port1 == 0 ) {
    dprintf( D_ALWAYS, "Shadow: Request to run a job on %s was REFUSED\n",
	     host );
	goto returnfailure;
  }
  /* end  flock ; dhruba */

	  // We don't use the server_name in the StartdRec, because our
	  // DNS query may fail or may give us the wrong IP address
	  // (either because it's stale or because we're talking to a
	  // machine with multiple network interfaces).  Sadly, we can't
	  // use the ip_addr either, because the startd doesn't send it in
	  // the correct byte ordering on little-endian machines.  So, we
	  // grab the IP address from the ReliSock, since we konw the
	  // startd always uses the same IP address for all of its
	  // communication.
  char sinfulstring[SINFUL_STRING_BUF_SIZE];

  generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port1);
  if( (sd1 = do_connect(sinfulstring, (char *)0, (u_short)ports.port1)) < 0 ) {
    dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring );
	goto returnfailure;
  }

  generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port2);
  if( (sd2 = do_connect(sinfulstring, (char *)0, (u_short)ports.port2)) < 0 ) {
    dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring );
	close(sd1);
	goto returnfailure;
  }

  delete sock;
  sock = NULL;

  if ( stRec.server_name ) {
	  free( stRec.server_name );
  }

  return 0;

returnfailure:
  reason = JOB_NOT_STARTED;
  delete sock;
  return -1;
}