예제 #1
0
DCStarter::X509UpdateStatus
DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time)
{
	ReliSock rsock;
	rsock.timeout(60);
	if( ! rsock.connect(_addr) ) {
		dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
			"Failed to connect to starter %s\n", _addr);
		return XUS_Error;
	}

	CondorError errstack;
	if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) {
		dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: "
				 "Failed send command to the starter: %s\n",
				 errstack.getFullText().c_str());
		return XUS_Error;
	}

		// Send the gsi proxy
	filesize_t file_size = 0;	// will receive the size of the file
	if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) {
		dprintf(D_ALWAYS,
			"DCStarter::delegateX509Proxy "
			"failed to delegate proxy file %s (size=%ld)\n",
			filename, (long int)file_size);
		return XUS_Error;
	}

		// Fetch the result
	rsock.decode();
	int reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	switch(reply) {
		case 0: return XUS_Error;
		case 1: return XUS_Okay;
		case 2: return XUS_Declined;
	}
	dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: "
		"remote side returned unknown code %d. Treating "
		"as an error.\n", reply);
	return XUS_Error;
}
예제 #2
0
// The function occurs in a seperate thread or process
int
TransferD::write_files_thread(void *targ, Stream *sock)
{	
	ThreadArg *thread_arg = (ThreadArg*)targ;
	ReliSock *rsock = (ReliSock*)sock;
	TransferRequest *treq = NULL;
	// int protocol;
	SimpleList<ClassAd*> *jad_list = NULL;
	ClassAd *jad = NULL;
	int cluster, proc;
	int old_timeout;
	int result;
	ClassAd respad;

	// XXX This is a damn dirty hack whose solution resides in implementing
	// a checksum for the files.
	// Now we sleep here for one second.  Why?  So we are certain
	// to transfer back output files even if the job ran for less
	// than one second. This is because:
	// stat() can't tell the difference between:
	//   1) A job starts up, touches a file, and exits all in one second
	//   2) A job starts up, doesn't touch the file, and exits all in one
	//    second
	// So if we force the start time of the job to be one second later than
	// the time we know the files were written, stat() should be able
	// to perceive what happened, if anything.

	sleep(1);

	// even though I'm in a new process, I got here either through forking
	// or through a thread, so this memory is a copy.
	// protocol = thread_arg->protocol;
	treq = thread_arg->treq;
	delete thread_arg;

	// XXX deal with protocol value.

	////////////////////////////////////////////////////////////////////////
	// Sort the classads (XXX maybe put at a higher level in the protocol)
	////////////////////////////////////////////////////////////////////////
	
	// XXX TODO

	////////////////////////////////////////////////////////////////////////
	// Do the transfer.
	////////////////////////////////////////////////////////////////////////

	// file transfers can take a long time....
	old_timeout = rsock->timeout(60 * 60 * 8);

	jad_list = treq->todo_tasks();

	while(jad_list->Next(jad)) {
		FileTransfer ftrans;

		jad->LookupInteger(ATTR_CLUSTER_ID, cluster);
		jad->LookupInteger(ATTR_PROC_ID, proc);
		dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
			"Transferring fileset for job %d.%d\n",
				cluster, proc);

		result = ftrans.SimpleInit(jad, true, true, rsock);
		if ( !result ) {
			dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
				"failed to init file transfer for job %d.%d \n",
				cluster, proc );

			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"FileTransfer Object failed to SimpleInit.");
			respad.put(*rsock);
			rsock->end_of_message();

			rsock->timeout(old_timeout);

			return EXIT_FAILURE;
		}

		ftrans.setPeerVersion(treq->get_peer_version().Value());

		// We're "downloading" from the client to here.
		result = ftrans.DownloadFiles();
		if ( !result ) {

			dprintf( D_ALWAYS, "TransferD::write_files_thread(): "
				"failed to transfer files for job %d.%d \n",
				cluster, proc );

			respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE);
			respad.Assign(ATTR_TREQ_INVALID_REASON, 
				"FileTransfer Object failed to download.");
			respad.put(*rsock);
			rsock->end_of_message();

			rsock->timeout(old_timeout);
			return EXIT_FAILURE;
		}
	}

	rsock->end_of_message();

	//////////////////////////////////////////////////////////////////////////
	// Now that the file transfer is done, tell the client everything is ok.
	//////////////////////////////////////////////////////////////////////////

	dprintf(D_ALWAYS, "Informing client of finished transfer.\n");

	rsock->encode();

	respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE);

	// This response ad to the client will contain:
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//
	respad.put(*rsock);
	rsock->end_of_message();

	delete rsock;

	return EXIT_SUCCESS;
}
// sending command to remote replication daemon; specified command function
// allows to specify which data is to be sent to the remote daemon
void
AbstractReplicatorStateMachine::sendCommand(
    int command, char* daemonSinfulString, CommandFunction function )
{
    dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand %s to %s\n",
               utilToString( command ), daemonSinfulString );
    Daemon  daemon( DT_ANY, daemonSinfulString );
    ReliSock socket;

    // no retries after 'm_connectionTimeout' seconds of unsuccessful connection
    socket.timeout( m_connectionTimeout );
    socket.doNotEnforceMinimalCONNECT_TIMEOUT( );

    if( ! socket.connect( daemonSinfulString, 0, false ) ) {
        dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "unable to connect to %s\n",
                   daemonSinfulString );
		socket.close( );

        return ;
    }
// General actions for any command sending
    if( ! daemon.startCommand( command, &socket, m_connectionTimeout ) ) {
        dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "cannot start command %s to %s\n",
                   utilToString( command ), daemonSinfulString );
		socket.close( );

        return ;
    }

    char const* sinfulString = daemonCore->InfoCommandSinfulString();
    if(! socket.put( sinfulString )/* || ! socket.end_of_message( )*/) {
        dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "unable to code the local sinful string or eom%s\n",
                   sinfulString );
		socket.close( );

        return ;
    }
    else {
        dprintf( D_FULLDEBUG, "AbstractReplicatorStateMachine::sendCommand "
                              "local sinful string coded successfully\n" );
    }
// End of General actions for any command sending

// Command-specific actions
	if( ! ((*this).*(function))( socket ) ) {
    	socket.close( );

		return ;
	}
// End of Command-specific actions
	if( ! socket.end_of_message( ) ) {
		socket.close( );
       	dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                            "unable to code the end of message\n" );
       	return ;
   	}

	socket.close( );
   	dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand "
                       "%s command sent to %s successfully\n",
             utilToString( command ), daemonSinfulString );
}
예제 #4
0
int
CCBServer::HandleRequest(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == CCB_REQUEST );

		// Avoid lengthy blocking on communication with our peer.
		// This command-handler should not get called until data
		// is ready to read.
	sock->timeout(1);

	ClassAd msg;
	sock->decode();
	if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to receive request "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	MyString name;
	if( msg.LookupString(ATTR_NAME,name) ) {
			// client name is purely for debugging purposes
		name.formatstr_cat(" on %s",sock->peer_description());
		sock->set_peer_description(name.Value());
	}
	MyString target_ccbid_str;
	MyString return_addr;
	MyString connect_id; // id target daemon should present to requester
	CCBID target_ccbid;

		// NOTE: using ATTR_CLAIM_ID for connect id so that it is
		// automatically treated as a secret over the network.
		// It must be presented by the target daemon when connecting
		// to the requesting client, so the client can confirm that
		// the connection is in response to its request.

	if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) ||
		!msg.LookupString(ATTR_MY_ADDRESS,return_addr) ||
		!msg.LookupString(ATTR_CLAIM_ID,connect_id) )
	{
		MyString ad_str;
		msg.sPrint(ad_str);
		dprintf(D_ALWAYS,
				"CCB: invalid request from %s: %s\n",
				sock->peer_description(), ad_str.Value() );
		return FALSE;
	}
	if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) {
		dprintf(D_ALWAYS,
				"CCB: request from %s contains invalid CCBID %s\n",
				sock->peer_description(), target_ccbid_str.Value() );
		return FALSE;
	}

	CCBTarget *target = GetTarget( target_ccbid );
	if( !target ) {
		dprintf(D_ALWAYS,
			"CCB: rejecting request from %s for ccbid %s because no daemon is "
			"currently registered with that id "
			"(perhaps it recently disconnected).\n",
			sock->peer_description(), target_ccbid_str.Value());

		MyString error_msg;
		error_msg.formatstr(
			"CCB server rejecting request for ccbid %s because no daemon is "
			"currently registered with that id "
			"(perhaps it recently disconnected).", target_ccbid_str.Value());
		RequestReply( sock, false, error_msg.Value(), 0, target_ccbid );
		return FALSE;
	}

	SetSmallBuffers(sock);

	CCBServerRequest *request =
		new CCBServerRequest(
			sock,
			target_ccbid,
			return_addr.Value(),
			connect_id.Value() );
	AddRequest( request, target );

	dprintf(D_FULLDEBUG,
			"CCB: received request id %lu from %s for target ccbid %s "
			"(registered as %s)\n",
			request->getRequestID(),
			request->getSock()->peer_description(),
			target_ccbid_str.Value(),
			target->getSock()->peer_description());

	ForwardRequestToTarget( request, target );

	return KEEP_STREAM;
}
예제 #5
0
/* Function    : transferFileCommand
 * Return value: TRANSFERER_TRUE - upon success,
 *               TRANSFERER_FALSE - upon failure
 * Description : sends a transfer command to the remote replication daemon,
 *               which creates a uploading 'condor_transferer' process
 * Notes       : sends to the replication daemon a port number, on which it
 *               will be listening to the files uploading requests
 */
int
DownloadReplicaTransferer::transferFileCommand( )
{
    char* temporaryDaemonSinfulString =
        const_cast<char*>( m_daemonSinfulString.Value( ) );

    dprintf( D_ALWAYS, "DownloadReplicaTransferer::transferFileCommand "
                       "to %s started\n", temporaryDaemonSinfulString );
    Daemon daemon( DT_ANY, temporaryDaemonSinfulString );
    ReliSock temporarySocket;
    
    // no retries after 'm_connectionTimeout' seconds of unsuccessful connection
    temporarySocket.timeout( m_connectionTimeout );
    temporarySocket.doNotEnforceMinimalCONNECT_TIMEOUT( );

    if( ! temporarySocket.connect( temporaryDaemonSinfulString, 0, false ) ) {
        dprintf( D_NETWORK, "DownloadReplicaTransferer::transferFileCommand "
                            "unable to connect to %s, reason: %s\n",
                   temporaryDaemonSinfulString, strerror( errno ) );
        temporarySocket.close( );

		return TRANSFERER_FALSE;
    }
    if( ! daemon.startCommand( REPLICATION_TRANSFER_FILE, &temporarySocket,
                                                 m_connectionTimeout ) ) {
        dprintf( D_COMMAND, "DownloadReplicaTransferer::transferFileCommand "
							"unable to start command to addr %s\n",
                   temporaryDaemonSinfulString );
		temporarySocket.close( );

        return TRANSFERER_FALSE;
    }
    MyString sinfulString;
    // find and bind port of the socket, to which the uploading
    // 'condor_transferer' process will send the important files
    ReliSock listeningSocket;

	listeningSocket.timeout( m_maxTransferLifetime / 2);
	//listeningSocket.timeout( ACCEPT_TIMEOUT );
	//listeningSocket.timeout( m_connectionTimeout );
	// this setting is practically unnecessary, since we do not connect to
	// remote sockets with 'listeningSocket'
    listeningSocket.doNotEnforceMinimalCONNECT_TIMEOUT( );

    if( ! listeningSocket.bind( FALSE ) || ! listeningSocket.listen( ) ) {
		temporarySocket.close( );
		listeningSocket.close( );

        return TRANSFERER_FALSE;
    }
    sinfulString = listeningSocket.get_sinful_public();
    // after the socket for the downloading/uploading process is occupied,
    // its number is sent to the remote replication daemon
    char* temporarySinfulString = const_cast<char*>( sinfulString.Value( ) );
    if( ! temporarySocket.code( temporarySinfulString ) ||
        ! temporarySocket.end_of_message( ) ) {
        dprintf( D_NETWORK, "DownloadReplicaTransferer::transferFileCommand "
               "unable to code the sinful string %s\n", temporarySinfulString );
		temporarySocket.close( );
		listeningSocket.close( );

        return TRANSFERER_FALSE;
    } else {
        dprintf( D_ALWAYS, "DownloadReplicaTransferer::transferFileCommand "
               "sinful string %s coded successfully\n", temporarySinfulString );
    }
	temporarySocket.close( );
    m_socket = listeningSocket.accept( );
//	m_socket->set_timeout_multiplier( 1 );
    m_socket->timeout( INT_MAX ); //m_connectionTimeout );
    m_socket->doNotEnforceMinimalCONNECT_TIMEOUT( );

	listeningSocket.close( );
    dprintf( D_ALWAYS, "DownloadReplicaTransferer::transferFileCommand "
                       "sent transfer command successfully and accepted "
					   "request on port no. %d\n", m_socket->get_port( ) );
    return TRANSFERER_TRUE;
}
예제 #6
0
int
CCBServer::HandleRegistration(int cmd,Stream *stream)
{
	ReliSock *sock = (ReliSock *)stream;
	ASSERT( cmd == CCB_REGISTER );

		// Avoid lengthy blocking on communication with our peer.
		// This command-handler should not get called until data
		// is ready to read.
	sock->timeout(1);

	ClassAd msg;
	sock->decode();
	if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to receive registration "
				"from %s.\n", sock->peer_description() );
		return FALSE;
	}

	SetSmallBuffers(sock);

	MyString name;
	if( msg.LookupString(ATTR_NAME,name) ) {
			// target daemon name is purely for debugging purposes
		name.formatstr_cat(" on %s",sock->peer_description());
		sock->set_peer_description(name.Value());
	}

	CCBTarget *target = new CCBTarget(sock);

	MyString reconnect_cookie_str,reconnect_ccbid_str;
	CCBID reconnect_cookie,reconnect_ccbid;
	bool reconnected = false;
	if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) &&
		CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) &&
		msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) &&
		CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) )
	{
		target->setCCBID( reconnect_ccbid );
		reconnected = ReconnectTarget( target, reconnect_cookie );
	}

	if( !reconnected ) {
		AddTarget( target );
	}

	CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() );
	ASSERT( reconnect_info );

	sock->encode();

	ClassAd reply_msg;
	MyString ccb_contact;
	CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str );
		// We send our address as part of the CCB contact string, rather
		// than letting the target daemon fill it in.  This is to give us
		// potential flexibility on the CCB server side to do things like
		// assign different targets to different CCB server sub-processes,
		// each with their own command port.
	CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact );

	reply_msg.Assign(ATTR_CCBID,ccb_contact.Value());
	reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER);
	reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value());

	if( !reply_msg.put( *sock ) || !sock->end_of_message() ) {
		dprintf(D_ALWAYS,
				"CCB: failed to send registration response "
				"to %s.\n", sock->peer_description() );

		RemoveTarget( target );
		return KEEP_STREAM; // we have already closed this socket
	}

	return KEEP_STREAM;
}
예제 #7
0
// I'm going to ask the schedd for where I can put the files for the jobs I've
// specified. The schedd is going to respond with A) a message telling me it
// has the answer right away, or B) an answer telling me I have to wait 
// an unknown length of time for the schedd to schedule me a place to put it.
bool 
DCSchedd::requestSandboxLocation(ClassAd *reqad, ClassAd *respad, 
	CondorError * errstack)
{
	ReliSock rsock;
	int will_block;
	ClassAd status_ad;

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if( ! startCommand(REQUEST_SANDBOX_LOCATION, (Sock*)&rsock, 0,
						   errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): "
				 "Failed to send command (REQUEST_SANDBOX_LOCATION) "
				 "to schedd (%s)\n", _addr );
		return false;
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return false;
	}

	rsock.encode();

	///////////////////////////////////////////////////////////////////////
	// Send my sandbox location request packet to the schedd.
	///////////////////////////////////////////////////////////////////////

	// This request ad will either contain:
	//	ATTR_TREQ_PEER_VERSION
	//	ATTR_TREQ_HAS_CONSTRAINT
	//	ATTR_TREQ_JOBID_LIST
	//	ATTR_TREQ_FTP
	// 
	// OR
	//
	//	ATTR_TREQ_DIRECTION
	//	ATTR_TREQ_PEER_VERSION
	//	ATTR_TREQ_HAS_CONSTRAINT
	//	ATTR_TREQ_CONSTRAINT
	//	ATTR_TREQ_FTP
	dprintf(D_ALWAYS, "Sending request ad.\n");
	if (putClassAd(&rsock, *reqad) != 1) {
		dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): "
				"Can't send reqad to the schedd\n");
		return false;
	}
	rsock.end_of_message();

	rsock.decode();

	///////////////////////////////////////////////////////////////////////
	// Read back a response ad which will tell me which jobs the schedd
	// said I could modify and whether or not I'm am going to have to block
	// before getting the payload of the transferd location/capability ad.
	///////////////////////////////////////////////////////////////////////

	// This status ad will contain
	//	ATTR_TREQ_INVALID_REQUEST (set to true)
	//	ATTR_TREQ_INVALID_REASON
	//
	// OR
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_JOBID_ALLOW_LIST
	//	ATTR_TREQ_JOBID_DENY_LIST
	//	ATTR_TREQ_WILL_BLOCK

	dprintf(D_ALWAYS, "Receiving status ad.\n");
	if (getClassAd(&rsock, status_ad) == false) {
		dprintf(D_ALWAYS, "Schedd closed connection to me. Aborting sandbox "
			"submission.\n");
		return false;
	}
	rsock.end_of_message();

	status_ad.LookupInteger(ATTR_TREQ_WILL_BLOCK, will_block);

	dprintf(D_ALWAYS, "Client will %s\n", will_block==1?"block":"not block");

	if (will_block == 1) {
		// set to 20 minutes.
		rsock.timeout(60*20);
	}

	///////////////////////////////////////////////////////////////////////
	// Read back the payload ad from the schedd about the transferd location
	// and capability string I can use for the fileset I wish to transfer.
	///////////////////////////////////////////////////////////////////////

	// read back the response ad from the schedd which contains a 
	// td sinful string, and a capability. These represent my ability to
	// read/write a certain fileset somewhere.

	// This response ad from the schedd will contain:
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to true)
	//	ATTR_TREQ_INVALID_REASON
	//
	// OR
	//
	//	ATTR_TREQ_INVALID_REQUEST (set to false)
	//	ATTR_TREQ_CAPABILITY
	//	ATTR_TREQ_TD_SINFUL
	//	ATTR_TREQ_JOBID_ALLOW_LIST

	dprintf(D_ALWAYS, "Receiving response ad.\n");
	if (getClassAd(&rsock, *respad) != true) {
		dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): "
				"Can't receive respond ad from the schedd\n");
		return false;
	}
	rsock.end_of_message();

	return true;
}
예제 #8
0
bool 
DCSchedd::spoolJobFiles(int JobAdsArrayLen, ClassAd* JobAdsArray[], CondorError * errstack)
{
	int reply;
	int i;
	ReliSock rsock;
	bool use_new_command = true;

	if ( version() ) {
		CondorVersionInfo vi( version() );
		if ( vi.built_since_version(6,7,7) ) {
			use_new_command = true;
		} else {
			use_new_command = false;
		}
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		std::string errmsg;
		formatstr(errmsg, "Failed to connect to schedd (%s)", _addr);

		dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: %s\n", errmsg.c_str() );

		if( errstack ) {
			errstack->push(
				"DCSchedd::spoolJobFiles",CEDAR_ERR_CONNECT_FAILED,
				errmsg.c_str() );
		}
		return false;
	}
	if ( use_new_command ) {
		if( ! startCommand(SPOOL_JOB_FILES_WITH_PERMS, (Sock*)&rsock, 0,
						   errstack) ) {

			dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: "
				"Failed to send command (SPOOL_JOB_FILES_WITH_PERMS) "
				"to the schedd (%s)\n", _addr );

			return false;
		}
	} else {
		if( ! startCommand(SPOOL_JOB_FILES, (Sock*)&rsock, 0, errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: "
					 "Failed to send command (SPOOL_JOB_FILES) "
					 "to the schedd (%s)\n", _addr );

			return false;
		}
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack ? errstack->getFullText().c_str() : "" );
		return false;
	}

	rsock.encode();

		// Send our version if using the new command
	if ( use_new_command ) {
			// Need to use a named variable, else the wrong version of	
			// code() is called.
		char *my_version = strdup( CondorVersion() );
		if ( !rsock.code(my_version) ) {
			dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
					"Can't send version string to the schedd\n");
			free( my_version );
			return false;
		}
		free( my_version );
	}

		// Send the number of jobs
	if ( !rsock.code(JobAdsArrayLen) ) {
		dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
				"Can't send JobAdsArrayLen to the schedd\n");
		return false;
	}

	if( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't send initial message (version + count) to schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::spoolJobFiles",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now, put the job ids onto the wire
	PROC_ID jobid;
	for (i=0; i<JobAdsArrayLen; i++) {
		if (!JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,jobid.cluster)) {
			dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
					"Job ad %d did not have a cluster id\n",i);
			return false;
		}
		if (!JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,jobid.proc)) {
			dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: "
					"Job ad %d did not have a proc id\n",i);
			return false;
		}
		rsock.code(jobid);
	}

	if( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg, "Failed while sending job ids to schedd (%s)", _addr);

		dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::spoolJobFiles",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now send all the files via the file transfer object
	for (i=0; i<JobAdsArrayLen; i++) {
		FileTransfer ftrans;
		if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, &rsock, PRIV_UNKNOWN, false, true) ) {
			if( errstack ) {
				int cluster = -1, proc = -1;
				if(JobAdsArray[i]) {
					JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster);
					JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc);
				}
				errstack->pushf(
					"DCSchedd::spoolJobFiles",
					FILETRANSFER_INIT_FAILED,
					"File transfer initialization failed for target job %d.%d",
					cluster, proc );
			}
			return false;
		}
		if ( use_new_command ) {
			ftrans.setPeerVersion( version() );
		}
		if ( !ftrans.UploadFiles(true,false) ) {
			if( errstack ) {
				FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo();

				int cluster = -1, proc = -1;
				if(JobAdsArray[i]) {
					JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster);
					JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc);
				}
				errstack->pushf(
					"DCSchedd::spoolJobFiles",
					FILETRANSFER_UPLOAD_FAILED,
					"File transfer failed for target job %d.%d: %s",
					cluster, proc, ft_info.error_desc.Value() );
			}
			return false;
		}
	}	
		
		
	rsock.end_of_message();

	rsock.decode();

	reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	if ( reply == 1 ) 
		return true;
	else
		return false;
}
예제 #9
0
bool 
DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/)
{
	if(numdone) { *numdone = 0; }
	ExprTree *tree = NULL;
	const char *lhstr;
	int reply;
	int i;
	ReliSock rsock;
	int JobAdsArrayLen;
	bool use_new_command = true;

	if ( version() ) {
		CondorVersionInfo vi( version() );
		if ( vi.built_since_version(6,7,7) ) {
			use_new_command = true;
		} else {
			use_new_command = false;
		}
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if ( use_new_command ) {
		if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0,
						   errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
					 "Failed to send command (TRANSFER_DATA_WITH_PERMS) "
					 "to the schedd\n" );
			return false;
		}
	} else {
		if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) {
			dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: "
					 "Failed to send command (TRANSFER_DATA) "
					 "to the schedd\n" );
			return false;
		}
	}

		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, 
			"DCSchedd::receiveJobSandbox: authentication failure: %s\n",
			errstack ? errstack->getFullText().c_str() : "" );
		return false;
	}

	rsock.encode();

		// Send our version if using the new command
	if ( use_new_command ) {
			// Need to use a named variable, else the wrong version of	
			// code() is called.
		char *my_version = strdup( CondorVersion() );
		if ( !rsock.code(my_version) ) {
			dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: "
					"Can't send version string to the schedd\n");
			free( my_version );
			return false;
		}
		free( my_version );
	}

		// Send the constraint
	char * nc_constraint = strdup( constraint );	// de-const
	if ( !rsock.code(nc_constraint) ) {
		free( nc_constraint );
		dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: "
				"Can't send JobAdsArrayLen to the schedd\n");
		return false;
	}
	free( nc_constraint );

	if ( !rsock.end_of_message() ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't send initial message (version + constraint) to schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::receiveJobSandbox",
				CEDAR_ERR_EOM_FAILED,
				errmsg.c_str());
		}
		return false;
	}

		// Now, read how many jobs matched the constraint.
	rsock.decode();
	if ( !rsock.code(JobAdsArrayLen) ) {
		std::string errmsg;
		formatstr(errmsg,
				"Can't receive JobAdsArrayLen from the schedd (%s)",
				_addr);

		dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

		if( errstack ) {
			errstack->push(
				"DCSchedd::receiveJobSandbox",
				CEDAR_ERR_GET_FAILED,
				errmsg.c_str());
		}
		return false;
	}

	rsock.end_of_message();

	dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: "
		"%d jobs matched my constraint (%s)\n",
		JobAdsArrayLen, constraint);

		// Now read all the files via the file transfer object
	for (i=0; i<JobAdsArrayLen; i++) {
		FileTransfer ftrans;
		ClassAd job;

			// grab job ClassAd
		if ( !getClassAd(&rsock, job) ) {
			std::string errmsg;
			formatstr(errmsg, "Can't receive job ad %d from the schedd", i);

			dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str());

			if( errstack ) {
				errstack->push(
							   "DCSchedd::receiveJobSandbox",
							   CEDAR_ERR_GET_FAILED,
							   errmsg.c_str());
			}
			return false;
		}

		rsock.end_of_message();

			// translate the job ad by replacing the 
			// saved SUBMIT_ attributes
		job.ResetExpr();
		while( job.NextExpr(lhstr, tree) ) {
			if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) {
					// this attr name starts with SUBMIT_
					// compute new lhs (strip off the SUBMIT_)
				const char *new_attr_name = strchr(lhstr,'_');
				ExprTree * pTree;
				ASSERT(new_attr_name);
				new_attr_name++;
					// insert attribute
				pTree = tree->Copy();
				job.Insert(new_attr_name, pTree, false);
			}
		}	// while next expr

		if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) {
			if( errstack ) {
				int cluster = -1, proc = -1;
				job.LookupInteger(ATTR_CLUSTER_ID,cluster);
				job.LookupInteger(ATTR_PROC_ID,proc);
				errstack->pushf(
					"DCSchedd::receiveJobSandbox",
					FILETRANSFER_INIT_FAILED,
					"File transfer initialization failed for target job %d.%d",
					cluster, proc );
			}
			return false;
		}
		// We want files to be copied to their final places, so apply
		// any filename remaps when downloading.
		if ( !ftrans.InitDownloadFilenameRemaps(&job) ) {
			return false;
		}
		if ( use_new_command ) {
			ftrans.setPeerVersion( version() );
		}
		if ( !ftrans.DownloadFiles() ) {
			if( errstack ) {
				FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo();

				int cluster = -1, proc = -1;
				job.LookupInteger(ATTR_CLUSTER_ID,cluster);
				job.LookupInteger(ATTR_PROC_ID,proc);
				errstack->pushf(
					"DCSchedd::receiveJobSandbox",
					FILETRANSFER_DOWNLOAD_FAILED,
					"File transfer failed for target job %d.%d: %s",
					cluster, proc, ft_info.error_desc.Value() );
			}
			return false;
		}
	}	
		
	rsock.end_of_message();

	rsock.encode();

	reply = OK;
	rsock.code(reply);
	rsock.end_of_message();

	if(numdone) { *numdone = JobAdsArrayLen; }

	return true;
}
예제 #10
0
ClassAd*
DCSchedd::actOnJobs( JobAction action,
					 const char* constraint, StringList* ids, 
					 const char* reason, const char* reason_attr,
					 const char* reason_code, const char* reason_code_attr,
					 action_result_type_t result_type,
					 bool notify_scheduler,
					 CondorError * errstack )
{
	char* tmp = NULL;
	char buf[512];
	int size, reply;
	ReliSock rsock;

		// // // // // // // //
		// Construct the ad we want to send
		// // // // // // // //

	ClassAd cmd_ad;

	sprintf( buf, "%s = %d", ATTR_JOB_ACTION, action );
	cmd_ad.Insert( buf );
	
	sprintf( buf, "%s = %d", ATTR_ACTION_RESULT_TYPE, 
			 (int)result_type );
	cmd_ad.Insert( buf );

	sprintf( buf, "%s = %s", ATTR_NOTIFY_JOB_SCHEDULER, 
			 notify_scheduler ? "True" : "False" );
	cmd_ad.Insert( buf );

	if( constraint ) {
		if( ids ) {
				// This is a programming error, not a run-time one
			EXCEPT( "DCSchedd::actOnJobs has both constraint and ids!" );
		}
		size = strlen(constraint) + strlen(ATTR_ACTION_CONSTRAINT) + 4;  
		tmp = (char*) malloc( size*sizeof(char) );
		if( !tmp ) {
			EXCEPT( "Out of memory!" );
		}
		sprintf( tmp, "%s = %s", ATTR_ACTION_CONSTRAINT, constraint ); 
		if( ! cmd_ad.Insert(tmp) ) {
			dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
					 "Can't insert constraint (%s) into ClassAd!\n",
					 constraint );
			free( tmp );
			return NULL;
		}			
		free( tmp );
		tmp = NULL;
	} else if( ids ) {
		char* action_ids = ids->print_to_string();
		if ( action_ids ) {
			size = strlen(action_ids) + strlen(ATTR_ACTION_IDS) + 7;
			tmp = (char*) malloc( size*sizeof(char) );
			if( !tmp ) {
				EXCEPT( "Out of memory!" );
			}
			sprintf( tmp, "%s = \"%s\"", ATTR_ACTION_IDS, action_ids );
			cmd_ad.Insert( tmp );
			free( tmp );
			tmp = NULL;
			free(action_ids);
			action_ids = NULL;
		}
	} else {
		EXCEPT( "DCSchedd::actOnJobs called without constraint or ids" );
	}

	if( reason_attr && reason ) {
		size = strlen(reason_attr) + strlen(reason) + 7;
		tmp = (char*) malloc( size*sizeof(char) );
		if( !tmp ) {
			EXCEPT( "Out of memory!" );
		}
		sprintf( tmp, "%s = \"%s\"", reason_attr, reason );
		cmd_ad.Insert( tmp );
		free( tmp );
		tmp = NULL;
	}

	if( reason_code_attr && reason_code ) {
		cmd_ad.AssignExpr(reason_code_attr,reason_code);
	}

		// // // // // // // //
		// On the wire protocol
		// // // // // // // //

	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return NULL;
	}
	if( ! startCommand(ACT_ON_JOBS, (Sock*)&rsock, 0, errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::actOnJobs: "
				 "Failed to send command (ACT_ON_JOBS) to the schedd\n" );
		return NULL;
	}
		// First, if we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return NULL;
	}

		// Now, put the command classad on the wire
	if( ! (putClassAd(&rsock, cmd_ad) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send classad\n" );
		return NULL;
	}

		// Next, we need to read the reply from the schedd if things
		// are ok and it's going to go forward.  If the schedd can't
		// read our reply to this ClassAd, it assumes we got killed
		// and it should abort its transaction
	rsock.decode();
	ClassAd* result_ad = new ClassAd();
	if( ! (getClassAd(&rsock, *result_ad) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: "
				 "Can't read response ad from %s\n", _addr );
		delete( result_ad );
		return NULL;
	}

		// If the action totally failed, the schedd will already have
		// aborted the transaction and closed up shop, so there's no
		// reason trying to continue.  However, we still want to
		// return the result ad we got back so that our caller can
		// figure out what went wrong.
	reply = FALSE;
	result_ad->LookupInteger( ATTR_ACTION_RESULT, reply );
	if( reply != OK ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Action failed\n" );
		return result_ad;
	}

		// Tell the schedd we're still here and ready to go
	rsock.encode();
	int answer = OK;
	if( ! (rsock.code(answer) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send reply\n" );
		delete( result_ad );
		return NULL;
	}
	
		// finally, make sure the schedd didn't blow up trying to
		// commit these changes to the job queue...
	rsock.decode();
	if( ! (rsock.code(reply) && rsock.end_of_message()) ) {
		dprintf( D_ALWAYS, "DCSchedd:actOnJobs: "
				 "Can't read confirmation from %s\n", _addr );
		delete( result_ad );
		return NULL;
	}

	return result_ad;
}
예제 #11
0
bool 
DCSchedd::delegateGSIcredential(const int cluster, const int proc, 
								const char* path_to_proxy_file,
								time_t expiration_time,
								time_t *result_expiration_time,
								CondorError * errstack)
{
	int reply;
	ReliSock rsock;

		// check the parameters
	if ( cluster < 1 || proc < 0 || !path_to_proxy_file || !errstack ) {
		dprintf(D_FULLDEBUG,"DCSchedd::delegateGSIcredential: bad parameters\n");
		return false;
	}

		// connect to the schedd, send the DELEGATE_GSI_CRED_SCHEDD command
	rsock.timeout(20);   // years of research... :)
	if( ! rsock.connect(_addr) ) {
		dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: "
				 "Failed to connect to schedd (%s)\n", _addr );
		return false;
	}
	if( ! startCommand(DELEGATE_GSI_CRED_SCHEDD, (Sock*)&rsock, 0, errstack) ) {
		dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: "
				 "Failed send command to the schedd: %s\n",
				 errstack->getFullText().c_str());
		return false;
	}


		// If we're not already authenticated, force that now. 
	if (!forceAuthentication( &rsock, errstack )) {
		dprintf( D_ALWAYS, 
				"DCSchedd::delegateGSIcredential authentication failure: %s\n",
				 errstack->getFullText().c_str() );
		return false;
	}

		// Send the job id
	rsock.encode();
	PROC_ID jobid;
	jobid.cluster = cluster;
	jobid.proc = proc;	
	if ( !rsock.code(jobid) || !rsock.end_of_message() ) {
		dprintf(D_ALWAYS,"DCSchedd::delegateGSIcredential: "
				"Can't send jobid to the schedd\n");
		return false;
	}

		// Delegate the gsi proxy
	filesize_t file_size = 0;	// will receive the size of the file
	if ( rsock.put_x509_delegation(&file_size,path_to_proxy_file,expiration_time,result_expiration_time) < 0 ) {
		dprintf(D_ALWAYS,
			"DCSchedd::delegateGSIcredential "
			"failed to send proxy file %s\n",
			path_to_proxy_file);
		return false;
	}
		
		// Fetch the result
	rsock.decode();
	reply = 0;
	rsock.code(reply);
	rsock.end_of_message();

	if ( reply == 1 ) 
		return true;
	else
		return false;
}
예제 #12
0
static bool 
_requestVMRegister(char *addr)
{
	char *buffer = NULL;
	Daemon hstartd(DT_STARTD, addr);

	//Using TCP
	ReliSock ssock;

	ssock.timeout( VM_SOCKET_TIMEOUT );
	ssock.encode();

	if( !ssock.connect(addr) ) {
		dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n", addr);
		return FALSE;
	}

	if( !hstartd.startCommand(VM_REGISTER, &ssock) ) { 
		dprintf( D_FULLDEBUG, "Failed to send VM_REGISTER command to host startd(%s)\n", addr);
		return FALSE;
	}

	// Send <IP address:port> of virtual machine
	buffer = strdup(daemonCore->InfoCommandSinfulString());
	ASSERT(buffer);

	if ( !ssock.code(buffer) ) {
		dprintf( D_FULLDEBUG,
				 "Failed to send VM_REGISTER command's arguments to "
				 "host startd %s: %s\n",
				 addr, buffer );
		free(buffer);
		return FALSE;
	}

	if( !ssock.end_of_message() ) {
		dprintf( D_FULLDEBUG, "Failed to send EOM to host startd %s\n", addr );
		free(buffer);
		return FALSE;
	}

	free(buffer);

	//Now, read permission information
	ssock.timeout( VM_SOCKET_TIMEOUT );
	ssock.decode();

	int permission = 0;
	ssock.code(permission);

	if( !ssock.end_of_message() ) {
		dprintf( D_FULLDEBUG, "Failed to receive EOM from host startd(%s)\n", addr );
		return FALSE;
	}

	if( permission > 0 ) {
		// Since now this virtual machine can be used for Condor
		if( vmregister ) {
			if( vmregister->vm_usable == 0 ) {
				vmregister->vm_usable = 1;
				if( resmgr ) {
					resmgr->eval_and_update_all();
				}
			}
		}
	}else {
		// For now this virtual machine can't be used for Condor
		if( vmregister ) {
			if( vmregister->vm_usable == 1 ) {
				vmregister->vm_usable = 0;
				if( resmgr ) {
					resmgr->eval_and_update_all();
				}
			}
		}
	}

	return TRUE;
}
예제 #13
0
void
VMRegister::requestHostClassAds(void)
{
	// find host startd daemon
	if( !m_vm_host_daemon )
		m_vm_host_daemon = vmapi_findDaemon( m_vm_host_name, DT_STARTD);

	if( !m_vm_host_daemon ) {
		dprintf( D_FULLDEBUG, "Can't find host(%s) Startd daemon\n", m_vm_host_name );
		return;
	}

	ClassAd query_ad;
	query_ad.SetMyTypeName(QUERY_ADTYPE);
	query_ad.SetTargetTypeName(STARTD_ADTYPE);
	query_ad.Assign(ATTR_REQUIREMENTS, true);

	char *addr = m_vm_host_daemon->addr();
	Daemon hstartd(DT_STARTD, addr);
	ReliSock ssock;

	ssock.timeout( VM_SOCKET_TIMEOUT );
	ssock.encode();

	if( !ssock.connect(addr) ) {
		dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n to get host classAd", addr);
		return;
	}

	if(!hstartd.startCommand( QUERY_STARTD_ADS, &ssock )) {
		dprintf( D_FULLDEBUG, "Failed to send QUERY_STARTD_ADS command to host startd(%s)\n", addr);
		return;
	}

	if( !query_ad.put(ssock) ) {
		dprintf(D_FULLDEBUG, "Failed to send query Ad to host startd(%s)\n", addr);
	}

	if( !ssock.end_of_message() ) {
		dprintf(D_FULLDEBUG, "Failed to send query EOM to host startd(%s)\n", addr);
	}

	// Read host classAds
	ssock.timeout( VM_SOCKET_TIMEOUT );
	ssock.decode();
	int more = 1, num_ads = 0;
	ClassAdList adList;
	ClassAd *ad;

	while (more) {
		if( !ssock.code(more) ) {
			ssock.end_of_message();
			return;
		}

		if(more) {
			ad = new ClassAd;
			if( !ad->initFromStream(ssock) ) {
				ssock.end_of_message();
				delete ad;
				return;
			}

			adList.Insert(ad);
			num_ads++;
		}
	}

	ssock.end_of_message();

	dprintf(D_FULLDEBUG, "Got %d classAds from host\n", num_ads);

	// Although we can get more than one classAd from host machine, 
	// we use only the first one classAd
	adList.Rewind();
	ad = adList.Next();

#if !defined(WANT_OLD_CLASSADS)
	ad->AddTargetRefs( TargetJobAttrs );
#endif

	// Get each Attribute from the classAd
	// added "HOST_" in front of each Attribute name
	const char *name;
	ExprTree *expr;

	ad->ResetExpr();
	while( ad->NextExpr(name, expr) ) {
		MyString attr;
		attr += "HOST_";
		attr += name;

		// Insert or Update an attribute to host_classAd in a VMRegister object
		ExprTree * pTree = expr->Copy();
		host_classad->Insert(attr.Value(), pTree, true);
	}
}