DCStarter::X509UpdateStatus DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time) { ReliSock rsock; rsock.timeout(60); if( ! rsock.connect(_addr) ) { dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: " "Failed to connect to starter %s\n", _addr); return XUS_Error; } CondorError errstack; if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) { dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: " "Failed send command to the starter: %s\n", errstack.getFullText().c_str()); return XUS_Error; } // Send the gsi proxy filesize_t file_size = 0; // will receive the size of the file if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) { dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy " "failed to delegate proxy file %s (size=%ld)\n", filename, (long int)file_size); return XUS_Error; } // Fetch the result rsock.decode(); int reply = 0; rsock.code(reply); rsock.end_of_message(); switch(reply) { case 0: return XUS_Error; case 1: return XUS_Okay; case 2: return XUS_Declined; } dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: " "remote side returned unknown code %d. Treating " "as an error.\n", reply); return XUS_Error; }
// The function occurs in a seperate thread or process int TransferD::write_files_thread(void *targ, Stream *sock) { ThreadArg *thread_arg = (ThreadArg*)targ; ReliSock *rsock = (ReliSock*)sock; TransferRequest *treq = NULL; // int protocol; SimpleList<ClassAd*> *jad_list = NULL; ClassAd *jad = NULL; int cluster, proc; int old_timeout; int result; ClassAd respad; // XXX This is a damn dirty hack whose solution resides in implementing // a checksum for the files. // Now we sleep here for one second. Why? So we are certain // to transfer back output files even if the job ran for less // than one second. This is because: // stat() can't tell the difference between: // 1) A job starts up, touches a file, and exits all in one second // 2) A job starts up, doesn't touch the file, and exits all in one // second // So if we force the start time of the job to be one second later than // the time we know the files were written, stat() should be able // to perceive what happened, if anything. sleep(1); // even though I'm in a new process, I got here either through forking // or through a thread, so this memory is a copy. // protocol = thread_arg->protocol; treq = thread_arg->treq; delete thread_arg; // XXX deal with protocol value. //////////////////////////////////////////////////////////////////////// // Sort the classads (XXX maybe put at a higher level in the protocol) //////////////////////////////////////////////////////////////////////// // XXX TODO //////////////////////////////////////////////////////////////////////// // Do the transfer. //////////////////////////////////////////////////////////////////////// // file transfers can take a long time.... old_timeout = rsock->timeout(60 * 60 * 8); jad_list = treq->todo_tasks(); while(jad_list->Next(jad)) { FileTransfer ftrans; jad->LookupInteger(ATTR_CLUSTER_ID, cluster); jad->LookupInteger(ATTR_PROC_ID, proc); dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "Transferring fileset for job %d.%d\n", cluster, proc); result = ftrans.SimpleInit(jad, true, true, rsock); if ( !result ) { dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "failed to init file transfer for job %d.%d \n", cluster, proc ); respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "FileTransfer Object failed to SimpleInit."); respad.put(*rsock); rsock->end_of_message(); rsock->timeout(old_timeout); return EXIT_FAILURE; } ftrans.setPeerVersion(treq->get_peer_version().Value()); // We're "downloading" from the client to here. result = ftrans.DownloadFiles(); if ( !result ) { dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "failed to transfer files for job %d.%d \n", cluster, proc ); respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "FileTransfer Object failed to download."); respad.put(*rsock); rsock->end_of_message(); rsock->timeout(old_timeout); return EXIT_FAILURE; } } rsock->end_of_message(); ////////////////////////////////////////////////////////////////////////// // Now that the file transfer is done, tell the client everything is ok. ////////////////////////////////////////////////////////////////////////// dprintf(D_ALWAYS, "Informing client of finished transfer.\n"); rsock->encode(); respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); // This response ad to the client will contain: // // ATTR_TREQ_INVALID_REQUEST (set to false) // respad.put(*rsock); rsock->end_of_message(); delete rsock; return EXIT_SUCCESS; }
// sending command to remote replication daemon; specified command function // allows to specify which data is to be sent to the remote daemon void AbstractReplicatorStateMachine::sendCommand( int command, char* daemonSinfulString, CommandFunction function ) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand %s to %s\n", utilToString( command ), daemonSinfulString ); Daemon daemon( DT_ANY, daemonSinfulString ); ReliSock socket; // no retries after 'm_connectionTimeout' seconds of unsuccessful connection socket.timeout( m_connectionTimeout ); socket.doNotEnforceMinimalCONNECT_TIMEOUT( ); if( ! socket.connect( daemonSinfulString, 0, false ) ) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "unable to connect to %s\n", daemonSinfulString ); socket.close( ); return ; } // General actions for any command sending if( ! daemon.startCommand( command, &socket, m_connectionTimeout ) ) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "cannot start command %s to %s\n", utilToString( command ), daemonSinfulString ); socket.close( ); return ; } char const* sinfulString = daemonCore->InfoCommandSinfulString(); if(! socket.put( sinfulString )/* || ! socket.end_of_message( )*/) { dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "unable to code the local sinful string or eom%s\n", sinfulString ); socket.close( ); return ; } else { dprintf( D_FULLDEBUG, "AbstractReplicatorStateMachine::sendCommand " "local sinful string coded successfully\n" ); } // End of General actions for any command sending // Command-specific actions if( ! ((*this).*(function))( socket ) ) { socket.close( ); return ; } // End of Command-specific actions if( ! socket.end_of_message( ) ) { socket.close( ); dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "unable to code the end of message\n" ); return ; } socket.close( ); dprintf( D_ALWAYS, "AbstractReplicatorStateMachine::sendCommand " "%s command sent to %s successfully\n", utilToString( command ), daemonSinfulString ); }
int CCBServer::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REQUEST ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive request " "from %s.\n", sock->peer_description() ); return FALSE; } MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // client name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } MyString target_ccbid_str; MyString return_addr; MyString connect_id; // id target daemon should present to requester CCBID target_ccbid; // NOTE: using ATTR_CLAIM_ID for connect id so that it is // automatically treated as a secret over the network. // It must be presented by the target daemon when connecting // to the requesting client, so the client can confirm that // the connection is in response to its request. if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) || !msg.LookupString(ATTR_MY_ADDRESS,return_addr) || !msg.LookupString(ATTR_CLAIM_ID,connect_id) ) { MyString ad_str; msg.sPrint(ad_str); dprintf(D_ALWAYS, "CCB: invalid request from %s: %s\n", sock->peer_description(), ad_str.Value() ); return FALSE; } if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) { dprintf(D_ALWAYS, "CCB: request from %s contains invalid CCBID %s\n", sock->peer_description(), target_ccbid_str.Value() ); return FALSE; } CCBTarget *target = GetTarget( target_ccbid ); if( !target ) { dprintf(D_ALWAYS, "CCB: rejecting request from %s for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).\n", sock->peer_description(), target_ccbid_str.Value()); MyString error_msg; error_msg.formatstr( "CCB server rejecting request for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).", target_ccbid_str.Value()); RequestReply( sock, false, error_msg.Value(), 0, target_ccbid ); return FALSE; } SetSmallBuffers(sock); CCBServerRequest *request = new CCBServerRequest( sock, target_ccbid, return_addr.Value(), connect_id.Value() ); AddRequest( request, target ); dprintf(D_FULLDEBUG, "CCB: received request id %lu from %s for target ccbid %s " "(registered as %s)\n", request->getRequestID(), request->getSock()->peer_description(), target_ccbid_str.Value(), target->getSock()->peer_description()); ForwardRequestToTarget( request, target ); return KEEP_STREAM; }
/* Function : transferFileCommand * Return value: TRANSFERER_TRUE - upon success, * TRANSFERER_FALSE - upon failure * Description : sends a transfer command to the remote replication daemon, * which creates a uploading 'condor_transferer' process * Notes : sends to the replication daemon a port number, on which it * will be listening to the files uploading requests */ int DownloadReplicaTransferer::transferFileCommand( ) { char* temporaryDaemonSinfulString = const_cast<char*>( m_daemonSinfulString.Value( ) ); dprintf( D_ALWAYS, "DownloadReplicaTransferer::transferFileCommand " "to %s started\n", temporaryDaemonSinfulString ); Daemon daemon( DT_ANY, temporaryDaemonSinfulString ); ReliSock temporarySocket; // no retries after 'm_connectionTimeout' seconds of unsuccessful connection temporarySocket.timeout( m_connectionTimeout ); temporarySocket.doNotEnforceMinimalCONNECT_TIMEOUT( ); if( ! temporarySocket.connect( temporaryDaemonSinfulString, 0, false ) ) { dprintf( D_NETWORK, "DownloadReplicaTransferer::transferFileCommand " "unable to connect to %s, reason: %s\n", temporaryDaemonSinfulString, strerror( errno ) ); temporarySocket.close( ); return TRANSFERER_FALSE; } if( ! daemon.startCommand( REPLICATION_TRANSFER_FILE, &temporarySocket, m_connectionTimeout ) ) { dprintf( D_COMMAND, "DownloadReplicaTransferer::transferFileCommand " "unable to start command to addr %s\n", temporaryDaemonSinfulString ); temporarySocket.close( ); return TRANSFERER_FALSE; } MyString sinfulString; // find and bind port of the socket, to which the uploading // 'condor_transferer' process will send the important files ReliSock listeningSocket; listeningSocket.timeout( m_maxTransferLifetime / 2); //listeningSocket.timeout( ACCEPT_TIMEOUT ); //listeningSocket.timeout( m_connectionTimeout ); // this setting is practically unnecessary, since we do not connect to // remote sockets with 'listeningSocket' listeningSocket.doNotEnforceMinimalCONNECT_TIMEOUT( ); if( ! listeningSocket.bind( FALSE ) || ! listeningSocket.listen( ) ) { temporarySocket.close( ); listeningSocket.close( ); return TRANSFERER_FALSE; } sinfulString = listeningSocket.get_sinful_public(); // after the socket for the downloading/uploading process is occupied, // its number is sent to the remote replication daemon char* temporarySinfulString = const_cast<char*>( sinfulString.Value( ) ); if( ! temporarySocket.code( temporarySinfulString ) || ! temporarySocket.end_of_message( ) ) { dprintf( D_NETWORK, "DownloadReplicaTransferer::transferFileCommand " "unable to code the sinful string %s\n", temporarySinfulString ); temporarySocket.close( ); listeningSocket.close( ); return TRANSFERER_FALSE; } else { dprintf( D_ALWAYS, "DownloadReplicaTransferer::transferFileCommand " "sinful string %s coded successfully\n", temporarySinfulString ); } temporarySocket.close( ); m_socket = listeningSocket.accept( ); // m_socket->set_timeout_multiplier( 1 ); m_socket->timeout( INT_MAX ); //m_connectionTimeout ); m_socket->doNotEnforceMinimalCONNECT_TIMEOUT( ); listeningSocket.close( ); dprintf( D_ALWAYS, "DownloadReplicaTransferer::transferFileCommand " "sent transfer command successfully and accepted " "request on port no. %d\n", m_socket->get_port( ) ); return TRANSFERER_TRUE; }
int CCBServer::HandleRegistration(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REGISTER ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive registration " "from %s.\n", sock->peer_description() ); return FALSE; } SetSmallBuffers(sock); MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // target daemon name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } CCBTarget *target = new CCBTarget(sock); MyString reconnect_cookie_str,reconnect_ccbid_str; CCBID reconnect_cookie,reconnect_ccbid; bool reconnected = false; if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) && CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) && msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) && CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) ) { target->setCCBID( reconnect_ccbid ); reconnected = ReconnectTarget( target, reconnect_cookie ); } if( !reconnected ) { AddTarget( target ); } CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() ); ASSERT( reconnect_info ); sock->encode(); ClassAd reply_msg; MyString ccb_contact; CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str ); // We send our address as part of the CCB contact string, rather // than letting the target daemon fill it in. This is to give us // potential flexibility on the CCB server side to do things like // assign different targets to different CCB server sub-processes, // each with their own command port. CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact ); reply_msg.Assign(ATTR_CCBID,ccb_contact.Value()); reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER); reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value()); if( !reply_msg.put( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to send registration response " "to %s.\n", sock->peer_description() ); RemoveTarget( target ); return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
// I'm going to ask the schedd for where I can put the files for the jobs I've // specified. The schedd is going to respond with A) a message telling me it // has the answer right away, or B) an answer telling me I have to wait // an unknown length of time for the schedd to schedule me a place to put it. bool DCSchedd::requestSandboxLocation(ClassAd *reqad, ClassAd *respad, CondorError * errstack) { ReliSock rsock; int will_block; ClassAd status_ad; rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): " "Failed to connect to schedd (%s)\n", _addr ); return false; } if( ! startCommand(REQUEST_SANDBOX_LOCATION, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): " "Failed to send command (REQUEST_SANDBOX_LOCATION) " "to schedd (%s)\n", _addr ); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack->getFullText().c_str() ); return false; } rsock.encode(); /////////////////////////////////////////////////////////////////////// // Send my sandbox location request packet to the schedd. /////////////////////////////////////////////////////////////////////// // This request ad will either contain: // ATTR_TREQ_PEER_VERSION // ATTR_TREQ_HAS_CONSTRAINT // ATTR_TREQ_JOBID_LIST // ATTR_TREQ_FTP // // OR // // ATTR_TREQ_DIRECTION // ATTR_TREQ_PEER_VERSION // ATTR_TREQ_HAS_CONSTRAINT // ATTR_TREQ_CONSTRAINT // ATTR_TREQ_FTP dprintf(D_ALWAYS, "Sending request ad.\n"); if (putClassAd(&rsock, *reqad) != 1) { dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): " "Can't send reqad to the schedd\n"); return false; } rsock.end_of_message(); rsock.decode(); /////////////////////////////////////////////////////////////////////// // Read back a response ad which will tell me which jobs the schedd // said I could modify and whether or not I'm am going to have to block // before getting the payload of the transferd location/capability ad. /////////////////////////////////////////////////////////////////////// // This status ad will contain // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_JOBID_ALLOW_LIST // ATTR_TREQ_JOBID_DENY_LIST // ATTR_TREQ_WILL_BLOCK dprintf(D_ALWAYS, "Receiving status ad.\n"); if (getClassAd(&rsock, status_ad) == false) { dprintf(D_ALWAYS, "Schedd closed connection to me. Aborting sandbox " "submission.\n"); return false; } rsock.end_of_message(); status_ad.LookupInteger(ATTR_TREQ_WILL_BLOCK, will_block); dprintf(D_ALWAYS, "Client will %s\n", will_block==1?"block":"not block"); if (will_block == 1) { // set to 20 minutes. rsock.timeout(60*20); } /////////////////////////////////////////////////////////////////////// // Read back the payload ad from the schedd about the transferd location // and capability string I can use for the fileset I wish to transfer. /////////////////////////////////////////////////////////////////////// // read back the response ad from the schedd which contains a // td sinful string, and a capability. These represent my ability to // read/write a certain fileset somewhere. // This response ad from the schedd will contain: // // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_CAPABILITY // ATTR_TREQ_TD_SINFUL // ATTR_TREQ_JOBID_ALLOW_LIST dprintf(D_ALWAYS, "Receiving response ad.\n"); if (getClassAd(&rsock, *respad) != true) { dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): " "Can't receive respond ad from the schedd\n"); return false; } rsock.end_of_message(); return true; }
bool DCSchedd::spoolJobFiles(int JobAdsArrayLen, ClassAd* JobAdsArray[], CondorError * errstack) { int reply; int i; ReliSock rsock; bool use_new_command = true; if ( version() ) { CondorVersionInfo vi( version() ); if ( vi.built_since_version(6,7,7) ) { use_new_command = true; } else { use_new_command = false; } } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { std::string errmsg; formatstr(errmsg, "Failed to connect to schedd (%s)", _addr); dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: %s\n", errmsg.c_str() ); if( errstack ) { errstack->push( "DCSchedd::spoolJobFiles",CEDAR_ERR_CONNECT_FAILED, errmsg.c_str() ); } return false; } if ( use_new_command ) { if( ! startCommand(SPOOL_JOB_FILES_WITH_PERMS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: " "Failed to send command (SPOOL_JOB_FILES_WITH_PERMS) " "to the schedd (%s)\n", _addr ); return false; } } else { if( ! startCommand(SPOOL_JOB_FILES, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: " "Failed to send command (SPOOL_JOB_FILES) " "to the schedd (%s)\n", _addr ); return false; } } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack ? errstack->getFullText().c_str() : "" ); return false; } rsock.encode(); // Send our version if using the new command if ( use_new_command ) { // Need to use a named variable, else the wrong version of // code() is called. char *my_version = strdup( CondorVersion() ); if ( !rsock.code(my_version) ) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Can't send version string to the schedd\n"); free( my_version ); return false; } free( my_version ); } // Send the number of jobs if ( !rsock.code(JobAdsArrayLen) ) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Can't send JobAdsArrayLen to the schedd\n"); return false; } if( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Can't send initial message (version + count) to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::spoolJobFiles", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now, put the job ids onto the wire PROC_ID jobid; for (i=0; i<JobAdsArrayLen; i++) { if (!JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,jobid.cluster)) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Job ad %d did not have a cluster id\n",i); return false; } if (!JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,jobid.proc)) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Job ad %d did not have a proc id\n",i); return false; } rsock.code(jobid); } if( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Failed while sending job ids to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::spoolJobFiles", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now send all the files via the file transfer object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, &rsock, PRIV_UNKNOWN, false, true) ) { if( errstack ) { int cluster = -1, proc = -1; if(JobAdsArray[i]) { JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster); JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc); } errstack->pushf( "DCSchedd::spoolJobFiles", FILETRANSFER_INIT_FAILED, "File transfer initialization failed for target job %d.%d", cluster, proc ); } return false; } if ( use_new_command ) { ftrans.setPeerVersion( version() ); } if ( !ftrans.UploadFiles(true,false) ) { if( errstack ) { FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo(); int cluster = -1, proc = -1; if(JobAdsArray[i]) { JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster); JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc); } errstack->pushf( "DCSchedd::spoolJobFiles", FILETRANSFER_UPLOAD_FAILED, "File transfer failed for target job %d.%d: %s", cluster, proc, ft_info.error_desc.Value() ); } return false; } } rsock.end_of_message(); rsock.decode(); reply = 0; rsock.code(reply); rsock.end_of_message(); if ( reply == 1 ) return true; else return false; }
bool DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/) { if(numdone) { *numdone = 0; } ExprTree *tree = NULL; const char *lhstr; int reply; int i; ReliSock rsock; int JobAdsArrayLen; bool use_new_command = true; if ( version() ) { CondorVersionInfo vi( version() ); if ( vi.built_since_version(6,7,7) ) { use_new_command = true; } else { use_new_command = false; } } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to connect to schedd (%s)\n", _addr ); return false; } if ( use_new_command ) { if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA_WITH_PERMS) " "to the schedd\n" ); return false; } } else { if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA) " "to the schedd\n" ); return false; } } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: authentication failure: %s\n", errstack ? errstack->getFullText().c_str() : "" ); return false; } rsock.encode(); // Send our version if using the new command if ( use_new_command ) { // Need to use a named variable, else the wrong version of // code() is called. char *my_version = strdup( CondorVersion() ); if ( !rsock.code(my_version) ) { dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send version string to the schedd\n"); free( my_version ); return false; } free( my_version ); } // Send the constraint char * nc_constraint = strdup( constraint ); // de-const if ( !rsock.code(nc_constraint) ) { free( nc_constraint ); dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send JobAdsArrayLen to the schedd\n"); return false; } free( nc_constraint ); if ( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Can't send initial message (version + constraint) to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now, read how many jobs matched the constraint. rsock.decode(); if ( !rsock.code(JobAdsArrayLen) ) { std::string errmsg; formatstr(errmsg, "Can't receive JobAdsArrayLen from the schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: " "%d jobs matched my constraint (%s)\n", JobAdsArrayLen, constraint); // Now read all the files via the file transfer object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; ClassAd job; // grab job ClassAd if ( !getClassAd(&rsock, job) ) { std::string errmsg; formatstr(errmsg, "Can't receive job ad %d from the schedd", i); dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes job.ResetExpr(); while( job.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); job.Insert(new_attr_name, pTree, false); } } // while next expr if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) { if( errstack ) { int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_INIT_FAILED, "File transfer initialization failed for target job %d.%d", cluster, proc ); } return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&job) ) { return false; } if ( use_new_command ) { ftrans.setPeerVersion( version() ); } if ( !ftrans.DownloadFiles() ) { if( errstack ) { FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo(); int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_DOWNLOAD_FAILED, "File transfer failed for target job %d.%d: %s", cluster, proc, ft_info.error_desc.Value() ); } return false; } } rsock.end_of_message(); rsock.encode(); reply = OK; rsock.code(reply); rsock.end_of_message(); if(numdone) { *numdone = JobAdsArrayLen; } return true; }
ClassAd* DCSchedd::actOnJobs( JobAction action, const char* constraint, StringList* ids, const char* reason, const char* reason_attr, const char* reason_code, const char* reason_code_attr, action_result_type_t result_type, bool notify_scheduler, CondorError * errstack ) { char* tmp = NULL; char buf[512]; int size, reply; ReliSock rsock; // // // // // // // // // Construct the ad we want to send // // // // // // // // ClassAd cmd_ad; sprintf( buf, "%s = %d", ATTR_JOB_ACTION, action ); cmd_ad.Insert( buf ); sprintf( buf, "%s = %d", ATTR_ACTION_RESULT_TYPE, (int)result_type ); cmd_ad.Insert( buf ); sprintf( buf, "%s = %s", ATTR_NOTIFY_JOB_SCHEDULER, notify_scheduler ? "True" : "False" ); cmd_ad.Insert( buf ); if( constraint ) { if( ids ) { // This is a programming error, not a run-time one EXCEPT( "DCSchedd::actOnJobs has both constraint and ids!" ); } size = strlen(constraint) + strlen(ATTR_ACTION_CONSTRAINT) + 4; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = %s", ATTR_ACTION_CONSTRAINT, constraint ); if( ! cmd_ad.Insert(tmp) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Can't insert constraint (%s) into ClassAd!\n", constraint ); free( tmp ); return NULL; } free( tmp ); tmp = NULL; } else if( ids ) { char* action_ids = ids->print_to_string(); if ( action_ids ) { size = strlen(action_ids) + strlen(ATTR_ACTION_IDS) + 7; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = \"%s\"", ATTR_ACTION_IDS, action_ids ); cmd_ad.Insert( tmp ); free( tmp ); tmp = NULL; free(action_ids); action_ids = NULL; } } else { EXCEPT( "DCSchedd::actOnJobs called without constraint or ids" ); } if( reason_attr && reason ) { size = strlen(reason_attr) + strlen(reason) + 7; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = \"%s\"", reason_attr, reason ); cmd_ad.Insert( tmp ); free( tmp ); tmp = NULL; } if( reason_code_attr && reason_code ) { cmd_ad.AssignExpr(reason_code_attr,reason_code); } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Failed to connect to schedd (%s)\n", _addr ); return NULL; } if( ! startCommand(ACT_ON_JOBS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Failed to send command (ACT_ON_JOBS) to the schedd\n" ); return NULL; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack->getFullText().c_str() ); return NULL; } // Now, put the command classad on the wire if( ! (putClassAd(&rsock, cmd_ad) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send classad\n" ); return NULL; } // Next, we need to read the reply from the schedd if things // are ok and it's going to go forward. If the schedd can't // read our reply to this ClassAd, it assumes we got killed // and it should abort its transaction rsock.decode(); ClassAd* result_ad = new ClassAd(); if( ! (getClassAd(&rsock, *result_ad) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: " "Can't read response ad from %s\n", _addr ); delete( result_ad ); return NULL; } // If the action totally failed, the schedd will already have // aborted the transaction and closed up shop, so there's no // reason trying to continue. However, we still want to // return the result ad we got back so that our caller can // figure out what went wrong. reply = FALSE; result_ad->LookupInteger( ATTR_ACTION_RESULT, reply ); if( reply != OK ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Action failed\n" ); return result_ad; } // Tell the schedd we're still here and ready to go rsock.encode(); int answer = OK; if( ! (rsock.code(answer) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send reply\n" ); delete( result_ad ); return NULL; } // finally, make sure the schedd didn't blow up trying to // commit these changes to the job queue... rsock.decode(); if( ! (rsock.code(reply) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: " "Can't read confirmation from %s\n", _addr ); delete( result_ad ); return NULL; } return result_ad; }
bool DCSchedd::delegateGSIcredential(const int cluster, const int proc, const char* path_to_proxy_file, time_t expiration_time, time_t *result_expiration_time, CondorError * errstack) { int reply; ReliSock rsock; // check the parameters if ( cluster < 1 || proc < 0 || !path_to_proxy_file || !errstack ) { dprintf(D_FULLDEBUG,"DCSchedd::delegateGSIcredential: bad parameters\n"); return false; } // connect to the schedd, send the DELEGATE_GSI_CRED_SCHEDD command rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: " "Failed to connect to schedd (%s)\n", _addr ); return false; } if( ! startCommand(DELEGATE_GSI_CRED_SCHEDD, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: " "Failed send command to the schedd: %s\n", errstack->getFullText().c_str()); return false; } // If we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential authentication failure: %s\n", errstack->getFullText().c_str() ); return false; } // Send the job id rsock.encode(); PROC_ID jobid; jobid.cluster = cluster; jobid.proc = proc; if ( !rsock.code(jobid) || !rsock.end_of_message() ) { dprintf(D_ALWAYS,"DCSchedd::delegateGSIcredential: " "Can't send jobid to the schedd\n"); return false; } // Delegate the gsi proxy filesize_t file_size = 0; // will receive the size of the file if ( rsock.put_x509_delegation(&file_size,path_to_proxy_file,expiration_time,result_expiration_time) < 0 ) { dprintf(D_ALWAYS, "DCSchedd::delegateGSIcredential " "failed to send proxy file %s\n", path_to_proxy_file); return false; } // Fetch the result rsock.decode(); reply = 0; rsock.code(reply); rsock.end_of_message(); if ( reply == 1 ) return true; else return false; }
static bool _requestVMRegister(char *addr) { char *buffer = NULL; Daemon hstartd(DT_STARTD, addr); //Using TCP ReliSock ssock; ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.encode(); if( !ssock.connect(addr) ) { dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n", addr); return FALSE; } if( !hstartd.startCommand(VM_REGISTER, &ssock) ) { dprintf( D_FULLDEBUG, "Failed to send VM_REGISTER command to host startd(%s)\n", addr); return FALSE; } // Send <IP address:port> of virtual machine buffer = strdup(daemonCore->InfoCommandSinfulString()); ASSERT(buffer); if ( !ssock.code(buffer) ) { dprintf( D_FULLDEBUG, "Failed to send VM_REGISTER command's arguments to " "host startd %s: %s\n", addr, buffer ); free(buffer); return FALSE; } if( !ssock.end_of_message() ) { dprintf( D_FULLDEBUG, "Failed to send EOM to host startd %s\n", addr ); free(buffer); return FALSE; } free(buffer); //Now, read permission information ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.decode(); int permission = 0; ssock.code(permission); if( !ssock.end_of_message() ) { dprintf( D_FULLDEBUG, "Failed to receive EOM from host startd(%s)\n", addr ); return FALSE; } if( permission > 0 ) { // Since now this virtual machine can be used for Condor if( vmregister ) { if( vmregister->vm_usable == 0 ) { vmregister->vm_usable = 1; if( resmgr ) { resmgr->eval_and_update_all(); } } } }else { // For now this virtual machine can't be used for Condor if( vmregister ) { if( vmregister->vm_usable == 1 ) { vmregister->vm_usable = 0; if( resmgr ) { resmgr->eval_and_update_all(); } } } } return TRUE; }
void VMRegister::requestHostClassAds(void) { // find host startd daemon if( !m_vm_host_daemon ) m_vm_host_daemon = vmapi_findDaemon( m_vm_host_name, DT_STARTD); if( !m_vm_host_daemon ) { dprintf( D_FULLDEBUG, "Can't find host(%s) Startd daemon\n", m_vm_host_name ); return; } ClassAd query_ad; query_ad.SetMyTypeName(QUERY_ADTYPE); query_ad.SetTargetTypeName(STARTD_ADTYPE); query_ad.Assign(ATTR_REQUIREMENTS, true); char *addr = m_vm_host_daemon->addr(); Daemon hstartd(DT_STARTD, addr); ReliSock ssock; ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.encode(); if( !ssock.connect(addr) ) { dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n to get host classAd", addr); return; } if(!hstartd.startCommand( QUERY_STARTD_ADS, &ssock )) { dprintf( D_FULLDEBUG, "Failed to send QUERY_STARTD_ADS command to host startd(%s)\n", addr); return; } if( !query_ad.put(ssock) ) { dprintf(D_FULLDEBUG, "Failed to send query Ad to host startd(%s)\n", addr); } if( !ssock.end_of_message() ) { dprintf(D_FULLDEBUG, "Failed to send query EOM to host startd(%s)\n", addr); } // Read host classAds ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.decode(); int more = 1, num_ads = 0; ClassAdList adList; ClassAd *ad; while (more) { if( !ssock.code(more) ) { ssock.end_of_message(); return; } if(more) { ad = new ClassAd; if( !ad->initFromStream(ssock) ) { ssock.end_of_message(); delete ad; return; } adList.Insert(ad); num_ads++; } } ssock.end_of_message(); dprintf(D_FULLDEBUG, "Got %d classAds from host\n", num_ads); // Although we can get more than one classAd from host machine, // we use only the first one classAd adList.Rewind(); ad = adList.Next(); #if !defined(WANT_OLD_CLASSADS) ad->AddTargetRefs( TargetJobAttrs ); #endif // Get each Attribute from the classAd // added "HOST_" in front of each Attribute name const char *name; ExprTree *expr; ad->ResetExpr(); while( ad->NextExpr(name, expr) ) { MyString attr; attr += "HOST_"; attr += name; // Insert or Update an attribute to host_classAd in a VMRegister object ExprTree * pTree = expr->Copy(); host_classad->Insert(attr.Value(), pTree, true); } }