void CCBServer::ForwardRequestToTarget( CCBServerRequest *request, CCBTarget *target ) { Sock *sock = target->getSock(); ClassAd msg; msg.Assign( ATTR_COMMAND, CCB_REQUEST ); msg.Assign( ATTR_MY_ADDRESS, request->getReturnAddr() ); msg.Assign( ATTR_CLAIM_ID, request->getConnectID() ); // for easier debugging msg.Assign( ATTR_NAME, request->getSock()->peer_description() ); MyString reqid_str; CCBIDToString( request->getRequestID(), reqid_str); msg.Assign( ATTR_REQUEST_ID, reqid_str ); sock->encode(); if( !msg.put( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to forward request id %lu from %s to target " "daemon %s with ccbid %lu\n", request->getRequestID(), request->getSock()->peer_description(), target->getSock()->peer_description(), target->getCCBID()); RequestFinished( request, false, "failed to forward request to target" ); return; } // Now wait for target to respond (HandleRequestResultsMsg). // We will get the response next time we poll the socket. // To get a faster response, we _could_ register the socket // now, if it has not already been registered. }
bool CCBListener::WriteMsgToCCB(ClassAd &msg) { if( !m_sock || m_waiting_for_connect ) { return false; } m_sock->encode(); if( !msg.put( *m_sock ) || !m_sock->end_of_message() ) { Disconnected(); return false; } return true; }
bool DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr) { ReliSock sock; if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) { error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } ClassAd input; input.Assign(ATTR_CLAIM_ID,job_claim_id); input.Assign(ATTR_SESSION_INFO,session_info); sock.encode(); if( !input.put(sock) || !sock.end_of_message() ) { error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } sock.decode(); ClassAd reply; if( !reply.initFromStream(sock) || !sock.end_of_message() ) { error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter"; return false; } bool success = false; reply.LookupBool(ATTR_RESULT,success); if( !success ) { reply.LookupString(ATTR_ERROR_STRING,error_msg); return false; } reply.LookupString(ATTR_CLAIM_ID,owner_claim_id); reply.LookupString(ATTR_VERSION,starter_version); // get the full starter address from the starter in case it contains // extra CCB info that we don't already know about reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); return true; }
void CCBServer::RequestReply( Sock *sock, bool success, char const *error_msg, CCBID request_cid, CCBID target_cid ) { if( success && sock->readReady() ) { // the client must have disconnected (which is expected if // the client has already received the reversed connection) return; } ClassAd msg; msg.Assign( ATTR_RESULT, success ); msg.Assign( ATTR_ERROR_STRING, error_msg ); sock->encode(); if( !msg.put( *sock ) || !sock->end_of_message() ) { // Would like to be completely quiet if success and the // client has disconnected, since this is normal; however, // the above write operations will generate noise when // they fail, so at least in FULLDEBUG, we explain what's // going on. Note that most of the time, we should not get // here for successful requests, because we either observe // the client disconnect earlier, or the above check on // the socket catches it. Why bother sending a reply on // success at all? Because if the client has not yet // seen the reverse connect and we just disconnect without // telling it the request was successful, then it will // think something has gone wrong. dprintf(success ? D_FULLDEBUG : D_ALWAYS, "CCB: failed to send result (%s) for request id %lu " "from %s requesting a reversed connection to target daemon " "with ccbid %lu: %s %s\n", success ? "request succeeded" : "request failed", request_cid, sock->peer_description(), target_cid, error_msg, success ? "(since the request was successful, it is expected " "that the client may disconnect before receiving " "results)" : "" ); } }
void CCBServer::SendHeartbeatResponse( CCBTarget *target ) { Sock *sock = target->getSock(); ClassAd msg; msg.Assign( ATTR_COMMAND, ALIVE ); sock->encode(); if( !msg.put( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to send heartbeat to target " "daemon %s with ccbid %lu\n", target->getSock()->peer_description(), target->getCCBID()); RemoveTarget( target ); return; } dprintf(D_FULLDEBUG,"CCB: sent heartbeat to target %s\n", sock->peer_description()); }
// This handler is called when a client wishes to write files from the // transferd's storage. int TransferD::write_files_handler(int cmd, Stream *sock) { ReliSock *rsock = (ReliSock*)sock; MyString capability; int protocol = FTP_UNKNOWN; TransferRequest *treq = NULL; MyString fquser; static int transfer_reaper_id = -1; ThreadArg *thread_arg; int tid; ClassAd reqad; ClassAd respad; cmd = cmd; // quiet the compiler. dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n"); ///////////////////////////////////////////////////////////////////////// // make sure we are authenticated ///////////////////////////////////////////////////////////////////////// if( ! rsock->triedAuthentication() ) { CondorError errstack; if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) { // we failed to authenticate, we should bail out now // since we don't know what user is trying to perform // this action. // TODO: it'd be nice to print out what failed, but we // need better error propagation for that... errstack.push( "TransferD::setup_transfer_request_handler()", 42, "Failure to register transferd - Authentication failed" ); dprintf( D_ALWAYS, "setup_transfer_request_handler() " "aborting: %s\n", errstack.getFullText() ); refuse( rsock ); return CLOSE_STREAM; } } fquser = rsock->getFullyQualifiedUser(); ///////////////////////////////////////////////////////////////////////// // Check to see if the capability the client tells us is something that // we have knowledge of. We ONLY check the capability and not the // identity of the person in question. This allows people of different // identities to write files here as long as they had the right // capability. While this might not sound secure, they STILL had to have // authenticated as someone this daemon trusts. // Similarly, check the protocol it wants to use as well as ensure that // the direction the transfer request was supposed to be is being honored. ///////////////////////////////////////////////////////////////////////// rsock->decode(); // soak the request ad from the client about what it wants to transfer reqad.initFromStream(*rsock); rsock->end_of_message(); reqad.LookupString(ATTR_TREQ_CAPABILITY, capability); rsock->encode(); // do I know of such a capability? if (m_treqs.lookup(capability, treq) != 0) { // didn't find it. Log it and tell them to leave and close up shop respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using capability '%s', but there was no such capability. " "Access denied.\n", fquser.Value(), capability.Value()); return CLOSE_STREAM; } reqad.LookupInteger(ATTR_TREQ_FTP, protocol); // am I willing to use this protocol? switch(protocol) { case FTP_CFTP: // FileTrans protocol, I'm happy. break; default: respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid file transfer protocol!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using protocol '%d', but I don't support that protocol. " "Access denied.\n", fquser.Value(), protocol); return CLOSE_STREAM; } // nsure that this transfer request was of the uploading variety if (treq->get_direction() != FTPD_UPLOAD) { respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Transfer Request was not an uploading request!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "to a transfer request that wasn't expecting to be written. " "Access denied.\n", fquser.Value()); } ///////////////////////////////////////////////////////////////////////// // Tell the client everything was ok. ///////////////////////////////////////////////////////////////////////// respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); respad.put(*rsock); rsock->end_of_message(); ///////////////////////////////////////////////////////////////////////// // Set up a thread (a process under unix) to read ALL of the job files // for all of the ads in the TransferRequest. ///////////////////////////////////////////////////////////////////////// // now create a thread, passing in the sock, which uses the file transfer // object to accept the files. if (transfer_reaper_id == -1) { // only set this up ONCE so each and every thread gets one. transfer_reaper_id = daemonCore->Register_Reaper( "write_files_reaper", (ReaperHandlercpp) &TransferD::write_files_reaper, "write_files_reaper", this ); } thread_arg = new ThreadArg(protocol, treq); // Start a new thread (process on Unix) to do the work tid = daemonCore->Create_Thread( (ThreadStartFunc)&TransferD::write_files_thread, (void *)thread_arg, rsock, transfer_reaper_id ); if (tid == FALSE) { // XXX How do I handle this failure? } // associate the tid with the request so I can deal with it propery in // the reaper m_client_to_transferd_threads.insert(tid, treq); // The stream is inherited to the thread, who does the transfer and // finishes the protocol, but in the parent, I'm closing it. return CLOSE_STREAM; }
int TransferD::write_files_reaper(int tid, int exit_status) { TransferRequest *treq = NULL; MyString str; ClassAd result; int exit_code; int signal; dprintf(D_ALWAYS, "TransferD::write_files_reaper(): " "A file transfer into the transferd has completed: " "tid %d, status: %d\n", tid, exit_status); ///////////////////////////////////////////////////////////////////////// // Consistancy check to make sure I asked to do the transfer ///////////////////////////////////////////////////////////////////////// if (m_client_to_transferd_threads.lookup((long)tid, treq) != 0) { EXCEPT("TransferD::write_files_reaper(): " "Programmer error: I have no record of it! "); } // remove it from the thread hash now that I'm dealing with it. m_client_to_transferd_threads.remove((long)tid); ///////////////////////////////////////////////////////////////////////// // Determine status ad. ///////////////////////////////////////////////////////////////////////// // The schedd will know who I'm talking about cause it has the // same capability for this transfer request. str = treq->get_capability(); result.Assign(ATTR_TREQ_CAPABILITY, str); // figure out what the exit_status means and encode it into the result ad if (WIFSIGNALED(exit_status)) { signal = WTERMSIG(exit_status); dprintf(D_ALWAYS, "Thread exited with signal: %d\n", signal); result.Assign(ATTR_TREQ_UPDATE_STATUS, "NOT OK"); str.sprintf("Died with signal %d", signal); result.Assign(ATTR_TREQ_UPDATE_REASON, str); result.Assign(ATTR_TREQ_SIGNALED, TRUE); } else { exit_code = WEXITSTATUS(exit_status); dprintf(D_ALWAYS, "Thread exited with exit code: %d\n", exit_code); switch(exit_code) { case EXIT_SUCCESS: result.Assign(ATTR_TREQ_UPDATE_STATUS, "OK"); result.Assign(ATTR_TREQ_UPDATE_REASON, "Successful transfer"); result.Assign(ATTR_TREQ_SIGNALED, FALSE); result.Assign(ATTR_TREQ_EXIT_CODE, exit_code); break; default: result.Assign(ATTR_TREQ_UPDATE_STATUS, "NOT OK"); str.sprintf("Exited with bad exit code %d", exit_code); result.Assign(ATTR_TREQ_UPDATE_REASON, str); result.Assign(ATTR_TREQ_SIGNALED, FALSE); result.Assign(ATTR_TREQ_EXIT_CODE, exit_code); break; } } ///////////////////////////////////////////////////////////////////////// // Call back schedd with status ad. If failed, don't repeat // it, the schedd will send another transfer request if it wants it // done again. ///////////////////////////////////////////////////////////////////////// m_update_sock->encode(); result.put(*m_update_sock); m_update_sock->end_of_message(); // now remove the treq forever from our knowledge m_treqs.remove(treq->get_capability()); // bye bye. delete treq; // Now, if the hash is empty, mark it down as the start of our inactivity // timer if (m_treqs.getNumElements() == 0) { dprintf(D_ALWAYS, "Last transfer request handled. Becoming inactive.\n"); m_inactivity_timer = time(NULL); } return TRUE; }
// The function occurs in a seperate thread or process int TransferD::write_files_thread(void *targ, Stream *sock) { ThreadArg *thread_arg = (ThreadArg*)targ; ReliSock *rsock = (ReliSock*)sock; TransferRequest *treq = NULL; // int protocol; SimpleList<ClassAd*> *jad_list = NULL; ClassAd *jad = NULL; int cluster, proc; int old_timeout; int result; ClassAd respad; // XXX This is a damn dirty hack whose solution resides in implementing // a checksum for the files. // Now we sleep here for one second. Why? So we are certain // to transfer back output files even if the job ran for less // than one second. This is because: // stat() can't tell the difference between: // 1) A job starts up, touches a file, and exits all in one second // 2) A job starts up, doesn't touch the file, and exits all in one // second // So if we force the start time of the job to be one second later than // the time we know the files were written, stat() should be able // to perceive what happened, if anything. sleep(1); // even though I'm in a new process, I got here either through forking // or through a thread, so this memory is a copy. // protocol = thread_arg->protocol; treq = thread_arg->treq; delete thread_arg; // XXX deal with protocol value. //////////////////////////////////////////////////////////////////////// // Sort the classads (XXX maybe put at a higher level in the protocol) //////////////////////////////////////////////////////////////////////// // XXX TODO //////////////////////////////////////////////////////////////////////// // Do the transfer. //////////////////////////////////////////////////////////////////////// // file transfers can take a long time.... old_timeout = rsock->timeout(60 * 60 * 8); jad_list = treq->todo_tasks(); while(jad_list->Next(jad)) { FileTransfer ftrans; jad->LookupInteger(ATTR_CLUSTER_ID, cluster); jad->LookupInteger(ATTR_PROC_ID, proc); dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "Transferring fileset for job %d.%d\n", cluster, proc); result = ftrans.SimpleInit(jad, true, true, rsock); if ( !result ) { dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "failed to init file transfer for job %d.%d \n", cluster, proc ); respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "FileTransfer Object failed to SimpleInit."); respad.put(*rsock); rsock->end_of_message(); rsock->timeout(old_timeout); return EXIT_FAILURE; } ftrans.setPeerVersion(treq->get_peer_version().Value()); // We're "downloading" from the client to here. result = ftrans.DownloadFiles(); if ( !result ) { dprintf( D_ALWAYS, "TransferD::write_files_thread(): " "failed to transfer files for job %d.%d \n", cluster, proc ); respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "FileTransfer Object failed to download."); respad.put(*rsock); rsock->end_of_message(); rsock->timeout(old_timeout); return EXIT_FAILURE; } } rsock->end_of_message(); ////////////////////////////////////////////////////////////////////////// // Now that the file transfer is done, tell the client everything is ok. ////////////////////////////////////////////////////////////////////////// dprintf(D_ALWAYS, "Informing client of finished transfer.\n"); rsock->encode(); respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); // This response ad to the client will contain: // // ATTR_TREQ_INVALID_REQUEST (set to false) // respad.put(*rsock); rsock->end_of_message(); delete rsock; return EXIT_SUCCESS; }
int main( int argc, char *argv[] ) { const char *filename=0; char *pool=0; int command=-1; int i; bool use_tcp = false; bool with_ack = false; bool allow_multiple = false; param_functions *p_funcs = NULL; myDistro->Init( argc, argv ); config(); p_funcs = get_param_functions(); for( i=1; i<argc; i++ ) { if(!strcmp(argv[i],"-help")) { usage(argv[0]); exit(0); } else if(!strcmp(argv[i],"-pool")) { i++; if(!argv[i]) { fprintf(stderr,"-pool requires an argument.\n\n"); usage(argv[0]); exit(1); } pool = argv[i]; } else if(!strncmp(argv[i],"-tcp",strlen(argv[i]))) { use_tcp = true; } else if(!strncmp(argv[i],"-multiple",strlen(argv[i]))) { // We don't set allow_multiple=true by default, because // existing users (e.g. glideinWMS) have stray blank lines // in the input file. allow_multiple = true; } else if(!strcmp(argv[i],"-version")) { version(); exit(0); } else if(!strcmp(argv[i],"-debug")) { // dprintf to console Termlog = 1; p_funcs = get_param_functions(); dprintf_config ("TOOL", p_funcs); } else if(argv[i][0]!='-' || !strcmp(argv[i],"-")) { if(command==-1) { command = getCollectorCommandNum(argv[i]); if(command==-1) { fprintf(stderr,"Unknown command name %s\n\n",argv[i]); usage(argv[0]); exit(1); } } else if(!filename) { filename = argv[i]; } else { fprintf(stderr,"Extra argument: %s\n\n",argv[i]); usage(argv[0]); exit(1); } } else { fprintf(stderr,"Unknown argument: %s\n\n",argv[i]); usage(argv[0]); exit(1); } } FILE *file; ClassAdList ads; Daemon *collector; Sock *sock; switch( command ) { case UPDATE_STARTD_AD_WITH_ACK: with_ack = true; break; } if( with_ack ) { use_tcp = true; } if(!filename || !strcmp(filename,"-")) { file = stdin; filename = "(stdin)"; } else { file = safe_fopen_wrapper_follow(filename,"r"); } if(!file) { fprintf(stderr,"couldn't open %s: %s\n",filename,strerror(errno)); return 1; } while(!feof(file)) { int eof=0,error=0,empty=0; char const *delim = "\n"; if( !allow_multiple ) { delim = "***"; } ClassAd *ad = new ClassAd(file,const_cast<char *>(delim),eof,error,empty); if(error) { fprintf(stderr,"couldn't parse ClassAd in %s\n",filename); delete ad; return 1; } if( empty ) { delete ad; break; } if( !allow_multiple && ads.Length() > 0 ) { fprintf(stderr,"ERROR: failed to parse '%s' as a ClassAd attribute\n",delim); delete ad; return 1; } ads.Insert(ad); } if(ads.Length() == 0) { fprintf(stderr,"%s is empty\n",filename); return 1; } CollectorList * collectors; if ( pool ) { collector = new Daemon( DT_COLLECTOR, pool, 0 ); collectors = new CollectorList(); collectors->append (collector); } else { collectors = CollectorList::create(); } bool had_error = false; collectors->rewind(); while (collectors->next(collector)) { dprintf(D_FULLDEBUG,"locating collector %s...\n", collector->name()); if(!collector->locate()) { fprintf(stderr,"couldn't locate collector: %s\n",collector->error()); had_error = true; continue; } dprintf(D_FULLDEBUG,"collector is %s located at %s\n", collector->hostname(),collector->addr()); sock = NULL; ClassAd *ad; int success_count = 0; int failure_count = 0; ads.Rewind(); while( (ad=ads.Next()) ) { // If there's no "MyAddress", generate one.. if( !ad->Lookup( ATTR_MY_ADDRESS ) ) { MyString tmp; tmp.formatstr( "<%s:0>", my_ip_string() ); ad->Assign( ATTR_MY_ADDRESS, tmp.Value() ); } if ( use_tcp ) { if( !sock ) { sock = collector->startCommand(command,Stream::reli_sock,20); } else { // Use existing connection. sock->encode(); sock->put(command); } } else { // We must open a new UDP socket each time. delete sock; sock = collector->startCommand(command,Stream::safe_sock,20); } int result = 0; if ( sock ) { result += ad->put( *sock ); result += sock->end_of_message(); } if ( result != 2 ) { fprintf(stderr,"failed to send classad to %s\n",collector->addr()); had_error = true; failure_count++; delete sock; sock = NULL; continue; } if( with_ack ) { sock->decode(); int ok = 0; if( !sock->get(ok) || !sock->end_of_message() ) { fprintf(stderr,"failed to get ack from %s\n",collector->addr()); had_error = true; failure_count++; delete sock; sock = NULL; continue; } // ack protocol does not allow for multiple updates, // so close the socket now delete sock; sock = NULL; } success_count++; } if( sock ) { CondorVersionInfo const *ver = sock->get_peer_version(); if( !ver || ver->built_since_version(7,7,3) ) { // graceful hangup so the collector knows we are done sock->encode(); command = DC_NOP; sock->put(command); sock->end_of_message(); } delete sock; sock = NULL; } printf("Sent %d of %d ad%s to %s.\n", success_count, success_count + failure_count, success_count+failure_count == 1 ? "" : "s", collector->name()); } delete collectors; return (had_error)?1:0; }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !input.put(sock) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !result.initFromStream(sock) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.sprintf("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.sprintf("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.sprintf("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.sprintf("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.sprintf("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.sprintf("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.sprintf("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
// when a transferd registers itself, it identifies who it is. The connection // is then held open and the schedd periodically might send more transfer // requests to the transferd. Also, if the transferd dies, the schedd is // informed quickly and reliably due to the closed connection. bool DCSchedd::register_transferd(MyString sinful, MyString id, int timeout, ReliSock **regsock_ptr, CondorError *errstack) { ReliSock *rsock; int invalid_request = 0; ClassAd regad; ClassAd respad; std::string errstr; std::string reason; if (regsock_ptr != NULL) { // Our caller wants a pointer to the socket we used to succesfully // register the claim. The NULL pointer will represent failure and // this will only be set to something real if everything was ok. *regsock_ptr = NULL; } // This call with automatically connect to _addr, which was set in the // constructor of this object to be the schedd in question. rsock = (ReliSock*)startCommand(TRANSFERD_REGISTER, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCSchedd::register_transferd: " "Failed to send command (TRANSFERD_REGISTER) " "to the schedd\n" ); errstack->push("DC_SCHEDD", 1, "Failed to start a TRANSFERD_REGISTER command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::register_transferd authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_SCHEDD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); // set up my registration request. regad.Assign(ATTR_TREQ_TD_SINFUL, sinful); regad.Assign(ATTR_TREQ_TD_ID, id); // This is the initial registration identification ad to the schedd // It contains: // ATTR_TREQ_TD_SINFUL // ATTR_TREQ_TD_ID regad.put(*rsock); rsock->end_of_message(); // Get the response from the schedd. rsock->decode(); // This is the response ad from the schedd: // It contains: // ATTR_TREQ_INVALID_REQUEST // // OR // // ATTR_TREQ_INVALID_REQUEST // ATTR_TREQ_INVALID_REASON respad.initFromStream(*rsock); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid_request); if (invalid_request == FALSE) { // not an invalid request if (regsock_ptr) *regsock_ptr = rsock; return true; } respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->pushf("DC_SCHEDD", 1, "Schedd refused registration: %s", reason.c_str()); return false; }
bool DCSchedd::getJobConnectInfo( PROC_ID jobid, int subproc, char const *session_info, int timeout, CondorError *errstack, MyString &starter_addr, MyString &starter_claim_id, MyString &starter_version, MyString &slot_name, MyString &error_msg, bool &retry_is_sensible) { ClassAd input; ClassAd output; input.Assign(ATTR_CLUSTER_ID,jobid.cluster); input.Assign(ATTR_PROC_ID,jobid.proc); if( subproc != -1 ) { input.Assign(ATTR_SUB_PROC_ID,subproc); } input.Assign(ATTR_SESSION_INFO,session_info); ReliSock sock; if( !connectSock(&sock,timeout,errstack) ) { error_msg = "Failed to connect to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !forceAuthentication(&sock, errstack) ) { error_msg = "Failed to authenticate"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.encode(); if( !input.put(sock) || !sock.end_of_message() ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.decode(); if( !output.initFromStream(sock) || !sock.end_of_message() ) { error_msg = "Failed to get response from schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( IsFulldebug(D_FULLDEBUG) ) { std::string adstr; output.SetPrivateAttributesInvisible(true); output.sPrint(adstr); output.SetPrivateAttributesInvisible(false); dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n", adstr.c_str()); } bool result=false; output.LookupBool(ATTR_RESULT,result); if( !result ) { output.LookupString(ATTR_ERROR_STRING,error_msg); retry_is_sensible = false; output.LookupBool(ATTR_RETRY,retry_is_sensible); } else { output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); output.LookupString(ATTR_CLAIM_ID,starter_claim_id); output.LookupString(ATTR_VERSION,starter_version); output.LookupString(ATTR_REMOTE_HOST,slot_name); } return result; }
bool DCTransferQueue::RequestTransferQueueSlot(bool downloading,char const *fname,char const *jobid,int timeout,MyString &error_desc) { ASSERT(fname); ASSERT(jobid); if( GoAheadAlways( downloading ) ) { m_xfer_downloading = downloading; m_xfer_fname = fname; m_xfer_jobid = jobid; return true; } CheckTransferQueueSlot(); if( m_xfer_queue_sock ) { // A request has already been made. // Currently, this is a no-op, because any upload/download slot // is as good as any other. In the future, there may be // different queues for different paths. ASSERT( m_xfer_downloading == downloading ); m_xfer_fname = fname; m_xfer_jobid = jobid; return true; } time_t started = time(NULL); CondorError errstack; // Our caller has to finish this operation in the specified // amount of time or risk not responding to the file transfer // peer in time, so ignore the timeout multiplier and set the // timeout exactly as specified. m_xfer_queue_sock = reliSock( timeout, 0, &errstack, false, true ); if( !m_xfer_queue_sock ) { formatstr(m_xfer_rejected_reason, "Failed to connect to transfer queue manager for job %s (%s): %s.", jobid, fname, errstack.getFullText().c_str() ); error_desc = m_xfer_rejected_reason; dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str()); return false; } if( timeout ) { timeout -= time(NULL)-started; if( timeout <= 0 ) { timeout = 1; } } bool connected = startCommand( TRANSFER_QUEUE_REQUEST, m_xfer_queue_sock, timeout, &errstack ); if( !connected ) { delete m_xfer_queue_sock; m_xfer_queue_sock = NULL; formatstr(m_xfer_rejected_reason, "Failed to initiate transfer queue request for job %s (%s): %s.", jobid, fname, errstack.getFullText().c_str() ); error_desc = m_xfer_rejected_reason; dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str()); return false; } m_xfer_downloading = downloading; m_xfer_fname = fname; m_xfer_jobid = jobid; ClassAd msg; msg.Assign(ATTR_DOWNLOADING,downloading); msg.Assign(ATTR_FILE_NAME,fname); msg.Assign(ATTR_JOB_ID,jobid); m_xfer_queue_sock->encode(); if( !msg.put(*m_xfer_queue_sock) || !m_xfer_queue_sock->end_of_message() ) { formatstr(m_xfer_rejected_reason, "Failed to write transfer request to %s for job %s " "(initial file %s).", m_xfer_queue_sock->peer_description(), m_xfer_jobid.c_str(), m_xfer_fname.c_str()); error_desc = m_xfer_rejected_reason; dprintf(D_ALWAYS,"%s\n",m_xfer_rejected_reason.c_str()); return false; } m_xfer_queue_sock->decode(); // Request has been initiated. Now sender should call // PollForTransferQueueSlot() to get response. m_xfer_queue_pending = true; return true; }