ClassAd * GetJobAd( int cluster_id, int proc_id, bool /*expStartdAttrs*/, bool /*persist_expansions*/ ) { int rval = -1; CurrentSysCall = CONDOR_GetJobAd; qmgmt_sock->encode(); null_on_error( qmgmt_sock->code(CurrentSysCall) ); null_on_error( qmgmt_sock->code(cluster_id) ); null_on_error( qmgmt_sock->code(proc_id) ); null_on_error( qmgmt_sock->end_of_message() ); qmgmt_sock->decode(); null_on_error( qmgmt_sock->code(rval) ); if( rval < 0 ) { null_on_error( qmgmt_sock->code(terrno) ); null_on_error( qmgmt_sock->end_of_message() ); errno = terrno; return NULL; } ClassAd *ad = new ClassAd; if ( !(getClassAd(qmgmt_sock, *ad)) ) { delete ad; errno = ETIMEDOUT; return NULL; } null_on_error( qmgmt_sock->end_of_message() ); return ad; }
ClassAd * GetNextDirtyJobByConstraint( char const *constraint, int initScan ) { int rval = -1; CurrentSysCall = CONDOR_GetNextDirtyJobByConstraint; qmgmt_sock->encode(); null_on_error( qmgmt_sock->code(CurrentSysCall) ); null_on_error( qmgmt_sock->code(initScan) ); null_on_error( qmgmt_sock->put(constraint) ); null_on_error( qmgmt_sock->end_of_message() ); qmgmt_sock->decode(); null_on_error( qmgmt_sock->code(rval) ); if( rval < 0 ) { null_on_error( qmgmt_sock->code(terrno) ); null_on_error( qmgmt_sock->end_of_message() ); errno = terrno; return NULL; } ClassAd *ad = new ClassAd; if ( ! (getClassAd(qmgmt_sock, *ad)) ) { delete ad; errno = ETIMEDOUT; return NULL; } null_on_error( qmgmt_sock->end_of_message() ); return ad; }
/* This function reads of a ClaimId string, optional classad, and eom from the given stream. It looks up that ClaimId in the resmgr to find the corresponding Resource*. If such a Resource is found, we return the pointer to it, otherwise, we return NULL. */ Resource* stream_to_rip( Stream* stream, ClassAd * pad ) { char* id = NULL; Resource* rip; stream->decode(); if( ! stream->get_secret(id) ) { dprintf( D_ALWAYS, "Can't read ClaimId\n" ); free( id ); return NULL; } // if we are not ad end of message, then there may be a classad payload containing argument options. if (pad && ! stream->peek_end_of_message()) { if ( ! getClassAd(stream, *pad)) { dprintf( D_ALWAYS, "Can't read options ClassAd after ClaimId\n"); } } if( ! stream->end_of_message() ) { dprintf( D_ALWAYS, "Can't read end_of_message\n" ); free( id ); return NULL; } rip = resmgr->get_by_cur_id( id ); if( !rip ) { ClaimIdParser idp( id ); dprintf( D_ALWAYS, "Error: can't find resource with ClaimId (%s) -- perhaps this claim was already removed?\n", idp.publicClaimId() ); free( id ); return NULL; } free( id ); return rip; }
int GetDirtyAttributes(int cluster_id, int proc_id, ClassAd *updated_attrs) { int rval = -1; MyString errs; CurrentSysCall = CONDOR_GetDirtyAttributes; qmgmt_sock->encode(); neg_on_error( qmgmt_sock->code(CurrentSysCall) ); neg_on_error( qmgmt_sock->code(cluster_id) ); neg_on_error( qmgmt_sock->code(proc_id) ); neg_on_error( qmgmt_sock->end_of_message() ); qmgmt_sock->decode(); neg_on_error( qmgmt_sock->code(rval) ); if( rval < 0 ) { neg_on_error( qmgmt_sock->code(terrno) ); neg_on_error( qmgmt_sock->end_of_message() ); errno = terrno; return rval; } if ( !(getClassAd(qmgmt_sock, *updated_attrs)) ) { errno = ETIMEDOUT; return 0; } neg_on_error( qmgmt_sock->end_of_message() != 0 ); return rval; }
bool DCStartd::drainJobs(int how_fast,bool resume_on_completion,char const *check_expr,char const *start_expr,std::string &request_id) { std::string error_msg; ClassAd request_ad; Sock *sock = startCommand( DRAIN_JOBS, Sock::reli_sock, 20 ); if( !sock ) { formatstr(error_msg,"Failed to start DRAIN_JOBS command to %s",name()); newError(CA_FAILURE,error_msg.c_str()); return false; } request_ad.Assign(ATTR_HOW_FAST,how_fast); request_ad.Assign(ATTR_RESUME_ON_COMPLETION,resume_on_completion); if( check_expr ) { request_ad.AssignExpr(ATTR_CHECK_EXPR,check_expr); } if( start_expr ) { request_ad.AssignExpr(ATTR_START_EXPR,start_expr); } if( !putClassAd(sock, request_ad) || !sock->end_of_message() ) { formatstr(error_msg,"Failed to compose DRAIN_JOBS request to %s",name()); newError(CA_FAILURE,error_msg.c_str()); delete sock; return false; } sock->decode(); ClassAd response_ad; if( !getClassAd(sock, response_ad) || !sock->end_of_message() ) { formatstr(error_msg,"Failed to get response to DRAIN_JOBS request to %s",name()); newError(CA_FAILURE,error_msg.c_str()); delete sock; return false; } response_ad.LookupString(ATTR_REQUEST_ID,request_id); bool result = false; int error_code = 0; response_ad.LookupBool(ATTR_RESULT,result); if( !result ) { std::string remote_error_msg; response_ad.LookupString(ATTR_ERROR_STRING,remote_error_msg); response_ad.LookupInteger(ATTR_ERROR_CODE,error_code); formatstr(error_msg, "Received failure from %s in response to DRAIN_JOBS request: error code %d: %s", name(),error_code,remote_error_msg.c_str()); newError(CA_FAILURE,error_msg.c_str()); delete sock; return false; } delete sock; return true; }
bool ClassAdMsg::readMsg( DCMessenger * /*messenger*/, Sock *sock ) { if( !getClassAd( sock, m_msg ) ) { sockFailed( sock ); return false; } return true; }
int TransferQueueManager::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == TRANSFER_QUEUE_REQUEST ); ClassAd msg; sock->decode(); if( !getClassAd( sock, msg ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "TransferQueueManager: failed to receive transfer request " "from %s.\n", sock->peer_description() ); return FALSE; } bool downloading = false; MyString fname; MyString jobid; MyString queue_user; filesize_t sandbox_size; if( !msg.LookupBool(ATTR_DOWNLOADING,downloading) || !msg.LookupString(ATTR_FILE_NAME,fname) || !msg.LookupString(ATTR_JOB_ID,jobid) || !msg.LookupString(ATTR_USER,queue_user) || !msg.LookupInteger(ATTR_SANDBOX_SIZE,sandbox_size)) { MyString msg_str; sPrintAd(msg_str, msg); dprintf(D_ALWAYS,"TransferQueueManager: invalid request from %s: %s\n", sock->peer_description(), msg_str.Value()); return FALSE; } // Currently, we just create the client with the default max queue // age. If it becomes necessary to customize the maximum age // on a case-by-case basis, it should be easy to adjust. TransferQueueRequest *client = new TransferQueueRequest( sock, sandbox_size, fname.Value(), jobid.Value(), queue_user.Value(), downloading, m_default_max_queue_age); if( !AddRequest( client ) ) { delete client; return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
bool DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr) { ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n", getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) { error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } ClassAd input; input.Assign(ATTR_CLAIM_ID,job_claim_id); input.Assign(ATTR_SESSION_INFO,session_info); sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } sock.decode(); ClassAd reply; if( !getClassAd(&sock, reply) || !sock.end_of_message() ) { error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter"; return false; } bool success = false; reply.LookupBool(ATTR_RESULT,success); if( !success ) { reply.LookupString(ATTR_ERROR_STRING,error_msg); return false; } reply.LookupString(ATTR_CLAIM_ID,owner_claim_id); reply.LookupString(ATTR_VERSION,starter_version); // get the full starter address from the starter in case it contains // extra CCB info that we don't already know about reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); return true; }
bool DCStartd::cancelDrainJobs(char const *request_id) { std::string error_msg; ClassAd request_ad; Sock *sock = startCommand( CANCEL_DRAIN_JOBS, Sock::reli_sock, 20 ); if( !sock ) { formatstr(error_msg,"Failed to start CANCEL_DRAIN_JOBS command to %s",name()); newError(CA_FAILURE,error_msg.c_str()); return false; } if( request_id ) { request_ad.Assign(ATTR_REQUEST_ID,request_id); } if( !putClassAd(sock, request_ad) || !sock->end_of_message() ) { formatstr(error_msg,"Failed to compose CANCEL_DRAIN_JOBS request to %s",name()); newError(CA_FAILURE,error_msg.c_str()); return false; } sock->decode(); ClassAd response_ad; if( !getClassAd(sock, response_ad) || !sock->end_of_message() ) { formatstr(error_msg,"Failed to get response to CANCEL_DRAIN_JOBS request to %s",name()); newError(CA_FAILURE,error_msg.c_str()); delete sock; return false; } bool result = false; int error_code = 0; response_ad.LookupBool(ATTR_RESULT,result); if( !result ) { std::string remote_error_msg; response_ad.LookupString(ATTR_ERROR_STRING,remote_error_msg); response_ad.LookupInteger(ATTR_ERROR_CODE,error_code); formatstr(error_msg, "Received failure from %s in response to CANCEL_DRAIN_JOBS request: error code %d: %s", name(),error_code,remote_error_msg.c_str()); newError(CA_FAILURE,error_msg.c_str()); delete sock; return false; } delete sock; return true; }
int Starter::receiveJobClassAdUpdate( Stream *stream ) { ClassAd update_ad; int final_update = 0; // It is expected that we will get here when the stream is closed. // Unfortunately, log noise will be generated when we try to read // from it. stream->decode(); stream->timeout(10); if( !stream->get( final_update) || !getClassAd( stream, update_ad ) || !stream->end_of_message() ) { final_update = 1; } else { dprintf(D_FULLDEBUG, "Received job ClassAd update from starter.\n"); dPrintAd( D_JOB, update_ad ); // In addition to new info about the job, the starter also // inserts contact info for itself (important for CCB and // shadow-starter reconnect, because startd needs to relay // starter's full contact info to the shadow when queried). // It's a bit of a hack to do it through this channel, but // better than nothing. update_ad.LookupString(ATTR_STARTER_IP_ADDR,m_starter_addr); if( s_claim ) { s_claim->receiveJobClassAdUpdate( update_ad ); } } if( final_update ) { dprintf(D_FULLDEBUG, "Closing job ClassAd update socket from starter.\n"); daemonCore->Cancel_Socket(s_job_update_sock); delete s_job_update_sock; s_job_update_sock = NULL; } return KEEP_STREAM; }
int GetAllJobsByConstraint_Next( ClassAd &ad ) { int rval = -1; ASSERT( CurrentSysCall == CONDOR_GetAllJobsByConstraint ); neg_on_error( qmgmt_sock->code(rval) ); if( rval < 0 ) { neg_on_error( qmgmt_sock->code(terrno) ); neg_on_error( qmgmt_sock->end_of_message() ); errno = terrno; return -1; } if ( ! (getClassAd(qmgmt_sock, ad)) ) { errno = ETIMEDOUT; return -1; } return 0; }
bool CCBListener::ReadMsgFromCCB() { if( !m_sock ) { return false; } m_sock->timeout(CCB_TIMEOUT); ClassAd msg; if( !getClassAd( m_sock, msg ) || !m_sock->end_of_message() ) { dprintf(D_ALWAYS, "CCBListener: failed to receive message from CCB server %s\n", m_ccb_address.Value()); Disconnected(); return false; } m_last_contact_from_peer = time(NULL); RescheduleHeartbeat(); int cmd = -1; msg.LookupInteger( ATTR_COMMAND, cmd ); switch( cmd ) { case CCB_REGISTER: return HandleCCBRegistrationReply( msg ); case CCB_REQUEST: return HandleCCBRequest( msg ); case ALIVE: dprintf(D_FULLDEBUG,"CCBListener: received heartbeat from server.\n"); return true; } MyString msg_str; sPrintAd(msg_str, msg); dprintf( D_ALWAYS, "CCBListener: Unexpected message received from CCB " "server: %s\n", msg_str.Value() ); return false; }
ClassAd * GetAllJobsByConstraint_imp( char const *constraint, char const *projection, ClassAdList &list) { int rval = -1; CurrentSysCall = CONDOR_GetAllJobsByConstraint; qmgmt_sock->encode(); null_on_error( qmgmt_sock->code(CurrentSysCall) ); null_on_error( qmgmt_sock->put(constraint) ); null_on_error( qmgmt_sock->put(projection) ); null_on_error( qmgmt_sock->end_of_message() ); qmgmt_sock->decode(); while (true) { null_on_error( qmgmt_sock->code(rval) ); if( rval < 0 ) { null_on_error( qmgmt_sock->code(terrno) ); null_on_error( qmgmt_sock->end_of_message() ); errno = terrno; return NULL; } ClassAd *ad = new ClassAd; if ( ! (getClassAd(qmgmt_sock, *ad)) ) { delete ad; errno = ETIMEDOUT; return NULL; } list.Insert(ad); }; null_on_error( qmgmt_sock->end_of_message() ); return 0; }
bool DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/) { if(numdone) { *numdone = 0; } ExprTree *tree = NULL; const char *lhstr; int reply; int i; ReliSock rsock; int JobAdsArrayLen; bool use_new_command = true; if ( version() ) { CondorVersionInfo vi( version() ); if ( vi.built_since_version(6,7,7) ) { use_new_command = true; } else { use_new_command = false; } } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to connect to schedd (%s)\n", _addr ); return false; } if ( use_new_command ) { if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA_WITH_PERMS) " "to the schedd\n" ); return false; } } else { if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA) " "to the schedd\n" ); return false; } } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: authentication failure: %s\n", errstack ? errstack->getFullText().c_str() : "" ); return false; } rsock.encode(); // Send our version if using the new command if ( use_new_command ) { // Need to use a named variable, else the wrong version of // code() is called. char *my_version = strdup( CondorVersion() ); if ( !rsock.code(my_version) ) { dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send version string to the schedd\n"); free( my_version ); return false; } free( my_version ); } // Send the constraint char * nc_constraint = strdup( constraint ); // de-const if ( !rsock.code(nc_constraint) ) { free( nc_constraint ); dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send JobAdsArrayLen to the schedd\n"); return false; } free( nc_constraint ); if ( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Can't send initial message (version + constraint) to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now, read how many jobs matched the constraint. rsock.decode(); if ( !rsock.code(JobAdsArrayLen) ) { std::string errmsg; formatstr(errmsg, "Can't receive JobAdsArrayLen from the schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: " "%d jobs matched my constraint (%s)\n", JobAdsArrayLen, constraint); // Now read all the files via the file transfer object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; ClassAd job; // grab job ClassAd if ( !getClassAd(&rsock, job) ) { std::string errmsg; formatstr(errmsg, "Can't receive job ad %d from the schedd", i); dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes job.ResetExpr(); while( job.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); job.Insert(new_attr_name, pTree, false); } } // while next expr if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) { if( errstack ) { int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_INIT_FAILED, "File transfer initialization failed for target job %d.%d", cluster, proc ); } return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&job) ) { return false; } if ( use_new_command ) { ftrans.setPeerVersion( version() ); } if ( !ftrans.DownloadFiles() ) { if( errstack ) { FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo(); int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_DOWNLOAD_FAILED, "File transfer failed for target job %d.%d: %s", cluster, proc, ft_info.error_desc.Value() ); } return false; } } rsock.end_of_message(); rsock.encode(); reply = OK; rsock.code(reply); rsock.end_of_message(); if(numdone) { *numdone = JobAdsArrayLen; } return true; }
int CCBServer::HandleRegistration(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REGISTER ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !getClassAd( sock, msg ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive registration " "from %s.\n", sock->peer_description() ); return FALSE; } SetSmallBuffers(sock); MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // target daemon name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } CCBTarget *target = new CCBTarget(sock); MyString reconnect_cookie_str,reconnect_ccbid_str; CCBID reconnect_cookie,reconnect_ccbid; bool reconnected = false; if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) && CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) && msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) && CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) ) { target->setCCBID( reconnect_ccbid ); reconnected = ReconnectTarget( target, reconnect_cookie ); } if( !reconnected ) { AddTarget( target ); } CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() ); ASSERT( reconnect_info ); sock->encode(); ClassAd reply_msg; MyString ccb_contact; CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str ); // We send our address as part of the CCB contact string, rather // than letting the target daemon fill it in. This is to give us // potential flexibility on the CCB server side to do things like // assign different targets to different CCB server sub-processes, // each with their own command port. CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact ); reply_msg.Assign(ATTR_CCBID,ccb_contact.Value()); reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER); reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value()); if( !putClassAd( sock, reply_msg ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to send registration response " "to %s.\n", sock->peer_description() ); RemoveTarget( target ); return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !getClassAd(&sock, result) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
bool DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q) { compat_classad::ClassAd ad; ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout); ad.InsertAttr("OutOffset", stdout_offset); ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr); ad.InsertAttr("ErrOffset", stderr_offset); ad.InsertAttr(ATTR_VERSION, CondorVersion()); size_t total_files = 0; total_files += transfer_stdout ? 1 : 0; total_files += transfer_stderr ? 1 : 0; if (filenames.size()) { total_files += filenames.size(); std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size()); std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size()); std::vector<ssize_t>::const_iterator it2 = offsets.begin(); for (std::vector<std::string>::const_iterator it = filenames.begin(); it != filenames.end() && it2 != offsets.end(); it++, it2++) { classad::Value value; value.SetStringValue(*it); filelist.push_back(classad::Literal::MakeLiteral(value)); value.SetIntegerValue(*it2); offsetlist.push_back(classad::Literal::MakeLiteral(value)); } classad::ExprTree *list(classad::ExprList::MakeExprList(filelist)); ad.Insert("TransferFiles", list); list = classad::ExprList::MakeExprList(offsetlist); ad.Insert("TransferOffsets", list); } ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes)); ReliSock sock; if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) { error_msg = "Failed to send START_PEEK to starter"; return false; } sock.encode(); if (!putClassAd(&sock, ad) || !sock.end_of_message()) { error_msg = "Failed to send request to starter"; return false; } compat_classad::ClassAd response; sock.decode(); if (!getClassAd(&sock, response) || !sock.end_of_message()) { error_msg = "Failed to read response for peeking at logs."; return false; } dPrintAd(D_FULLDEBUG, response); bool success = false; if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success) { response.EvaluateAttrBool(ATTR_RETRY, retry_sensible); error_msg = "Remote operation failed."; response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg); return false; } classad::Value valueX; classad_shared_ptr<classad::ExprList> list; if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list)) { error_msg = "Unable to evaluate starter response"; return false; } classad_shared_ptr<classad::ExprList> offlist; if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist)) { error_msg = "Unable to evaluate starter response (missing offsets)"; return false; } size_t remaining = max_bytes; size_t file_count = 0; classad::ExprList::const_iterator it2 = offlist->begin(); for (classad::ExprList::const_iterator it = list->begin(); it != list->end() && it2 != offlist->end(); it++, it2++) { classad::Value value; (*it2)->Evaluate(value); off_t off = -1; value.IsIntegerValue(off); (*it)->Evaluate(value); std::string filename; int64_t xfer_fd = -1; if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd)) { if (xfer_fd == 0) filename = "_condor_stdout"; if (xfer_fd == 1) filename = "_condor_stderr"; } int fd = next.getNextFD(filename); filesize_t size = -1; int retval; if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED)) { error_msg = "Internal error when transferring file " + filename; } else if (size >= 0) { remaining -= max_bytes; file_count++; off += size; } else { error_msg = "Failed to transfer file " + filename; } if (xfer_fd == 0) { stdout_offset = off; //dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset); } else if (xfer_fd == 1) { stderr_offset = off; } else { std::vector<ssize_t>::iterator it4 = offsets.begin(); for (std::vector<std::string>::const_iterator it3 = filenames.begin(); it3 != filenames.end() && it4 != offsets.end(); it3++, it4++) { if (*it3 == filename) *it4 = off; } } } size_t remote_file_count; if (!sock.get(remote_file_count) || !sock.end_of_message()) { error_msg = "Unable to get remote file count."; return false; } if (file_count != remote_file_count) { std::stringstream ss; ss << "Recieved " << file_count << " files, but remote side thought it sent " << remote_file_count << " files"; error_msg = ss.str(); return false; } if ((total_files != file_count) && !error_msg.size()) { error_msg = "At least one file transfer failed."; return false; } return true; }
bool ScheddNegotiate::readMsg( DCMessenger * /*messenger*/, Sock *sock ) { // Get the negotiator's request. // Note that end_of_message() is handled by our caller. if( !sock->code(m_operation) ) { dprintf( D_ALWAYS, "Can't receive request from negotiator\n" ); return false; } switch( m_operation ) { case REJECTED_WITH_REASON: if( !sock->code(m_reject_reason) ) { dprintf( D_ALWAYS, "Can't receive reject reason from negotiator\n" ); return false; } break; case REJECTED: break; case SEND_JOB_INFO: break; case SEND_RESOURCE_REQUEST_LIST: if( !sock->code(m_num_resource_reqs_to_send) ) { dprintf( D_ALWAYS, "Can't receive num_resource_reqs_to_send from negotiator\n" ); return false; } break; case PERMISSION: // No negotiator since 7.1.3 should ever send this // command, and older ones should not send it either, // since we advertise WantResAd=True. dprintf( D_ALWAYS, "Negotiator sent PERMISSION rather than expected PERMISSION_AND_AD! Aborting.\n"); return false; break; case PERMISSION_AND_AD: { char *claim_id = NULL; if( !sock->get_secret(claim_id) || !claim_id ) { dprintf( D_ALWAYS, "Can't receive ClaimId from negotiator\n" ); return false; } m_claim_id = claim_id; free( claim_id ); size_t space = m_claim_id.find(' '); if (space != std::string::npos) { m_extra_claims = m_claim_id.substr(space + 1, std::string::npos); m_claim_id = m_claim_id.substr(0, space); } m_match_ad.Clear(); // get startd ad from negotiator if( !getClassAd(sock, m_match_ad) ) { dprintf( D_ALWAYS, "Can't get my match ad from negotiator\n" ); return false; } #if defined(ADD_TARGET_SCOPING) m_match_ad.AddTargetRefs( TargetJobAttrs ); #endif break; } case END_NEGOTIATE: break; default: dprintf( D_ALWAYS, "Got unexpected request (%d)\n", m_operation ); return false; } return true; }
int CCBServer::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REQUEST ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !getClassAd( sock, msg ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive request " "from %s.\n", sock->peer_description() ); return FALSE; } MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // client name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } MyString target_ccbid_str; MyString return_addr; MyString connect_id; // id target daemon should present to requester CCBID target_ccbid; // NOTE: using ATTR_CLAIM_ID for connect id so that it is // automatically treated as a secret over the network. // It must be presented by the target daemon when connecting // to the requesting client, so the client can confirm that // the connection is in response to its request. if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) || !msg.LookupString(ATTR_MY_ADDRESS,return_addr) || !msg.LookupString(ATTR_CLAIM_ID,connect_id) ) { MyString ad_str; sPrintAd(ad_str, msg); dprintf(D_ALWAYS, "CCB: invalid request from %s: %s\n", sock->peer_description(), ad_str.Value() ); return FALSE; } if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) { dprintf(D_ALWAYS, "CCB: request from %s contains invalid CCBID %s\n", sock->peer_description(), target_ccbid_str.Value() ); return FALSE; } CCBTarget *target = GetTarget( target_ccbid ); if( !target ) { dprintf(D_ALWAYS, "CCB: rejecting request from %s for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).\n", sock->peer_description(), target_ccbid_str.Value()); MyString error_msg; error_msg.formatstr( "CCB server rejecting request for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).", target_ccbid_str.Value()); RequestReply( sock, false, error_msg.Value(), 0, target_ccbid ); return FALSE; } SetSmallBuffers(sock); CCBServerRequest *request = new CCBServerRequest( sock, target_ccbid, return_addr.Value(), connect_id.Value() ); AddRequest( request, target ); dprintf(D_FULLDEBUG, "CCB: received request id %lu from %s for target ccbid %s " "(registered as %s)\n", request->getRequestID(), request->getSock()->peer_description(), target_ccbid_str.Value(), target->getSock()->peer_description()); ForwardRequestToTarget( request, target ); return KEEP_STREAM; }
ClassAd *CollectorEngine:: collect (int command,ClassAd *clientAd,const condor_sockaddr& from,int &insert,Sock *sock) { ClassAd *retVal; ClassAd *pvtAd; int insPvt; AdNameHashKey hk; HashString hashString; static int repeatStartdAds = -1; // for debugging ClassAd *clientAdToRepeat = NULL; _condor_auto_accum_runtime<collector_runtime_probe> rt(CollectorEngine_rucc_runtime); double rt_last = rt.begin; if (repeatStartdAds == -1) { repeatStartdAds = param_integer("COLLECTOR_REPEAT_STARTD_ADS",0); } if( !ValidateClassAd(command,clientAd,sock) ) { return NULL; } CollectorEngine_rucc_validateAd_runtime.Add(rt.tick(rt_last)); // mux on command switch (command) { case UPDATE_STARTD_AD: case UPDATE_STARTD_AD_WITH_ACK: if ( repeatStartdAds > 0 ) { clientAdToRepeat = new ClassAd(*clientAd); } if (!makeStartdAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); CollectorEngine_rucc_makeHashKey_runtime.Add(rt.tick(rt_last)); retVal=updateClassAd (StartdAds, "StartdAd ", "Start", clientAd, hk, hashString, insert, from ); if (last_updateClassAd_was_insert) { CollectorEngine_rucc_insertAd_runtime.Add(rt.tick(rt_last)); } else { CollectorEngine_rucc_updateAd_runtime.Add(rt.tick(rt_last)); } // if we want to store private ads if (!sock) { dprintf (D_ALWAYS, "Want private ads, but no socket given!\n"); break; } else { if (!(pvtAd = new ClassAd)) { EXCEPT ("Memory error!"); } if( !getClassAd(sock, *pvtAd) ) { dprintf(D_FULLDEBUG,"\t(Could not get startd's private ad)\n"); delete pvtAd; break; } // Fix up some stuff in the private ad that we depend on. // We started doing this in 7.2.0, so once we no longer // care about compatibility with stuff from before then, // the startd could stop bothering to send these attributes. // Queries of private ads depend on the following: SetMyTypeName( *pvtAd, STARTD_ADTYPE ); // Negotiator matches up private ad with public ad by // using the following. if( retVal ) { CopyAttribute( ATTR_MY_ADDRESS, *pvtAd, *retVal ); CopyAttribute( ATTR_NAME, *pvtAd, *retVal ); } CollectorEngine_rucc_getPvtAd_runtime.Add(rt.tick(rt_last)); // insert the private ad into its hashtable --- use the same // hash key as the public ad (void) updateClassAd (StartdPrivateAds, "StartdPvtAd ", "StartdPvt", pvtAd, hk, hashString, insPvt, from ); if (last_updateClassAd_was_insert) { CollectorEngine_rucc_insertPvtAd_runtime.Add(rt.tick(rt_last)); } else { CollectorEngine_rucc_updatePvtAd_runtime.Add(rt.tick(rt_last)); } } // create fake duplicates of this ad, each with a different name, if // we are told to do so. this feature exists for developer // scalability testing. if ( repeatStartdAds > 0 && clientAdToRepeat ) { ClassAd *fakeAd; int n; char newname[150],oldname[130]; oldname[0] = '\0'; clientAdToRepeat->LookupString("Name",oldname,sizeof(oldname)); for (n=0;n<repeatStartdAds;n++) { fakeAd = new ClassAd(*clientAdToRepeat); snprintf(newname,sizeof(newname), "Name=\"fake%d-%s\"",n,oldname); fakeAd->Insert(newname); makeStartdAdHashKey (hk, fakeAd); hashString.Build( hk ); if (! updateClassAd (StartdAds, "StartdAd ", "Start", fakeAd, hk, hashString, insert, from ) ) { // don't leak memory if there is some failure delete fakeAd; } } delete clientAdToRepeat; clientAdToRepeat = NULL; CollectorEngine_rucc_repeatAd_runtime.Add(rt.tick(rt_last)); } break; case MERGE_STARTD_AD: if (!makeStartdAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=mergeClassAd (StartdAds, "StartdAd ", "Start", clientAd, hk, hashString, insert, from ); break; case UPDATE_SCHEDD_AD: if (!makeScheddAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (ScheddAds, "ScheddAd ", "Schedd", clientAd, hk, hashString, insert, from ); break; case UPDATE_SUBMITTOR_AD: // use the same hashkey function as a schedd ad if (!makeScheddAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } // since submittor ads always follow a schedd ad, and a master check is // performed for schedd ads, we don't need a master check in here hashString.Build( hk ); retVal=updateClassAd (SubmittorAds, "SubmittorAd ", "Submittor", clientAd, hk, hashString, insert, from ); break; case UPDATE_LICENSE_AD: // use the same hashkey function as a schedd ad if (!makeLicenseAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } // since submittor ads always follow a schedd ad, and a master check is // performed for schedd ads, we don't need a master check in here hashString.Build( hk ); retVal=updateClassAd (LicenseAds, "LicenseAd ", "License", clientAd, hk, hashString, insert, from ); break; case UPDATE_MASTER_AD: if (!makeMasterAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (MasterAds, "MasterAd ", "Master", clientAd, hk, hashString, insert, from ); break; case UPDATE_CKPT_SRVR_AD: if (!makeCkptSrvrAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (CkptServerAds, "CkptSrvrAd ", "CkptSrvr", clientAd, hk, hashString, insert, from ); break; case UPDATE_COLLECTOR_AD: if (!makeCollectorAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (CollectorAds, "CollectorAd ", "Collector", clientAd, hk, hashString, insert, from ); break; case UPDATE_STORAGE_AD: if (!makeStorageAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (StorageAds, "StorageAd ", "Storage", clientAd, hk, hashString, insert, from ); break; case UPDATE_ACCOUNTING_AD: if (!makeAccountingAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (AccountingAds, "AccountingAd ", "Accouting", clientAd, hk, hashString, insert, from ); break; case UPDATE_NEGOTIATOR_AD: if (!makeNegotiatorAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); if (m_allowOnlyOneNegotiator) { // first, purge all the existing negotiator ads, since we // want to enforce that *ONLY* 1 negotiator is in the // collector any given time. purgeHashTable( NegotiatorAds ); } retVal=updateClassAd (NegotiatorAds, "NegotiatorAd ", "Negotiator", clientAd, hk, hashString, insert, from ); break; case UPDATE_HAD_AD: if (!makeHadAdHashKey (hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (HadAds, "HadAd ", "HAD", clientAd, hk, hashString, insert, from ); break; case UPDATE_GRID_AD: if (!makeGridAdHashKey(hk, clientAd)) { dprintf (D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build( hk ); retVal=updateClassAd (GridAds, "GridAd ", "Grid", clientAd, hk, hashString, insert, from ); break; case UPDATE_AD_GENERIC: { const char *type_str = GetMyTypeName(*clientAd); if (type_str == NULL) { dprintf(D_ALWAYS, "collect: UPDATE_AD_GENERIC: ad has no type\n"); insert = -3; retVal = 0; break; } MyString type(type_str); CollectorHashTable *cht = findOrCreateTable(type); if (cht == NULL) { dprintf(D_ALWAYS, "collect: findOrCreateTable failed\n"); insert = -3; retVal = 0; break; } if (!makeGenericAdHashKey (hk, clientAd)) { dprintf(D_ALWAYS, "Could not make hashkey --- ignoring ad\n"); insert = -3; retVal = 0; break; } hashString.Build(hk); retVal = updateClassAd(*cht, type_str, type_str, clientAd, hk, hashString, insert, from); break; } case QUERY_STARTD_ADS: case QUERY_SCHEDD_ADS: case QUERY_MASTER_ADS: case QUERY_SUBMITTOR_ADS: case QUERY_CKPT_SRVR_ADS: case QUERY_STARTD_PVT_ADS: case QUERY_COLLECTOR_ADS: case QUERY_NEGOTIATOR_ADS: case QUERY_HAD_ADS: case QUERY_GENERIC_ADS: case INVALIDATE_STARTD_ADS: case INVALIDATE_SCHEDD_ADS: case INVALIDATE_MASTER_ADS: case INVALIDATE_CKPT_SRVR_ADS: case INVALIDATE_SUBMITTOR_ADS: case INVALIDATE_COLLECTOR_ADS: case INVALIDATE_NEGOTIATOR_ADS: case INVALIDATE_HAD_ADS: case INVALIDATE_ADS_GENERIC: // these are not implemented in the engine, but we allow another // daemon to detect that these commands have been given insert = -2; retVal = 0; break; default: dprintf (D_ALWAYS, "Received illegal command: %d\n", command); insert = -1; retVal = 0; } if (command != UPDATE_STARTD_AD && command != UPDATE_STARTD_AD_WITH_ACK) { CollectorEngine_rucc_other_runtime.Add(rt.tick(rt_last)); } // return the updated ad return retVal; }
void CCBServer::HandleRequestResultsMsg( CCBTarget *target ) { // Reply from target daemon about whether it succeeded in // connecting to the requested client. Sock *sock = target->getSock(); ClassAd msg; sock->decode(); if( !getClassAd( sock, msg ) || !sock->end_of_message() ) { // disconnect dprintf(D_FULLDEBUG, "CCB: received disconnect from target daemon %s " "with ccbid %lu.\n", sock->peer_description(), target->getCCBID() ); RemoveTarget( target ); return; } int command = 0; if( msg.LookupInteger( ATTR_COMMAND, command ) && command == ALIVE ) { SendHeartbeatResponse( target ); return; } target->decPendingRequestResults(); bool success = false; MyString error_msg; MyString reqid_str; CCBID reqid; MyString connect_id; msg.LookupBool( ATTR_RESULT, success ); msg.LookupString( ATTR_ERROR_STRING, error_msg ); msg.LookupString( ATTR_REQUEST_ID, reqid_str ); msg.LookupString( ATTR_CLAIM_ID, connect_id ); if( !CCBIDFromString( reqid, reqid_str.Value() ) ) { MyString msg_str; sPrintAd(msg_str, msg); dprintf(D_ALWAYS, "CCB: received reply from target daemon %s with ccbid %lu " "without a valid request id: %s\n", sock->peer_description(), target->getCCBID(), msg_str.Value()); RemoveTarget( target ); return; } CCBServerRequest *request = GetRequest( reqid ); if( request && request->getSock()->readReady() ) { // Request socket must have just closed. To avoid noise in // logs when we fail to write to it, delete the request now. RemoveRequest( request ); request = NULL; } char const *request_desc = "(client which has gone away)"; if( request ) { request_desc = request->getSock()->peer_description(); } if( success ) { dprintf(D_FULLDEBUG,"CCB: received 'success' from target daemon %s " "with ccbid %lu for " "request %s from %s.\n", sock->peer_description(), target->getCCBID(), reqid_str.Value(), request_desc); } else { dprintf(D_FULLDEBUG,"CCB: received error from target daemon %s " "with ccbid %lu for " "request %s from %s: %s\n", sock->peer_description(), target->getCCBID(), reqid_str.Value(), request_desc, error_msg.Value()); } if( !request ) { if( success ) { // expected: the client has gone away; it got what it wanted return; } dprintf( D_FULLDEBUG, "CCB: client for request %s to target daemon %s with ccbid " "%lu disappeared before receiving error details.\n", reqid_str.Value(), sock->peer_description(), target->getCCBID()); return; } if( connect_id != request->getConnectID() ) { MyString msg_str; sPrintAd(msg_str, msg); dprintf( D_FULLDEBUG, "CCB: received wrong connect id (%s) from target daemon %s " "with ccbid %lu for " "request %s\n", connect_id.Value(), sock->peer_description(), target->getCCBID(), reqid_str.Value()); RemoveTarget( target ); return; } RequestFinished( request, success, error_msg.Value() ); }
ClassAd* DCSchedd::actOnJobs( JobAction action, const char* constraint, StringList* ids, const char* reason, const char* reason_attr, const char* reason_code, const char* reason_code_attr, action_result_type_t result_type, bool notify_scheduler, CondorError * errstack ) { char* tmp = NULL; char buf[512]; int size, reply; ReliSock rsock; // // // // // // // // // Construct the ad we want to send // // // // // // // // ClassAd cmd_ad; sprintf( buf, "%s = %d", ATTR_JOB_ACTION, action ); cmd_ad.Insert( buf ); sprintf( buf, "%s = %d", ATTR_ACTION_RESULT_TYPE, (int)result_type ); cmd_ad.Insert( buf ); sprintf( buf, "%s = %s", ATTR_NOTIFY_JOB_SCHEDULER, notify_scheduler ? "True" : "False" ); cmd_ad.Insert( buf ); if( constraint ) { if( ids ) { // This is a programming error, not a run-time one EXCEPT( "DCSchedd::actOnJobs has both constraint and ids!" ); } size = strlen(constraint) + strlen(ATTR_ACTION_CONSTRAINT) + 4; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = %s", ATTR_ACTION_CONSTRAINT, constraint ); if( ! cmd_ad.Insert(tmp) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Can't insert constraint (%s) into ClassAd!\n", constraint ); free( tmp ); return NULL; } free( tmp ); tmp = NULL; } else if( ids ) { char* action_ids = ids->print_to_string(); if ( action_ids ) { size = strlen(action_ids) + strlen(ATTR_ACTION_IDS) + 7; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = \"%s\"", ATTR_ACTION_IDS, action_ids ); cmd_ad.Insert( tmp ); free( tmp ); tmp = NULL; free(action_ids); action_ids = NULL; } } else { EXCEPT( "DCSchedd::actOnJobs called without constraint or ids" ); } if( reason_attr && reason ) { size = strlen(reason_attr) + strlen(reason) + 7; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = \"%s\"", reason_attr, reason ); cmd_ad.Insert( tmp ); free( tmp ); tmp = NULL; } if( reason_code_attr && reason_code ) { cmd_ad.AssignExpr(reason_code_attr,reason_code); } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Failed to connect to schedd (%s)\n", _addr ); return NULL; } if( ! startCommand(ACT_ON_JOBS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Failed to send command (ACT_ON_JOBS) to the schedd\n" ); return NULL; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack->getFullText().c_str() ); return NULL; } // Now, put the command classad on the wire if( ! (putClassAd(&rsock, cmd_ad) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send classad\n" ); return NULL; } // Next, we need to read the reply from the schedd if things // are ok and it's going to go forward. If the schedd can't // read our reply to this ClassAd, it assumes we got killed // and it should abort its transaction rsock.decode(); ClassAd* result_ad = new ClassAd(); if( ! (getClassAd(&rsock, *result_ad) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: " "Can't read response ad from %s\n", _addr ); delete( result_ad ); return NULL; } // If the action totally failed, the schedd will already have // aborted the transaction and closed up shop, so there's no // reason trying to continue. However, we still want to // return the result ad we got back so that our caller can // figure out what went wrong. reply = FALSE; result_ad->LookupInteger( ATTR_ACTION_RESULT, reply ); if( reply != OK ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Action failed\n" ); return result_ad; } // Tell the schedd we're still here and ready to go rsock.encode(); int answer = OK; if( ! (rsock.code(answer) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send reply\n" ); delete( result_ad ); return NULL; } // finally, make sure the schedd didn't blow up trying to // commit these changes to the job queue... rsock.decode(); if( ! (rsock.code(reply) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: " "Can't read confirmation from %s\n", _addr ); delete( result_ad ); return NULL; } return result_ad; }
// Read history from a remote schedd static void readHistoryRemote(classad::ExprTree *constraintExpr) { printHeader(); if(longformat && use_xml) { std::string out; AddClassAdXMLFileHeader(out); printf("%s\n", out.c_str()); } classad::ClassAd ad; classad::ExprList *projList(new classad::ExprList()); classad::ExprTree *projTree = static_cast<classad::ExprTree*>(projList); ad.Insert(ATTR_PROJECTION, projTree); ad.Insert(ATTR_REQUIREMENTS, constraintExpr); ad.InsertAttr(ATTR_NUM_MATCHES, specifiedMatch <= 0 ? -1 : specifiedMatch); DCSchedd schedd(g_name.size() ? g_name.c_str() : NULL, g_pool.size() ? g_pool.c_str() : NULL); if (!schedd.locate(Daemon::LOCATE_FOR_LOOKUP)) { fprintf(stderr, "Unable to locate remote schedd (name=%s, pool=%s).\n", g_name.c_str(), g_pool.c_str()); exit(1); } Sock* sock; if (!(sock = schedd.startCommand(QUERY_SCHEDD_HISTORY, Stream::reli_sock, 0))) { fprintf(stderr, "Unable to send history command to remote schedd;\n" "Typically, either the schedd is not responding, does not authorize you, or does not support remote history.\n"); exit(1); } classad_shared_ptr<Sock> sock_sentry(sock); if (!putClassAd(sock, ad) || !sock->end_of_message()) { fprintf(stderr, "Unable to send request to remote schedd; likely a server or network error.\n"); exit(1); } while (true) { compat_classad::ClassAd ad; if (!getClassAd(sock, ad)) { fprintf(stderr, "Failed to recieve remote ad.\n"); exit(1); } long long intVal; if (ad.EvaluateAttrInt(ATTR_OWNER, intVal) && (intVal == 0)) { // Last ad. if (!sock->end_of_message()) { fprintf(stderr, "Unable to close remote socket.\n"); } sock->close(); std::string errorMsg; if (ad.EvaluateAttrInt(ATTR_ERROR_CODE, intVal) && intVal && ad.EvaluateAttrString(ATTR_ERROR_STRING, errorMsg)) { fprintf(stderr, "Error %lld: %s\n", intVal, errorMsg.c_str()); exit(intVal); } if (ad.EvaluateAttrInt("MalformedAds", intVal) && intVal) { fprintf(stderr, "Remote side had parse errors on history file"); exit(1); } if (!ad.EvaluateAttrInt(ATTR_NUM_MATCHES, intVal) || (intVal != matchCount)) { fprintf(stderr, "Client and server do not agree on number of ads sent;\n" "Indicates lost network packets or an internal error\n"); exit(1); } break; } matchCount++; printJob(ad); } if(longformat && use_xml) { std::string out; AddClassAdXMLFileFooter(out); printf("%s\n", out.c_str()); } }
bool DCStartd::deactivateClaim( bool graceful, bool *claim_is_closing ) { dprintf( D_FULLDEBUG, "Entering DCStartd::deactivateClaim(%s)\n", graceful ? "graceful" : "forceful" ); if( claim_is_closing ) { *claim_is_closing = false; } setCmdStr( "deactivateClaim" ); if( ! checkClaimId() ) { return false; } if( ! checkAddr() ) { return false; } // if this claim is associated with a security session ClaimIdParser cidp(claim_id); char const *sec_session = cidp.secSessionId(); if (IsDebugLevel(D_COMMAND)) { int cmd = graceful ? DEACTIVATE_CLAIM : DEACTIVATE_CLAIM_FORCIBLY; dprintf (D_COMMAND, "DCStartd::deactivateClaim(%s,...) making connection to %s\n", getCommandStringSafe(cmd), _addr ? _addr : "NULL"); } bool result; ReliSock reli_sock; reli_sock.timeout(20); // years of research... :) if( ! reli_sock.connect(_addr) ) { std::string err = "DCStartd::deactivateClaim: "; err += "Failed to connect to startd ("; err += _addr ? _addr : "NULL"; err += ')'; newError( CA_CONNECT_FAILED, err.c_str() ); return false; } int cmd; if( graceful ) { cmd = DEACTIVATE_CLAIM; } else { cmd = DEACTIVATE_CLAIM_FORCIBLY; } result = startCommand( cmd, (Sock*)&reli_sock, 20, NULL, NULL, false, sec_session ); if( ! result ) { std::string err = "DCStartd::deactivateClaim: "; err += "Failed to send command "; if( graceful ) { err += "DEACTIVATE_CLAIM"; } else { err += "DEACTIVATE_CLAIM_FORCIBLY"; } err += " to the startd"; newError( CA_COMMUNICATION_ERROR, err.c_str() ); return false; } // Now, send the ClaimId if( ! reli_sock.put_secret(claim_id) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::deactivateClaim: Failed to send ClaimId to the startd" ); return false; } if( ! reli_sock.end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::deactivateClaim: Failed to send EOM to the startd" ); return false; } reli_sock.decode(); ClassAd response_ad; if( !getClassAd(&reli_sock, response_ad) || !reli_sock.end_of_message() ) { dprintf( D_FULLDEBUG, "DCStartd::deactivateClaim: failed to read response ad.\n"); // The response ad is not critical and is expected to be missing // if the startd is from before 7.0.5. } else { bool start = true; response_ad.LookupBool(ATTR_START,start); if( claim_is_closing ) { *claim_is_closing = !start; } } // we're done dprintf( D_FULLDEBUG, "DCStartd::deactivateClaim: " "successfully sent command\n" ); return true; }
bool DCSchedd::getJobConnectInfo( PROC_ID jobid, int subproc, char const *session_info, int timeout, CondorError *errstack, MyString &starter_addr, MyString &starter_claim_id, MyString &starter_version, MyString &slot_name, MyString &error_msg, bool &retry_is_sensible, int &job_status, MyString &hold_reason) { ClassAd input; ClassAd output; input.Assign(ATTR_CLUSTER_ID,jobid.cluster); input.Assign(ATTR_PROC_ID,jobid.proc); if( subproc != -1 ) { input.Assign(ATTR_SUB_PROC_ID,subproc); } input.Assign(ATTR_SESSION_INFO,session_info); ReliSock sock; if( !connectSock(&sock,timeout,errstack) ) { error_msg = "Failed to connect to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !forceAuthentication(&sock, errstack) ) { error_msg = "Failed to authenticate"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.decode(); if( !getClassAd(&sock, output) || !sock.end_of_message() ) { error_msg = "Failed to get response from schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( IsFulldebug(D_FULLDEBUG) ) { std::string adstr; sPrintAd(adstr, output, true); dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n", adstr.c_str()); } bool result=false; output.LookupBool(ATTR_RESULT,result); if( !result ) { output.LookupString(ATTR_HOLD_REASON,hold_reason); output.LookupString(ATTR_ERROR_STRING,error_msg); retry_is_sensible = false; output.LookupBool(ATTR_RETRY,retry_is_sensible); output.LookupInteger(ATTR_JOB_STATUS,job_status); } else { output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); output.LookupString(ATTR_CLAIM_ID,starter_claim_id); output.LookupString(ATTR_VERSION,starter_version); output.LookupString(ATTR_REMOTE_HOST,slot_name); } return result; }
bool DCSchedd::recycleShadow( int previous_job_exit_reason, ClassAd **new_job_ad, MyString &error_msg ) { int timeout = 300; CondorError errstack; ReliSock sock; if( !connectSock(&sock,timeout,&errstack) ) { error_msg.formatstr("Failed to connect to schedd: %s", errstack.getFullText().c_str()); return false; } if( !startCommand(RECYCLE_SHADOW, &sock, timeout, &errstack) ) { error_msg.formatstr("Failed to send RECYCLE_SHADOW to schedd: %s", errstack.getFullText().c_str()); return false; } if( !forceAuthentication(&sock, &errstack) ) { error_msg.formatstr("Failed to authenticate: %s", errstack.getFullText().c_str()); return false; } sock.encode(); int mypid = getpid(); if( !sock.put( mypid ) || !sock.put( previous_job_exit_reason ) || !sock.end_of_message() ) { error_msg = "Failed to send job exit reason"; return false; } sock.decode(); int found_new_job = 0; sock.get( found_new_job ); if( found_new_job ) { *new_job_ad = new ClassAd(); if( !getClassAd( &sock, *(*new_job_ad) ) ) { error_msg = "Failed to receive new job ClassAd"; delete *new_job_ad; *new_job_ad = NULL; return false; } } if( !sock.end_of_message() ) { error_msg = "Failed to receive end of message"; delete *new_job_ad; *new_job_ad = NULL; return false; } if( *new_job_ad ) { sock.encode(); int ok=1; if( !sock.put(ok) || !sock.end_of_message() ) { error_msg = "Failed to send ok"; delete *new_job_ad; *new_job_ad = NULL; return false; } } return true; }
bool ClaimStartdMsg::readMsg( DCMessenger * /*messenger*/, Sock *sock ) { // Now, we set the timeout on the socket to 1 second. Since we // were called by as a Register_Socket callback, this should not // block if things are working as expected. // However, if the Startd wigged out and sent a // partial int or some such, we cannot afford to block. -Todd 3/2000 sock->timeout(1); if( !sock->get(m_reply) ) { dprintf( failureDebugLevel(), "Response problem from startd when requesting claim %s.\n", description() ); sockFailed( sock ); return false; } /* Reply of 0 (NOT_OK) means claim rejected. Reply of 1 (OK) means claim accepted. Reply of 3 (REQUEST_CLAIM_LEFTOVERS) means claim accepted by a partitionable slot, and the "leftovers" slot ad and claim id will be sent next. Reply of 4 (REQUEST_CLAIM_PAIR) means claim accepted by a slot that is paired, and the partner slot ad and claim id will be sent next. */ if( m_reply == OK ) { // no need to log success, because DCMsg::reportSuccess() will } else if( m_reply == NOT_OK ) { dprintf( failureDebugLevel(), "Request was NOT accepted for claim %s\n", description() ); } else if( m_reply == REQUEST_CLAIM_LEFTOVERS ) { if( !sock->get(m_leftover_claim_id) || !getClassAd( sock, m_leftover_startd_ad ) ) { // failed to read leftover partitionable slot info dprintf( failureDebugLevel(), "Failed to read paritionable slot leftover from startd - claim %s.\n", description() ); // treat this failure same as NOT_OK, since this startd is screwed m_reply = NOT_OK; } else { // successfully read leftover partitionable slot info m_have_leftovers = true; // change reply to OK cuz claim was a success m_reply = OK; } } else if( m_reply == REQUEST_CLAIM_PAIR ) { if( !sock->get(m_paired_claim_id) || !getClassAd( sock, m_paired_startd_ad ) ) { // failed to read paired slot info dprintf( failureDebugLevel(), "Failed to read paired slot info from startd - claim %s.\n", description() ); // treat this failure same as NOT_OK, since this startd is screwed m_reply = NOT_OK; } else { // successfully read paired slot info m_have_paired_slot = true; // change reply to OK cuz claim was a success m_reply = OK; } } else { dprintf( failureDebugLevel(), "Unknown reply from startd when requesting claim %s\n",description()); } // end_of_message() is done by caller return true; }
// when a transferd registers itself, it identifies who it is. The connection // is then held open and the schedd periodically might send more transfer // requests to the transferd. Also, if the transferd dies, the schedd is // informed quickly and reliably due to the closed connection. bool DCSchedd::register_transferd(MyString sinful, MyString id, int timeout, ReliSock **regsock_ptr, CondorError *errstack) { ReliSock *rsock; int invalid_request = 0; ClassAd regad; ClassAd respad; std::string errstr; std::string reason; if (regsock_ptr != NULL) { // Our caller wants a pointer to the socket we used to succesfully // register the claim. The NULL pointer will represent failure and // this will only be set to something real if everything was ok. *regsock_ptr = NULL; } // This call with automatically connect to _addr, which was set in the // constructor of this object to be the schedd in question. rsock = (ReliSock*)startCommand(TRANSFERD_REGISTER, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCSchedd::register_transferd: " "Failed to send command (TRANSFERD_REGISTER) " "to the schedd\n" ); errstack->push("DC_SCHEDD", 1, "Failed to start a TRANSFERD_REGISTER command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::register_transferd authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_SCHEDD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); // set up my registration request. regad.Assign(ATTR_TREQ_TD_SINFUL, sinful); regad.Assign(ATTR_TREQ_TD_ID, id); // This is the initial registration identification ad to the schedd // It contains: // ATTR_TREQ_TD_SINFUL // ATTR_TREQ_TD_ID putClassAd(rsock, regad); rsock->end_of_message(); // Get the response from the schedd. rsock->decode(); // This is the response ad from the schedd: // It contains: // ATTR_TREQ_INVALID_REQUEST // // OR // // ATTR_TREQ_INVALID_REQUEST // ATTR_TREQ_INVALID_REASON getClassAd(rsock, respad); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid_request); if (invalid_request == FALSE) { // not an invalid request if (regsock_ptr) *regsock_ptr = rsock; return true; } respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->pushf("DC_SCHEDD", 1, "Schedd refused registration: %s", reason.c_str()); return false; }
bool ScheddNegotiate::readMsg( DCMessenger * /*messenger*/, Sock *sock ) { // Get the negotiator's request. // Note that end_of_message() is handled by our caller. if( !sock->code(m_operation) ) { dprintf( D_ALWAYS, "Can't receive request from negotiator\n" ); return false; } switch( m_operation ) { case REJECTED_WITH_REASON: if( !sock->code(m_reject_reason) ) { dprintf( D_ALWAYS, "Can't receive reject reason from negotiator\n" ); return false; } break; case REJECTED: break; case SEND_JOB_INFO: break; case PERMISSION: // No negotiator since 7.1.3 should ever send this // command, and older ones should not send it either, // since we advertise WantResAd=True. dprintf( D_ALWAYS, "Negotiator sent PERMISSION rather than expected PERMISSION_AND_AD! Aborting.\n"); return false; break; case PERMISSION_AND_AD: { char *claim_id = NULL; if( !sock->get_secret(claim_id) || !claim_id ) { dprintf( D_ALWAYS, "Can't receive ClaimId from negotiator\n" ); return false; } m_claim_id = claim_id; free( claim_id ); m_match_ad.Clear(); // get startd ad from negotiator if( !getClassAd(sock, m_match_ad) ) { dprintf( D_ALWAYS, "Can't get my match ad from negotiator\n" ); return false; } #if !defined(WANT_OLD_CLASSADS) m_match_ad.AddTargetRefs( TargetJobAttrs ); #endif break; } case END_NEGOTIATE: break; default: dprintf( D_ALWAYS, "Got unexpected request (%d)\n", m_operation ); return false; } return true; }
// I'm going to ask the schedd for where I can put the files for the jobs I've // specified. The schedd is going to respond with A) a message telling me it // has the answer right away, or B) an answer telling me I have to wait // an unknown length of time for the schedd to schedule me a place to put it. bool DCSchedd::requestSandboxLocation(ClassAd *reqad, ClassAd *respad, CondorError * errstack) { ReliSock rsock; int will_block; ClassAd status_ad; rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): " "Failed to connect to schedd (%s)\n", _addr ); return false; } if( ! startCommand(REQUEST_SANDBOX_LOCATION, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): " "Failed to send command (REQUEST_SANDBOX_LOCATION) " "to schedd (%s)\n", _addr ); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack->getFullText().c_str() ); return false; } rsock.encode(); /////////////////////////////////////////////////////////////////////// // Send my sandbox location request packet to the schedd. /////////////////////////////////////////////////////////////////////// // This request ad will either contain: // ATTR_TREQ_PEER_VERSION // ATTR_TREQ_HAS_CONSTRAINT // ATTR_TREQ_JOBID_LIST // ATTR_TREQ_FTP // // OR // // ATTR_TREQ_DIRECTION // ATTR_TREQ_PEER_VERSION // ATTR_TREQ_HAS_CONSTRAINT // ATTR_TREQ_CONSTRAINT // ATTR_TREQ_FTP dprintf(D_ALWAYS, "Sending request ad.\n"); if (putClassAd(&rsock, *reqad) != 1) { dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): " "Can't send reqad to the schedd\n"); return false; } rsock.end_of_message(); rsock.decode(); /////////////////////////////////////////////////////////////////////// // Read back a response ad which will tell me which jobs the schedd // said I could modify and whether or not I'm am going to have to block // before getting the payload of the transferd location/capability ad. /////////////////////////////////////////////////////////////////////// // This status ad will contain // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_JOBID_ALLOW_LIST // ATTR_TREQ_JOBID_DENY_LIST // ATTR_TREQ_WILL_BLOCK dprintf(D_ALWAYS, "Receiving status ad.\n"); if (getClassAd(&rsock, status_ad) == false) { dprintf(D_ALWAYS, "Schedd closed connection to me. Aborting sandbox " "submission.\n"); return false; } rsock.end_of_message(); status_ad.LookupInteger(ATTR_TREQ_WILL_BLOCK, will_block); dprintf(D_ALWAYS, "Client will %s\n", will_block==1?"block":"not block"); if (will_block == 1) { // set to 20 minutes. rsock.timeout(60*20); } /////////////////////////////////////////////////////////////////////// // Read back the payload ad from the schedd about the transferd location // and capability string I can use for the fileset I wish to transfer. /////////////////////////////////////////////////////////////////////// // read back the response ad from the schedd which contains a // td sinful string, and a capability. These represent my ability to // read/write a certain fileset somewhere. // This response ad from the schedd will contain: // // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_CAPABILITY // ATTR_TREQ_TD_SINFUL // ATTR_TREQ_JOBID_ALLOW_LIST dprintf(D_ALWAYS, "Receiving response ad.\n"); if (getClassAd(&rsock, *respad) != true) { dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): " "Can't receive respond ad from the schedd\n"); return false; } rsock.end_of_message(); return true; }