bool DCStarter::createJobOwnerSecSession(int timeout,char const *job_claim_id,char const *starter_sec_session,char const *session_info,MyString &owner_claim_id,MyString &error_msg,MyString &starter_version,MyString &starter_addr) { ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::createJobOwnerSecSession(%s,...) making connection to %s\n", getCommandStringSafe(CREATE_JOB_OWNER_SEC_SESSION), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(CREATE_JOB_OWNER_SEC_SESSION, &sock,timeout,NULL,NULL,false,starter_sec_session) ) { error_msg = "Failed to send CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } ClassAd input; input.Assign(ATTR_CLAIM_ID,job_claim_id); input.Assign(ATTR_SESSION_INFO,session_info); sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to compose CREATE_JOB_OWNER_SEC_SESSION to starter"; return false; } sock.decode(); ClassAd reply; if( !getClassAd(&sock, reply) || !sock.end_of_message() ) { error_msg = "Failed to get response to CREATE_JOB_OWNER_SEC_SESSION from starter"; return false; } bool success = false; reply.LookupBool(ATTR_RESULT,success); if( !success ) { reply.LookupString(ATTR_ERROR_STRING,error_msg); return false; } reply.LookupString(ATTR_CLAIM_ID,owner_claim_id); reply.LookupString(ATTR_VERSION,starter_version); // get the full starter address from the starter in case it contains // extra CCB info that we don't already know about reply.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); return true; }
SOCKET connectUDP(const char *host , const char *service) //连接UDP套接字 { return connectSock(host ,service ,"udp"); }
SOCKET connectTCP(const char *host , const char *service) //连接TCP套接字 { return connectSock(host, service, "tcp"); }
bool DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q) { compat_classad::ClassAd ad; ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout); ad.InsertAttr("OutOffset", stdout_offset); ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr); ad.InsertAttr("ErrOffset", stderr_offset); ad.InsertAttr(ATTR_VERSION, CondorVersion()); size_t total_files = 0; total_files += transfer_stdout ? 1 : 0; total_files += transfer_stderr ? 1 : 0; if (filenames.size()) { total_files += filenames.size(); std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size()); std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size()); std::vector<ssize_t>::const_iterator it2 = offsets.begin(); for (std::vector<std::string>::const_iterator it = filenames.begin(); it != filenames.end() && it2 != offsets.end(); it++, it2++) { classad::Value value; value.SetStringValue(*it); filelist.push_back(classad::Literal::MakeLiteral(value)); value.SetIntegerValue(*it2); offsetlist.push_back(classad::Literal::MakeLiteral(value)); } classad::ExprTree *list(classad::ExprList::MakeExprList(filelist)); ad.Insert("TransferFiles", list); list = classad::ExprList::MakeExprList(offsetlist); ad.Insert("TransferOffsets", list); } ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes)); ReliSock sock; if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) { error_msg = "Failed to send START_PEEK to starter"; return false; } sock.encode(); if (!putClassAd(&sock, ad) || !sock.end_of_message()) { error_msg = "Failed to send request to starter"; return false; } compat_classad::ClassAd response; sock.decode(); if (!getClassAd(&sock, response) || !sock.end_of_message()) { error_msg = "Failed to read response for peeking at logs."; return false; } dPrintAd(D_FULLDEBUG, response); bool success = false; if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success) { response.EvaluateAttrBool(ATTR_RETRY, retry_sensible); error_msg = "Remote operation failed."; response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg); return false; } classad::Value valueX; classad_shared_ptr<classad::ExprList> list; if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list)) { error_msg = "Unable to evaluate starter response"; return false; } classad_shared_ptr<classad::ExprList> offlist; if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist)) { error_msg = "Unable to evaluate starter response (missing offsets)"; return false; } size_t remaining = max_bytes; size_t file_count = 0; classad::ExprList::const_iterator it2 = offlist->begin(); for (classad::ExprList::const_iterator it = list->begin(); it != list->end() && it2 != offlist->end(); it++, it2++) { classad::Value value; (*it2)->Evaluate(value); off_t off = -1; value.IsIntegerValue(off); (*it)->Evaluate(value); std::string filename; int64_t xfer_fd = -1; if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd)) { if (xfer_fd == 0) filename = "_condor_stdout"; if (xfer_fd == 1) filename = "_condor_stderr"; } int fd = next.getNextFD(filename); filesize_t size = -1; int retval; if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED)) { error_msg = "Internal error when transferring file " + filename; } else if (size >= 0) { remaining -= max_bytes; file_count++; off += size; } else { error_msg = "Failed to transfer file " + filename; } if (xfer_fd == 0) { stdout_offset = off; //dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset); } else if (xfer_fd == 1) { stderr_offset = off; } else { std::vector<ssize_t>::iterator it4 = offsets.begin(); for (std::vector<std::string>::const_iterator it3 = filenames.begin(); it3 != filenames.end() && it4 != offsets.end(); it3++, it4++) { if (*it3 == filename) *it4 = off; } } } size_t remote_file_count; if (!sock.get(remote_file_count) || !sock.end_of_message()) { error_msg = "Unable to get remote file count."; return false; } if (file_count != remote_file_count) { std::stringstream ss; ss << "Recieved " << file_count << " files, but remote side thought it sent " << remote_file_count << " files"; error_msg = ss.str(); return false; } if ((total_files != file_count) && !error_msg.size()) { error_msg = "At least one file transfer failed."; return false; } return true; }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !getClassAd(&sock, result) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
bool DCSchedd::recycleShadow( int previous_job_exit_reason, ClassAd **new_job_ad, MyString &error_msg ) { int timeout = 300; CondorError errstack; ReliSock sock; if( !connectSock(&sock,timeout,&errstack) ) { error_msg.formatstr("Failed to connect to schedd: %s", errstack.getFullText().c_str()); return false; } if( !startCommand(RECYCLE_SHADOW, &sock, timeout, &errstack) ) { error_msg.formatstr("Failed to send RECYCLE_SHADOW to schedd: %s", errstack.getFullText().c_str()); return false; } if( !forceAuthentication(&sock, &errstack) ) { error_msg.formatstr("Failed to authenticate: %s", errstack.getFullText().c_str()); return false; } sock.encode(); int mypid = getpid(); if( !sock.put( mypid ) || !sock.put( previous_job_exit_reason ) || !sock.end_of_message() ) { error_msg = "Failed to send job exit reason"; return false; } sock.decode(); int found_new_job = 0; sock.get( found_new_job ); if( found_new_job ) { *new_job_ad = new ClassAd(); if( !getClassAd( &sock, *(*new_job_ad) ) ) { error_msg = "Failed to receive new job ClassAd"; delete *new_job_ad; *new_job_ad = NULL; return false; } } if( !sock.end_of_message() ) { error_msg = "Failed to receive end of message"; delete *new_job_ad; *new_job_ad = NULL; return false; } if( *new_job_ad ) { sock.encode(); int ok=1; if( !sock.put(ok) || !sock.end_of_message() ) { error_msg = "Failed to send ok"; delete *new_job_ad; *new_job_ad = NULL; return false; } } return true; }
bool DCSchedd::getJobConnectInfo( PROC_ID jobid, int subproc, char const *session_info, int timeout, CondorError *errstack, MyString &starter_addr, MyString &starter_claim_id, MyString &starter_version, MyString &slot_name, MyString &error_msg, bool &retry_is_sensible, int &job_status, MyString &hold_reason) { ClassAd input; ClassAd output; input.Assign(ATTR_CLUSTER_ID,jobid.cluster); input.Assign(ATTR_PROC_ID,jobid.proc); if( subproc != -1 ) { input.Assign(ATTR_SUB_PROC_ID,subproc); } input.Assign(ATTR_SESSION_INFO,session_info); ReliSock sock; if( !connectSock(&sock,timeout,errstack) ) { error_msg = "Failed to connect to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !forceAuthentication(&sock, errstack) ) { error_msg = "Failed to authenticate"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.decode(); if( !getClassAd(&sock, output) || !sock.end_of_message() ) { error_msg = "Failed to get response from schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( IsFulldebug(D_FULLDEBUG) ) { std::string adstr; sPrintAd(adstr, output, true); dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n", adstr.c_str()); } bool result=false; output.LookupBool(ATTR_RESULT,result); if( !result ) { output.LookupString(ATTR_HOLD_REASON,hold_reason); output.LookupString(ATTR_ERROR_STRING,error_msg); retry_is_sensible = false; output.LookupBool(ATTR_RETRY,retry_is_sensible); output.LookupInteger(ATTR_JOB_STATUS,job_status); } else { output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); output.LookupString(ATTR_CLAIM_ID,starter_claim_id); output.LookupString(ATTR_VERSION,starter_version); output.LookupString(ATTR_REMOTE_HOST,slot_name); } return result; }