bool DCSchedd::recycleShadow( int previous_job_exit_reason, ClassAd **new_job_ad, MyString &error_msg ) { int timeout = 300; CondorError errstack; ReliSock sock; if( !connectSock(&sock,timeout,&errstack) ) { error_msg.formatstr("Failed to connect to schedd: %s", errstack.getFullText().c_str()); return false; } if( !startCommand(RECYCLE_SHADOW, &sock, timeout, &errstack) ) { error_msg.formatstr("Failed to send RECYCLE_SHADOW to schedd: %s", errstack.getFullText().c_str()); return false; } if( !forceAuthentication(&sock, &errstack) ) { error_msg.formatstr("Failed to authenticate: %s", errstack.getFullText().c_str()); return false; } sock.encode(); int mypid = getpid(); if( !sock.put( mypid ) || !sock.put( previous_job_exit_reason ) || !sock.end_of_message() ) { error_msg = "Failed to send job exit reason"; return false; } sock.decode(); int found_new_job = 0; sock.get( found_new_job ); if( found_new_job ) { *new_job_ad = new ClassAd(); if( !getClassAd( &sock, *(*new_job_ad) ) ) { error_msg = "Failed to receive new job ClassAd"; delete *new_job_ad; *new_job_ad = NULL; return false; } } if( !sock.end_of_message() ) { error_msg = "Failed to receive end of message"; delete *new_job_ad; *new_job_ad = NULL; return false; } if( *new_job_ad ) { sock.encode(); int ok=1; if( !sock.put(ok) || !sock.end_of_message() ) { error_msg = "Failed to send ok"; delete *new_job_ad; *new_job_ad = NULL; return false; } } return true; }
bool DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/) { if(numdone) { *numdone = 0; } ExprTree *tree = NULL; const char *lhstr; int reply; int i; ReliSock rsock; int JobAdsArrayLen; bool use_new_command = true; if ( version() ) { CondorVersionInfo vi( version() ); if ( vi.built_since_version(6,7,7) ) { use_new_command = true; } else { use_new_command = false; } } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to connect to schedd (%s)\n", _addr ); return false; } if ( use_new_command ) { if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA_WITH_PERMS) " "to the schedd\n" ); return false; } } else { if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA) " "to the schedd\n" ); return false; } } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: authentication failure: %s\n", errstack ? errstack->getFullText().c_str() : "" ); return false; } rsock.encode(); // Send our version if using the new command if ( use_new_command ) { // Need to use a named variable, else the wrong version of // code() is called. char *my_version = strdup( CondorVersion() ); if ( !rsock.code(my_version) ) { dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send version string to the schedd\n"); free( my_version ); return false; } free( my_version ); } // Send the constraint char * nc_constraint = strdup( constraint ); // de-const if ( !rsock.code(nc_constraint) ) { free( nc_constraint ); dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send JobAdsArrayLen to the schedd\n"); return false; } free( nc_constraint ); if ( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Can't send initial message (version + constraint) to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now, read how many jobs matched the constraint. rsock.decode(); if ( !rsock.code(JobAdsArrayLen) ) { std::string errmsg; formatstr(errmsg, "Can't receive JobAdsArrayLen from the schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: " "%d jobs matched my constraint (%s)\n", JobAdsArrayLen, constraint); // Now read all the files via the file transfer object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; ClassAd job; // grab job ClassAd if ( !getClassAd(&rsock, job) ) { std::string errmsg; formatstr(errmsg, "Can't receive job ad %d from the schedd", i); dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes job.ResetExpr(); while( job.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); job.Insert(new_attr_name, pTree, false); } } // while next expr if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) { if( errstack ) { int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_INIT_FAILED, "File transfer initialization failed for target job %d.%d", cluster, proc ); } return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&job) ) { return false; } if ( use_new_command ) { ftrans.setPeerVersion( version() ); } if ( !ftrans.DownloadFiles() ) { if( errstack ) { FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo(); int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_DOWNLOAD_FAILED, "File transfer failed for target job %d.%d: %s", cluster, proc, ft_info.error_desc.Value() ); } return false; } } rsock.end_of_message(); rsock.encode(); reply = OK; rsock.code(reply); rsock.end_of_message(); if(numdone) { *numdone = JobAdsArrayLen; } return true; }
// upload the files associated with the jobads to the sandbox at td_sinful // with the supplied capability. // The work_ad should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP // ATTR_TREQ_JOBID_ALLOW_LIST bool DCTransferD::upload_job_files(int JobAdsArrayLen, ClassAd* JobAdsArray[], ClassAd *work_ad, CondorError * errstack) { ReliSock *rsock = NULL; int timeout = 60 * 60 * 8; // transfers take a long time... int i; ClassAd reqad, respad; std::string cap; int ftp; int invalid; int protocol; std::string reason; ////////////////////////////////////////////////////////////////////////// // Connect to the transferd and authenticate ////////////////////////////////////////////////////////////////////////// // This call with automatically connect to _addr, which was set in the // constructor of this object to be the transferd in question. rsock = (ReliSock*)startCommand(TRANSFERD_WRITE_FILES, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCTransferD::upload_job_files: " "Failed to send command (TRANSFERD_WRITE_FILES) " "to the schedd\n" ); errstack->push("DC_TRANSFERD", 1, "Failed to start a TRANSFERD_WRITE_FILES command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCTransferD::upload_job_files() authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_TRANSFERD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); ////////////////////////////////////////////////////////////////////////// // Query the transferd about the capability/protocol and see if I can // upload my files. It will respond with a classad saying good or bad. ////////////////////////////////////////////////////////////////////////// work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap); work_ad->LookupInteger(ATTR_TREQ_FTP, ftp); reqad.Assign(ATTR_TREQ_CAPABILITY, cap); reqad.Assign(ATTR_TREQ_FTP, ftp); // This request ad to the transferd should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP reqad.put(*rsock); rsock->end_of_message(); rsock->decode(); // This response ad to the transferd should contain: // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) respad.initFromStream(*rsock); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { // The transferd rejected my attempt to upload the fileset delete rsock; respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } ////////////////////////////////////////////////////////////////////////// // Sort the job ads array into numerically sorted order. The transferd // must do the same. ////////////////////////////////////////////////////////////////////////// // TODO ////////////////////////////////////////////////////////////////////////// // Based upon the protocol I've chosen, use that method to upload the // files. When using the FileTrans protocol, a child process on the // transferd side will be inheriting a socket and accepting the // FileTransfer object's protocol. ////////////////////////////////////////////////////////////////////////// // XXX Fix to only send jobads for allowed jobs dprintf(D_ALWAYS, "Sending fileset"); work_ad->LookupInteger(ATTR_TREQ_FTP, protocol); switch(protocol) { case FTP_CFTP: // upload the files using the FileTransfer Object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, rsock) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to initate uploading of files."); return false; } ftrans.setPeerVersion( version() ); if ( !ftrans.UploadFiles(true,false) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to upload files."); return false; } dprintf(D_ALWAYS | D_NOHEADER, "."); } rsock->end_of_message(); dprintf(D_ALWAYS | D_NOHEADER, "\n"); break; default: // Bail due to user error. This client doesn't support the uknown // protocol. delete rsock; errstack->push("DC_TRANSFERD", 1, "Unknown file transfer protocol selected."); return false; break; } ////////////////////////////////////////////////////////////////////////// // Get the response from the transferd once it sees a completed // movement of files from the child process. ////////////////////////////////////////////////////////////////////////// rsock->decode(); respad.initFromStream(*rsock); rsock->end_of_message(); // close up shop delete rsock; respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if ( invalid == TRUE ) { respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } return true; }
static bool _requestVMRegister(char *addr) { char *buffer = NULL; Daemon hstartd(DT_STARTD, addr); //Using TCP ReliSock ssock; ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.encode(); if( !ssock.connect(addr) ) { dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n", addr); return FALSE; } if( !hstartd.startCommand(VM_REGISTER, &ssock) ) { dprintf( D_FULLDEBUG, "Failed to send VM_REGISTER command to host startd(%s)\n", addr); return FALSE; } // Send <IP address:port> of virtual machine buffer = strdup(daemonCore->InfoCommandSinfulString()); ASSERT(buffer); if ( !ssock.code(buffer) ) { dprintf( D_FULLDEBUG, "Failed to send VM_REGISTER command's arguments to " "host startd %s: %s\n", addr, buffer ); free(buffer); return FALSE; } if( !ssock.end_of_message() ) { dprintf( D_FULLDEBUG, "Failed to send EOM to host startd %s\n", addr ); free(buffer); return FALSE; } free(buffer); //Now, read permission information ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.decode(); int permission = 0; ssock.code(permission); if( !ssock.end_of_message() ) { dprintf( D_FULLDEBUG, "Failed to receive EOM from host startd(%s)\n", addr ); return FALSE; } if( permission > 0 ) { // Since now this virtual machine can be used for Condor if( vmregister ) { if( vmregister->vm_usable == 0 ) { vmregister->vm_usable = 1; if( resmgr ) { resmgr->eval_and_update_all(); } } } }else { // For now this virtual machine can't be used for Condor if( vmregister ) { if( vmregister->vm_usable == 1 ) { vmregister->vm_usable = 0; if( resmgr ) { resmgr->eval_and_update_all(); } } } } return TRUE; }
int CCBServer::HandleRequest(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REQUEST ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive request " "from %s.\n", sock->peer_description() ); return FALSE; } MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // client name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } MyString target_ccbid_str; MyString return_addr; MyString connect_id; // id target daemon should present to requester CCBID target_ccbid; // NOTE: using ATTR_CLAIM_ID for connect id so that it is // automatically treated as a secret over the network. // It must be presented by the target daemon when connecting // to the requesting client, so the client can confirm that // the connection is in response to its request. if( !msg.LookupString(ATTR_CCBID,target_ccbid_str) || !msg.LookupString(ATTR_MY_ADDRESS,return_addr) || !msg.LookupString(ATTR_CLAIM_ID,connect_id) ) { MyString ad_str; msg.sPrint(ad_str); dprintf(D_ALWAYS, "CCB: invalid request from %s: %s\n", sock->peer_description(), ad_str.Value() ); return FALSE; } if( !CCBIDFromString(target_ccbid,target_ccbid_str.Value()) ) { dprintf(D_ALWAYS, "CCB: request from %s contains invalid CCBID %s\n", sock->peer_description(), target_ccbid_str.Value() ); return FALSE; } CCBTarget *target = GetTarget( target_ccbid ); if( !target ) { dprintf(D_ALWAYS, "CCB: rejecting request from %s for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).\n", sock->peer_description(), target_ccbid_str.Value()); MyString error_msg; error_msg.formatstr( "CCB server rejecting request for ccbid %s because no daemon is " "currently registered with that id " "(perhaps it recently disconnected).", target_ccbid_str.Value()); RequestReply( sock, false, error_msg.Value(), 0, target_ccbid ); return FALSE; } SetSmallBuffers(sock); CCBServerRequest *request = new CCBServerRequest( sock, target_ccbid, return_addr.Value(), connect_id.Value() ); AddRequest( request, target ); dprintf(D_FULLDEBUG, "CCB: received request id %lu from %s for target ccbid %s " "(registered as %s)\n", request->getRequestID(), request->getSock()->peer_description(), target_ccbid_str.Value(), target->getSock()->peer_description()); ForwardRequestToTarget( request, target ); return KEEP_STREAM; }
bool DCStarter::startSSHD(char const *known_hosts_file,char const *private_client_key_file,char const *preferred_shells,char const *slot_name,char const *ssh_keygen_args,ReliSock &sock,int timeout,char const *sec_session_id,MyString &remote_user,MyString &error_msg,bool &retry_is_sensible) { retry_is_sensible = false; #ifndef HAVE_SSH_TO_JOB error_msg = "This version of Condor does not support ssh key exchange."; return false; #else if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::startSSHD(%s,...) making connection to %s\n", getCommandStringSafe(START_SSHD), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(START_SSHD, &sock,timeout,NULL,NULL,false,sec_session_id) ) { error_msg = "Failed to send START_SSHD to starter"; return false; } ClassAd input; if( preferred_shells && *preferred_shells ) { input.Assign(ATTR_SHELL,preferred_shells); } if( slot_name && *slot_name ) { // This is a little silly. // We are telling the remote side the name of the slot so // that it can put it in the welcome message. input.Assign(ATTR_NAME,slot_name); } if( ssh_keygen_args && *ssh_keygen_args ) { input.Assign(ATTR_SSH_KEYGEN_ARGS,ssh_keygen_args); } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send START_SSHD request to starter"; return false; } ClassAd result; sock.decode(); if( !getClassAd(&sock, result) || !sock.end_of_message() ) { error_msg = "Failed to read response to START_SSHD from starter"; return false; } bool success = false; result.LookupBool(ATTR_RESULT,success); if( !success ) { std::string remote_error_msg; result.LookupString(ATTR_ERROR_STRING,remote_error_msg); error_msg.formatstr("%s: %s",slot_name,remote_error_msg.c_str()); retry_is_sensible = false; result.LookupBool(ATTR_RETRY,retry_is_sensible); return false; } result.LookupString(ATTR_REMOTE_USER,remote_user); std::string public_server_key; if( !result.LookupString(ATTR_SSH_PUBLIC_SERVER_KEY,public_server_key) ) { error_msg = "No public ssh server key received in reply to START_SSHD"; return false; } std::string private_client_key; if( !result.LookupString(ATTR_SSH_PRIVATE_CLIENT_KEY,private_client_key) ) { error_msg = "No ssh client key received in reply to START_SSHD"; return false; } // store the private client key unsigned char *decode_buf = NULL; int length = -1; condor_base64_decode(private_client_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh client key."; return false; } FILE *fp = safe_fcreate_fail_if_exists(private_client_key_file,"a",0400); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", private_client_key_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", private_client_key_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; // store the public server key in the known_hosts file length = -1; condor_base64_decode(public_server_key.c_str(),&decode_buf,&length); if( !decode_buf ) { error_msg = "Error decoding ssh server key."; return false; } fp = safe_fcreate_fail_if_exists(known_hosts_file,"a",0600); if( !fp ) { error_msg.formatstr("Failed to create %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } // prepend a host name pattern (*) to the public key to make a valid // record in the known_hosts file fprintf(fp,"* "); if( fwrite(decode_buf,length,1,fp)!=1 ) { error_msg.formatstr("Failed to write to %s: %s", known_hosts_file,strerror(errno)); fclose( fp ); free( decode_buf ); return false; } if( fclose(fp)!=0 ) { error_msg.formatstr("Failed to close %s: %s", known_hosts_file,strerror(errno)); free( decode_buf ); return false; } fp = NULL; free( decode_buf ); decode_buf = NULL; return true; #endif }
int rm_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) { char * name = NULL; int rtnVal = FALSE; int rc; bool found_cred; CredentialWrapper * cred_wrapper = NULL; char * owner = NULL; const char * user; ReliSock * socket = (ReliSock*)stream; if (!socket->triedAuthentication()) { CondorError errstack; if( ! SecMan::authenticate_sock(socket, READ, &errstack) ) { dprintf (D_ALWAYS, "Unable to authenticate, qutting\n"); goto EXIT; } } socket->decode(); if (!socket->code(name)) { dprintf (D_ALWAYS, "Error receiving credential name\n"); goto EXIT; } user = socket->getFullyQualifiedUser(); dprintf (D_ALWAYS, "Authenticated as %s\n", user); if (strchr (name, ':')) { // The name is of the form user:name // This better be a super-user! // TODO: Check super-user's list // Owner is the first part owner = strdup (name); char * pColon = strchr (owner, ':'); *pColon = '\0'; // Name is the second part sprintf (name, "%s", (char*)(pColon+sizeof(char))); if (strcmp (owner, user) != 0) { dprintf (D_ALWAYS, "Requesting another user's (%s) credential %s\n", owner, name); if (!isSuperUser (user)) { dprintf (D_ALWAYS, "User %s is NOT super user, request DENIED\n", user); goto EXIT; } else { dprintf (D_FULLDEBUG, "User %s is super user, request GRANTED\n", user); } } } else { owner = strdup (user); } dprintf (D_ALWAYS, "Attempting to delete cred %s for user %s\n", name, owner); found_cred=false; credentials.Rewind(); while (credentials.Next(cred_wrapper)) { if (cred_wrapper->cred->GetType() == X509_CREDENTIAL_TYPE) { if ((strcmp(cred_wrapper->cred->GetName(), name) == 0) && (strcmp(cred_wrapper->cred->GetOwner(), owner) == 0)) { credentials.DeleteCurrent(); found_cred=true; break; // found it } } } if (found_cred) { priv_state priv = set_root_priv(); // Remove credential data unlink (cred_wrapper->GetStorageName()); // Save the metadata list SaveCredentialList(); set_priv(priv); delete cred_wrapper; dprintf (D_ALWAYS, "Removed credential %s for owner %s\n", name, owner); } else { dprintf (D_ALWAYS, "Unable to remove credential %s:%s (not found)\n", owner, name); } free (owner); socket->encode(); rc = (found_cred)?CREDD_SUCCESS:CREDD_CREDENTIAL_NOT_FOUND; socket->code(rc); rtnVal = TRUE; EXIT: if (name != NULL) { free (name); } return rtnVal; }
bool DCSchedd::spoolJobFiles(int JobAdsArrayLen, ClassAd* JobAdsArray[], CondorError * errstack) { int reply; int i; ReliSock rsock; bool use_new_command = true; if ( version() ) { CondorVersionInfo vi( version() ); if ( vi.built_since_version(6,7,7) ) { use_new_command = true; } else { use_new_command = false; } } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { std::string errmsg; formatstr(errmsg, "Failed to connect to schedd (%s)", _addr); dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: %s\n", errmsg.c_str() ); if( errstack ) { errstack->push( "DCSchedd::spoolJobFiles",CEDAR_ERR_CONNECT_FAILED, errmsg.c_str() ); } return false; } if ( use_new_command ) { if( ! startCommand(SPOOL_JOB_FILES_WITH_PERMS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: " "Failed to send command (SPOOL_JOB_FILES_WITH_PERMS) " "to the schedd (%s)\n", _addr ); return false; } } else { if( ! startCommand(SPOOL_JOB_FILES, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::spoolJobFiles: " "Failed to send command (SPOOL_JOB_FILES) " "to the schedd (%s)\n", _addr ); return false; } } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack ? errstack->getFullText().c_str() : "" ); return false; } rsock.encode(); // Send our version if using the new command if ( use_new_command ) { // Need to use a named variable, else the wrong version of // code() is called. char *my_version = strdup( CondorVersion() ); if ( !rsock.code(my_version) ) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Can't send version string to the schedd\n"); free( my_version ); return false; } free( my_version ); } // Send the number of jobs if ( !rsock.code(JobAdsArrayLen) ) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Can't send JobAdsArrayLen to the schedd\n"); return false; } if( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Can't send initial message (version + count) to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::spoolJobFiles", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now, put the job ids onto the wire PROC_ID jobid; for (i=0; i<JobAdsArrayLen; i++) { if (!JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,jobid.cluster)) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Job ad %d did not have a cluster id\n",i); return false; } if (!JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,jobid.proc)) { dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: " "Job ad %d did not have a proc id\n",i); return false; } rsock.code(jobid); } if( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Failed while sending job ids to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd:spoolJobFiles: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::spoolJobFiles", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now send all the files via the file transfer object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; if ( !ftrans.SimpleInit(JobAdsArray[i], false, false, &rsock, PRIV_UNKNOWN, false, true) ) { if( errstack ) { int cluster = -1, proc = -1; if(JobAdsArray[i]) { JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster); JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc); } errstack->pushf( "DCSchedd::spoolJobFiles", FILETRANSFER_INIT_FAILED, "File transfer initialization failed for target job %d.%d", cluster, proc ); } return false; } if ( use_new_command ) { ftrans.setPeerVersion( version() ); } if ( !ftrans.UploadFiles(true,false) ) { if( errstack ) { FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo(); int cluster = -1, proc = -1; if(JobAdsArray[i]) { JobAdsArray[i]->LookupInteger(ATTR_CLUSTER_ID,cluster); JobAdsArray[i]->LookupInteger(ATTR_PROC_ID,proc); } errstack->pushf( "DCSchedd::spoolJobFiles", FILETRANSFER_UPLOAD_FAILED, "File transfer failed for target job %d.%d: %s", cluster, proc, ft_info.error_desc.Value() ); } return false; } } rsock.end_of_message(); rsock.decode(); reply = 0; rsock.code(reply); rsock.end_of_message(); if ( reply == 1 ) return true; else return false; }
int get_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) { char * name = NULL; int rtnVal = FALSE; bool found_cred=false; CredentialWrapper * cred = NULL; char * owner = NULL; const char * user = NULL; void * data = NULL; ReliSock * socket = (ReliSock*)stream; // Authenticate if (!socket->triedAuthentication()) { CondorError errstack; if( ! SecMan::authenticate_sock(socket, READ, &errstack) ) { dprintf (D_ALWAYS, "Unable to authenticate, qutting\n"); goto EXIT; } } socket->decode(); if (!socket->code(name)) { dprintf (D_ALWAYS, "Error receiving credential name\n"); goto EXIT; } user = socket->getFullyQualifiedUser(); dprintf (D_ALWAYS, "Authenticated as %s\n", user); if (strchr (name, ':')) { // The name is of the form user:name // This better be a super-user! // TODO: Check super-user's list // Owner is the first part owner = strdup (name); char * pColon = strchr (owner, ':'); *pColon = '\0'; // Name is the second part sprintf (name, "%s", (char*)(pColon+sizeof(char))); if (strcmp (owner, user) != 0) { dprintf (D_ALWAYS, "Requesting another user's (%s) credential %s\n", owner, name); if (!isSuperUser (user)) { dprintf (D_ALWAYS, "User %s is NOT super user, request DENIED\n", user); goto EXIT; } else { dprintf (D_FULLDEBUG, "User %s is super user, request GRANTED\n", user); } } } else { owner = strdup (user); } dprintf (D_ALWAYS, "sending cred %s for user %s\n", name, owner); credentials.Rewind(); while (credentials.Next(cred)) { if (cred->cred->GetType() == X509_CREDENTIAL_TYPE) { if ((strcmp(cred->cred->GetName(), name) == 0) && (strcmp(cred->cred->GetOwner(), owner) == 0)) { found_cred=true; break; // found it } } } socket->encode(); if (found_cred) { dprintf (D_FULLDEBUG, "Found cred %s\n", cred->GetStorageName()); int data_size; int rc = LoadData (cred->GetStorageName(), data, data_size); dprintf (D_FULLDEBUG, "Credential::LoadData returned %d\n", rc); if (rc == 0) { goto EXIT; } socket->code (data_size); socket->code_bytes (data, data_size); dprintf (D_ALWAYS, "Credential name %s for owner %s returned to user %s\n", name, owner, user); } else { dprintf (D_ALWAYS, "Cannot find cred %s\n", name); int rc = CREDD_CREDENTIAL_NOT_FOUND; socket->code (rc); } rtnVal = TRUE; EXIT: if ( name != NULL) { free (name); } if ( owner != NULL) { free (owner); } if ( data != NULL) { free (data); } return rtnVal; }
int query_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) { classad::ClassAdUnParser unparser; std::string adbuffer; char * request = NULL; int rtnVal = FALSE; int length; CredentialWrapper * cred = NULL; SimpleList <Credential*> result_list; ReliSock * socket = (ReliSock*)stream; const char * user; if (!socket->triedAuthentication()) { CondorError errstack; if( ! SecMan::authenticate_sock(socket, READ, &errstack) ) { dprintf (D_ALWAYS, "Unable to authenticate, qutting\n"); goto EXIT; } } user = socket->getFullyQualifiedUser(); dprintf (D_ALWAYS, "Authenticated as %s\n", user); socket->decode(); if (!socket->code(request)) { dprintf (D_ALWAYS, "Error receiving request\n"); goto EXIT; } if ((request != NULL) && (strcmp (request, "*") == 0)) { if (!isSuperUser (user)) { dprintf (D_ALWAYS, "User %s is NOT super user, request DENIED\n", user); goto EXIT; } } // Find credentials for this user credentials.Rewind(); while (credentials.Next(cred)) { if (cred->cred->GetType() == X509_CREDENTIAL_TYPE) { if (strcmp(cred->cred->GetOwner(), user) == 0) { result_list.Append (cred->cred); } } } socket->encode(); length = result_list.Length(); dprintf (D_FULLDEBUG, "User has %d credentials\n", length); socket->code (length); Credential * _cred; result_list.Rewind(); while (result_list.Next(_cred)) { classad::ClassAd * _temp = cred->GetMetadata(); unparser.Unparse(adbuffer,_temp); char * classad_str = strdup(adbuffer.c_str()); socket->code (classad_str); free (classad_str); delete _temp; } rtnVal = TRUE; EXIT: if (request != NULL) { free (request); } return rtnVal; }
boost::shared_ptr<ClassAdWrapper> ping(object locate_obj, object command_obj=object("DC_NOP")) { int num = getCommand(command_obj); extract<ClassAdWrapper&> ad_extract(locate_obj); std::string addr; if (ad_extract.check()) { ClassAdWrapper& ad = ad_extract(); if (!ad.EvaluateAttrString(ATTR_MY_ADDRESS, addr)) { THROW_EX(ValueError, "Daemon address not specified."); } } else { addr = extract<std::string>(locate_obj); } Daemon daemon(DT_ANY, addr.c_str(), NULL); if (!daemon.locate()) { THROW_EX(RuntimeError, "Unable to find daemon."); } CondorError errstack; boost::shared_ptr<ClassAdWrapper> authz_ad(new ClassAdWrapper()); ReliSock *sock = NULL; if (!(sock = (ReliSock*) daemon.makeConnectedSocket( Stream::reli_sock, 0, 0, &errstack ))) { THROW_EX(RuntimeError, "Unable to connect to daemon."); } if (!(daemon.startSubCommand(DC_SEC_QUERY, num, sock, 0, &errstack))) { THROW_EX(RuntimeError, "Unable to send security query to daemon."); } sock->decode(); if (!getClassAd(sock, *authz_ad.get()) || !sock->end_of_message()) { THROW_EX(RuntimeError, "Failed to get security session information from remote daemon."); } MyString cmd_map_ent; cmd_map_ent.formatstr ("{%s,<%i>}", addr.c_str(), num); MyString session_id; KeyCacheEntry *k = NULL; ClassAd *policy = NULL; // IMPORTANT: this hashtable returns 0 on success! if ((SecMan::command_map).lookup(cmd_map_ent, session_id)) { THROW_EX(RuntimeError, "No valid entry in command map hash table!"); } // IMPORTANT: this hashtable returns 1 on success! if (!(SecMan::session_cache).lookup(session_id.Value(), k)) { THROW_EX(RuntimeError, "No valid entry in session map hash table!"); } policy = k->policy(); authz_ad->Update(*policy); return authz_ad; }
SharedPortState::HandlerResult SharedPortState::HandleResp(Stream *&s) { // The following final ACK appears to be necessary on Mac OS X // 10.5.8 (not sure of others). It does not appear to be // necessary on any version of linux that I have tried. The // observed problem that this solves is random failures where // the endpoint sees the passed socket close right away. It // would be nice not to have an ACK, because then the whole // PassSocket() protocol would be non-blocking. Since the // protocol is blocking with this ACK, it means we could // temporarily deadlock if the endpoint we are passing the // socket to is blocking on us on some other channel. If // this becomes a problem, we can at least make the ACK only // happen on platforms that need it. This protocol is always // just local to the machine, so need to worry about keeping // it compatible between different platforms. ReliSock *sock = static_cast<ReliSock*>(s); sock->decode(); int status = 0; bool result; bool read_would_block = false; { BlockingModeGuard guard(sock, m_non_blocking); result = sock->code(status); if ( m_non_blocking ) { read_would_block = sock->clear_read_block_flag(); } } if (read_would_block) { if (sock->deadline_expired()) { dprintf(D_ALWAYS, "SharedPortClient - server response deadline has passed for %s%s\n", m_sock_name.c_str(), m_requested_by.c_str()); return FAILED; } dprintf(D_ALWAYS, "SharedPortClient read would block; waiting for result for SHARED_PORT_PASS_FD to %s%s.\n", m_sock_name.c_str(), m_requested_by.c_str()); return WAIT; } if( !result || !sock->end_of_message() ) { dprintf(D_ALWAYS, "SharedPortClient: failed to receive result for SHARED_PORT_PASS_FD to %s%s: %s\n", m_sock_name.c_str(), m_requested_by.c_str(), strerror(errno)); return FAILED; } if( status != 0 ) { dprintf(D_ALWAYS, "SharedPortClient: received failure response for SHARED_PORT_PASS_FD to %s%s\n", m_sock_name.c_str(), m_requested_by.c_str()); return FAILED; } dprintf(D_FULLDEBUG, "SharedPortClient: passed socket to %s%s\n", m_sock_name.c_str(), m_requested_by.c_str()); return DONE; }
int main( int argc, char *argv[] ) { char *machine_name = 0; char *log_name = 0; char *pool=0; int i; daemon_t type = DT_MASTER; myDistro->Init( argc, argv ); config(); for( i=1; i<argc; i++ ) { if(!strcmp(argv[i],"-help")) { usage(argv[0]); exit(0); } else if(!strcmp(argv[i],"-pool")) { i++; if(!argv[i]) { fprintf(stderr,"-pool requires an argument.\n\n"); usage(argv[0]); exit(1); } pool = argv[i]; } else if(!strcmp(argv[i],"-version")) { version(); exit(0); } else if(!strcmp(argv[i],"-debug")) { dprintf_set_tool_debug("TOOL", 0); } else if(argv[i][0]=='-') { type = stringToDaemonType(&argv[i][1]); if( type == DT_NONE || type == DT_DAGMAN) { usage(argv[0]); exit(1); } } else if(argv[i][0]!='-') { if(!machine_name) { machine_name = argv[i]; } else if(!log_name) { log_name = argv[i]; } else { fprintf(stderr,"Extra argument: %s\n\n",argv[i]); usage(argv[0]); exit(1); } } else { usage(argv[0]); exit(1); } } if( !machine_name || !log_name ) { usage(argv[0]); exit(1); } Daemon *daemon; ReliSock *sock; if (pool) { DCCollector col( pool ); if( ! col.addr() ) { fprintf( stderr, "Error: %s\n", col.error() ); exit(1); } daemon = new Daemon( type, machine_name, col.addr() ); } else { daemon = new Daemon( type, machine_name ); } dprintf(D_FULLDEBUG,"Locating daemon process on %s...\n",machine_name); if(!daemon->locate()) { fprintf(stderr,"Couldn't locate daemon on %s: %s\n",machine_name,daemon->error()); exit(1); } dprintf(D_FULLDEBUG,"Daemon %s is %s\n",daemon->hostname(),daemon->addr()); sock = (ReliSock*)daemon->startCommand( DC_FETCH_LOG, Sock::reli_sock); if(!sock) { fprintf(stderr,"couldn't connect to daemon %s at %s\n",daemon->hostname(),daemon->addr()); return 1; } int commandType = DC_FETCH_LOG_TYPE_PLAIN; if ((strcmp(log_name, "HISTORY") == 0) || (strcmp(log_name, "STARTD_HISTORY") == 0)) { commandType = DC_FETCH_LOG_TYPE_HISTORY; } if ((strcmp(log_name, "STARTD.PER_JOB_HISTORY_DIR") == 0) || (strcmp(log_name, "STARTD.PER_JOB_HISTORY_DIR") == 0)) { commandType = DC_FETCH_LOG_TYPE_HISTORY_DIR; } sock->put( commandType ); sock->put( log_name ); sock->end_of_message(); if (commandType == DC_FETCH_LOG_TYPE_HISTORY_DIR) { return handleHistoryDir(sock); } int result = -1; int exitcode = 1; const char *reason=NULL; filesize_t filesize; sock->decode(); sock->code(result); switch(result) { case -1: reason = "permission denied"; break; case DC_FETCH_LOG_RESULT_SUCCESS: result = sock->get_file(&filesize,1,0); if(result>=0) { exitcode = 0; } else { reason = "connection lost"; } break; case DC_FETCH_LOG_RESULT_NO_NAME: reason = "no log file by that name"; break; case DC_FETCH_LOG_RESULT_CANT_OPEN: reason = "server was unable to access it"; break; case DC_FETCH_LOG_RESULT_BAD_TYPE: reason = "server can't provide that type of log"; break; default: reason = "unknown error"; break; } if(exitcode!=0) { fprintf(stderr,"Couldn't fetch log: %s.\n",reason); } return exitcode; }
// when a transferd registers itself, it identifies who it is. The connection // is then held open and the schedd periodically might send more transfer // requests to the transferd. Also, if the transferd dies, the schedd is // informed quickly and reliably due to the closed connection. bool DCSchedd::register_transferd(MyString sinful, MyString id, int timeout, ReliSock **regsock_ptr, CondorError *errstack) { ReliSock *rsock; int invalid_request = 0; ClassAd regad; ClassAd respad; std::string errstr; std::string reason; if (regsock_ptr != NULL) { // Our caller wants a pointer to the socket we used to succesfully // register the claim. The NULL pointer will represent failure and // this will only be set to something real if everything was ok. *regsock_ptr = NULL; } // This call with automatically connect to _addr, which was set in the // constructor of this object to be the schedd in question. rsock = (ReliSock*)startCommand(TRANSFERD_REGISTER, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCSchedd::register_transferd: " "Failed to send command (TRANSFERD_REGISTER) " "to the schedd\n" ); errstack->push("DC_SCHEDD", 1, "Failed to start a TRANSFERD_REGISTER command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::register_transferd authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_SCHEDD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); // set up my registration request. regad.Assign(ATTR_TREQ_TD_SINFUL, sinful); regad.Assign(ATTR_TREQ_TD_ID, id); // This is the initial registration identification ad to the schedd // It contains: // ATTR_TREQ_TD_SINFUL // ATTR_TREQ_TD_ID putClassAd(rsock, regad); rsock->end_of_message(); // Get the response from the schedd. rsock->decode(); // This is the response ad from the schedd: // It contains: // ATTR_TREQ_INVALID_REQUEST // // OR // // ATTR_TREQ_INVALID_REQUEST // ATTR_TREQ_INVALID_REASON getClassAd(rsock, respad); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid_request); if (invalid_request == FALSE) { // not an invalid request if (regsock_ptr) *regsock_ptr = rsock; return true; } respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->pushf("DC_SCHEDD", 1, "Schedd refused registration: %s", reason.c_str()); return false; }
int store_cred_handler(Service * /*service*/, int /*i*/, Stream *stream) { void * data = NULL; int rtnVal = FALSE; int rc; char * temp_file_name = NULL; bool found_cred; CredentialWrapper * temp_cred = NULL; int data_size = -1; classad::ClassAd * _classad = NULL; classad::ClassAd classad; std::string classad_cstr; char * classad_str = NULL; classad::ClassAdParser parser; ReliSock * socket = (ReliSock*)stream; const char * user = NULL; CredentialWrapper * cred_wrapper = NULL; if (!socket->triedAuthentication()) { CondorError errstack; if( ! SecMan::authenticate_sock(socket, WRITE, &errstack) ) { dprintf (D_ALWAYS, "Unable to authenticate, qutting\n"); goto EXIT; } } user = socket->getFullyQualifiedUser(); dprintf (D_FULLDEBUG, "Request by: %s, %s\n", socket->getOwner(), user); socket->decode(); if (!socket->code (classad_str)) { dprintf (D_ALWAYS, "Error receiving credential metadata\n"); goto EXIT; } classad_cstr = classad_str; free (classad_str); _classad = parser.ParseClassAd(classad_cstr); if (!_classad) { dprintf (D_ALWAYS, "Error: invalid credential metadata %s\n", classad_cstr.c_str()); goto EXIT; } classad = *_classad; delete _classad; int type; if (!classad.EvaluateAttrInt ("Type", type)) { dprintf (D_ALWAYS, "Missing Type attribute in classad!\n"); goto EXIT; } if (type == X509_CREDENTIAL_TYPE) { cred_wrapper = new X509CredentialWrapper (classad); dprintf (D_ALWAYS, "Name=%s Size=%d\n", cred_wrapper->cred->GetName(), cred_wrapper->cred->GetDataSize()); } else { dprintf (D_ALWAYS, "Unsupported credential type %d\n", type); goto EXIT; } cred_wrapper->cred->SetOrigOwner (socket->getOwner()); // original remote uname cred_wrapper->cred->SetOwner (user); // mapped uname // Receive credential data data_size = cred_wrapper->cred->GetDataSize(); if (data_size > MAX_CRED_DATA_SIZE) { dprintf (D_ALWAYS, "ERROR: Credential data size %d > maximum allowed (%d)\n", data_size, MAX_CRED_DATA_SIZE); goto EXIT; } data = malloc (data_size); if (data == NULL) { EXCEPT("Out of memory. Aborting."); } if (!socket->code_bytes(data,data_size)) { dprintf (D_ALWAYS, "Error receiving credential data\n"); goto EXIT; } cred_wrapper->cred->SetData (data, data_size); // Check whether credential under this name already exists found_cred=false; credentials.Rewind(); while (credentials.Next(temp_cred)) { if ((strcmp(cred_wrapper->cred->GetName(), temp_cred->cred->GetName()) == 0) && (strcmp(cred_wrapper->cred->GetOwner(), temp_cred->cred->GetOwner()) == 0)) { found_cred=true; break; // found it } } if (found_cred) { dprintf (D_ALWAYS, "Credential %s for owner %s already exists!\n", cred_wrapper->cred->GetName(), cred_wrapper->cred->GetOwner()); socket->encode(); int rcred=CREDD_ERROR_CREDENTIAL_ALREADY_EXISTS; socket->code(rcred); goto EXIT; } // Write data to a file temp_file_name = dircat (cred_store_dir, "credXXXXXX"); condor_mkstemp (temp_file_name); cred_wrapper->SetStorageName (temp_file_name); init_user_id_from_FQN (user); if (!StoreData(temp_file_name,data,data_size)) { socket->encode(); int rcred = CREDD_UNABLE_TO_STORE; socket->code(rcred); goto EXIT; } ((X509CredentialWrapper*)cred_wrapper)->cred->SetRealExpirationTime ( x509_proxy_expiration_time(temp_file_name)); // Write metadata to a file credentials.Append (cred_wrapper); SaveCredentialList(); // Write response to the client socket->encode(); rc = CREDD_SUCCESS; socket->code(rc); dprintf( D_ALWAYS, "Credential name %s owner %s successfully stored\n", cred_wrapper->cred->GetName(), cred_wrapper->cred->GetOwner() ); if (type == X509_CREDENTIAL_TYPE) { ((X509Credential*)cred_wrapper->cred)->display( D_FULLDEBUG ); } rtnVal = TRUE; EXIT: if ( data != NULL ) { free (data); } if ( temp_file_name != NULL ) { delete [] temp_file_name; } if ( cred_wrapper != NULL) { delete cred_wrapper; } return rtnVal; }
// I'm going to ask the schedd for where I can put the files for the jobs I've // specified. The schedd is going to respond with A) a message telling me it // has the answer right away, or B) an answer telling me I have to wait // an unknown length of time for the schedd to schedule me a place to put it. bool DCSchedd::requestSandboxLocation(ClassAd *reqad, ClassAd *respad, CondorError * errstack) { ReliSock rsock; int will_block; ClassAd status_ad; rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): " "Failed to connect to schedd (%s)\n", _addr ); return false; } if( ! startCommand(REQUEST_SANDBOX_LOCATION, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::requestSandboxLocation(): " "Failed to send command (REQUEST_SANDBOX_LOCATION) " "to schedd (%s)\n", _addr ); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack->getFullText().c_str() ); return false; } rsock.encode(); /////////////////////////////////////////////////////////////////////// // Send my sandbox location request packet to the schedd. /////////////////////////////////////////////////////////////////////// // This request ad will either contain: // ATTR_TREQ_PEER_VERSION // ATTR_TREQ_HAS_CONSTRAINT // ATTR_TREQ_JOBID_LIST // ATTR_TREQ_FTP // // OR // // ATTR_TREQ_DIRECTION // ATTR_TREQ_PEER_VERSION // ATTR_TREQ_HAS_CONSTRAINT // ATTR_TREQ_CONSTRAINT // ATTR_TREQ_FTP dprintf(D_ALWAYS, "Sending request ad.\n"); if (putClassAd(&rsock, *reqad) != 1) { dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): " "Can't send reqad to the schedd\n"); return false; } rsock.end_of_message(); rsock.decode(); /////////////////////////////////////////////////////////////////////// // Read back a response ad which will tell me which jobs the schedd // said I could modify and whether or not I'm am going to have to block // before getting the payload of the transferd location/capability ad. /////////////////////////////////////////////////////////////////////// // This status ad will contain // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_JOBID_ALLOW_LIST // ATTR_TREQ_JOBID_DENY_LIST // ATTR_TREQ_WILL_BLOCK dprintf(D_ALWAYS, "Receiving status ad.\n"); if (getClassAd(&rsock, status_ad) == false) { dprintf(D_ALWAYS, "Schedd closed connection to me. Aborting sandbox " "submission.\n"); return false; } rsock.end_of_message(); status_ad.LookupInteger(ATTR_TREQ_WILL_BLOCK, will_block); dprintf(D_ALWAYS, "Client will %s\n", will_block==1?"block":"not block"); if (will_block == 1) { // set to 20 minutes. rsock.timeout(60*20); } /////////////////////////////////////////////////////////////////////// // Read back the payload ad from the schedd about the transferd location // and capability string I can use for the fileset I wish to transfer. /////////////////////////////////////////////////////////////////////// // read back the response ad from the schedd which contains a // td sinful string, and a capability. These represent my ability to // read/write a certain fileset somewhere. // This response ad from the schedd will contain: // // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_CAPABILITY // ATTR_TREQ_TD_SINFUL // ATTR_TREQ_JOBID_ALLOW_LIST dprintf(D_ALWAYS, "Receiving response ad.\n"); if (getClassAd(&rsock, *respad) != true) { dprintf(D_ALWAYS,"DCSchedd:requestSandboxLocation(): " "Can't receive respond ad from the schedd\n"); return false; } rsock.end_of_message(); return true; }
int part_send_job( int test_starter, char *host, int &reason, char *capability, char * /*schedd*/, PROC *proc, int &sd1, int &sd2, char **name) { int reply; ReliSock *sock = NULL; StartdRec stRec; PORTS ports; bool done = false; int retry_delay = 3; int num_retries = 0; // make sure we have the job classad InitJobAd(proc->id.cluster, proc->id.proc); while( !done ) { Daemon startd(DT_STARTD, host, NULL); if (!(sock = (ReliSock*)startd.startCommand ( ACTIVATE_CLAIM, Stream::reli_sock, 90))) { dprintf( D_ALWAYS, "startCommand(ACTIVATE_CLAIM) to startd failed\n"); goto returnfailure; } // Send the capability ClaimIdParser idp( capability ); dprintf(D_FULLDEBUG, "send capability %s\n", idp.publicClaimId() ); if( !sock->put_secret(capability) ) { dprintf( D_ALWAYS, "sock->put(\"%s\") failed\n",idp.publicClaimId()); goto returnfailure; } // Send the starter number if( test_starter ) { dprintf( D_ALWAYS, "Requesting Alternate Starter %d\n", test_starter ); } else { dprintf( D_ALWAYS, "Requesting Primary Starter\n" ); } if( !sock->code(test_starter) ) { dprintf( D_ALWAYS, "sock->code(%d) failed\n", test_starter ); goto returnfailure; } // Send the job info if( !putClassAd(sock, *JobAd) ) { dprintf( D_ALWAYS, "failed to send job ad\n" ); goto returnfailure; } if( !sock->end_of_message() ) { dprintf( D_ALWAYS, "failed to send message to startd\n" ); goto returnfailure; } // We're done sending. Now, get the reply. sock->decode(); if( !sock->code(reply) || !sock->end_of_message() ) { dprintf( D_ALWAYS, "failed to receive reply from startd\n" ); goto returnfailure; } switch( reply ) { case OK: dprintf( D_ALWAYS, "Shadow: Request to run a job was ACCEPTED\n" ); done = true; break; case NOT_OK: dprintf( D_ALWAYS, "Shadow: Request to run a job was REFUSED\n"); goto returnfailure; break; case CONDOR_TRY_AGAIN: num_retries++; dprintf( D_ALWAYS, "Shadow: Request to run a job was TEMPORARILY REFUSED\n" ); if( num_retries > 20 ) { dprintf( D_ALWAYS, "Shadow: Too many retries, giving up.\n" ); goto returnfailure; } delete sock; dprintf( D_ALWAYS, "Shadow: will try again in %d seconds\n", retry_delay ); sleep( retry_delay ); break; default: dprintf(D_ALWAYS,"Unknown reply from startd for command ACTIVATE_CLAIM\n"); dprintf(D_ALWAYS,"Shadow: Request to run a job was REFUSED\n"); goto returnfailure; break; } } /* start flock : dhruba */ sock->decode(); memset( &stRec, '\0', sizeof(stRec) ); if( !sock->code(stRec) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "Can't read reply from startd.\n"); goto returnfailure; } ports = stRec.ports; if( stRec.ip_addr ) { host = stRec.server_name; if(name) { *name = strdup(stRec.server_name); } dprintf(D_FULLDEBUG, "host = %s inet_addr = 0x%x port1 = %d port2 = %d\n", host, stRec.ip_addr,ports.port1, ports.port2 ); } else { dprintf(D_FULLDEBUG, "host = %s port1 = %d port2 = %d\n", host, ports.port1, ports.port2 ); } if( ports.port1 == 0 ) { dprintf( D_ALWAYS, "Shadow: Request to run a job on %s was REFUSED\n", host ); goto returnfailure; } /* end flock ; dhruba */ // We don't use the server_name in the StartdRec, because our // DNS query may fail or may give us the wrong IP address // (either because it's stale or because we're talking to a // machine with multiple network interfaces). Sadly, we can't // use the ip_addr either, because the startd doesn't send it in // the correct byte ordering on little-endian machines. So, we // grab the IP address from the ReliSock, since we konw the // startd always uses the same IP address for all of its // communication. char sinfulstring[SINFUL_STRING_BUF_SIZE]; generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port1); if( (sd1 = do_connect(sinfulstring, (char *)0, (u_short)ports.port1)) < 0 ) { dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring ); goto returnfailure; } generate_sinful(sinfulstring, SINFUL_STRING_BUF_SIZE, sock->peer_ip_str(), ports.port2); if( (sd2 = do_connect(sinfulstring, (char *)0, (u_short)ports.port2)) < 0 ) { dprintf( D_ALWAYS, "failed to connect to scheduler on %s\n", sinfulstring ); close(sd1); goto returnfailure; } delete sock; sock = NULL; if ( stRec.server_name ) { free( stRec.server_name ); } return 0; returnfailure: reason = JOB_NOT_STARTED; delete sock; return -1; }
int CCBServer::HandleRegistration(int cmd,Stream *stream) { ReliSock *sock = (ReliSock *)stream; ASSERT( cmd == CCB_REGISTER ); // Avoid lengthy blocking on communication with our peer. // This command-handler should not get called until data // is ready to read. sock->timeout(1); ClassAd msg; sock->decode(); if( !msg.initFromStream( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to receive registration " "from %s.\n", sock->peer_description() ); return FALSE; } SetSmallBuffers(sock); MyString name; if( msg.LookupString(ATTR_NAME,name) ) { // target daemon name is purely for debugging purposes name.formatstr_cat(" on %s",sock->peer_description()); sock->set_peer_description(name.Value()); } CCBTarget *target = new CCBTarget(sock); MyString reconnect_cookie_str,reconnect_ccbid_str; CCBID reconnect_cookie,reconnect_ccbid; bool reconnected = false; if( msg.LookupString(ATTR_CLAIM_ID,reconnect_cookie_str) && CCBIDFromString(reconnect_cookie,reconnect_cookie_str.Value()) && msg.LookupString( ATTR_CCBID,reconnect_ccbid_str) && CCBIDFromContactString(reconnect_ccbid,reconnect_ccbid_str.Value()) ) { target->setCCBID( reconnect_ccbid ); reconnected = ReconnectTarget( target, reconnect_cookie ); } if( !reconnected ) { AddTarget( target ); } CCBReconnectInfo *reconnect_info = GetReconnectInfo( target->getCCBID() ); ASSERT( reconnect_info ); sock->encode(); ClassAd reply_msg; MyString ccb_contact; CCBIDToString( reconnect_info->getReconnectCookie(),reconnect_cookie_str ); // We send our address as part of the CCB contact string, rather // than letting the target daemon fill it in. This is to give us // potential flexibility on the CCB server side to do things like // assign different targets to different CCB server sub-processes, // each with their own command port. CCBIDToContactString( m_address.Value(), target->getCCBID(), ccb_contact ); reply_msg.Assign(ATTR_CCBID,ccb_contact.Value()); reply_msg.Assign(ATTR_COMMAND,CCB_REGISTER); reply_msg.Assign(ATTR_CLAIM_ID,reconnect_cookie_str.Value()); if( !reply_msg.put( *sock ) || !sock->end_of_message() ) { dprintf(D_ALWAYS, "CCB: failed to send registration response " "to %s.\n", sock->peer_description() ); RemoveTarget( target ); return KEEP_STREAM; // we have already closed this socket } return KEEP_STREAM; }
bool DCSchedd::delegateGSIcredential(const int cluster, const int proc, const char* path_to_proxy_file, time_t expiration_time, time_t *result_expiration_time, CondorError * errstack) { int reply; ReliSock rsock; // check the parameters if ( cluster < 1 || proc < 0 || !path_to_proxy_file || !errstack ) { dprintf(D_FULLDEBUG,"DCSchedd::delegateGSIcredential: bad parameters\n"); return false; } // connect to the schedd, send the DELEGATE_GSI_CRED_SCHEDD command rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: " "Failed to connect to schedd (%s)\n", _addr ); return false; } if( ! startCommand(DELEGATE_GSI_CRED_SCHEDD, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential: " "Failed send command to the schedd: %s\n", errstack->getFullText().c_str()); return false; } // If we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::delegateGSIcredential authentication failure: %s\n", errstack->getFullText().c_str() ); return false; } // Send the job id rsock.encode(); PROC_ID jobid; jobid.cluster = cluster; jobid.proc = proc; if ( !rsock.code(jobid) || !rsock.end_of_message() ) { dprintf(D_ALWAYS,"DCSchedd::delegateGSIcredential: " "Can't send jobid to the schedd\n"); return false; } // Delegate the gsi proxy filesize_t file_size = 0; // will receive the size of the file if ( rsock.put_x509_delegation(&file_size,path_to_proxy_file,expiration_time,result_expiration_time) < 0 ) { dprintf(D_ALWAYS, "DCSchedd::delegateGSIcredential " "failed to send proxy file %s\n", path_to_proxy_file); return false; } // Fetch the result rsock.decode(); reply = 0; rsock.code(reply); rsock.end_of_message(); if ( reply == 1 ) return true; else return false; }
int DCStartd::delegateX509Proxy( const char* proxy, time_t expiration_time, time_t *result_expiration_time ) { dprintf( D_FULLDEBUG, "Entering DCStartd::delegateX509Proxy()\n" ); setCmdStr( "delegateX509Proxy" ); if( ! claim_id ) { newError( CA_INVALID_REQUEST, "DCStartd::delegateX509Proxy: Called with NULL claim_id" ); return CONDOR_ERROR; } // if this claim is associated with a security session ClaimIdParser cidp(claim_id); // // 1) begin the DELEGATE_GSI_CRED_STARTD command // ReliSock* tmp = (ReliSock*)startCommand( DELEGATE_GSI_CRED_STARTD, Stream::reli_sock, 20, NULL, NULL, false, cidp.secSessionId() ); if( ! tmp ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Failed to send command DELEGATE_GSI_CRED_STARTD to the startd" ); return CONDOR_ERROR; } // // 2) get reply from startd - OK means continue, NOT_OK means // don't bother (the startd doesn't require a delegated // proxy // tmp->decode(); int reply; if( !tmp->code(reply) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: failed to receive reply from startd (1)" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: end of message error from startd (1)" ); delete tmp; return CONDOR_ERROR; } if( reply == NOT_OK ) { delete tmp; return NOT_OK; } // // 3) send over the claim id and delegate (or copy) the given proxy // tmp->encode(); int use_delegation = param_boolean( "DELEGATE_JOB_GSI_CREDENTIALS", true ) ? 1 : 0; if( !tmp->code( claim_id ) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Failed to send claim id to the startd" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->code( use_delegation ) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Failed to send use_delegation flag to the startd" ); delete tmp; return CONDOR_ERROR; } int rv; filesize_t dont_care; if( use_delegation ) { rv = tmp->put_x509_delegation( &dont_care, proxy, expiration_time, result_expiration_time ); } else { dprintf( D_FULLDEBUG, "DELEGATE_JOB_GSI_CREDENTIALS is False; using direct copy\n"); if( ! tmp->get_encryption() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: Cannot copy: channel does not have encryption enabled" ); delete tmp; return CONDOR_ERROR; } rv = tmp->put_file( &dont_care, proxy ); } if( rv == -1 ) { newError( CA_FAILURE, "DCStartd::delegateX509Proxy: Failed to delegate proxy" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->end_of_message() ) { newError( CA_FAILURE, "DCStartd::delegateX509Proxy: end of message error to startd" ); delete tmp; return CONDOR_ERROR; } // command successfully sent; now get the reply tmp->decode(); if( !tmp->code(reply) ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: failed to receive reply from startd (2)" ); delete tmp; return CONDOR_ERROR; } if ( !tmp->end_of_message() ) { newError( CA_COMMUNICATION_ERROR, "DCStartd::delegateX509Proxy: end of message error from startd (2)" ); delete tmp; return CONDOR_ERROR; } delete tmp; dprintf( D_FULLDEBUG, "DCStartd::delegateX509Proxy: successfully sent command, reply is: %d\n", reply ); return reply; }
ClassAd* DCSchedd::actOnJobs( JobAction action, const char* constraint, StringList* ids, const char* reason, const char* reason_attr, const char* reason_code, const char* reason_code_attr, action_result_type_t result_type, bool notify_scheduler, CondorError * errstack ) { char* tmp = NULL; char buf[512]; int size, reply; ReliSock rsock; // // // // // // // // // Construct the ad we want to send // // // // // // // // ClassAd cmd_ad; sprintf( buf, "%s = %d", ATTR_JOB_ACTION, action ); cmd_ad.Insert( buf ); sprintf( buf, "%s = %d", ATTR_ACTION_RESULT_TYPE, (int)result_type ); cmd_ad.Insert( buf ); sprintf( buf, "%s = %s", ATTR_NOTIFY_JOB_SCHEDULER, notify_scheduler ? "True" : "False" ); cmd_ad.Insert( buf ); if( constraint ) { if( ids ) { // This is a programming error, not a run-time one EXCEPT( "DCSchedd::actOnJobs has both constraint and ids!" ); } size = strlen(constraint) + strlen(ATTR_ACTION_CONSTRAINT) + 4; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = %s", ATTR_ACTION_CONSTRAINT, constraint ); if( ! cmd_ad.Insert(tmp) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Can't insert constraint (%s) into ClassAd!\n", constraint ); free( tmp ); return NULL; } free( tmp ); tmp = NULL; } else if( ids ) { char* action_ids = ids->print_to_string(); if ( action_ids ) { size = strlen(action_ids) + strlen(ATTR_ACTION_IDS) + 7; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = \"%s\"", ATTR_ACTION_IDS, action_ids ); cmd_ad.Insert( tmp ); free( tmp ); tmp = NULL; free(action_ids); action_ids = NULL; } } else { EXCEPT( "DCSchedd::actOnJobs called without constraint or ids" ); } if( reason_attr && reason ) { size = strlen(reason_attr) + strlen(reason) + 7; tmp = (char*) malloc( size*sizeof(char) ); if( !tmp ) { EXCEPT( "Out of memory!" ); } sprintf( tmp, "%s = \"%s\"", reason_attr, reason ); cmd_ad.Insert( tmp ); free( tmp ); tmp = NULL; } if( reason_code_attr && reason_code ) { cmd_ad.AssignExpr(reason_code_attr,reason_code); } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Failed to connect to schedd (%s)\n", _addr ); return NULL; } if( ! startCommand(ACT_ON_JOBS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::actOnJobs: " "Failed to send command (ACT_ON_JOBS) to the schedd\n" ); return NULL; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd: authentication failure: %s\n", errstack->getFullText().c_str() ); return NULL; } // Now, put the command classad on the wire if( ! (putClassAd(&rsock, cmd_ad) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send classad\n" ); return NULL; } // Next, we need to read the reply from the schedd if things // are ok and it's going to go forward. If the schedd can't // read our reply to this ClassAd, it assumes we got killed // and it should abort its transaction rsock.decode(); ClassAd* result_ad = new ClassAd(); if( ! (getClassAd(&rsock, *result_ad) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: " "Can't read response ad from %s\n", _addr ); delete( result_ad ); return NULL; } // If the action totally failed, the schedd will already have // aborted the transaction and closed up shop, so there's no // reason trying to continue. However, we still want to // return the result ad we got back so that our caller can // figure out what went wrong. reply = FALSE; result_ad->LookupInteger( ATTR_ACTION_RESULT, reply ); if( reply != OK ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Action failed\n" ); return result_ad; } // Tell the schedd we're still here and ready to go rsock.encode(); int answer = OK; if( ! (rsock.code(answer) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: Can't send reply\n" ); delete( result_ad ); return NULL; } // finally, make sure the schedd didn't blow up trying to // commit these changes to the job queue... rsock.decode(); if( ! (rsock.code(reply) && rsock.end_of_message()) ) { dprintf( D_ALWAYS, "DCSchedd:actOnJobs: " "Can't read confirmation from %s\n", _addr ); delete( result_ad ); return NULL; } return result_ad; }
bool DCStarter::peek(bool transfer_stdout, ssize_t &stdout_offset, bool transfer_stderr, ssize_t &stderr_offset, const std::vector<std::string> &filenames, std::vector<ssize_t> &offsets, size_t max_bytes, bool &retry_sensible, PeekGetFD &next, std::string &error_msg, unsigned timeout, const std::string &sec_session_id, DCTransferQueue *xfer_q) { compat_classad::ClassAd ad; ad.InsertAttr(ATTR_JOB_OUTPUT, transfer_stdout); ad.InsertAttr("OutOffset", stdout_offset); ad.InsertAttr(ATTR_JOB_ERROR, transfer_stderr); ad.InsertAttr("ErrOffset", stderr_offset); ad.InsertAttr(ATTR_VERSION, CondorVersion()); size_t total_files = 0; total_files += transfer_stdout ? 1 : 0; total_files += transfer_stderr ? 1 : 0; if (filenames.size()) { total_files += filenames.size(); std::vector<classad::ExprTree *> filelist; filelist.reserve(filenames.size()); std::vector<classad::ExprTree *> offsetlist; offsetlist.reserve(filenames.size()); std::vector<ssize_t>::const_iterator it2 = offsets.begin(); for (std::vector<std::string>::const_iterator it = filenames.begin(); it != filenames.end() && it2 != offsets.end(); it++, it2++) { classad::Value value; value.SetStringValue(*it); filelist.push_back(classad::Literal::MakeLiteral(value)); value.SetIntegerValue(*it2); offsetlist.push_back(classad::Literal::MakeLiteral(value)); } classad::ExprTree *list(classad::ExprList::MakeExprList(filelist)); ad.Insert("TransferFiles", list); list = classad::ExprList::MakeExprList(offsetlist); ad.Insert("TransferOffsets", list); } ad.InsertAttr(ATTR_MAX_TRANSFER_BYTES, static_cast<long long>(max_bytes)); ReliSock sock; if (IsDebugLevel(D_COMMAND)) { dprintf (D_COMMAND, "DCStarter::peek(%s,...) making connection to %s\n", getCommandStringSafe(STARTER_PEEK), _addr ? _addr : "NULL"); } if( !connectSock(&sock, timeout, NULL) ) { error_msg = "Failed to connect to starter"; return false; } if( !startCommand(STARTER_PEEK, &sock, timeout, NULL, NULL, false, sec_session_id.c_str()) ) { error_msg = "Failed to send START_PEEK to starter"; return false; } sock.encode(); if (!putClassAd(&sock, ad) || !sock.end_of_message()) { error_msg = "Failed to send request to starter"; return false; } compat_classad::ClassAd response; sock.decode(); if (!getClassAd(&sock, response) || !sock.end_of_message()) { error_msg = "Failed to read response for peeking at logs."; return false; } dPrintAd(D_FULLDEBUG, response); bool success = false; if (!response.EvaluateAttrBool(ATTR_RESULT, success) || !success) { response.EvaluateAttrBool(ATTR_RETRY, retry_sensible); error_msg = "Remote operation failed."; response.EvaluateAttrString(ATTR_ERROR_STRING, error_msg); return false; } classad::Value valueX; classad_shared_ptr<classad::ExprList> list; if (!response.EvaluateAttr("TransferFiles", valueX) || !valueX.IsSListValue(list)) { error_msg = "Unable to evaluate starter response"; return false; } classad_shared_ptr<classad::ExprList> offlist; if (!response.EvaluateAttr("TransferOffsets", valueX) || !valueX.IsSListValue(offlist)) { error_msg = "Unable to evaluate starter response (missing offsets)"; return false; } size_t remaining = max_bytes; size_t file_count = 0; classad::ExprList::const_iterator it2 = offlist->begin(); for (classad::ExprList::const_iterator it = list->begin(); it != list->end() && it2 != offlist->end(); it++, it2++) { classad::Value value; (*it2)->Evaluate(value); off_t off = -1; value.IsIntegerValue(off); (*it)->Evaluate(value); std::string filename; int64_t xfer_fd = -1; if (!value.IsStringValue(filename) && value.IsIntegerValue(xfer_fd)) { if (xfer_fd == 0) filename = "_condor_stdout"; if (xfer_fd == 1) filename = "_condor_stderr"; } int fd = next.getNextFD(filename); filesize_t size = -1; int retval; if ((retval = sock.get_file(&size, fd, false, false, remaining, xfer_q)) && (retval != GET_FILE_MAX_BYTES_EXCEEDED)) { error_msg = "Internal error when transferring file " + filename; } else if (size >= 0) { remaining -= max_bytes; file_count++; off += size; } else { error_msg = "Failed to transfer file " + filename; } if (xfer_fd == 0) { stdout_offset = off; //dprintf(D_FULLDEBUG, "New stdout offset: %ld\n", stdout_offset); } else if (xfer_fd == 1) { stderr_offset = off; } else { std::vector<ssize_t>::iterator it4 = offsets.begin(); for (std::vector<std::string>::const_iterator it3 = filenames.begin(); it3 != filenames.end() && it4 != offsets.end(); it3++, it4++) { if (*it3 == filename) *it4 = off; } } } size_t remote_file_count; if (!sock.get(remote_file_count) || !sock.end_of_message()) { error_msg = "Unable to get remote file count."; return false; } if (file_count != remote_file_count) { formatstr(error_msg, "Received %ld files, but remote side thought it sent %ld files\n", file_count, remote_file_count); return false; } if ((total_files != file_count) && !error_msg.size()) { error_msg = "At least one file transfer failed."; return false; } return true; }
bool DCSchedd::getJobConnectInfo( PROC_ID jobid, int subproc, char const *session_info, int timeout, CondorError *errstack, MyString &starter_addr, MyString &starter_claim_id, MyString &starter_version, MyString &slot_name, MyString &error_msg, bool &retry_is_sensible, int &job_status, MyString &hold_reason) { ClassAd input; ClassAd output; input.Assign(ATTR_CLUSTER_ID,jobid.cluster); input.Assign(ATTR_PROC_ID,jobid.proc); if( subproc != -1 ) { input.Assign(ATTR_SUB_PROC_ID,subproc); } input.Assign(ATTR_SESSION_INFO,session_info); ReliSock sock; if( !connectSock(&sock,timeout,errstack) ) { error_msg = "Failed to connect to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !forceAuthentication(&sock, errstack) ) { error_msg = "Failed to authenticate"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.decode(); if( !getClassAd(&sock, output) || !sock.end_of_message() ) { error_msg = "Failed to get response from schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( IsFulldebug(D_FULLDEBUG) ) { std::string adstr; sPrintAd(adstr, output, true); dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n", adstr.c_str()); } bool result=false; output.LookupBool(ATTR_RESULT,result); if( !result ) { output.LookupString(ATTR_HOLD_REASON,hold_reason); output.LookupString(ATTR_ERROR_STRING,error_msg); retry_is_sensible = false; output.LookupBool(ATTR_RETRY,retry_is_sensible); output.LookupInteger(ATTR_JOB_STATUS,job_status); } else { output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); output.LookupString(ATTR_CLAIM_ID,starter_claim_id); output.LookupString(ATTR_VERSION,starter_version); output.LookupString(ATTR_REMOTE_HOST,slot_name); } return result; }
// download the files associated with the jobads to the sandbox at td_sinful // with the supplied capability. // The work_ad should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP // ATTR_TREQ_JOBID_ALLOW_LIST bool DCTransferD::download_job_files(ClassAd *work_ad, CondorError * errstack) { ReliSock *rsock = NULL; int timeout = 60 * 60 * 8; // transfers take a long time... int i; ClassAd reqad, respad; std::string cap; int ftp; int invalid; int protocol; std::string reason; int num_transfers; ClassAd jad; const char *lhstr = NULL; ExprTree *tree = NULL; ////////////////////////////////////////////////////////////////////////// // Connect to the transferd and authenticate ////////////////////////////////////////////////////////////////////////// // This call with automatically connect to _addr, which was set in the // constructor of this object to be the transferd in question. rsock = (ReliSock*)startCommand(TRANSFERD_READ_FILES, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCTransferD::download_job_files: " "Failed to send command (TRANSFERD_READ_FILES) " "to the schedd\n" ); errstack->push("DC_TRANSFERD", 1, "Failed to start a TRANSFERD_READ_FILES command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCTransferD::download_job_files() authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_TRANSFERD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); ////////////////////////////////////////////////////////////////////////// // Query the transferd about the capability/protocol and see if I can // download my files. It will respond with a classad saying good or bad. ////////////////////////////////////////////////////////////////////////// work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap); work_ad->LookupInteger(ATTR_TREQ_FTP, ftp); reqad.Assign(ATTR_TREQ_CAPABILITY, cap); reqad.Assign(ATTR_TREQ_FTP, ftp); // This request ad to the transferd should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP reqad.put(*rsock); rsock->end_of_message(); rsock->decode(); // This response ad from the transferd should contain: // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_NUM_TRANSFERS // respad.initFromStream(*rsock); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { // The transferd rejected my attempt to upload the fileset delete rsock; respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } respad.LookupInteger(ATTR_TREQ_NUM_TRANSFERS, num_transfers); ////////////////////////////////////////////////////////////////////////// // Based upon the protocol I've chosen, use that method to download the // files. When using the FileTrans protocol, a child process on the // transferd side will be sending me individual job ads and then // instantiating a filetransfer object for that ad. ////////////////////////////////////////////////////////////////////////// dprintf(D_ALWAYS, "Receiving fileset"); work_ad->LookupInteger(ATTR_TREQ_FTP, protocol); switch(protocol) { case FTP_CFTP: // download the files using the FileTransfer Object for (i = 0; i < num_transfers; i++) { // Grab a job ad the server is sending us so we know what // to receive. jad.initFromStream(*rsock); rsock->end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes so the download goes into the // correct place. jad.ResetExpr(); while( jad.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); jad.Insert(new_attr_name, pTree, false); } } // while next expr // instantiate a filetransfer object and have it accept the // files. FileTransfer ftrans; if ( !ftrans.SimpleInit(&jad, false, false, rsock) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to initate uploading of files."); return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&jad) ) { return false; } ftrans.setPeerVersion( version() ); if ( !ftrans.DownloadFiles() ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to download files."); return false; } dprintf(D_ALWAYS | D_NOHEADER, "."); } rsock->end_of_message(); dprintf(D_ALWAYS | D_NOHEADER, "\n"); break; default: // Bail due to user error. This client doesn't support the unknown // protocol. delete rsock; errstack->push("DC_TRANSFERD", 1, "Unknown file transfer protocol selected."); return false; break; } ////////////////////////////////////////////////////////////////////////// // Get the response from the transferd once it sees a completed // movement of files to the child process. ////////////////////////////////////////////////////////////////////////// rsock->decode(); respad.initFromStream(*rsock); rsock->end_of_message(); // close up shop delete rsock; respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if ( invalid == TRUE ) { respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } return true; }
void VMRegister::requestHostClassAds(void) { // find host startd daemon if( !m_vm_host_daemon ) m_vm_host_daemon = vmapi_findDaemon( m_vm_host_name, DT_STARTD); if( !m_vm_host_daemon ) { dprintf( D_FULLDEBUG, "Can't find host(%s) Startd daemon\n", m_vm_host_name ); return; } ClassAd query_ad; query_ad.SetMyTypeName(QUERY_ADTYPE); query_ad.SetTargetTypeName(STARTD_ADTYPE); query_ad.Assign(ATTR_REQUIREMENTS, true); char *addr = m_vm_host_daemon->addr(); Daemon hstartd(DT_STARTD, addr); ReliSock ssock; ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.encode(); if( !ssock.connect(addr) ) { dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n to get host classAd", addr); return; } if(!hstartd.startCommand( QUERY_STARTD_ADS, &ssock )) { dprintf( D_FULLDEBUG, "Failed to send QUERY_STARTD_ADS command to host startd(%s)\n", addr); return; } if( !query_ad.put(ssock) ) { dprintf(D_FULLDEBUG, "Failed to send query Ad to host startd(%s)\n", addr); } if( !ssock.end_of_message() ) { dprintf(D_FULLDEBUG, "Failed to send query EOM to host startd(%s)\n", addr); } // Read host classAds ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.decode(); int more = 1, num_ads = 0; ClassAdList adList; ClassAd *ad; while (more) { if( !ssock.code(more) ) { ssock.end_of_message(); return; } if(more) { ad = new ClassAd; if( !ad->initFromStream(ssock) ) { ssock.end_of_message(); delete ad; return; } adList.Insert(ad); num_ads++; } } ssock.end_of_message(); dprintf(D_FULLDEBUG, "Got %d classAds from host\n", num_ads); // Although we can get more than one classAd from host machine, // we use only the first one classAd adList.Rewind(); ad = adList.Next(); #if !defined(WANT_OLD_CLASSADS) ad->AddTargetRefs( TargetJobAttrs ); #endif // Get each Attribute from the classAd // added "HOST_" in front of each Attribute name const char *name; ExprTree *expr; ad->ResetExpr(); while( ad->NextExpr(name, expr) ) { MyString attr; attr += "HOST_"; attr += name; // Insert or Update an attribute to host_classAd in a VMRegister object ExprTree * pTree = expr->Copy(); host_classad->Insert(attr.Value(), pTree, true); } }