void PandadClassAdLogPlugin::setAttribute( const char * key, const char * attribute, const char * value ) { int cluster = 0, proc = 0; if( shouldIgnoreJob( key, cluster, proc ) ) { return; } dprintf( D_FULLDEBUG, "PANDA: setAttribute( %s, %s, %s ).\n", key, attribute, value ); std::string globalJobID; if( ! getGlobalJobID( cluster, proc, globalJobID ) ) { return; } // See comment in newClassAd(), above. if( strcmp( attribute, "ProcId" ) == 0 ) { ClassAd * clusterAd = ScheddGetJobAd( cluster, -1 ); if( clusterAd != NULL ) { ExprTree * valueExpr = NULL; const char * attribute = NULL; clusterAd->ResetExpr(); while( clusterAd->NextExpr( attribute, valueExpr ) ) { dprintf( D_FULLDEBUG, "PANDA: found %s in cluster ad.\n", attribute ); if( shouldIgnoreAttribute( attribute ) ) { continue; } std::string valueString; classad::ClassAdUnParser unparser; unparser.Unparse( valueString, valueExpr ); updatePandaJob( globalJobID.c_str(), attribute, valueString.c_str() ); } } else { dprintf( D_FULLDEBUG, "PANDA: Failed to find cluster ad for %d.%d\n", cluster, proc ); } } if( shouldIgnoreAttribute( attribute ) ) { return; } updatePandaJob( globalJobID.c_str(), attribute, value ); }
void OfflineCollectorPlugin::mergeClassAd ( ClassAd &ad, char const *key ) { ClassAd *old_ad = NULL; if (!_ads) return; _ads->BeginTransaction (); if ( !_ads->LookupClassAd ( key, old_ad ) ) { _ads->AbortTransaction (); return; } ad.ResetExpr(); ExprTree *expr; const char *attr_name; while (ad.NextExpr(attr_name, expr)) { MyString new_val; MyString old_val; ASSERT( attr_name && expr ); new_val = ExprTreeToString( expr ); expr = old_ad->LookupExpr( attr_name ); if( expr ) { old_val = ExprTreeToString( expr ); if( new_val == old_val ) { continue; } } // filter out stuff we never want to mess with if( !strcasecmp(attr_name,ATTR_MY_TYPE) || !strcasecmp(attr_name,ATTR_TARGET_TYPE) || !strcasecmp(attr_name,ATTR_AUTHENTICATED_IDENTITY) ) { continue; } _ads->SetAttribute(key, attr_name, new_val.Value()); } _ads->CommitTransaction (); }
void HookPrepareJobClient::hookExited(int exit_status) { HookClient::hookExited(exit_status); if (WIFSIGNALED(exit_status) || WEXITSTATUS(exit_status) != 0) { MyString status_msg = ""; statusString(exit_status, status_msg); int subcode; if (WIFSIGNALED(exit_status)) { subcode = -1 * WTERMSIG(exit_status); } else { subcode = WEXITSTATUS(exit_status); } MyString err_msg; err_msg.sprintf("HOOK_PREPARE_JOB (%s) failed (%s)", m_hook_path, status_msg.Value()); dprintf(D_ALWAYS|D_FAILURE, "ERROR in StarterHookMgr::tryHookPrepareJob: %s\n", err_msg.Value()); Starter->jic->notifyStarterError(err_msg.Value(), true, CONDOR_HOLD_CODE_HookPrepareJobFailure, subcode); Starter->RemoteShutdownFast(0); } else { // Make an update ad from the stdout of the hook MyString out(*getStdOut()); ClassAd updateAd; updateAd.initFromString(out.Value(), NULL); dprintf(D_FULLDEBUG, "Prepare hook output classad\n"); updateAd.dPrint(D_FULLDEBUG); // Insert each expr from the update ad into the job ad updateAd.ResetExpr(); ClassAd* job_ad = Starter->jic->jobClassAd(); const char *name; ExprTree *et; while (updateAd.NextExpr(name, et)) { ExprTree *pCopy = et->Copy(); job_ad->Insert(name, pCopy, false); } dprintf(D_FULLDEBUG, "After Prepare hook: merged job classad:\n"); job_ad->dPrint(D_FULLDEBUG); Starter->jobEnvironmentReady(); } }
bool BaseCodec::classAdToMap(ClassAd& ad, AttributeMapType& _map) { ExprTree *expr; const char *name; ad.ResetExpr(); _map.clear(); while (ad.NextExpr(name,expr)) { if (!addAttributeToMap(ad, name, _map)) { return false; } } // //debug // if (IsFulldebug(D_FULLDEBUG)) { // ad.dPrint(D_FULLDEBUG|D_NOHEADER); // } return true; }
// download the files associated with the jobads to the sandbox at td_sinful // with the supplied capability. // The work_ad should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP // ATTR_TREQ_JOBID_ALLOW_LIST bool DCTransferD::download_job_files(ClassAd *work_ad, CondorError * errstack) { ReliSock *rsock = NULL; int timeout = 60 * 60 * 8; // transfers take a long time... int i; ClassAd reqad, respad; std::string cap; int ftp; int invalid; int protocol; std::string reason; int num_transfers; ClassAd jad; const char *lhstr = NULL; ExprTree *tree = NULL; ////////////////////////////////////////////////////////////////////////// // Connect to the transferd and authenticate ////////////////////////////////////////////////////////////////////////// // This call with automatically connect to _addr, which was set in the // constructor of this object to be the transferd in question. rsock = (ReliSock*)startCommand(TRANSFERD_READ_FILES, Stream::reli_sock, timeout, errstack); if( ! rsock ) { dprintf( D_ALWAYS, "DCTransferD::download_job_files: " "Failed to send command (TRANSFERD_READ_FILES) " "to the schedd\n" ); errstack->push("DC_TRANSFERD", 1, "Failed to start a TRANSFERD_READ_FILES command."); return false; } // First, if we're not already authenticated, force that now. if (!forceAuthentication( rsock, errstack )) { dprintf( D_ALWAYS, "DCTransferD::download_job_files() authentication " "failure: %s\n", errstack->getFullText().c_str() ); errstack->push("DC_TRANSFERD", 1, "Failed to authenticate properly."); return false; } rsock->encode(); ////////////////////////////////////////////////////////////////////////// // Query the transferd about the capability/protocol and see if I can // download my files. It will respond with a classad saying good or bad. ////////////////////////////////////////////////////////////////////////// work_ad->LookupString(ATTR_TREQ_CAPABILITY, cap); work_ad->LookupInteger(ATTR_TREQ_FTP, ftp); reqad.Assign(ATTR_TREQ_CAPABILITY, cap); reqad.Assign(ATTR_TREQ_FTP, ftp); // This request ad to the transferd should contain: // ATTR_TREQ_CAPABILITY // ATTR_TREQ_FTP reqad.put(*rsock); rsock->end_of_message(); rsock->decode(); // This response ad from the transferd should contain: // ATTR_TREQ_INVALID_REQUEST (set to true) // ATTR_TREQ_INVALID_REASON // // OR // // ATTR_TREQ_INVALID_REQUEST (set to false) // ATTR_TREQ_NUM_TRANSFERS // respad.initFromStream(*rsock); rsock->end_of_message(); respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { // The transferd rejected my attempt to upload the fileset delete rsock; respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } respad.LookupInteger(ATTR_TREQ_NUM_TRANSFERS, num_transfers); ////////////////////////////////////////////////////////////////////////// // Based upon the protocol I've chosen, use that method to download the // files. When using the FileTrans protocol, a child process on the // transferd side will be sending me individual job ads and then // instantiating a filetransfer object for that ad. ////////////////////////////////////////////////////////////////////////// dprintf(D_ALWAYS, "Receiving fileset"); work_ad->LookupInteger(ATTR_TREQ_FTP, protocol); switch(protocol) { case FTP_CFTP: // download the files using the FileTransfer Object for (i = 0; i < num_transfers; i++) { // Grab a job ad the server is sending us so we know what // to receive. jad.initFromStream(*rsock); rsock->end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes so the download goes into the // correct place. jad.ResetExpr(); while( jad.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); jad.Insert(new_attr_name, pTree, false); } } // while next expr // instantiate a filetransfer object and have it accept the // files. FileTransfer ftrans; if ( !ftrans.SimpleInit(&jad, false, false, rsock) ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to initate uploading of files."); return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&jad) ) { return false; } ftrans.setPeerVersion( version() ); if ( !ftrans.DownloadFiles() ) { delete rsock; errstack->push("DC_TRANSFERD", 1, "Failed to download files."); return false; } dprintf(D_ALWAYS | D_NOHEADER, "."); } rsock->end_of_message(); dprintf(D_ALWAYS | D_NOHEADER, "\n"); break; default: // Bail due to user error. This client doesn't support the unknown // protocol. delete rsock; errstack->push("DC_TRANSFERD", 1, "Unknown file transfer protocol selected."); return false; break; } ////////////////////////////////////////////////////////////////////////// // Get the response from the transferd once it sees a completed // movement of files to the child process. ////////////////////////////////////////////////////////////////////////// rsock->decode(); respad.initFromStream(*rsock); rsock->end_of_message(); // close up shop delete rsock; respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if ( invalid == TRUE ) { respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); errstack->push("DC_TRANSFERD", 1, reason.c_str()); return false; } return true; }
bool SchedulerObject::submit(AttributeMapType &jobAdMap, std::string &id, std::string &text) { int cluster; int proc; if (!m_codec) { text = "Codec has not been initialized"; return false; } // our mandatory set of attributes for a submit const char* required[] = { ATTR_JOB_CMD, ATTR_REQUIREMENTS, ATTR_OWNER, ATTR_JOB_IWD, NULL }; // 1. Create transaction BeginTransaction(); // 2. Create cluster if (-1 == (cluster = NewCluster())) { AbortTransaction(); text = "Failed to create new cluster"; return false; } // 3. Create proc if (-1 == (proc = NewProc(cluster))) { AbortTransaction(); text = "Failed to create new proc"; return false; } // 4. Submit job ad // Schema: (vanilla job) // Schedd demands - Owner, JobUniverse // To run - JobStatus, Requirements // Schedd excepts if no Owner // Schedd prunes on startup if no Owner or JobUniverse // Schedd won't run job without JobStatus // Job cannot match without Requirements // Shadow rejects jobs without an Iwd // Shadow: Job has no CondorVersion, assuming pre version 6.3.3 // Shadow: Unix Vanilla job is pre version 6.3.3, setting 'TransferFiles = "NEVER"' // Starter won't run job without Cmd // Starter needs a valid Owner (local account name) if not using nobody // condor_q requires ClusterId (int), ProcId (int), QDate (int), RemoteUserCpu (float), JobStatus (int), JobPrio (int), ImageSize (int), Owner (str) and Cmd (str) // Schema: (vm job) // ShouldTransferFiles - unset by default, must be set ClassAd ad; int universe; // ShouldTransferFiles - unset by default, must be set // shadow will try to setup local transfer sandbox otherwise // without good priv ad.Assign(ATTR_SHOULD_TRANSFER_FILES, "NO"); if (!m_codec->mapToClassAd(jobAdMap, ad, text)) { AbortTransaction(); return false; } std::string missing; if (!checkRequiredAttrs(ad, required, missing)) { AbortTransaction(); text = "Job ad is missing required attributes: " + missing; return false; } // EARLY SET: These attribute are set early so the incoming ad // has a change to override them. ::SetAttribute(cluster, proc, ATTR_JOB_STATUS, "1"); // 1 = idle // Junk that condor_q wants, but really shouldn't be necessary ::SetAttribute(cluster, proc, ATTR_JOB_REMOTE_USER_CPU, "0.0"); // float ::SetAttribute(cluster, proc, ATTR_JOB_PRIO, "0"); // int ::SetAttribute(cluster, proc, ATTR_IMAGE_SIZE, "0"); // int if (!ad.LookupInteger(ATTR_JOB_UNIVERSE, universe)) { char* uni_str = param("DEFAULT_UNIVERSE"); if (!uni_str) { universe = CONDOR_UNIVERSE_VANILLA; } else { universe = CondorUniverseNumber(uni_str); } ::SetAttributeInt(cluster, proc, ATTR_JOB_UNIVERSE, universe ); } // more stuff - without these our idle stats are whack if ( universe != CONDOR_UNIVERSE_MPI && universe != CONDOR_UNIVERSE_PVM ) { ::SetAttribute(cluster, proc, ATTR_MAX_HOSTS, "1"); // int ::SetAttribute(cluster, proc, ATTR_MIN_HOSTS, "1"); // int } ::SetAttribute(cluster, proc, ATTR_CURRENT_HOSTS, "0"); // int ExprTree *expr; const char *name; std::string value; ad.ResetExpr(); while (ad.NextExpr(name,expr)) { // All these extra lookups are horrible. They have to // be there because the ClassAd may have multiple // copies of the same attribute name! This means that // the last attribute with a given name will set the // value, but the last attribute tends to be the // attribute with the oldest (wrong) value. How // annoying is that! if (!(expr = ad.Lookup(name))) { dprintf(D_ALWAYS, "Failed to lookup %s\n", name); AbortTransaction(); text = "Failed to parse job ad attribute"; return false; } value = ExprTreeToString(expr); ::SetAttribute(cluster, proc, name, value.c_str()); } // LATE SET: These attributes are set late, after the incoming // ad, so they override whatever the incoming ad set. char buf[22]; // 22 is max size for an id, 2^32 + . + 2^32 + \0 snprintf(buf, 22, "%d", cluster); ::SetAttribute(cluster, proc, ATTR_CLUSTER_ID, buf); snprintf(buf, 22, "%d", proc); ::SetAttribute(cluster, proc, ATTR_PROC_ID, buf); snprintf(buf, 22, "%ld", time(NULL)); ::SetAttribute(cluster, proc, ATTR_Q_DATE, buf); // Could check for some invalid attributes, e.g // JobUniverse <= CONDOR_UNIVERSE_MIN or >= CONDOR_UNIVERSE_MAX // 5. Commit transaction CommitTransaction(); // 6. Reschedule scheduler.needReschedule(); // 7. Return identifier // TODO: dag ids? string tmp; //tmp.sprintf("%s#%d.%d", Name, cluster, proc); // we have other API compositions for job id and submission id // so let's return raw cluster.proc aviUtilFmt(tmp,"%d.%d", cluster, proc); id = tmp.c_str(); return true; }
bool DCSchedd::receiveJobSandbox(const char* constraint, CondorError * errstack, int * numdone /*=0*/) { if(numdone) { *numdone = 0; } ExprTree *tree = NULL; const char *lhstr; int reply; int i; ReliSock rsock; int JobAdsArrayLen; bool use_new_command = true; if ( version() ) { CondorVersionInfo vi( version() ); if ( vi.built_since_version(6,7,7) ) { use_new_command = true; } else { use_new_command = false; } } // // // // // // // // // On the wire protocol // // // // // // // // rsock.timeout(20); // years of research... :) if( ! rsock.connect(_addr) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to connect to schedd (%s)\n", _addr ); return false; } if ( use_new_command ) { if( ! startCommand(TRANSFER_DATA_WITH_PERMS, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA_WITH_PERMS) " "to the schedd\n" ); return false; } } else { if( ! startCommand(TRANSFER_DATA, (Sock*)&rsock, 0, errstack) ) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: " "Failed to send command (TRANSFER_DATA) " "to the schedd\n" ); return false; } } // First, if we're not already authenticated, force that now. if (!forceAuthentication( &rsock, errstack )) { dprintf( D_ALWAYS, "DCSchedd::receiveJobSandbox: authentication failure: %s\n", errstack ? errstack->getFullText().c_str() : "" ); return false; } rsock.encode(); // Send our version if using the new command if ( use_new_command ) { // Need to use a named variable, else the wrong version of // code() is called. char *my_version = strdup( CondorVersion() ); if ( !rsock.code(my_version) ) { dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send version string to the schedd\n"); free( my_version ); return false; } free( my_version ); } // Send the constraint char * nc_constraint = strdup( constraint ); // de-const if ( !rsock.code(nc_constraint) ) { free( nc_constraint ); dprintf(D_ALWAYS,"DCSchedd:receiveJobSandbox: " "Can't send JobAdsArrayLen to the schedd\n"); return false; } free( nc_constraint ); if ( !rsock.end_of_message() ) { std::string errmsg; formatstr(errmsg, "Can't send initial message (version + constraint) to schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_EOM_FAILED, errmsg.c_str()); } return false; } // Now, read how many jobs matched the constraint. rsock.decode(); if ( !rsock.code(JobAdsArrayLen) ) { std::string errmsg; formatstr(errmsg, "Can't receive JobAdsArrayLen from the schedd (%s)", _addr); dprintf(D_ALWAYS,"DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); dprintf(D_FULLDEBUG,"DCSchedd:receiveJobSandbox: " "%d jobs matched my constraint (%s)\n", JobAdsArrayLen, constraint); // Now read all the files via the file transfer object for (i=0; i<JobAdsArrayLen; i++) { FileTransfer ftrans; ClassAd job; // grab job ClassAd if ( !getClassAd(&rsock, job) ) { std::string errmsg; formatstr(errmsg, "Can't receive job ad %d from the schedd", i); dprintf(D_ALWAYS, "DCSchedd::receiveJobSandbox: %s\n", errmsg.c_str()); if( errstack ) { errstack->push( "DCSchedd::receiveJobSandbox", CEDAR_ERR_GET_FAILED, errmsg.c_str()); } return false; } rsock.end_of_message(); // translate the job ad by replacing the // saved SUBMIT_ attributes job.ResetExpr(); while( job.NextExpr(lhstr, tree) ) { if ( lhstr && strncasecmp("SUBMIT_",lhstr,7)==0 ) { // this attr name starts with SUBMIT_ // compute new lhs (strip off the SUBMIT_) const char *new_attr_name = strchr(lhstr,'_'); ExprTree * pTree; ASSERT(new_attr_name); new_attr_name++; // insert attribute pTree = tree->Copy(); job.Insert(new_attr_name, pTree, false); } } // while next expr if ( !ftrans.SimpleInit(&job,false,false,&rsock) ) { if( errstack ) { int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_INIT_FAILED, "File transfer initialization failed for target job %d.%d", cluster, proc ); } return false; } // We want files to be copied to their final places, so apply // any filename remaps when downloading. if ( !ftrans.InitDownloadFilenameRemaps(&job) ) { return false; } if ( use_new_command ) { ftrans.setPeerVersion( version() ); } if ( !ftrans.DownloadFiles() ) { if( errstack ) { FileTransfer::FileTransferInfo ft_info = ftrans.GetInfo(); int cluster = -1, proc = -1; job.LookupInteger(ATTR_CLUSTER_ID,cluster); job.LookupInteger(ATTR_PROC_ID,proc); errstack->pushf( "DCSchedd::receiveJobSandbox", FILETRANSFER_DOWNLOAD_FAILED, "File transfer failed for target job %d.%d: %s", cluster, proc, ft_info.error_desc.Value() ); } return false; } } rsock.end_of_message(); rsock.encode(); reply = OK; rsock.code(reply); rsock.end_of_message(); if(numdone) { *numdone = JobAdsArrayLen; } return true; }
void VMRegister::requestHostClassAds(void) { // find host startd daemon if( !m_vm_host_daemon ) m_vm_host_daemon = vmapi_findDaemon( m_vm_host_name, DT_STARTD); if( !m_vm_host_daemon ) { dprintf( D_FULLDEBUG, "Can't find host(%s) Startd daemon\n", m_vm_host_name ); return; } ClassAd query_ad; query_ad.SetMyTypeName(QUERY_ADTYPE); query_ad.SetTargetTypeName(STARTD_ADTYPE); query_ad.Assign(ATTR_REQUIREMENTS, true); char *addr = m_vm_host_daemon->addr(); Daemon hstartd(DT_STARTD, addr); ReliSock ssock; ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.encode(); if( !ssock.connect(addr) ) { dprintf( D_FULLDEBUG, "Failed to connect to host startd(%s)\n to get host classAd", addr); return; } if(!hstartd.startCommand( QUERY_STARTD_ADS, &ssock )) { dprintf( D_FULLDEBUG, "Failed to send QUERY_STARTD_ADS command to host startd(%s)\n", addr); return; } if( !query_ad.put(ssock) ) { dprintf(D_FULLDEBUG, "Failed to send query Ad to host startd(%s)\n", addr); } if( !ssock.end_of_message() ) { dprintf(D_FULLDEBUG, "Failed to send query EOM to host startd(%s)\n", addr); } // Read host classAds ssock.timeout( VM_SOCKET_TIMEOUT ); ssock.decode(); int more = 1, num_ads = 0; ClassAdList adList; ClassAd *ad; while (more) { if( !ssock.code(more) ) { ssock.end_of_message(); return; } if(more) { ad = new ClassAd; if( !ad->initFromStream(ssock) ) { ssock.end_of_message(); delete ad; return; } adList.Insert(ad); num_ads++; } } ssock.end_of_message(); dprintf(D_FULLDEBUG, "Got %d classAds from host\n", num_ads); // Although we can get more than one classAd from host machine, // we use only the first one classAd adList.Rewind(); ad = adList.Next(); #if !defined(WANT_OLD_CLASSADS) ad->AddTargetRefs( TargetJobAttrs ); #endif // Get each Attribute from the classAd // added "HOST_" in front of each Attribute name const char *name; ExprTree *expr; ad->ResetExpr(); while( ad->NextExpr(name, expr) ) { MyString attr; attr += "HOST_"; attr += name; // Insert or Update an attribute to host_classAd in a VMRegister object ExprTree * pTree = expr->Copy(); host_classad->Insert(attr.Value(), pTree, true); } }