void handleConstraints( void ) { if( ! has_constraint ) { return; } const char* tmp = global_constraint.Value(); CondorError errstack; if( doWorkByConstraint(tmp, &errstack) ) { fprintf( stdout, "Jobs matching constraint %s %s\n", tmp, (mode == JA_REMOVE_JOBS) ? "have been marked for removal" : (mode == JA_REMOVE_X_JOBS) ? "have been removed locally (remote state unknown)" : actionWord(mode,true) ); } else { fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() ); if (had_error) { fprintf( stderr, "Couldn't find/%s all jobs matching constraint %s\n", actionWord(mode,false), tmp ); } } }
int Job::declare_file(const MyString &name, filesize_t size, CondorError &errstack) { JobFile *ignored; JobFile jobFile; jobFile.size = size; jobFile.currentOffset = 0; jobFile.name = name; jobFile.file = safe_open_wrapper_follow((spoolDirectory + DIR_DELIM_STRING + jobFile.name).Value(), O_WRONLY | O_CREAT | _O_BINARY, 0600); if (-1 != jobFile.file) { if (0 == declaredFiles.lookup(name, ignored)) { close(jobFile.file); errstack.pushf("SOAP", ALREADYEXISTS, "File '%s' already declared.", name.Value()); return 4; } if (declaredFiles.insert(name, jobFile)) { close(jobFile.file); errstack.pushf("SOAP", FAIL, "Failed to record file '%s'.", name.Value()); return 2; } } else { // If there is a path delimiter in the name we assume that // the client knows what she is doing and will set a // proper Iwd later on. If there is no path delimiter we // have a problem. if (-1 != name.FindChar(DIR_DELIM_CHAR)) { dprintf(D_FULLDEBUG, "Failed to open '%s' for writing, reason: %s\n", (spoolDirectory+DIR_DELIM_STRING+jobFile.name).Value(), strerror(errno)); errstack.pushf("SOAP", FAIL, "Failed to open '%s' for writing, reason: %s", name.Value(), strerror(errno)); return 3; } } return 0; }
int Job::get_file(const MyString &name, int offset, int length, unsigned char *&data, CondorError &errstack) { memset(data, 0, length); int file = safe_open_wrapper_follow((spoolDirectory + DIR_DELIM_STRING + name).Value(), O_RDONLY | _O_BINARY, 0); if (-1 != file) { if (-1 == lseek(file, offset, SEEK_SET)) { close(file); errstack.pushf("SOAP", FAIL, "Failed to lseek in file '%s', reason: %s", name.Value(), strerror(errno)); return 2; } int result; if (-1 == (result = full_read(file, data, sizeof(unsigned char) * length))) { close(file); errstack.pushf("SOAP", FAIL, "Failed to read from file '%s', wanted to " "read %d bytes but received %d", name.Value(), length, result); return 3; } if (-1 == close(file)) { errstack.pushf("SOAP", FAIL, "Failed to close file '%s', reason: %s", name.Value(), strerror(errno)); return 4; } } else { errstack.pushf("SOAP", FAIL, "Failed to open file '%s', reason: %s", name.Value(), strerror(errno)); return 1; } return 0; }
// This function calls up the schedd passed in on the command line and // registers the transferd as being available for the schedd's use. RegisterResult TransferD::register_to_schedd(ReliSock **regsock_ptr) { CondorError errstack; MyString sname; MyString id; MyString sinful; bool rval; if (*regsock_ptr != NULL) { *regsock_ptr = NULL; } sname = m_features.get_schedd_sinful(); id = m_features.get_id(); if (sname == "N/A") { // no schedd supplied with which to register dprintf(D_ALWAYS, "No schedd specified to which to register.\n"); return REG_RESULT_NO_SCHEDD; } // what is my sinful string? sinful = daemonCore->InfoCommandSinfulString(-1); dprintf(D_FULLDEBUG, "Registering myself(%s) to schedd(%s)\n", sinful.Value(), sname.Value()); // hook up to the schedd. DCSchedd schedd(sname.Value(), NULL); // register myself, give myself 1 minute to connect. rval = schedd.register_transferd(sinful, id, 20*3, regsock_ptr, &errstack); if (rval == false) { // emit why dprintf(D_ALWAYS, "TransferRequest::register_to_schedd(): Failed to " "register. Schedd gave reason '%s'\n", errstack.getFullText().c_str()); return REG_RESULT_FAILED; } // WARNING WARNING WARNING WARNING // // WARNING WARNING WARNING WARNING // // WARNING WARNING WARNING WARNING // // WARNING WARNING WARNING WARNING // // WARNING WARNING WARNING WARNING // // Here, I must infact go back to daemon core without closing or doing // anything with the socket. This is because the schedd is going to // reconnect back to me, and I can't deadlock. dprintf(D_FULLDEBUG, "Succesfully registered, awaiting treq channel message....\n"); return REG_RESULT_SUCCESS; }
bool DCMaster::sendMasterCommand( bool insure_update, int my_cmd ) { CondorError errstack; int master_cmd = my_cmd; dprintf( D_FULLDEBUG, "DCMaster::sendMasterCommand: Just starting... \n"); /* have we located the required master yet? */ if( ! _addr ) { locate(); } if( ! m_master_safesock && ! insure_update ) { m_master_safesock = new SafeSock; m_master_safesock->timeout(20); // years of research... :) if( ! m_master_safesock->connect(_addr) ) { dprintf( D_ALWAYS, "sendMasterCommand: Failed to connect to master " "(%s)\n", _addr ); delete m_master_safesock; m_master_safesock = NULL; return false; } } ReliSock reli_sock; bool result; if( insure_update ) { // For now, if we have to ensure that the update gets // there, we use a ReliSock (TCP). reli_sock.timeout(20); // years of research... :) if( ! reli_sock.connect(_addr) ) { dprintf( D_ALWAYS, "sendMasterCommand: Failed to connect to master " "(%s)\n", _addr ); return false; } result = sendCommand( master_cmd, (Sock*)&reli_sock, 0, &errstack ); } else { result = sendCommand( master_cmd, (Sock*)m_master_safesock, 0, &errstack ); } if( ! result ) { dprintf( D_FULLDEBUG, "Failed to send %d command to master\n",master_cmd ); if( m_master_safesock ) { delete m_master_safesock; m_master_safesock = NULL; } if( errstack.code() != 0 ) { dprintf( D_ALWAYS, "ERROR: %s\n", errstack.getFullText() ); } return false; } return true; }
// Called when the schedd initially connects to the transferd to finish // the registration process. int TransferD::setup_transfer_request_handler(int /*cmd*/, Stream *sock) { ReliSock *rsock = (ReliSock*)sock; MyString sock_id; dprintf(D_ALWAYS, "Got TRANSFER_CONTROL_CHANNEL!\n"); rsock->decode(); /////////////////////////////////////////////////////////////// // make sure we are authenticated /////////////////////////////////////////////////////////////// if( ! rsock->triedAuthentication() ) { CondorError errstack; if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) { // we failed to authenticate, we should bail out now // since we don't know what user is trying to perform // this action. // TODO: it'd be nice to print out what failed, but we // need better error propagation for that... errstack.push( "TransferD::setup_transfer_request_handler()", 42, "Failure to register transferd - Authentication failed" ); dprintf( D_ALWAYS, "setup_transfer_request_handler() " "aborting: %s\n", errstack.getFullText().c_str() ); refuse(rsock); return CLOSE_STREAM; } } rsock->decode(); /////////////////////////////////////////////////////////////// // Register this socket with a socket handler to handle incoming requests /////////////////////////////////////////////////////////////// sock_id += "<TreqChannel-Socket>"; char* _sock_id = strdup( sock_id.Value() ); //de-const // register the handler for any future transfer requests on this socket. daemonCore->Register_Socket((Sock*)rsock, _sock_id, (SocketHandlercpp)&TransferD::accept_transfer_request_handler, "TransferD::accept_transfer_request_handler", this, ALLOW); free( _sock_id ); dprintf(D_ALWAYS, "Treq channel established.\n"); dprintf(D_ALWAYS, "Accepting Transfer Requests.\n"); return KEEP_STREAM; }
int Job::put_file(const MyString &name, int offset, char * data, int data_length, CondorError &errstack) { JobFile jobFile; if (-1 == declaredFiles.lookup(name, jobFile)) { errstack.pushf("SOAP", FAIL, "File '%s' has not been declared.", name.Value()); return 1; } if (-1 != jobFile.file) { if (-1 == lseek(jobFile.file, offset, SEEK_SET)) { errstack.pushf("SOAP", FAIL, "Failed to lseek in file '%s', reason: %s", name.Value(), strerror(errno)); return 2; } int result; if (data_length != (result = full_write(jobFile.file, data, data_length))) { errstack.pushf("SOAP", FAIL, "Failed to write to from file '%s', wanted to write %d bytes but was only able to write %d", name.Value(), data_length, result); return 3; } } else { errstack.pushf("SOAP", FAIL, "Failed to open file '%s', it should not " "contain any path separators.", name.Value()); return 5; } return 0; }
/////////////////////////////////////////////////////////////////////////////// // Note: on Unix/Linux, the file ID is a string encoding the combination of // device number and inode; on Windows the file ID is simply the value // _fullpath() returns on the path we're given. The Unix/Linux version // is preferable because it will work correctly even if there are hard // links to log files; but there are no inodes on Windows, so we're // doing what we can. bool GetFileID( const MyString &filename, MyString &fileID, CondorError &errstack ) { // Make sure the log file exists. Even though we may later call // InitializeFile(), we have to make sure the file exists here // first so we make sure that the file exists and we can therefore // get an inode or real path for it. // We *don't* want to truncate the file here, though, because // we don't know for sure whether it's the first time we're seeing // it. if ( access( filename.Value(), F_OK ) != 0 ) { if ( !MultiLogFiles::InitializeFile( filename.Value(), false, errstack ) ) { errstack.pushf( "ReadMultipleUserLogs", UTIL_ERR_LOG_FILE, "Error initializing log file %s", filename.Value() ); return false; } } #ifdef WIN32 char *tmpRealPath = realpath( filename.Value(), NULL ); if ( !tmpRealPath ) { errstack.pushf( "ReadMultipleUserLogs", UTIL_ERR_LOG_FILE, "Error (%d, %s) getting real path for specified path %s", errno, strerror( errno ), filename.Value() ); return false; } fileID = tmpRealPath; free( tmpRealPath ); #else StatWrapper swrap; if ( swrap.Stat( filename.Value() ) != 0 ) { errstack.pushf( "ReadMultipleUserLogs", UTIL_ERR_LOG_FILE, "Error getting inode for log file %s", filename.Value() ); return false; } fileID.formatstr( "%llu:%llu", (unsigned long long)swrap.GetBuf()->st_dev, (unsigned long long)swrap.GetBuf()->st_ino ); #endif return true; }
//--------------------------------------------------------------------------- Qmgr_connection * DagmanClassad::OpenConnection() { // Open job queue CondorError errstack; Qmgr_connection *queue = ConnectQ( _schedd->addr(), 0, false, &errstack, NULL, _schedd->version() ); if ( !queue ) { debug_printf( DEBUG_QUIET, "WARNING: failed to connect to queue manager (%s)\n", errstack.getFullText().c_str() ); check_warning_strictness( DAG_STRICT_3 ); return NULL; } return queue; }
DCStarter::X509UpdateStatus DCStarter::delegateX509Proxy( const char * filename, time_t expiration_time, char const *sec_session_id, time_t *result_expiration_time) { ReliSock rsock; rsock.timeout(60); if( ! rsock.connect(_addr) ) { dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: " "Failed to connect to starter %s\n", _addr); return XUS_Error; } CondorError errstack; if( ! startCommand(DELEGATE_GSI_CRED_STARTER, &rsock, 0, &errstack, NULL, false, sec_session_id) ) { dprintf( D_ALWAYS, "DCStarter::delegateX509Proxy: " "Failed send command to the starter: %s\n", errstack.getFullText().c_str()); return XUS_Error; } // Send the gsi proxy filesize_t file_size = 0; // will receive the size of the file if ( rsock.put_x509_delegation(&file_size,filename,expiration_time,result_expiration_time) < 0 ) { dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy " "failed to delegate proxy file %s (size=%ld)\n", filename, (long int)file_size); return XUS_Error; } // Fetch the result rsock.decode(); int reply = 0; rsock.code(reply); rsock.end_of_message(); switch(reply) { case 0: return XUS_Error; case 1: return XUS_Okay; case 2: return XUS_Declined; } dprintf(D_ALWAYS, "DCStarter::delegateX509Proxy: " "remote side returned unknown code %d. Treating " "as an error.\n", reply); return XUS_Error; }
/** * Process the history directory and maintain the history file map * * Only handle rotated history files, those history.* that are not an * index. For each one that is not in the history file map, create a * new HistoryFile, poll it for entries to process, and add it to the * map. */ void aviary::history::processHistoryDirectory() { const char *file = NULL; // each time through we rebuild our set of inodes if (force_reset) { m_historyFiles.clear(); } Directory dir ( m_path.Value() ); dir.Rewind(); while ( ( file = dir.Next() ) ) { // Skip all non-history files, e.g. history and history.*.idx if ( strncmp ( file, "history.", 8 ) || !strncmp ( file + ( strlen ( file ) - 4 ), HISTORY_INDEX_SUFFIX, 4 ) ) continue; HistoryFile h_file ( ( m_path + DIR_DELIM_STRING + file ).Value() ); CondorError errstack; if ( !h_file.init ( errstack ) ) { dprintf ( D_ALWAYS, "%s\n", errstack.getFullText().c_str() ); return; } errstack.clear(); long unsigned int id; ASSERT ( h_file.getId ( id ) ); HistoryFileListType::iterator entry = m_historyFiles.find ( id ); if ( m_historyFiles.end() == entry ) { HistoryFile::HistoryEntriesTypeIterators ij = h_file.poll ( errstack ); for ( HistoryFile::HistoryEntriesTypeIterator i = ij.first; i != ij.second; i++ ) { process ( ( *i ) ); } m_historyFiles.insert ( id ); } } }
ODSHistoryFile & ODSHistoryFile::operator=(const ODSHistoryFile &base) { if (this != &base) { (*this).m_name = base.m_name; cleanup(); // Don't just copy the stat and FILE* members, initialize them CondorError errstack; if (!init(errstack)) { // XXX: Should throw an exception here dprintf ( D_ALWAYS, "ODSHistoryFile::operator=: %s\n", errstack.getFullText(true).c_str()); } } return *this; }
bool ODSHistoryFile::init(CondorError &errstack) { StatWrapper stat_wrapper; if (stat_wrapper.Stat(m_name.c_str())) { errstack.pushf("ODSHistoryFile::init", 1, "Failed to stat %s: %d (%s)\n", m_name.c_str(), stat_wrapper.GetErrno(), strerror(stat_wrapper.GetErrno())); return false; } m_stat = (StatStructType *) malloc(sizeof(StatStructType)); ASSERT(m_stat); memcpy(m_stat, stat_wrapper.GetBuf(), sizeof(StatStructType)); if (!S_ISREG(m_stat->st_mode)) { errstack.pushf("ODSHistoryFile::init", 2, "%s: not a regular file\n", m_name.c_str()); return false; } m_file = safe_fopen_wrapper(m_name.c_str(), "r"); if (NULL == m_file) { errstack.pushf("ODSHistoryFile::init", 4, "Failed to fopen %s: %d (%s)\n", m_name.c_str(), errno, strerror(errno)); return false; } m_writer = new ODSMongodbOps(DB_NAME); if (!m_writer->init("localhost")) { errstack.pushf("ODSHistoryFile::init", 5, "Unable to init ODS writer\n"); return false; } return true; }
void handleAll() { char constraint[128]; sprintf( constraint, "%s >= 0", ATTR_CLUSTER_ID ); CondorError errstack; if( doWorkByConstraint(constraint, &errstack) ) { fprintf( stdout, "All jobs %s.\n", (mode == JA_REMOVE_JOBS) ? "marked for removal" : (mode == JA_REMOVE_X_JOBS) ? "removed locally (remote state unknown)" : actionWord(mode,true) ); } else { fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() ); if (had_error) { fprintf( stderr, "Could not %s all jobs.\n", actionWord(mode,false) ); } } }
int Job::get_spool_list(List<FileInfo> &file_list, CondorError &errstack) { StatInfo directoryInfo(spoolDirectory.Value()); if (directoryInfo.IsDirectory()) { Directory directory(spoolDirectory.Value()); const char * name; FileInfo *info; while (NULL != (name = directory.Next())) { info = new FileInfo(); info->initialize(name, directory.GetFileSize()); ASSERT(info); if (!file_list.Append(info)) { errstack.pushf("SOAP", FAIL, "Error adding %s to file list.", name); return 2; } } return 0; } else { dprintf(D_ALWAYS, "spoolDirectory == '%s'\n", spoolDirectory.Value()); errstack.pushf("SOAP", FAIL, "spool directory '%s' is not actually a directory.", spoolDirectory.Value()); return 1; } }
//--------------------------------------------------------------------------- bool Job::UnmonitorLogFile( ReadMultipleUserLogs &condorLogReader, ReadMultipleUserLogs &storkLogReader ) { debug_printf( DEBUG_DEBUG_2, "Unmonitoring log file <%s> for node %s\n", GetLogFile(), GetJobName() ); if ( !_logIsMonitored ) { debug_printf( DEBUG_DEBUG_1, "Warning: log file for node " "%s is already unmonitored\n", GetJobName() ); return true; } ReadMultipleUserLogs &logReader = (_jobType == TYPE_CONDOR) ? condorLogReader : storkLogReader; debug_printf( DEBUG_DEBUG_1, "Unmonitoring log file <%s> for node %s\n", GetLogFile(), GetJobName() ); CondorError errstack; bool result = logReader.unmonitorLogFile( GetLogFile(), errstack ); if ( !result ) { errstack.pushf( "DAGMan::Job", DAGMAN_ERR_LOG_FILE, "ERROR: Unable to unmonitor log " "file for node %s", GetJobName() ); debug_printf( DEBUG_QUIET, "%s\n", errstack.getFullText().c_str() ); EXCEPT( "Fatal log file monitoring error!\n" ); } if ( result ) { delete [] _logFile; _logFile = NULL; _logIsMonitored = false; } return result; }
bool DCStartd::getAds( ClassAdList &adsList ) { CondorError errstack; // fetch the query QueryResult q; CondorQuery* query; char* ad_addr; // instantiate query object if (!(query = new CondorQuery (STARTD_AD))) { dprintf( D_ALWAYS, "Error: Out of memory\n"); return(false); } if( this->locate() ){ ad_addr = this->addr(); q = query->fetchAds(adsList, ad_addr, &errstack); if (q != Q_OK) { if (q == Q_COMMUNICATION_ERROR) { dprintf( D_ALWAYS, "%s\n", errstack.getFullText(true).c_str() ); } else { dprintf (D_ALWAYS, "Error: Could not fetch ads --- %s\n", getStrQueryResult(q)); } delete query; return (false); } } else { delete query; return(false); } delete query; return(true); }
bool MultiLogFiles::InitializeFile(const char *filename, bool truncate, CondorError &errstack) { dprintf( D_LOG_FILES, "MultiLogFiles::InitializeFile(%s, %d)\n", filename, (int)truncate ); int flags = O_WRONLY; if ( truncate ) { flags |= O_TRUNC; dprintf( D_ALWAYS, "MultiLogFiles: truncating log file %s\n", filename ); } // Two-phase attempt at open here is to make things work if // a log file is a symlink to another file (see gittrac #2704). int fd = safe_create_fail_if_exists( filename, flags ); if ( fd < 0 && errno == EEXIST ) { fd = safe_open_no_create_follow( filename, flags ); } if ( fd < 0 ) { errstack.pushf("MultiLogFiles", UTIL_ERR_OPEN_FILE, "Error (%d, %s) opening file %s for creation " "or truncation", errno, strerror( errno ), filename ); return false; } if ( close( fd ) != 0 ) { errstack.pushf("MultiLogFiles", UTIL_ERR_CLOSE_FILE, "Error (%d, %s) closing file %s for creation " "or truncation", errno, strerror( errno ), filename ); return false; } return true; }
bool MultiLogFiles::makePathAbsolute(MyString &filename, CondorError &errstack) { if ( !fullpath(filename.Value()) ) { // I'd like to use realpath() here, but I'm not sure // if that's portable across all platforms. wenger 2009-01-09. MyString currentDir; if ( !condor_getcwd(currentDir) ) { errstack.pushf( "MultiLogFiles", UTIL_ERR_GET_CWD, "ERROR: condor_getcwd() failed with errno %d (%s) at %s:%d", errno, strerror(errno), __FILE__, __LINE__); return false; } filename = currentDir + DIR_DELIM_STRING + filename; } return true; }
int main(int argc, char **argv) { char * server_address = NULL; char ** ptr; const char * myName; // find our name myName = strrchr( argv[0], DIR_DELIM_CHAR ); if( !myName ) { myName = argv[0]; } else { myName++; } // read config file myDistro->Init (argc, argv); config (); for (ptr=argv+1,argc--; argc > 0; argc--,ptr++) { if ( ptr[0][0] == '-' ) { switch ( ptr[0][1] ) { case 'h': usage(myName); exit(0); break; case 'd': // dprintf to console Termlog = 1; dprintf_config ("TOOL", get_param_functions()); break; case 'n': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -n requires another argument\n", myName ); exit(1); } server_address = strdup (*ptr); break; case 'v': version(); // this function calls exit(0) break; default: fprintf( stderr, "%s: Unknown option %s\n", myName, *ptr); usage(myName); exit(1); } } //fi } //rof CondorError errorstack; int number = 0; SimpleList <Credential*> result; DCCredd credd(server_address); // resolve server address if ( ! credd.locate() ) { fprintf (stderr, "%s\n", credd.error() ); return 1; } if (!credd.listCredentials (result, number, errorstack)) { fprintf (stderr, "Unable to retrieve credentials (%s)\n", errorstack.getFullText(true)); return 1; } if (number > 0) { Credential * cred; result.Rewind(); printf ("Name\tType\n-----\t-----\n"); while (result.Next (cred)) { printf ("%s\t%s\n", cred->GetName(), cred->GetTypeString()); } printf ("\nTotal %d\n", number); } else if (number == 0) { printf ("No credentials currently stored on this server\n"); } else { fprintf (stderr, "ERROR\n"); return 1; } return 0; }
int main(int argc, char *argv[]) { char *arg; int nArgs = 0; // number of args int i, result; char* pool = NULL; char* scheddName = NULL; char* scheddAddr = NULL; MyString method; char *tmp; myDistro->Init( argc, argv ); MyName = condor_basename(argv[0]); config(); #if !defined(WIN32) install_sig_handler(SIGPIPE, SIG_IGN ); #endif // dig around in the config file looking for what the config file says // about getting files from Condor. This defaults with the global variable // initialization. tmp = param( "SANDBOX_TRANSFER_METHOD" ); if ( tmp != NULL ) { method = tmp; free( tmp ); string_to_stm( method, st_method ); } char **args = (char **)malloc(sizeof(char *) * argc); // args if ( ! args) exit(2); // parse the arguments. for( argv++; (arg = *argv); argv++ ) { if( arg[0] == '-' ) { if( ! arg[1] ) { usage(); } switch( arg[1] ) { case 'd': // dprintf to console dprintf_set_tool_debug("TOOL", 0); break; case 'c': args[nArgs] = arg; nArgs++; argv++; if( ! *argv ) { fprintf( stderr, "%s: -constraint requires another argument\n", MyName); exit(1); } args[nArgs] = *argv; nArgs++; break; case 'a': if( arg[2] && arg[2] == 'd' ) { argv++; if( ! *argv ) { fprintf( stderr, "%s: -addr requires another argument\n", MyName); exit(1); } if( is_valid_sinful(*argv) ) { scheddAddr = strdup(*argv); if( ! scheddAddr ) { fprintf( stderr, "Out of Memory!\n" ); exit(1); } } else { fprintf( stderr, "%s: \"%s\" is not a valid address\n", MyName, *argv ); fprintf( stderr, "Should be of the form " "<ip.address.here:port>\n" ); fprintf( stderr, "For example: <123.456.789.123:6789>\n" ); exit( 1 ); } break; } All = true; break; case 'n': // use the given name as the schedd name to connect to argv++; if( ! *argv ) { fprintf( stderr, "%s: -name requires another argument\n", MyName); exit(1); } if ( scheddName ) free(scheddName); scheddName = strdup(*argv); break; case 'p': // use the given name as the central manager to query argv++; if( ! *argv ) { fprintf( stderr, "%s: -pool requires another argument\n", MyName); exit(1); } if( pool ) { free( pool ); } pool = strdup( *argv ); break; case 's': argv++; if( ! *argv ) { fprintf( stderr, "%s: -stm requires another argument\n", MyName); exit(1); } method = *argv; string_to_stm(method, st_method); break; case 'v': version(); break; case 'h': usage(0); break; default: fprintf( stderr, "Unrecognized option: %s\n", arg ); usage(); break; } } else { if( All ) { // If -all is set, there should be no other // constraint arguments. usage(); } args[nArgs] = arg; nArgs++; } } // Check to make sure we have a valid sandbox transfer mechanism. if (st_method == STM_UNKNOWN) { fprintf( stderr, "%s: Unknown sandbox transfer method: %s\n", MyName, method.Value()); usage(); exit(1); } if( ! (All || nArgs) ) { // We got no indication of what to act on fprintf( stderr, "You did not specify any jobs\n" ); usage(); } // We're done parsing args, now make sure we know how to // contact the schedd. if( ! scheddAddr ) { // This will always do the right thing, even if either or // both of scheddName or pool are NULL. schedd = new DCSchedd( scheddName, pool ); } else { schedd = new DCSchedd( scheddAddr ); } if( ! schedd->locate() ) { fprintf( stderr, "%s: %s\n", MyName, schedd->error() ); exit( 1 ); } // Process the args. if( All ) { handleAll(); } else { for(i = 0; i < nArgs; i++) { if( match_prefix( args[i], "-constraint" ) ) { i++; addConstraint( args[i] ); } else { procArg(args[i]); } } } // Sanity check: make certain we now have a constraint if ( global_constraint.Length() <= 0 ) { fprintf( stderr, "Unable to create a job constraint!\n"); exit(1); } fprintf(stdout,"Fetching data files...\n"); switch(st_method) { case STM_USE_SCHEDD_ONLY: { // start block // Get the sandbox directly from the schedd. // And now, do the work. CondorError errstack; result = schedd->receiveJobSandbox(global_constraint.Value(), &errstack); if ( !result ) { fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() ); fprintf( stderr, "ERROR: Failed to spool job files.\n" ); exit(1); } // All done return 0; } //end block break; case STM_USE_TRANSFERD: { // start block // NEW METHOD where we ask the schedd for a transferd, then get the // files from the transferd CondorError errstack; ClassAd respad; int invalid; MyString reason; MyString td_sinful; MyString td_cap; result = schedd->requestSandboxLocation(FTPD_DOWNLOAD, global_constraint, FTP_CFTP, &respad, &errstack); if ( !result ) { fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() ); fprintf( stderr, "ERROR: Failed to spool job files.\n" ); exit(1); } respad.LookupInteger(ATTR_TREQ_INVALID_REQUEST, invalid); if (invalid == TRUE) { fprintf( stderr, "ERROR: Failed to spool job files.\n" ); respad.LookupString(ATTR_TREQ_INVALID_REASON, reason); fprintf( stderr, "%s\n", reason.Value()); exit(EXIT_FAILURE); } respad.LookupString(ATTR_TREQ_TD_SINFUL, td_sinful); respad.LookupString(ATTR_TREQ_CAPABILITY, td_cap); dprintf(D_ALWAYS, "td: %s, cap: %s\n", td_sinful.Value(), td_cap.Value()); DCTransferD dctd(td_sinful.Value()); result = dctd.download_job_files(&respad, &errstack); if ( !result ) { fprintf( stderr, "\n%s\n", errstack.getFullText(true).c_str() ); fprintf( stderr, "ERROR: Failed to spool job files.\n" ); exit(1); } } // end block break; default: EXCEPT("PROGRAMMER ERROR: st_method must be known."); break; } // All done return 0; }
int main(int argc, char **argv) { char ** ptr; const char * myName; // find our name myName = strrchr( argv[0], DIR_DELIM_CHAR ); if( !myName ) { myName = argv[0]; } else { myName++; } int cred_type = 0; char * cred_name = NULL; char * cred_file_name = NULL; char * myproxy_user = NULL; char * myproxy_host = NULL; int myproxy_port = 0; char * myproxy_dn = NULL; char * server_address= NULL; // read config file myDistro->Init (argc, argv); config(); for (ptr=argv+1,argc--; argc > 0; argc--,ptr++) { if ( ptr[0][0] == '-' ) { switch ( ptr[0][1] ) { case 'h': usage(myName); exit(0); break; case 'd': // dprintf to console Termlog = 1; dprintf_config ("TOOL", get_param_functions()); break; case 'S': // dprintf to console Termlog = 1; Read_Myproxy_pw_terminal = false; break; case 'n': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -n requires another argument\n", myName ); exit(1); } server_address = strdup (*ptr); break; case 't': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -t requires another argument\n", myName ); exit(1); } if (strcmp (*ptr, "x509") == 0) { cred_type = X509_CREDENTIAL_TYPE; } else { fprintf( stderr, "Invalid credential type %s\n", *ptr ); exit(1); } break; case 'f': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -f requires another argument\n", myName ); exit(1); } cred_file_name = strdup (*ptr); break; case 'N': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -N requires another argument\n", myName ); exit(1); } cred_name = strdup (*ptr); break; case 'm': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -m requires another argument\n", myName ); exit(1); } parseMyProxyArgument (*ptr, myproxy_user, myproxy_host, myproxy_port); break; case 'D': if( !(--argc) || !(*(++ptr)) ) { fprintf( stderr, "%s: -D requires another argument\n", myName ); exit(1); } myproxy_dn = strdup (*ptr); break; case 'v': version(); // this function calls exit(0) break; default: fprintf( stderr, "%s: Unknown option %s\n", myName, *ptr); usage(myName); exit(1); } } //fi } //rof if (( cred_file_name == NULL ) || (cred_type == 0)) { fprintf ( stderr, "Credential filename or type not specified\n"); exit (1); } Credential * cred = NULL; if (cred_type == X509_CREDENTIAL_TYPE) { cred = new X509Credential(); } else { fprintf ( stderr, "Invalid credential type\n"); exit (1); } char * data = NULL; int data_size; if (!read_file (cred_file_name, data, data_size)) { fprintf (stderr, "Can't open %s\n", cred_file_name); exit (1); } cred->SetData (data, data_size); if (cred_name !=NULL) { cred->SetName(cred_name); } else { cred->SetName(DEFAULT_CREDENTIAL_NAME); } char * username = my_username(0); cred->SetOwner (username); if (cred_type == X509_CREDENTIAL_TYPE && myproxy_host != NULL) { X509Credential * x509cred = (X509Credential*)cred; MyString str_host_port = myproxy_host; if (myproxy_port != 0) { str_host_port += ":"; str_host_port += myproxy_port; } x509cred->SetMyProxyServerHost (str_host_port.Value()); if (myproxy_user != NULL) { x509cred->SetMyProxyUser (myproxy_user); } else { x509cred->SetMyProxyUser (username); } if (myproxy_dn != NULL) { x509cred->SetMyProxyServerDN (myproxy_dn); } char * myproxy_password; if ( Read_Myproxy_pw_terminal ) { myproxy_password = prompt_password( "Please enter the MyProxy password:"******"Please enter the MyProxy password from the standard input\n"); } if (myproxy_password) { x509cred->SetRefreshPassword ( myproxy_password ); } x509cred->display( D_FULLDEBUG ); } CondorError errstack; DCCredd dc_credd (server_address); // resolve server address if ( ! dc_credd.locate() ) { fprintf (stderr, "%s\n", dc_credd.error() ); return 1; } if (dc_credd.storeCredential(cred, errstack)) { printf ("Credential submitted successfully\n"); } else { fprintf (stderr, "Unable to submit credential\n%s\n", errstack.getFullText(true)); return 1; } return 0; }
void procArg(const char* arg) { int c, p; // cluster/proc # char* tmp; MyString constraint; if( str_isint(arg) || str_isreal(arg,true) ) // process by cluster/proc # { c = strtol(arg, &tmp, 10); if(c <= 0) { fprintf(stderr, "Invalid cluster # from %s.\n", arg); had_error = true; return; } if(*tmp == '\0') // delete the cluster { CondorError errstack; constraint.formatstr( "%s == %d", ATTR_CLUSTER_ID, c ); if( doWorkByConstraint(constraint.Value(), &errstack) ) { fprintf( stdout, "Cluster %d %s.\n", c, (mode == JA_REMOVE_JOBS) ? "has been marked for removal" : (mode == JA_REMOVE_X_JOBS) ? "has been removed locally (remote state unknown)" : actionWord(mode,true) ); } else { fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() ); if (had_error) { fprintf( stderr, "Couldn't find/%s all jobs in cluster %d.\n", actionWord(mode,false), c ); } } return; } if(*tmp == '.') { p = strtol(tmp + 1, &tmp, 10); if(p < 0) { fprintf( stderr, "Invalid proc # from %s.\n", arg); had_error = true; return; } if(*tmp == '\0') // process a proc { if( ! job_ids ) { job_ids = new StringList(); } job_ids->append( arg ); return; } } fprintf( stderr, "Warning: unrecognized \"%s\" skipped.\n", arg ); return; } // process by user name else { CondorError errstack; constraint.formatstr("%s == \"%s\"", ATTR_OWNER, arg ); if( doWorkByConstraint(constraint.Value(), &errstack) ) { fprintf( stdout, "User %s's job(s) %s.\n", arg, (mode == JA_REMOVE_JOBS) ? "have been marked for removal" : (mode == JA_REMOVE_X_JOBS) ? "have been removed locally (remote state unknown)" : actionWord(mode,true) ); } else { fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() ); if (had_error) { fprintf( stderr, "Couldn't find/%s all of user %s's job(s).\n", actionWord(mode,false), arg ); } } } }
// This handler is called when a client wishes to write files from the // transferd's storage. int TransferD::write_files_handler(int cmd, Stream *sock) { ReliSock *rsock = (ReliSock*)sock; MyString capability; int protocol = FTP_UNKNOWN; TransferRequest *treq = NULL; MyString fquser; static int transfer_reaper_id = -1; ThreadArg *thread_arg; int tid; ClassAd reqad; ClassAd respad; cmd = cmd; // quiet the compiler. dprintf(D_ALWAYS, "Got TRANSFERD_WRITE_FILES!\n"); ///////////////////////////////////////////////////////////////////////// // make sure we are authenticated ///////////////////////////////////////////////////////////////////////// if( ! rsock->triedAuthentication() ) { CondorError errstack; if( ! SecMan::authenticate_sock(rsock, WRITE, &errstack) ) { // we failed to authenticate, we should bail out now // since we don't know what user is trying to perform // this action. // TODO: it'd be nice to print out what failed, but we // need better error propagation for that... errstack.push( "TransferD::setup_transfer_request_handler()", 42, "Failure to register transferd - Authentication failed" ); dprintf( D_ALWAYS, "setup_transfer_request_handler() " "aborting: %s\n", errstack.getFullText() ); refuse( rsock ); return CLOSE_STREAM; } } fquser = rsock->getFullyQualifiedUser(); ///////////////////////////////////////////////////////////////////////// // Check to see if the capability the client tells us is something that // we have knowledge of. We ONLY check the capability and not the // identity of the person in question. This allows people of different // identities to write files here as long as they had the right // capability. While this might not sound secure, they STILL had to have // authenticated as someone this daemon trusts. // Similarly, check the protocol it wants to use as well as ensure that // the direction the transfer request was supposed to be is being honored. ///////////////////////////////////////////////////////////////////////// rsock->decode(); // soak the request ad from the client about what it wants to transfer reqad.initFromStream(*rsock); rsock->end_of_message(); reqad.LookupString(ATTR_TREQ_CAPABILITY, capability); rsock->encode(); // do I know of such a capability? if (m_treqs.lookup(capability, treq) != 0) { // didn't find it. Log it and tell them to leave and close up shop respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid capability!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using capability '%s', but there was no such capability. " "Access denied.\n", fquser.Value(), capability.Value()); return CLOSE_STREAM; } reqad.LookupInteger(ATTR_TREQ_FTP, protocol); // am I willing to use this protocol? switch(protocol) { case FTP_CFTP: // FileTrans protocol, I'm happy. break; default: respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Invalid file transfer protocol!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "using protocol '%d', but I don't support that protocol. " "Access denied.\n", fquser.Value(), protocol); return CLOSE_STREAM; } // nsure that this transfer request was of the uploading variety if (treq->get_direction() != FTPD_UPLOAD) { respad.Assign(ATTR_TREQ_INVALID_REQUEST, TRUE); respad.Assign(ATTR_TREQ_INVALID_REASON, "Transfer Request was not an uploading request!"); respad.put(*rsock); rsock->end_of_message(); dprintf(D_ALWAYS, "Client identity '%s' tried to write some files " "to a transfer request that wasn't expecting to be written. " "Access denied.\n", fquser.Value()); } ///////////////////////////////////////////////////////////////////////// // Tell the client everything was ok. ///////////////////////////////////////////////////////////////////////// respad.Assign(ATTR_TREQ_INVALID_REQUEST, FALSE); respad.put(*rsock); rsock->end_of_message(); ///////////////////////////////////////////////////////////////////////// // Set up a thread (a process under unix) to read ALL of the job files // for all of the ads in the TransferRequest. ///////////////////////////////////////////////////////////////////////// // now create a thread, passing in the sock, which uses the file transfer // object to accept the files. if (transfer_reaper_id == -1) { // only set this up ONCE so each and every thread gets one. transfer_reaper_id = daemonCore->Register_Reaper( "write_files_reaper", (ReaperHandlercpp) &TransferD::write_files_reaper, "write_files_reaper", this ); } thread_arg = new ThreadArg(protocol, treq); // Start a new thread (process on Unix) to do the work tid = daemonCore->Create_Thread( (ThreadStartFunc)&TransferD::write_files_thread, (void *)thread_arg, rsock, transfer_reaper_id ); if (tid == FALSE) { // XXX How do I handle this failure? } // associate the tid with the request so I can deal with it propery in // the reaper m_client_to_transferd_threads.insert(tid, treq); // The stream is inherited to the thread, who does the transfer and // finishes the protocol, but in the parent, I'm closing it. return CLOSE_STREAM; }
bool Triggerd::PerformQueries() { ClassAdList result; CondorError errstack; QueryResult status; Trigger* trig = NULL; CondorQuery* query; bool ret_val = true; std::map<uint32_t,Trigger*>::iterator iter; ClassAd* ad = NULL; std::string eventText; char* token = NULL; std::string triggerText; char* queryString = NULL; ExprTree* attr = NULL; std::list<std::string> missing_nodes; size_t pos; size_t prev_pos; bool bad_trigger = false; const char* token_str = NULL; if (0 < triggers.size()) { dprintf(D_FULLDEBUG, "Triggerd: Evaluating %d triggers\n", (int)triggers.size()); query = new CondorQuery(ANY_AD); for (iter = triggers.begin(); iter != triggers.end(); iter++) { // Clear any pre-exhisting custom contraints and add the constraint // for this trigger trig = iter->second; query->clearORCustomConstraints(); query->clearANDCustomConstraints(); queryString = strdup(trig->GetQuery().c_str()); ReplaceAllChars(queryString, '\'', '"'); query->addANDConstraint(queryString); free(queryString); // Perform the query and check the result if (NULL != query_collector) { status = query->fetchAds(result, query_collector->addr(), &errstack); } else { status = collectors->query(*query, result, &errstack); } if (Q_OK != status) { // Problem with the query if (Q_COMMUNICATION_ERROR == status) { dprintf(D_ALWAYS, "Triggerd Error: Error contacting the collecter - %s\n", errstack.getFullText(true).c_str()); if (CEDAR_ERR_CONNECT_FAILED == errstack.code(0)) { dprintf(D_ALWAYS, "Triggerd Error: Couldn't contact the collector on the central manager\n"); } } else { dprintf(D_ALWAYS, "Triggerd Error: Could not retrieve ads - %s\n", getStrQueryResult(status)); } ret_val = false; break; } else { dprintf(D_FULLDEBUG, "Query successful. Parsing results\n"); // Query was successful, so parse the results result.Open(); while ((ad = result.Next())) { if (true == bad_trigger) { // Avoid processing a bad trigger multiple times. Remove // all result ads and reset the flag dprintf(D_FULLDEBUG, "Cleaning up after a bad trigger\n"); result.Delete(ad); while ((ad = result.Next())) { result.Delete(ad); } bad_trigger = false; break; } eventText = ""; triggerText = trig->GetText(); dprintf(D_FULLDEBUG, "Parsing trigger text '%s'\n", triggerText.c_str()); prev_pos = pos = 0; while (prev_pos < triggerText.length()) { pos = triggerText.find("$(", prev_pos, 2); if (std::string::npos == pos) { // Didn't find the start of a varible, so append the // remaining string dprintf(D_FULLDEBUG, "Adding text string to event text\n"); eventText += triggerText.substr(prev_pos, std::string::npos); prev_pos = triggerText.length(); } else { // Found a variable for substitution. Need to add // text before it to the string, grab the variable // to substitute for, and put its value in the text eventText += triggerText.substr(prev_pos, pos - prev_pos); dprintf(D_FULLDEBUG, "Adding text string prior to variable substitution to event text\n"); // Increment the position by 2 to skip the $( prev_pos = pos + 2; pos = triggerText.find(")", prev_pos, 1); if (std::string::npos == pos) { // Uh-oh. We have a start of a variable substitution // but no closing marker. dprintf(D_FULLDEBUG, "Error: Failed to find closing varable substitution marker ')'. Aborting processing of the trigger\n"); bad_trigger = true; break; } else { token_str = triggerText.substr(prev_pos, pos-prev_pos).c_str(); token = RemoveWS(token_str); dprintf(D_FULLDEBUG, "token: '%s'\n", token); if (NULL == token) { dprintf(D_ALWAYS, "Removing whitespace from %s produced unusable name. Aborting processing of the trigger\n", token_str); bad_trigger = true; break; } attr = ad->LookupExpr(token); if (NULL == attr) { // The token isn't found in the classad, so treat it // like a string dprintf(D_FULLDEBUG, "Adding text string to event text\n"); eventText += token; } else { dprintf(D_FULLDEBUG, "Adding classad value to event text\n"); eventText += ExprTreeToString(attr); } if (NULL != token) { free(token); token = NULL; } ++pos; } prev_pos = pos; } } // Remove the trailing space std::string::size_type notwhite = eventText.find_last_not_of(" "); eventText.erase(notwhite+1); // Send the event if (false == bad_trigger) { EventCondorTriggerNotify event(eventText, time(NULL)); singleton->getInstance()->raiseEvent(event); dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str()); } result.Delete(ad); } bad_trigger = false; result.Close(); } } delete query; } else { dprintf(D_FULLDEBUG, "Triggerd: No triggers to evaluate\n"); } // Look for absent nodes (nodes expected to be in the pool but aren't) if (NULL != console) { missing_nodes = console->findAbsentNodes(); if (0 < missing_nodes.size()) { for (std::list<std::string>::iterator node = missing_nodes.begin(); node != missing_nodes.end(); ++ node) { eventText = node->c_str(); eventText += " is missing from the pool"; EventCondorTriggerNotify event(eventText, time(NULL)); singleton->getInstance()->raiseEvent(event); dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str()); } } } return ret_val; }
void doContactSchedd() { int rc; Qmgr_connection *schedd; BaseJob *curr_job; ClassAd *next_ad; char expr_buf[12000]; bool schedd_updates_complete = false; bool schedd_deletes_complete = false; bool add_remove_jobs_complete = false; bool update_jobs_complete = false; bool commit_transaction = true; int failure_line_num = 0; bool send_reschedule = false; std::string error_str = ""; StringList dirty_job_ids; char *job_id_str; PROC_ID job_id; CondorError errstack; dprintf(D_FULLDEBUG,"in doContactSchedd()\n"); initJobExprs(); contactScheddTid = TIMER_UNSET; // vacateJobs ///////////////////////////////////////////////////// if ( pendingScheddVacates.getNumElements() != 0 ) { std::string buff; StringList job_ids; VacateRequest curr_request; int result; ClassAd* rval; pendingScheddVacates.startIterations(); while ( pendingScheddVacates.iterate( curr_request ) != 0 ) { formatstr( buff, "%d.%d", curr_request.job->procID.cluster, curr_request.job->procID.proc ); job_ids.append( buff.c_str() ); } char *tmp = job_ids.print_to_string(); if ( tmp ) { dprintf( D_FULLDEBUG, "Calling vacateJobs on %s\n", tmp ); free(tmp); tmp = NULL; } rval = ScheddObj->vacateJobs( &job_ids, VACATE_FAST, &errstack ); if ( rval == NULL ) { formatstr( error_str, "vacateJobs returned NULL, CondorError: %s!", errstack.getFullText().c_str() ); goto contact_schedd_failure; } else { pendingScheddVacates.startIterations(); while ( pendingScheddVacates.iterate( curr_request ) != 0 ) { formatstr( buff, "job_%d_%d", curr_request.job->procID.cluster, curr_request.job->procID.proc ); if ( !rval->LookupInteger( buff.c_str(), result ) ) { dprintf( D_FULLDEBUG, "vacateJobs returned malformed ad\n" ); EXCEPT( "vacateJobs returned malformed ad" ); } else { dprintf( D_FULLDEBUG, " %d.%d vacate result: %d\n", curr_request.job->procID.cluster, curr_request.job->procID.proc,result); pendingScheddVacates.remove( curr_request.job->procID ); curr_request.result = (action_result_t)result; curr_request.job->SetEvaluateState(); completedScheddVacates.insert( curr_request.job->procID, curr_request ); } } delete rval; } } schedd = ConnectQ( ScheddAddr, QMGMT_TIMEOUT, false, NULL, myUserName, CondorVersion() ); if ( !schedd ) { error_str = "Failed to connect to schedd!"; goto contact_schedd_failure; } // CheckLeases ///////////////////////////////////////////////////// if ( checkLeasesSignaled ) { dprintf( D_FULLDEBUG, "querying for renewed leases\n" ); // Grab the lease attributes of all the jobs in our global hashtable. BaseJob::JobsByProcId.startIterations(); while ( BaseJob::JobsByProcId.iterate( curr_job ) != 0 ) { int new_expiration; rc = GetAttributeInt( curr_job->procID.cluster, curr_job->procID.proc, ATTR_TIMER_REMOVE_CHECK, &new_expiration ); if ( rc < 0 ) { if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } else { // This job doesn't have doesn't have a lease from // the submitter. Skip it. continue; } } curr_job->UpdateJobLeaseReceived( new_expiration ); } checkLeasesSignaled = false; } // end of handling check leases // AddJobs ///////////////////////////////////////////////////// if ( addJobsSignaled || firstScheddContact ) { int num_ads = 0; dprintf( D_FULLDEBUG, "querying for new jobs\n" ); // Make sure we grab all Globus Universe jobs (except held ones // that we previously indicated we were done with) // when we first start up in case we're recovering from a // shutdown/meltdown. // Otherwise, grab all jobs that are unheld and aren't marked as // currently being managed and aren't marked as not matched. // If JobManaged is undefined, equate it with false. // If Matched is undefined, equate it with true. // NOTE: Schedds from Condor 6.6 and earlier don't include // "(Universe==9)" in the constraint they give to the gridmanager, // so this gridmanager will pull down non-globus-universe ads, // although it won't use them. This is inefficient but not // incorrect behavior. if ( firstScheddContact ) { // Grab all jobs for us to manage. This expression is a // derivative of the expression below for new jobs. We add // "|| Managed =?= TRUE" to also get jobs our previous // incarnation was in the middle of managing when it died // (if it died unexpectedly). With the new term, the // "&& Managed =!= TRUE" from the new jobs expression becomes // superfluous (by boolean logic), so we drop it. sprintf( expr_buf, "%s && %s && ((%s && %s) || %s)", expr_schedd_job_constraint.c_str(), expr_not_completely_done.c_str(), expr_matched_or_undef.c_str(), expr_not_held.c_str(), expr_managed.c_str() ); } else { // Grab new jobs for us to manage sprintf( expr_buf, "%s && %s && %s && %s && %s", expr_schedd_job_constraint.c_str(), expr_not_completely_done.c_str(), expr_matched_or_undef.c_str(), expr_not_held.c_str(), expr_not_managed.c_str() ); } dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf); next_ad = GetNextJobByConstraint( expr_buf, 1 ); while ( next_ad != NULL ) { PROC_ID procID; BaseJob *old_job; int job_is_matched = 1; // default to true if not in ClassAd next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster ); next_ad->LookupInteger( ATTR_PROC_ID, procID.proc ); bool job_is_managed = jobExternallyManaged(next_ad); next_ad->LookupBool(ATTR_JOB_MATCHED,job_is_matched); if ( BaseJob::JobsByProcId.lookup( procID, old_job ) != 0 ) { JobType *job_type = NULL; BaseJob *new_job = NULL; // job had better be either managed or matched! (or both) ASSERT( job_is_managed || job_is_matched ); if ( MustExpandJobAd( next_ad ) ) { // Get the expanded ClassAd from the schedd, which // has the GridResource filled in with info from // the matched ad. delete next_ad; next_ad = NULL; next_ad = GetJobAd(procID.cluster,procID.proc); if ( next_ad == NULL && errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } if ( next_ad == NULL ) { // We may get here if it was not possible to expand // one of the $$() expressions. We don't want to // roll back the transaction and blow away the // hold that the schedd just put on the job, so // simply skip over this ad. dprintf(D_ALWAYS,"Failed to get expanded job ClassAd from Schedd for %d.%d. errno=%d\n",procID.cluster,procID.proc,errno); goto contact_schedd_next_add_job; } } // Search our job types for one that'll handle this job jobTypes.Rewind(); while ( jobTypes.Next( job_type ) ) { if ( job_type->AdMatchFunc( next_ad ) ) { // Found one! dprintf( D_FULLDEBUG, "Using job type %s for job %d.%d\n", job_type->Name, procID.cluster, procID.proc ); break; } } if ( job_type != NULL ) { new_job = job_type->CreateFunc( next_ad ); } else { dprintf( D_ALWAYS, "No handlers for job %d.%d\n", procID.cluster, procID.proc ); new_job = new BaseJob( next_ad ); } ASSERT(new_job); new_job->SetEvaluateState(); dprintf(D_ALWAYS,"Found job %d.%d --- inserting\n", new_job->procID.cluster,new_job->procID.proc); num_ads++; if ( !job_is_managed ) { rc = tSetAttributeString( new_job->procID.cluster, new_job->procID.proc, ATTR_JOB_MANAGED, MANAGED_EXTERNAL); if ( rc < 0 ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } } } else { // We already know about this job, skip // But also set Managed=true on the schedd so that it won't // keep signalling us about it delete next_ad; rc = tSetAttributeString( procID.cluster, procID.proc, ATTR_JOB_MANAGED, MANAGED_EXTERNAL ); if ( rc < 0 ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } } contact_schedd_next_add_job: next_ad = GetNextJobByConstraint( expr_buf, 0 ); } // end of while next_ad if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } dprintf(D_FULLDEBUG,"Fetched %d new job ads from schedd\n",num_ads); } // end of handling add jobs // RemoveJobs ///////////////////////////////////////////////////// // We always want to perform this check. Otherwise, we may overwrite a // REMOVED/HELD/COMPLETED status with something else below. { int num_ads = 0; dprintf( D_FULLDEBUG, "querying for removed/held jobs\n" ); // Grab jobs marked as REMOVED/COMPLETED or marked as HELD that we // haven't previously indicated that we're done with (by setting // JobManaged to "Schedd". sprintf( expr_buf, "(%s) && (%s) && (%s == %d || %s == %d || (%s == %d && %s =?= \"%s\"))", ScheddJobConstraint, expr_not_completely_done.c_str(), ATTR_JOB_STATUS, REMOVED, ATTR_JOB_STATUS, COMPLETED, ATTR_JOB_STATUS, HELD, ATTR_JOB_MANAGED, MANAGED_EXTERNAL ); dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf); next_ad = GetNextJobByConstraint( expr_buf, 1 ); while ( next_ad != NULL ) { PROC_ID procID; BaseJob *next_job; int curr_status; next_ad->LookupInteger( ATTR_CLUSTER_ID, procID.cluster ); next_ad->LookupInteger( ATTR_PROC_ID, procID.proc ); next_ad->LookupInteger( ATTR_JOB_STATUS, curr_status ); if ( BaseJob::JobsByProcId.lookup( procID, next_job ) == 0 ) { // Should probably skip jobs we already have marked as // held or removed next_job->JobAdUpdateFromSchedd( next_ad, true ); num_ads++; } else if ( curr_status == REMOVED ) { // If we don't know about the job, act like we got an // ADD_JOBS signal from the schedd the next time we // connect, so that we'll create a Job object for it // and decide how it needs to be handled. // TODO The AddJobs and RemoveJobs queries shoule be // combined into a single query. dprintf( D_ALWAYS, "Don't know about removed job %d.%d. " "Will treat it as a new job to manage\n", procID.cluster, procID.proc ); addJobsSignaled = true; } else { dprintf( D_ALWAYS, "Don't know about held/completed job %d.%d. " "Ignoring it\n", procID.cluster, procID.proc ); } delete next_ad; next_ad = GetNextJobByConstraint( expr_buf, 0 ); } if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } dprintf(D_FULLDEBUG,"Fetched %d job ads from schedd\n",num_ads); } if ( RemoteCommitTransaction() < 0 ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } add_remove_jobs_complete = true; // Retrieve dirty attributes ///////////////////////////////////////////////////// if ( updateJobsSignaled ) { dprintf( D_FULLDEBUG, "querying for jobs with attribute updates\n" ); sprintf( expr_buf, "%s && %s && %s && %s", expr_schedd_job_constraint.c_str(), expr_not_completely_done.c_str(), expr_not_held.c_str(), expr_managed.c_str() ); dprintf( D_FULLDEBUG,"Using constraint %s\n",expr_buf); next_ad = GetNextDirtyJobByConstraint( expr_buf, 1 ); while ( next_ad != NULL ) { ClassAd updates; char str[PROC_ID_STR_BUFLEN]; next_ad->LookupInteger( ATTR_CLUSTER_ID, job_id.cluster ); next_ad->LookupInteger( ATTR_PROC_ID, job_id.proc ); if ( GetDirtyAttributes( job_id.cluster, job_id.proc, &updates ) < 0 ) { dprintf( D_ALWAYS, "Failed to retrieve dirty attributes for job %d.%d\n", job_id.cluster, job_id.proc ); failure_line_num = __LINE__; delete next_ad; goto contact_schedd_disconnect; } else { dprintf (D_FULLDEBUG, "Retrieved updated attributes for job %d.%d\n", job_id.cluster, job_id.proc); dPrintAd(D_JOB, updates); } if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) { curr_job->JobAdUpdateFromSchedd( &updates, false ); ProcIdToStr( job_id, str ); dirty_job_ids.append( str ); } else { dprintf( D_ALWAYS, "Don't know about updated job %d.%d. " "Ignoring it\n", job_id.cluster, job_id.proc ); } delete next_ad; next_ad = GetNextDirtyJobByConstraint( expr_buf, 0 ); } } update_jobs_complete = true; // if ( BeginTransaction() < 0 ) { errno = 0; BeginTransaction(); if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } // requestJobStatus ///////////////////////////////////////////////////// if ( pendingJobStatus.getNumElements() != 0 ) { JobStatusRequest curr_request; pendingJobStatus.startIterations(); while ( pendingJobStatus.iterate( curr_request ) != 0 ) { int status; rc = GetAttributeInt( curr_request.job_id.cluster, curr_request.job_id.proc, ATTR_JOB_STATUS, &status ); if ( rc < 0 ) { if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } else { // The job is not in the schedd's job queue. This // probably means that the user did a condor_rm -f, // so return a job status of REMOVED. status = REMOVED; } } // return status dprintf( D_FULLDEBUG, "%d.%d job status: %d\n", curr_request.job_id.cluster, curr_request.job_id.proc, status ); pendingJobStatus.remove( curr_request.job_id ); curr_request.job_status = status; daemonCore->Reset_Timer( curr_request.tid, 0 ); completedJobStatus.insert( curr_request.job_id, curr_request ); } } // Update existing jobs ///////////////////////////////////////////////////// ScheddUpdateRequest *curr_request; pendingScheddUpdates.startIterations(); while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) { curr_job = curr_request->m_job; dprintf(D_FULLDEBUG,"Updating classad values for %d.%d:\n", curr_job->procID.cluster, curr_job->procID.proc); const char *attr_name; const char *attr_value; ExprTree *expr; bool fake_job_in_queue = false; curr_job->jobAd->ResetExpr(); while ( curr_job->jobAd->NextDirtyExpr(attr_name, expr) == true && fake_job_in_queue == false ) { attr_value = ExprTreeToString( expr ); dprintf(D_FULLDEBUG," %s = %s\n",attr_name,attr_value); rc = SetAttribute( curr_job->procID.cluster, curr_job->procID.proc, attr_name, attr_value); if ( rc < 0 ) { if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } else { // The job is not in the schedd's job queue. This // probably means that the user did a condor_rm -f, // so pretend that all updates for the job succeed. // Otherwise, we'll never make forward progress on // the job. // TODO We should also fake a job status of REMOVED // to the job, so it can do what cleanup it can. fake_job_in_queue = true; break; } } } } if ( RemoteCommitTransaction() < 0 ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } schedd_updates_complete = true; // Delete existing jobs ///////////////////////////////////////////////////// errno = 0; BeginTransaction(); if ( errno == ETIMEDOUT ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } pendingScheddUpdates.startIterations(); while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) { curr_job = curr_request->m_job; if ( curr_job->deleteFromSchedd ) { dprintf(D_FULLDEBUG,"Deleting job %d.%d from schedd\n", curr_job->procID.cluster, curr_job->procID.proc); rc = DestroyProc(curr_job->procID.cluster, curr_job->procID.proc); // NOENT means the job doesn't exist. Good enough for us. if ( rc < 0 && rc != DESTROYPROC_ENOENT) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } } } if ( RemoteCommitTransaction() < 0 ) { failure_line_num = __LINE__; commit_transaction = false; goto contact_schedd_disconnect; } schedd_deletes_complete = true; contact_schedd_disconnect: DisconnectQ( schedd, commit_transaction ); if ( add_remove_jobs_complete == true ) { firstScheddContact = false; addJobsSignaled = false; } else { formatstr( error_str, "Schedd connection error during Add/RemoveJobs at line %d!", failure_line_num ); goto contact_schedd_failure; } if ( update_jobs_complete == true ) { updateJobsSignaled = false; } else { formatstr( error_str, "Schedd connection error during dirty attribute update at line %d!", failure_line_num ); goto contact_schedd_failure; } if ( schedd_updates_complete == false ) { formatstr( error_str, "Schedd connection error during updates at line %d!", failure_line_num ); goto contact_schedd_failure; } // Clear dirty bits for all jobs updated if ( !dirty_job_ids.isEmpty() ) { ClassAd *rval; dprintf( D_FULLDEBUG, "Calling clearDirtyAttrs on %d jobs\n", dirty_job_ids.number() ); dirty_job_ids.rewind(); rval = ScheddObj->clearDirtyAttrs( &dirty_job_ids, &errstack ); if ( rval == NULL ) { dprintf(D_ALWAYS, "Failed to notify schedd to clear dirty attributes. CondorError: %s\n", errstack.getFullText().c_str() ); } delete rval; } // Wake up jobs that had schedd updates pending and delete job // objects that wanted to be deleted pendingScheddUpdates.startIterations(); while ( pendingScheddUpdates.iterate( curr_request ) != 0 ) { curr_job = curr_request->m_job; curr_job->jobAd->ClearAllDirtyFlags(); if ( curr_job->deleteFromGridmanager ) { // If the Job object wants to delete the job from the // schedd but we failed to do so, don't delete the job // object yet; wait until we successfully delete the job // from the schedd. if ( curr_job->deleteFromSchedd == true && schedd_deletes_complete == false ) { continue; } // If wantRematch is set, send a reschedule now if ( curr_job->wantRematch ) { send_reschedule = true; } pendingScheddUpdates.remove( curr_job->procID ); pendingScheddVacates.remove( curr_job->procID ); pendingJobStatus.remove( curr_job->procID ); completedJobStatus.remove( curr_job->procID ); completedScheddVacates.remove( curr_job->procID ); delete curr_job; } else { pendingScheddUpdates.remove( curr_job->procID ); if ( curr_request->m_notify ) { curr_job->SetEvaluateState(); } } delete curr_request; } // Poke objects that wanted to be notified when a schedd update completed // successfully (possibly minus deletes) int timer_id; scheddUpdateNotifications.Rewind(); while ( scheddUpdateNotifications.Next( timer_id ) ) { daemonCore->Reset_Timer( timer_id, 0 ); } scheddUpdateNotifications.Clear(); if ( send_reschedule == true ) { ScheddObj->reschedule(); } // Check if we have any jobs left to manage. If not, exit. if ( BaseJob::JobsByProcId.getNumElements() == 0 ) { dprintf( D_ALWAYS, "No jobs left, shutting down\n" ); daemonCore->Send_Signal( daemonCore->getpid(), SIGTERM ); } lastContactSchedd = time(NULL); if ( schedd_deletes_complete == false ) { error_str = "Problem using DestroyProc to delete jobs!"; goto contact_schedd_failure; } scheddFailureCount = 0; // For each job that had dirty attributes, re-evaluate the policy dirty_job_ids.rewind(); while ( (job_id_str = dirty_job_ids.next()) != NULL ) { StrToProcIdFixMe(job_id_str, job_id); if ( BaseJob::JobsByProcId.lookup( job_id, curr_job ) == 0 ) { curr_job->EvalPeriodicJobExpr(); } } dprintf(D_FULLDEBUG,"leaving doContactSchedd()\n"); return; contact_schedd_failure: scheddFailureCount++; if ( error_str == "" ) { error_str = "Failure in doContactSchedd"; } if ( scheddFailureCount >= maxScheddFailures ) { dprintf( D_ALWAYS, "%s\n", error_str.c_str() ); EXCEPT( "Too many failures connecting to schedd!" ); } dprintf( D_ALWAYS, "%s Will retry\n", error_str.c_str() ); lastContactSchedd = time(NULL); RequestContactSchedd(); return; }
int main(int argc, char **argv) { int result = 0; if ( argc <= 1 || (argc >= 2 && !strcmp("-usage", argv[1])) ) { printf("Usage: condor_check_userlogs <log file 1> " "[log file 2] ... [log file n]\n"); exit(0); } // Set up dprintf. dprintf_set_tool_debug("condor_check_userlogs", 0); set_debug_flags(NULL, D_ALWAYS); StringList logFiles; for ( int argnum = 1; argnum < argc; ++argnum ) { logFiles.append(argv[argnum]); } logFiles.rewind(); ReadMultipleUserLogs ru; char *filename; while ( (filename = logFiles.next()) ) { MyString filestring( filename ); CondorError errstack; if ( !ru.monitorLogFile( filestring, false, errstack ) ) { fprintf( stderr, "Error monitoring log file %s: %s\n", filename, errstack.getFullText().c_str() ); result = 1; } } bool logsMissing = false; CheckEvents ce; int totalSubmitted = 0; int netSubmitted = 0; bool done = false; while( !done ) { ULogEvent* e = NULL; MyString errorMsg; ULogEventOutcome outcome = ru.readEvent( e ); switch (outcome) { case ULOG_RD_ERROR: case ULOG_UNK_ERROR: logsMissing = true; case ULOG_NO_EVENT: printf( "Log outcome: %s\n", ULogEventOutcomeNames[outcome] ); done = true; break; case ULOG_OK: printf( "Log event: %s (%d.%d.%d)", ULogEventNumberNames[e->eventNumber], e->cluster, e->proc, e->subproc ); if ( ce.CheckAnEvent(e, errorMsg) != CheckEvents::EVENT_OKAY ) { fprintf(stderr, "%s\n", errorMsg.Value()); result = 1; } if( e->eventNumber == ULOG_SUBMIT ) { SubmitEvent* ee = (SubmitEvent*) e; printf( " (\"%s\")", ee->submitEventLogNotes ); ++totalSubmitted; ++netSubmitted; printf( "\n Total submitted: %d; net submitted: %d\n", totalSubmitted, netSubmitted ); } if( e->eventNumber == ULOG_JOB_HELD ) { JobHeldEvent* ee = (JobHeldEvent*) e; printf( " (code=%d subcode=%d)", ee->getReasonCode(), ee->getReasonSubCode()); } if( e->eventNumber == ULOG_JOB_TERMINATED ) { --netSubmitted; printf( "\n Total submitted: %d; net submitted: %d\n", totalSubmitted, netSubmitted ); } if( e->eventNumber == ULOG_JOB_ABORTED ) { --netSubmitted; printf( "\n Total submitted: %d; net submitted: %d\n", totalSubmitted, netSubmitted ); } if( e->eventNumber == ULOG_EXECUTABLE_ERROR ) { --netSubmitted; printf( "\n Total submitted: %d; net submitted: %d\n", totalSubmitted, netSubmitted ); } printf( "\n" ); break; default: fprintf(stderr, "Unexpected read event outcome!\n"); result = 1; break; } } logFiles.rewind(); while ( (filename = logFiles.next()) ) { MyString filestring( filename ); CondorError errstack; if ( !ru.unmonitorLogFile( filestring, errstack ) ) { fprintf( stderr, "Error unmonitoring log file %s: %s\n", filename, errstack.getFullText().c_str() ); result = 1; } } MyString errorMsg; CheckEvents::check_event_result_t checkAllResult = ce.CheckAllJobs(errorMsg); if ( checkAllResult != CheckEvents::EVENT_OKAY ) { fprintf(stderr, "%s\n", errorMsg.Value()); fprintf(stderr, "CheckAllJobs() result: %s\n", CheckEvents::ResultToString(checkAllResult)); result = 1; } if ( result == 0 ) { if ( !logsMissing ) { printf("Log(s) are okay\n"); } else { printf("Log(s) may be okay\n"); printf( "Some logs cannot be read\n"); } } else { printf("Log(s) have error(s)\n"); } return result; }
int Job::initialize(CondorError &errstack) { char * Spool = param("SPOOL"); ASSERT(Spool); char *ckpt_name = gen_ckpt_name(Spool, id.cluster, id.proc, 0); spoolDirectory = ckpt_name; free(ckpt_name); ckpt_name = NULL; if (Spool) { free(Spool); Spool = NULL; } struct stat stats; if (-1 == stat(spoolDirectory.Value(), &stats)) { if (ENOENT == errno && spoolDirectory.Length() != 0) { // We assume here that the job is not a standard universe // job. Spooling works differently for standard universe. // Unfortunately, we might not know the job universe // yet, so standard universe is problematic with SOAP // (and always has been). if( !SpooledJobFiles::createJobSpoolDirectory_PRIV_CONDOR(id.cluster,id.proc,false) ) { errstack.pushf("SOAP", FAIL, "Creation of spool directory '%s' failed, " "reason: %s", spoolDirectory.Value(), strerror(errno)); return 1; } else { dprintf(D_FULLDEBUG, "mkdir(%s) succeeded.\n", spoolDirectory.Value()); } } else { dprintf(D_FULLDEBUG, "ERROR: stat(%s) errno: %d (%s)\n", spoolDirectory.Value(), errno, strerror(errno)); errstack.pushf("SOAP", FAIL, "stat(%s) failed, reason: %s", spoolDirectory.Value(), strerror(errno)); return 2; } } else { dprintf(D_FULLDEBUG, "WARNING: Job '%d.%d''s spool '%s' already exists.\n", id.cluster, id.proc, spoolDirectory.Value()); } return 0; }
int Job::get_file(const MyString &name, int offset, int length, unsigned char *&data, CondorError &errstack) { #if !defined(WIN32) TemporaryPrivSentry sentry( true ); if ( param_boolean( "CHOWN_JOB_SPOOL_FILES", false ) == false ) { ClassAd *job_ad = GetJobAd_as_ClassAd( id.cluster, id.proc ); if ( job_ad == NULL ) { errstack.pushf("SOAP", FAIL, "Failed to retrieve job ad for file '%s'", name.Value()); return 5; } if ( !init_user_ids_from_ad( *job_ad ) ) { errstack.pushf("SOAP", FAIL, "Failed to init user ids for file '%s'", name.Value()); return 6; } set_user_priv(); } #endif int file = safe_open_wrapper_follow((spoolDirectory + DIR_DELIM_STRING + name).Value(), O_RDONLY | _O_BINARY, 0); if (-1 != file) { if (-1 == lseek(file, offset, SEEK_SET)) { close(file); errstack.pushf("SOAP", FAIL, "Failed to lseek in file '%s', reason: %s", name.Value(), strerror(errno)); return 2; } int result; if (-1 == (result = full_read(file, data, sizeof(unsigned char) * length))) { close(file); errstack.pushf("SOAP", FAIL, "Failed to read from file '%s', wanted to " "read %d bytes but received %d", name.Value(), length, result); return 3; } if (-1 == close(file)) { errstack.pushf("SOAP", FAIL, "Failed to close file '%s', reason: %s", name.Value(), strerror(errno)); return 4; } } else { errstack.pushf("SOAP", FAIL, "Failed to open file '%s', reason: %s", name.Value(), strerror(errno)); return 1; } return 0; }
int Job::submit(const struct condor__ClassAdStruct &jobAd, CondorError &errstack) { int i, rval; // XXX: This is ugly, and only should happen when spooling, // i.e. not always with cedar. rval = SetAttributeString(id.cluster, id.proc, ATTR_JOB_IWD, spoolDirectory.Value()); if (rval < 0) { errstack.pushf("SOAP", FAIL, "Failed to set job %d.%d's %s attribute to '%s'.", id.cluster, id.proc, ATTR_JOB_IWD, spoolDirectory.Value()); return rval; } StringList transferFiles; MyString currentKey; JobFile jobFile; declaredFiles.startIterations(); while (declaredFiles.iterate(currentKey, jobFile)) { transferFiles.append(jobFile.name.Value()); } char *fileList = NULL; if (0 == transferFiles.number()) { fileList = strdup(""); } else { fileList = transferFiles.print_to_string(); ASSERT(fileList); } rval = SetAttributeString(id.cluster, id.proc, ATTR_TRANSFER_INPUT_FILES, fileList); if (fileList) { free(fileList); fileList = NULL; } if (rval < 0) { errstack.pushf("SOAP", FAIL, "Failed to set job %d.%d's %s attribute.", id.cluster, id.proc, ATTR_TRANSFER_INPUT_FILES); return rval; } int found_iwd = 0; for (i = 0; i < jobAd.__size; i++) { const char* name = jobAd.__ptr[i].name; const char* value = jobAd.__ptr[i].value; if (!name) continue; if (!value) value="UNDEFINED"; // XXX: This is a quick fix. If processing MyType or // TargetType they should be ignored. Ideally we could // convert the ClassAdStruct to a ClassAd and then iterate // the ClassAd. if (0 == strcmp(name, ATTR_MY_TYPE) || 0 == strcmp(name, ATTR_TARGET_TYPE)) { continue; } if ( jobAd.__ptr[i].type == STRING_ATTR ) { // string type - put value in quotes as hint for ClassAd parser found_iwd = found_iwd || !strcmp(name, ATTR_JOB_IWD); rval = SetAttributeString(id.cluster, id.proc, name, value); } else { // all other types can be deduced by the ClassAd parser rval = SetAttribute(id.cluster, id.proc, name, value); } if ( rval < 0 ) { errstack.pushf("SOAP", FAIL, "Failed to set job %d.%d's %s attribute.", id.cluster, id.proc, name); return rval; } } // Trust the client knows what it is doing if there is an Iwd. if (!found_iwd) { // We need to make sure the Iwd is rewritten so files // in the spool directory can be found. rval = SetAttributeString(id.cluster, id.proc, ATTR_JOB_IWD, spoolDirectory.Value()); if (rval < 0) { errstack.pushf("SOAP", FAIL, "Failed to set %d.%d's %s attribute to '%s'.", id.cluster, id.proc, ATTR_JOB_IWD, spoolDirectory.Value()); return rval; } } return 0; }