static bool SetMasterProxy( Proxy *master, const Proxy *copy_src ) { int rc; std::string tmp_file; formatstr( tmp_file, "%s.tmp", master->proxy_filename ); rc = copy_file( copy_src->proxy_filename, tmp_file.c_str() ); if ( rc != 0 ) { return false; } rc = rotate_file( tmp_file.c_str(), master->proxy_filename ); if ( rc != 0 ) { MSC_SUPPRESS_WARNING_FIXME(6031) // warning: return value of 'unlink' ignored. unlink( tmp_file.c_str() ); return false; } master->expiration_time = copy_src->expiration_time; master->near_expired = copy_src->near_expired; Callback cb; master->m_callbacks.Rewind(); while ( master->m_callbacks.Next( cb ) ) { ((cb.m_data)->*(cb.m_func_ptr))(); } return true; }
void main_shutdown_rescue( int exitVal, Dag::dag_status dagStatus ) { // Avoid possible infinite recursion if you hit a fatal error // while writing a rescue DAG. static bool inShutdownRescue = false; if ( inShutdownRescue ) { return; } inShutdownRescue = true; dagman.dag->_dagStatus = dagStatus; debug_printf( DEBUG_QUIET, "Aborting DAG...\n" ); // Avoid writing two different rescue DAGs if the "main" DAG and // the final node (if any) both fail. static bool wroteRescue = false; if( dagman.dag ) { // We write the rescue DAG *before* removing jobs because // otherwise if we crashed, failed, or were killed while // removing them, we would leave the DAG in an // unrecoverable state... if( exitVal != 0 ) { if ( dagman.maxRescueDagNum > 0 ) { dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, wroteRescue, false, dagman._writePartialRescueDag ); wroteRescue = true; } else { debug_printf( DEBUG_QUIET, "No rescue DAG written because " "DAGMAN_MAX_RESCUE_NUM is 0\n" ); } } debug_printf( DEBUG_DEBUG_1, "We have %d running jobs to remove\n", dagman.dag->NumJobsSubmitted() ); if( dagman.dag->NumJobsSubmitted() > 0 ) { debug_printf( DEBUG_NORMAL, "Removing submitted jobs...\n" ); dagman.dag->RemoveRunningJobs(dagman); } if ( dagman.dag->NumScriptsRunning() > 0 ) { debug_printf( DEBUG_NORMAL, "Removing running scripts...\n" ); dagman.dag->RemoveRunningScripts(); } dagman.dag->PrintDeferrals( DEBUG_NORMAL, true ); // Start the final node if we have one. if ( dagman.dag->StartFinalNode() ) { // We started a final node; return here so we wait for the // final node to finish, instead of exiting immediately. inShutdownRescue = false; return; } dagman.dag->DumpNodeStatus( false, true ); dagman.dag->GetJobstateLog().WriteDagmanFinished( exitVal ); } MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored. unlink( lockFileName ); dagman.CleanUp(); inShutdownRescue = false; DC_Exit( exitVal ); }
void ExitSuccess() { dagman.dag->DumpNodeStatus( false, false ); dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_OKAY ); MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored. unlink( lockFileName ); dagman.CleanUp(); DC_Exit( EXIT_OKAY ); }
static void add_child(FILE* fp, child_handle_t ch) { struct popen_entry *pe = (struct popen_entry *)malloc(sizeof(struct popen_entry)); MSC_SUPPRESS_WARNING_FIXME(6011) // Dereferencing a null pointer, malloc can return NULL. pe->fp = fp; pe->ch = ch; pe->next = popen_entry_head; popen_entry_head = pe; }
void GlobusResource::CleanupMonitorJob() { if ( monitorGramJobId ) { monitorGahp->globus_gram_client_job_cancel( monitorGramJobId ); free( monitorGramJobId ); monitorGramJobId = NULL; monitorGramJobStatus = GLOBUS_GRAM_PROTOCOL_JOB_STATE_UNKNOWN; monitorGramErrorCode = 0; } if ( monitorDirectory ) { std::string tmp_dir; formatstr( tmp_dir, "%s.remove", monitorDirectory ); MSC_SUPPRESS_WARNING_FIXME(6031) // warning: return value of 'rename' ignored. rename( monitorDirectory, tmp_dir.c_str() ); free( monitorDirectory ); monitorDirectory = NULL; Directory tmp( tmp_dir.c_str() ); tmp.Remove_Entire_Directory(); MSC_SUPPRESS_WARNING_FIXME(6031) // warning: return value of 'rmdir' ignored. rmdir( tmp_dir.c_str() ); } if(monitorJobStatusFile) { free(monitorJobStatusFile); monitorJobStatusFile = NULL; } if(monitorLogFile) { free(monitorLogFile); monitorLogFile = NULL; } }
int LogHistoricalSequenceNumber::ReadBody(FILE *fp) { int rval,rval1; char *buf = NULL; rval = readword(fp, buf); if (rval < 0) return rval; MSC_SUPPRESS_WARNING_FIXME(6031)// return value of scanf ignored. int64 does not match %lu sscanf(buf,"%lu",&historical_sequence_number); free(buf); rval1 = readword(fp, buf); //the label of the attribute //we ignore it if (rval1 < 0) return rval1; free(buf); rval1 = readword(fp, buf); if (rval1 < 0) return rval1; MSC_SUPPRESS_WARNING_FIXME(6031 6328)// return value of scanf ignored. int64 does not match %lu sscanf(buf,"%lu",×tamp); free(buf); return rval + rval1; }
int MyProxyGetDelegationReaper(Service *, int exitPid, int exitStatus) { // Find the right MyProxyEntry Proxy *proxy=NULL; MyProxyEntry *matched_entry=NULL; int found = FALSE; // Iterate through each proxy ProxiesByFilename.startIterations(); while ( ProxiesByFilename.iterate( proxy ) != 0 ) { // Iterate through all myproxy entries for the proxy proxy->myproxy_entries.Rewind(); while (proxy->myproxy_entries.Next(matched_entry)) { if (matched_entry->get_delegation_pid == exitPid) { found = TRUE; break; } } if (found) { break; } } if (!found) { dprintf (D_ALWAYS, "WEIRD! MyProxyManager::GetDelegationReaper unable to find entry for pid %d", exitPid); return FALSE; } if (exitStatus == 0) { dprintf (D_ALWAYS, "myproxy-get-delegation for proxy %s exited successfully\n", proxy->proxy_filename); close (matched_entry->get_delegation_err_fd); } else { // This myproxyEntry is no good, move it to the back of the list MyProxyEntry * myProxyEntry = NULL; proxy->myproxy_entries.Rewind(); if (proxy->myproxy_entries.Next (myProxyEntry)) { proxy->myproxy_entries.DeleteCurrent(); proxy->myproxy_entries.Append (myProxyEntry); } // In the case of an error, append the stderr stream of myproxy-get-delegation to log close (matched_entry->get_delegation_err_fd); char buff[500]; buff[0]='\0'; std::string output; int fd = safe_open_wrapper_follow(matched_entry->get_delegation_err_filename, O_RDONLY); if (fd != -1) { int bytes_read; do { bytes_read = read( fd, buff, 499 ); if ( bytes_read > 0 ) { buff[bytes_read] = '\0'; output += buff; } else if ( bytes_read < 0 ) { dprintf( D_ALWAYS, "WEIRD! Cannot read err file %s, " "errno=%d (%s)\n", matched_entry->get_delegation_err_filename, errno, strerror( errno ) ); } } while ( bytes_read > 0 ); close (fd); } else { dprintf( D_ALWAYS, "WEIRD! Cannot open err file %s, " "errno=%d (%s)\n", matched_entry->get_delegation_err_filename, errno, strerror( errno ) ); } dprintf (D_ALWAYS, "myproxy-get-delegation for proxy %s, for job (%d.%d) exited with code %d, output (top):\n%s\n", proxy->proxy_filename, matched_entry->cluster_id, matched_entry->proc_id, WEXITSTATUS(exitStatus), output.c_str()); } // Clean up close (matched_entry->get_delegation_password_pipe[0]); close (matched_entry->get_delegation_password_pipe[1]); matched_entry->get_delegation_password_pipe[0]=-1; matched_entry->get_delegation_password_pipe[1]=-1; matched_entry->get_delegation_err_fd=-1; matched_entry->get_delegation_pid=FALSE; MSC_SUPPRESS_WARNING_FIXME(6031) // warning: return value of 'unlink' ignored. unlink (matched_entry->get_delegation_err_filename);// Remove the temporary file free (matched_entry->get_delegation_err_filename); matched_entry->get_delegation_err_filename=NULL; return TRUE; }
int RefreshProxyThruMyProxy(Proxy * proxy) { char * proxy_filename = proxy->proxy_filename; MyProxyEntry * myProxyEntry = NULL; MyString args_string; int pid; // Starting from the most recent myproxy entry // Find an entry with a password int found = FALSE; proxy->myproxy_entries.Rewind(); while (proxy->myproxy_entries.Next (myProxyEntry)) { if (myProxyEntry->myproxy_password || GetMyProxyPasswordFromSchedD (myProxyEntry->cluster_id, myProxyEntry->proc_id, &(myProxyEntry->myproxy_password))) { found=TRUE; //. Now move it to the front of the list proxy->myproxy_entries.DeleteCurrent(); proxy->myproxy_entries.Prepend(myProxyEntry); break; } } if (!found) { // We're screwed - can't get MyProxy passwords for any entry return FALSE; } // Make sure we're not called more often than necessary and if time_t now=time(NULL); if ((myProxyEntry->get_delegation_pid != FALSE) || (now - myProxyEntry->last_invoked_time < 30)) { dprintf (D_ALWAYS, "proxy %s too soon or myproxy-get-delegation already started\n", proxy_filename); return FALSE; } myProxyEntry->last_invoked_time=now; // If you don't have a myproxy password, ask SchedD for it if (!myProxyEntry->myproxy_password) { // Will there ever be a case when there is no MyProxy password needed at all? return FALSE; } // Initialize reaper, if needed if (myproxyGetDelegationReaperId == 0 ) { myproxyGetDelegationReaperId = daemonCore->Register_Reaper( "GetDelegationReaper", (ReaperHandler) &MyProxyGetDelegationReaper, "GetDelegation Reaper"); } // Set up environnment for myproxy-get-delegation Env myEnv; std::string buff; if (myProxyEntry->myproxy_server_dn) { formatstr( buff, "MYPROXY_SERVER_DN=%s", myProxyEntry->myproxy_server_dn); myEnv.SetEnv(buff.c_str()); dprintf (D_FULLDEBUG, "%s\n", buff.c_str()); } formatstr(buff, "X509_USER_PROXY=%s", proxy_filename); myEnv.SetEnv (buff.c_str()); dprintf (D_FULLDEBUG, "%s\n", buff.c_str()); // Print password (this will end up in stdin for myproxy-get-delegation) if (pipe (myProxyEntry->get_delegation_password_pipe)) { dprintf(D_ALWAYS, "Failed to pipe(2) in RefreshProxyThruMyProxy " "for writing password, aborting\n"); return FALSE; } int written = write (myProxyEntry->get_delegation_password_pipe[1], myProxyEntry->myproxy_password, strlen (myProxyEntry->myproxy_password)); if (written < (int) strlen (myProxyEntry->myproxy_password)) { dprintf(D_ALWAYS, "Failed to write to pipe in RefreshProxyThruMyProxy %d\n", errno); return FALSE; } written = write (myProxyEntry->get_delegation_password_pipe[1], "\n", 1); if (written < 1) { dprintf(D_ALWAYS, "Failed to write to pipe in RefreshProxyThruMyProxy %d\n", errno); return FALSE; } // Figure out user name; char * username = my_username(0); // Figure out myproxy host and port char * myproxy_host = getHostFromAddr (myProxyEntry->myproxy_host); int myproxy_port = getPortFromAddr (myProxyEntry->myproxy_host); // args ArgList args; args.AppendArg(proxy_filename); args.AppendArg("-v"); args.AppendArg("-o"); args.AppendArg(proxy_filename); args.AppendArg("-s"); args.AppendArg(myproxy_host); args.AppendArg("-d"); args.AppendArg("-t"); args.AppendArg(myProxyEntry->new_proxy_lifetime); args.AppendArg("-S"); args.AppendArg("-l"); args.AppendArg(username); // Optional port argument if (myproxy_port) { args.AppendArg("-p"); args.AppendArg(myproxy_port); } // Optional credential name argument if (myProxyEntry->myproxy_credential_name) { args.AppendArg("-k"); args.AppendArg(myProxyEntry->myproxy_credential_name); } free (username); free (myproxy_host); // Create temporary file to store myproxy-get-delegation's stderr myProxyEntry->get_delegation_err_filename = create_temp_file(); if(!myProxyEntry->get_delegation_err_filename) { dprintf( D_ALWAYS, "Failed to create temp file"); } else { MSC_SUPPRESS_WARNING_FIXME(6031) // warning: return value of 'chmod' ignored. chmod (myProxyEntry->get_delegation_err_filename, 0600); myProxyEntry->get_delegation_err_fd = safe_open_wrapper_follow(myProxyEntry->get_delegation_err_filename,O_RDWR); if (myProxyEntry->get_delegation_err_fd == -1) { dprintf (D_ALWAYS, "Error opening file %s\n", myProxyEntry->get_delegation_err_filename); } } int arrIO[3]; arrIO[0]=myProxyEntry->get_delegation_password_pipe[0]; //stdin arrIO[1]=myProxyEntry->get_delegation_err_fd; arrIO[2]=myProxyEntry->get_delegation_err_fd; // stderr char * myproxy_get_delegation_pgm = param ("MYPROXY_GET_DELEGATION"); if (!myproxy_get_delegation_pgm) { dprintf (D_ALWAYS, "MYPROXY_GET_DELEGATION not defined in config file\n"); goto error_exit; } args.GetArgsStringForDisplay(&args_string); dprintf (D_ALWAYS, "Calling %s %s\n", myproxy_get_delegation_pgm, args_string.Value()); pid = daemonCore->Create_Process ( myproxy_get_delegation_pgm, args, PRIV_USER_FINAL, myproxyGetDelegationReaperId, FALSE, &myEnv, NULL, // cwd NULL, // process family info NULL, // socket inherit arrIO); // in/out/err streams free (myproxy_get_delegation_pgm); if (pid == FALSE) { dprintf (D_ALWAYS, "Failed to run myproxy-get-delegation\n"); goto error_exit; } myProxyEntry->get_delegation_pid = pid; return TRUE; error_exit: myProxyEntry->get_delegation_pid=FALSE; if (myProxyEntry->get_delegation_err_fd >= 0) { close (myProxyEntry->get_delegation_err_fd); myProxyEntry->get_delegation_err_fd=-1; } if (myProxyEntry->get_delegation_err_filename) { MSC_SUPPRESS_WARNING_FIXME(6031) // warning: return value of 'unlink' ignored. unlink (myProxyEntry->get_delegation_err_filename);// Remove the tempora free (myProxyEntry->get_delegation_err_filename); myProxyEntry->get_delegation_err_filename=NULL; } if (myProxyEntry->get_delegation_password_pipe[0] >= 0) { close (myProxyEntry->get_delegation_password_pipe[0]); myProxyEntry->get_delegation_password_pipe[0]=-1; } if (myProxyEntry->get_delegation_password_pipe[1] >= 0 ) { close (myProxyEntry->get_delegation_password_pipe[1]); myProxyEntry->get_delegation_password_pipe[1]=-1; } return FALSE; }
java_exit_mode_t JavaProc::ClassifyExit( int status ) { FILE *file; int fields; char tmp[11]; // enough for "abnormal" tmp[0] = 0; int normal_exit = WIFEXITED(status); int exit_code = WEXITSTATUS(status); int sig_num = WTERMSIG(status); java_exit_mode_t exit_mode; if(normal_exit) { dprintf(D_ALWAYS, "JavaProc: JVM exited normally with code %d\n", exit_code); file = safe_fopen_wrapper_follow(startfile.Value(),"r"); if(file) { dprintf(D_ALWAYS, "JavaProc: Wrapper left start record %s\n", startfile.Value()); fclose(file); file = safe_fopen_wrapper_follow(endfile.Value(),"r"); if(file) { dprintf(D_ALWAYS, "JavaProc: Wrapper left end record %s\n", endfile.Value()); fields = fscanf(file,"%10s",tmp); // no more than sizeof(tmp) if(fields!=1) { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Job called System.exit(%d)\n", exit_code); exit_mode = JAVA_EXIT_NORMAL; } else if(!strcmp(tmp,"normal")) { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Job returned from main()\n"); exit_mode = JAVA_EXIT_NORMAL; } else if(!strcmp(tmp,"abnormal")) { ParseExceptionFile(file); if(!strcmp(ex_type.Value(),"java.lang.Error")) { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Job threw a %s (%s), " "will retry it later.\n", ex_name.Value(), ex_type.Value()); exit_mode = JAVA_EXIT_SYSTEM_ERROR; } else { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Job threw a %s (%s), " "will return it to the user.\n", ex_name.Value(), ex_type.Value()); exit_mode = JAVA_EXIT_EXCEPTION; } } else if(!strcmp(tmp,"noexec")) { ParseExceptionFile(file); dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Job could not be executed\n"); exit_mode = JAVA_EXIT_EXCEPTION; } else { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Unknown wrapper result '%s'\n", tmp); exit_mode = JAVA_EXIT_SYSTEM_ERROR; } fclose(file); } else { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Wrapper did not leave end record %s\n", endfile.Value()); dprintf(D_ALWAYS, "JavaProc: Thus, job called System.exit(%d)\n", exit_code); exit_mode = JAVA_EXIT_NORMAL; } } else { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: Wrapper did not leave start record.\n"); dprintf(D_ALWAYS, "JavaProc: I'll assume Java is misconfigured here.\n"); exit_mode = JAVA_EXIT_SYSTEM_ERROR; } } else { dprintf(D_FAILURE|D_ALWAYS, "JavaProc: JVM exited abnormally with signal %d\n", sig_num); exit_mode = JAVA_EXIT_SYSTEM_ERROR; } dprintf(D_ALWAYS,"JavaProc: unlinking %s and %s\n",startfile.Value(),endfile.Value()); priv_state s = set_priv(PRIV_ROOT); MSC_SUPPRESS_WARNING_FIXME(6031) // return value of unlink ignored. unlink(startfile.Value()); MSC_SUPPRESS_WARNING_FIXME(6031) // return value of unlink ignored. unlink(endfile.Value()); set_priv(s); return exit_mode; }
//--------------------------------------------------------------------------- void main_init (int argc, char ** const argv) { printf ("Executing condor dagman ... \n"); // flag used if DAGMan is invoked with -WaitForDebug so we // wait for a developer to attach with a debugger... volatile int wait_for_debug = 0; // process any config vars -- this happens before we process // argv[], since arguments should override config settings dagman.Config(); // The DCpermission (last parm) should probably be PARENT, if it existed daemonCore->Register_Signal( SIGUSR1, "SIGUSR1", (SignalHandler) main_shutdown_remove, "main_shutdown_remove", NULL); /****** FOR TESTING ******* daemonCore->Register_Signal( SIGUSR2, "SIGUSR2", (SignalHandler) main_testing_stub, "main_testing_stub", NULL); ****** FOR TESTING ********/ debug_progname = condor_basename(argv[0]); // condor_submit_dag version from .condor.sub bool allowVerMismatch = false; const char *csdVersion = "undefined"; int i; for (i = 0 ; i < argc ; i++) { debug_printf( DEBUG_NORMAL, "argv[%d] == \"%s\"\n", i, argv[i] ); } if (argc < 2) Usage(); // Make sure an input file was specified // get dagman job id from environment, if it's there // (otherwise it will be set to "-1.-1.-1") dagman.DAGManJobId.SetFromString( getenv( EnvGetName( ENV_ID ) ) ); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Minimum legal version for a .condor.sub file to be compatible // with this condor_dagman binary. // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // Be sure to change this if the arguments or environment // passed to condor_dagman change in an incompatible way!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! struct DagVersionData { int majorVer; int minorVer; int subMinorVer; }; const DagVersionData MIN_SUBMIT_FILE_VERSION = { 7, 1, 2 }; // Construct a string of the minimum submit file version. MyString minSubmitVersionStr; minSubmitVersionStr.formatstr( "%d.%d.%d", MIN_SUBMIT_FILE_VERSION.majorVer, MIN_SUBMIT_FILE_VERSION.minorVer, MIN_SUBMIT_FILE_VERSION.subMinorVer ); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Process command-line arguments // for (i = 1; i < argc; i++) { if( !strcasecmp( "-Debug", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No debug level specified\n" ); Usage(); } debug_level = (debug_level_t) atoi (argv[i]); } else if( !strcasecmp( "-Lockfile", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No DagMan lockfile specified\n" ); Usage(); } lockFileName = argv[i]; } else if( !strcasecmp( "-Help", argv[i] ) ) { Usage(); } else if (!strcasecmp( "-Dag", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No DAG specified\n" ); Usage(); } dagman.dagFiles.append( argv[i] ); } else if( !strcasecmp( "-MaxIdle", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxIdle\n" ); Usage(); } dagman.maxIdle = atoi( argv[i] ); } else if( !strcasecmp( "-MaxJobs", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxJobs\n" ); Usage(); } dagman.maxJobs = atoi( argv[i] ); } else if( !strcasecmp( "-MaxScripts", argv[i] ) ) { debug_printf( DEBUG_SILENT, "-MaxScripts has been replaced with " "-MaxPre and -MaxPost arguments\n" ); Usage(); } else if( !strcasecmp( "-MaxPre", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxPre\n" ); Usage(); } dagman.maxPreScripts = atoi( argv[i] ); } else if( !strcasecmp( "-MaxPost", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxPost\n" ); Usage(); } dagman.maxPostScripts = atoi( argv[i] ); } else if( !strcasecmp( "-NoEventChecks", argv[i] ) ) { debug_printf( DEBUG_QUIET, "Warning: -NoEventChecks is " "ignored; please use the DAGMAN_ALLOW_EVENTS " "config parameter instead\n"); check_warning_strictness( DAG_STRICT_1 ); } else if( !strcasecmp( "-AllowLogError", argv[i] ) ) { dagman.allowLogError = true; } else if( !strcasecmp( "-DontAlwaysRunPost",argv[i] ) ) { dagman._runPost = false; } else if( !strcasecmp( "-WaitForDebug", argv[i] ) ) { wait_for_debug = 1; } else if( !strcasecmp( "-UseDagDir", argv[i] ) ) { dagman.useDagDir = true; } else if( !strcasecmp( "-AutoRescue", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No AutoRescue value specified\n" ); Usage(); } dagman.autoRescue = (atoi( argv[i] ) != 0); } else if( !strcasecmp( "-DoRescueFrom", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No rescue DAG number specified\n" ); Usage(); } dagman.doRescueFrom = atoi (argv[i]); } else if( !strcasecmp( "-CsdVersion", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No CsdVersion value specified\n" ); Usage(); } csdVersion = argv[i]; } else if( !strcasecmp( "-AllowVersionMismatch", argv[i] ) ) { allowVerMismatch = true; } else if( !strcasecmp( "-DumpRescue", argv[i] ) ) { dagman.dumpRescueDag = true; } else if( !strcasecmp( "-verbose", argv[i] ) ) { dagman._submitDagDeepOpts.bVerbose = true; } else if( !strcasecmp( "-force", argv[i] ) ) { dagman._submitDagDeepOpts.bForce = true; } else if( !strcasecmp( "-notification", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No notification value specified\n" ); Usage(); } dagman._submitDagDeepOpts.strNotification = argv[i]; } else if( !strcasecmp( "-dagman", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No dagman value specified\n" ); Usage(); } dagman._submitDagDeepOpts.strDagmanPath = argv[i]; } else if( !strcasecmp( "-outfile_dir", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No outfile_dir value specified\n" ); Usage(); } dagman._submitDagDeepOpts.strOutfileDir = argv[i]; } else if( !strcasecmp( "-update_submit", argv[i] ) ) { dagman._submitDagDeepOpts.updateSubmit = true; } else if( !strcasecmp( "-import_env", argv[i] ) ) { dagman._submitDagDeepOpts.importEnv = true; } else if( !strcasecmp( "-priority", argv[i] ) ) { ++i; if( i >= argc || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_NORMAL, "No priority value specified\n"); Usage(); } dagman._submitDagDeepOpts.priority = atoi(argv[i]); } else if( !strcasecmp( "-dont_use_default_node_log", argv[i] ) ) { dagman._submitDagDeepOpts.always_use_node_log = false; } else { debug_printf( DEBUG_SILENT, "\nUnrecognized argument: %s\n", argv[i] ); Usage(); } } dagman.dagFiles.rewind(); dagman.primaryDagFile = dagman.dagFiles.next(); dagman.multiDags = (dagman.dagFiles.number() > 1); MyString tmpDefaultLog; if ( dagman._defaultNodeLog != NULL ) { tmpDefaultLog = dagman._defaultNodeLog; free( dagman._defaultNodeLog ); } else { tmpDefaultLog = dagman.primaryDagFile + ".nodes.log"; } // Force default log file path to be absolute so it works // with -usedagdir and DIR nodes. CondorError errstack; if ( !MultiLogFiles::makePathAbsolute( tmpDefaultLog, errstack) ) { debug_printf( DEBUG_QUIET, "Unable to convert default log " "file name to absolute path: %s\n", errstack.getFullText().c_str() ); dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_ERROR ); DC_Exit( EXIT_ERROR ); } dagman._defaultNodeLog = strdup( tmpDefaultLog.Value() ); debug_printf( DEBUG_NORMAL, "Default node log file is: <%s>\n", dagman._defaultNodeLog); // // Check the arguments // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Checking for version compatibility between the .condor.sub // file and this condor_dagman binary... // Note: if we're in recovery mode and the submit file version // causes us to quit, we leave any existing node jobs still // running -- may want to change that eventually. wenger 2009-10-13. // Version of the condor_submit_dag that created our submit file. CondorVersionInfo submitFileVersion( csdVersion ); // Version of this condor_dagman binary. CondorVersionInfo dagmanVersion; // Just generate this message fragment in one place. MyString versionMsg; versionMsg.formatstr("the version (%s) of this DAG's Condor submit " "file (created by condor_submit_dag)", csdVersion ); // Make sure version in submit file is valid. if( !submitFileVersion.is_valid() ) { if ( !allowVerMismatch ) { debug_printf( DEBUG_QUIET, "Error: %s is invalid!\n", versionMsg.Value() ); DC_Exit( EXIT_ERROR ); } else { debug_printf( DEBUG_NORMAL, "Warning: %s is invalid; " "continuing because of -AllowVersionMismatch flag\n", versionMsg.Value() ); } // Make sure .condor.sub file is recent enough. } else if ( submitFileVersion.compare_versions( CondorVersion() ) != 0 ) { if( !submitFileVersion.built_since_version( MIN_SUBMIT_FILE_VERSION.majorVer, MIN_SUBMIT_FILE_VERSION.minorVer, MIN_SUBMIT_FILE_VERSION.subMinorVer ) ) { if ( !allowVerMismatch ) { debug_printf( DEBUG_QUIET, "Error: %s is older than " "oldest permissible version (%s)\n", versionMsg.Value(), minSubmitVersionStr.Value() ); DC_Exit( EXIT_ERROR ); } else { debug_printf( DEBUG_NORMAL, "Warning: %s is older than " "oldest permissible version (%s); continuing " "because of -AllowVersionMismatch flag\n", versionMsg.Value(), minSubmitVersionStr.Value() ); } // Warn if .condor.sub file is a newer version than this binary. } else if (dagmanVersion.compare_versions( csdVersion ) > 0 ) { debug_printf( DEBUG_NORMAL, "Warning: %s is newer than " "condor_dagman version (%s)\n", versionMsg.Value(), CondorVersion() ); check_warning_strictness( DAG_STRICT_3 ); } else { debug_printf( DEBUG_NORMAL, "Note: %s differs from " "condor_dagman version (%s), but the " "difference is permissible\n", versionMsg.Value(), CondorVersion() ); } } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if( dagman.primaryDagFile == "" ) { debug_printf( DEBUG_SILENT, "No DAG file was specified\n" ); Usage(); } if (lockFileName == NULL) { debug_printf( DEBUG_SILENT, "No DAG lock file was specified\n" ); Usage(); } if( dagman.maxJobs < 0 ) { debug_printf( DEBUG_SILENT, "-MaxJobs must be non-negative\n"); Usage(); } if( dagman.maxPreScripts < 0 ) { debug_printf( DEBUG_SILENT, "-MaxPre must be non-negative\n" ); Usage(); } if( dagman.maxPostScripts < 0 ) { debug_printf( DEBUG_SILENT, "-MaxPost must be non-negative\n" ); Usage(); } if( dagman.doRescueFrom < 0 ) { debug_printf( DEBUG_SILENT, "-DoRescueFrom must be non-negative\n" ); Usage(); } debug_printf( DEBUG_VERBOSE, "DAG Lockfile will be written to %s\n", lockFileName ); if ( dagman.dagFiles.number() == 1 ) { debug_printf( DEBUG_VERBOSE, "DAG Input file is %s\n", dagman.primaryDagFile.Value() ); } else { MyString msg = "DAG Input files are "; dagman.dagFiles.rewind(); const char *dagFile; while ( (dagFile = dagman.dagFiles.next()) != NULL ) { msg += dagFile; msg += " "; } msg += "\n"; debug_printf( DEBUG_VERBOSE, "%s", msg.Value() ); } // if requested, wait for someone to attach with a debugger... while( wait_for_debug ) { } { MyString cwd; if( !condor_getcwd(cwd) ) { cwd = "<null>"; } debug_printf( DEBUG_DEBUG_1, "Current path is %s\n",cwd.Value()); char *temp = my_username(); debug_printf( DEBUG_DEBUG_1, "Current user is %s\n", temp ? temp : "<null>" ); if( temp ) { free( temp ); } } // // Figure out the rescue DAG to run, if any (this is with "new- // style" rescue DAGs). // int rescueDagNum = 0; MyString rescueDagMsg; if ( dagman.doRescueFrom != 0 ) { rescueDagNum = dagman.doRescueFrom; rescueDagMsg.formatstr( "Rescue DAG number %d specified", rescueDagNum ); RenameRescueDagsAfter( dagman.primaryDagFile.Value(), dagman.multiDags, rescueDagNum, dagman.maxRescueDagNum ); } else if ( dagman.autoRescue ) { rescueDagNum = FindLastRescueDagNum( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum ); rescueDagMsg.formatstr( "Found rescue DAG number %d", rescueDagNum ); } // // Fill in values in the deep submit options that we haven't // already set. // dagman._submitDagDeepOpts.bAllowLogError = dagman.allowLogError; dagman._submitDagDeepOpts.useDagDir = dagman.useDagDir; dagman._submitDagDeepOpts.autoRescue = dagman.autoRescue; dagman._submitDagDeepOpts.doRescueFrom = dagman.doRescueFrom; dagman._submitDagDeepOpts.allowVerMismatch = allowVerMismatch; dagman._submitDagDeepOpts.recurse = false; // // Create the DAG // // Note: a bunch of the parameters we pass here duplicate things // in submitDagOpts, but I'm keeping them separate so we don't have to // bother to construct a new SubmitDagOtions object for splices. // wenger 2010-03-25 dagman.dag = new Dag( dagman.dagFiles, dagman.maxJobs, dagman.maxPreScripts, dagman.maxPostScripts, dagman.allowLogError, dagman.useDagDir, dagman.maxIdle, dagman.retrySubmitFirst, dagman.retryNodeFirst, dagman.condorRmExe, dagman.storkRmExe, &dagman.DAGManJobId, dagman.prohibitMultiJobs, dagman.submitDepthFirst, dagman._defaultNodeLog, dagman._generateSubdagSubmits, &dagman._submitDagDeepOpts, false ); /* toplevel dag! */ if( dagman.dag == NULL ) { EXCEPT( "ERROR: out of memory!\n"); } dagman.dag->SetAbortOnScarySubmit( dagman.abortOnScarySubmit ); dagman.dag->SetAllowEvents( dagman.allow_events ); dagman.dag->SetConfigFile( dagman._dagmanConfigFile ); dagman.dag->SetMaxJobHolds( dagman._maxJobHolds ); dagman.dag->SetPostRun(dagman._runPost); if( dagman._submitDagDeepOpts.priority != 0 ) { // From command line dagman.dag->SetDefaultPriority(dagman._submitDagDeepOpts.priority); } else if( dagman._defaultPriority != 0 ) { // From config file dagman.dag->SetDefaultPriority(dagman._defaultPriority); dagman._submitDagDeepOpts.priority = dagman._defaultPriority; } // // Parse the input files. The parse() routine // takes care of adding jobs and dependencies to the DagMan // dagman.mungeNodeNames = (dagman.dagFiles.number() > 1); parseSetDoNameMunge( dagman.mungeNodeNames ); debug_printf( DEBUG_VERBOSE, "Parsing %d dagfiles\n", dagman.dagFiles.number() ); dagman.dagFiles.rewind(); char *dagFile; // Here we make a copy of the dagFiles for iteration purposes. Deep inside // of the parsing, copies of the dagman.dagFile string list happen which // mess up the iteration of this list. StringList sl( dagman.dagFiles ); sl.rewind(); while ( (dagFile = sl.next()) != NULL ) { debug_printf( DEBUG_VERBOSE, "Parsing %s ...\n", dagFile ); if( !parse( dagman.dag, dagFile, dagman.useDagDir ) ) { if ( dagman.dumpRescueDag ) { // Dump the rescue DAG so we can see what we got // in the failed parse attempt. debug_printf( DEBUG_QUIET, "Dumping rescue DAG " "because of -DumpRescue flag\n" ); dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, false, true, false ); } dagman.dag->RemoveRunningJobs(dagman, true); MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored. unlink( lockFileName ); dagman.CleanUp(); // Note: debug_error calls DC_Exit(). debug_error( 1, DEBUG_QUIET, "Failed to parse %s\n", dagFile ); } } if( dagman.dag->GetDefaultPriority() != 0 ) { dagman.dag->SetDefaultPriorities(); // Applies to the nodes of the dag } dagman.dag->GetJobstateLog().WriteDagmanStarted( dagman.DAGManJobId ); if ( rescueDagNum > 0 ) { // Get our Pegasus sequence numbers set correctly. dagman.dag->GetJobstateLog().InitializeRescue(); } // lift the final set of splices into the main dag. dagman.dag->LiftSplices(SELF); // // Actually parse the "new-new" style (partial DAG info only) // rescue DAG here. Note: this *must* be done after splices // are lifted! // if ( rescueDagNum > 0 ) { dagman.rescueFileToRun = RescueDagName( dagman.primaryDagFile.Value(), dagman.multiDags, rescueDagNum ); debug_printf ( DEBUG_QUIET, "%s; running %s in combination with " "normal DAG file%s\n", rescueDagMsg.Value(), dagman.rescueFileToRun.Value(), dagman.multiDags ? "s" : ""); debug_printf ( DEBUG_QUIET, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); debug_printf ( DEBUG_QUIET, "USING RESCUE DAG %s\n", dagman.rescueFileToRun.Value() ); // Turn off node name munging for the rescue DAG, because // it will already have munged node names. parseSetDoNameMunge( false ); if( !parse( dagman.dag, dagman.rescueFileToRun.Value(), dagman.useDagDir ) ) { if ( dagman.dumpRescueDag ) { // Dump the rescue DAG so we can see what we got // in the failed parse attempt. debug_printf( DEBUG_QUIET, "Dumping rescue DAG " "because of -DumpRescue flag\n" ); dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, true, false ); } dagman.dag->RemoveRunningJobs(dagman, true); MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored. unlink( lockFileName ); dagman.CleanUp(); // Note: debug_error calls DC_Exit(). debug_error( 1, DEBUG_QUIET, "Failed to parse %s\n", dagFile ); } } dagman.dag->CheckThrottleCats(); // fix up any use of $(JOB) in the vars values for any node dagman.dag->ResolveVarsInterpolations(); /* debug_printf(DEBUG_QUIET, "COMPLETED DAG!\n");*/ /* dagman.dag->PrintJobList();*/ #ifndef NOT_DETECT_CYCLE if( dagman.startup_cycle_detect && dagman.dag->isCycle() ) { // Note: maybe we should run the final node here, if there is one. // wenger 2011-12-19. debug_error (1, DEBUG_QUIET, "ERROR: a cycle exists in the dag, please check input\n"); } #endif debug_printf( DEBUG_VERBOSE, "Dag contains %d total jobs\n", dagman.dag->NumNodes( true ) ); MyString firstLocation; if ( dagman.dag->GetReject( firstLocation ) ) { debug_printf( DEBUG_QUIET, "Exiting because of REJECT " "specification in %s. This most likely means " "that the DAG file was produced with the -DumpRescue " "flag when parsing the original DAG failed.\n", firstLocation.Value() ); DC_Exit( EXIT_ERROR ); return; } dagman.dag->DumpDotFile(); if ( dagman.dumpRescueDag ) { debug_printf( DEBUG_QUIET, "Dumping rescue DAG and exiting " "because of -DumpRescue flag\n" ); dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, false, false, false ); ExitSuccess(); return; } //------------------------------------------------------------------------ // Bootstrap and Recovery // // If the Lockfile exists, this indicates a premature termination // of a previous run of Dagman. If condor log is also present, // we run in recovery mode // If the Daglog is not present, then we do not run in recovery // mode { bool recovery = access(lockFileName, F_OK) == 0; if (recovery) { debug_printf( DEBUG_VERBOSE, "Lock file %s detected, \n", lockFileName); if (dagman.abortDuplicates) { if (util_check_lock_file(lockFileName) == 1) { debug_printf( DEBUG_QUIET, "Aborting because it " "looks like another instance of DAGMan is " "currently running on this DAG; if that is " "not the case, delete the lock file (%s) " "and re-submit the DAG.\n", lockFileName ); dagman.dag->GetJobstateLog(). WriteDagmanFinished( EXIT_RESTART ); dagman.CleanUp(); DC_Exit( EXIT_ERROR ); // We should never get to here! } } } // // If this DAGMan continues, it should overwrite the lock // file if it exists. // util_create_lock_file(lockFileName, dagman.abortDuplicates); debug_printf( DEBUG_VERBOSE, "Bootstrapping...\n"); if( !dagman.dag->Bootstrap( recovery ) ) { dagman.dag->PrintReadyQ( DEBUG_DEBUG_1 ); debug_error( 1, DEBUG_QUIET, "ERROR while bootstrapping\n"); } } debug_printf( DEBUG_VERBOSE, "Registering condor_event_timer...\n" ); daemonCore->Register_Timer( 1, dagman.m_user_log_scan_interval, condor_event_timer, "condor_event_timer" ); dagman.dag->SetPendingNodeReportInterval( dagman.pendingReportInterval ); }