//--------------------------------------------------------------------------- void ensureOutputFilesExist(const SubmitDagDeepOptions &deepOpts, SubmitDagShallowOptions &shallowOpts) { int maxRescueDagNum = param_integer("DAGMAN_MAX_RESCUE_NUM", MAX_RESCUE_DAG_DEFAULT, 0, ABS_MAX_RESCUE_DAG_NUM); if (deepOpts.doRescueFrom > 0) { MyString rescueDagName = RescueDagName(shallowOpts.primaryDagFile.Value(), shallowOpts.dagFiles.number() > 1, deepOpts.doRescueFrom); if (!fileExists(rescueDagName)) { fprintf( stderr, "-dorescuefrom %d specified, but rescue " "DAG file %s does not exist!\n", deepOpts.doRescueFrom, rescueDagName.Value() ); exit( 1 ); } } // Get rid of the halt file (if one exists). tolerant_unlink( HaltFileName( shallowOpts.primaryDagFile ).Value() ); if (deepOpts.bForce) { tolerant_unlink(shallowOpts.strSubFile.Value()); tolerant_unlink(shallowOpts.strSchedLog.Value()); tolerant_unlink(shallowOpts.strLibOut.Value()); tolerant_unlink(shallowOpts.strLibErr.Value()); RenameRescueDagsAfter(shallowOpts.primaryDagFile.Value(), shallowOpts.dagFiles.number() > 1, 0, maxRescueDagNum); } // Check whether we're automatically running a rescue DAG -- if // so, allow things to continue even if the files generated // by condor_submit_dag already exist. bool autoRunningRescue = false; if (deepOpts.autoRescue) { int rescueDagNum = FindLastRescueDagNum(shallowOpts.primaryDagFile.Value(), shallowOpts.dagFiles.number() > 1, maxRescueDagNum); if (rescueDagNum > 0) { printf("Running rescue DAG %d\n", rescueDagNum); autoRunningRescue = true; } } bool bHadError = false; // If not running a rescue DAG, check for existing files // generated by condor_submit_dag... if (!autoRunningRescue && deepOpts.doRescueFrom < 1 && !deepOpts.updateSubmit) { if (fileExists(shallowOpts.strSubFile)) { fprintf( stderr, "ERROR: \"%s\" already exists.\n", shallowOpts.strSubFile.Value() ); bHadError = true; } if (fileExists(shallowOpts.strLibOut)) { fprintf( stderr, "ERROR: \"%s\" already exists.\n", shallowOpts.strLibOut.Value() ); bHadError = true; } if (fileExists(shallowOpts.strLibErr)) { fprintf( stderr, "ERROR: \"%s\" already exists.\n", shallowOpts.strLibErr.Value() ); bHadError = true; } if (fileExists(shallowOpts.strSchedLog)) { fprintf( stderr, "ERROR: \"%s\" already exists.\n", shallowOpts.strSchedLog.Value() ); bHadError = true; } } // This is checking for the existance of an "old-style" rescue // DAG file. if (!deepOpts.autoRescue && deepOpts.doRescueFrom < 1 && fileExists(shallowOpts.strRescueFile)) { fprintf( stderr, "ERROR: \"%s\" already exists.\n", shallowOpts.strRescueFile.Value() ); fprintf( stderr, " You may want to resubmit your DAG using that " "file, instead of \"%s\"\n", shallowOpts.primaryDagFile.Value()); fprintf( stderr, " Look at the Condor manual for details about DAG " "rescue files.\n" ); fprintf( stderr, " Please investigate and either remove \"%s\",\n", shallowOpts.strRescueFile.Value() ); fprintf( stderr, " or use it as the input to condor_submit_dag.\n" ); bHadError = true; } if (bHadError) { fprintf( stderr, "\nSome file(s) needed by %s already exist. ", dagman_exe ); fprintf( stderr, "Either rename them,\nuse the \"-f\" option to " "force them to be overwritten, or use\n" "the \"-update_submit\" option to update the submit " "file and continue.\n" ); exit( 1 ); } }
//--------------------------------------------------------------------------- void main_init (int argc, char ** const argv) { printf ("Executing condor dagman ... \n"); // flag used if DAGMan is invoked with -WaitForDebug so we // wait for a developer to attach with a debugger... volatile int wait_for_debug = 0; // process any config vars -- this happens before we process // argv[], since arguments should override config settings dagman.Config(); // The DCpermission (last parm) should probably be PARENT, if it existed daemonCore->Register_Signal( SIGUSR1, "SIGUSR1", (SignalHandler) main_shutdown_remove, "main_shutdown_remove", NULL); /****** FOR TESTING ******* daemonCore->Register_Signal( SIGUSR2, "SIGUSR2", (SignalHandler) main_testing_stub, "main_testing_stub", NULL); ****** FOR TESTING ********/ debug_progname = condor_basename(argv[0]); // condor_submit_dag version from .condor.sub bool allowVerMismatch = false; const char *csdVersion = "undefined"; int i; for (i = 0 ; i < argc ; i++) { debug_printf( DEBUG_NORMAL, "argv[%d] == \"%s\"\n", i, argv[i] ); } if (argc < 2) Usage(); // Make sure an input file was specified // get dagman job id from environment, if it's there // (otherwise it will be set to "-1.-1.-1") dagman.DAGManJobId.SetFromString( getenv( EnvGetName( ENV_ID ) ) ); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Minimum legal version for a .condor.sub file to be compatible // with this condor_dagman binary. // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // Be sure to change this if the arguments or environment // passed to condor_dagman change in an incompatible way!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! struct DagVersionData { int majorVer; int minorVer; int subMinorVer; }; const DagVersionData MIN_SUBMIT_FILE_VERSION = { 7, 1, 2 }; // Construct a string of the minimum submit file version. MyString minSubmitVersionStr; minSubmitVersionStr.formatstr( "%d.%d.%d", MIN_SUBMIT_FILE_VERSION.majorVer, MIN_SUBMIT_FILE_VERSION.minorVer, MIN_SUBMIT_FILE_VERSION.subMinorVer ); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // // Process command-line arguments // for (i = 1; i < argc; i++) { if( !strcasecmp( "-Debug", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No debug level specified\n" ); Usage(); } debug_level = (debug_level_t) atoi (argv[i]); } else if( !strcasecmp( "-Lockfile", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No DagMan lockfile specified\n" ); Usage(); } lockFileName = argv[i]; } else if( !strcasecmp( "-Help", argv[i] ) ) { Usage(); } else if (!strcasecmp( "-Dag", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No DAG specified\n" ); Usage(); } dagman.dagFiles.append( argv[i] ); } else if( !strcasecmp( "-MaxIdle", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxIdle\n" ); Usage(); } dagman.maxIdle = atoi( argv[i] ); } else if( !strcasecmp( "-MaxJobs", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxJobs\n" ); Usage(); } dagman.maxJobs = atoi( argv[i] ); } else if( !strcasecmp( "-MaxScripts", argv[i] ) ) { debug_printf( DEBUG_SILENT, "-MaxScripts has been replaced with " "-MaxPre and -MaxPost arguments\n" ); Usage(); } else if( !strcasecmp( "-MaxPre", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxPre\n" ); Usage(); } dagman.maxPreScripts = atoi( argv[i] ); } else if( !strcasecmp( "-MaxPost", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "Integer missing after -MaxPost\n" ); Usage(); } dagman.maxPostScripts = atoi( argv[i] ); } else if( !strcasecmp( "-NoEventChecks", argv[i] ) ) { debug_printf( DEBUG_QUIET, "Warning: -NoEventChecks is " "ignored; please use the DAGMAN_ALLOW_EVENTS " "config parameter instead\n"); check_warning_strictness( DAG_STRICT_1 ); } else if( !strcasecmp( "-AllowLogError", argv[i] ) ) { dagman.allowLogError = true; } else if( !strcasecmp( "-DontAlwaysRunPost",argv[i] ) ) { dagman._runPost = false; } else if( !strcasecmp( "-WaitForDebug", argv[i] ) ) { wait_for_debug = 1; } else if( !strcasecmp( "-UseDagDir", argv[i] ) ) { dagman.useDagDir = true; } else if( !strcasecmp( "-AutoRescue", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No AutoRescue value specified\n" ); Usage(); } dagman.autoRescue = (atoi( argv[i] ) != 0); } else if( !strcasecmp( "-DoRescueFrom", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No rescue DAG number specified\n" ); Usage(); } dagman.doRescueFrom = atoi (argv[i]); } else if( !strcasecmp( "-CsdVersion", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No CsdVersion value specified\n" ); Usage(); } csdVersion = argv[i]; } else if( !strcasecmp( "-AllowVersionMismatch", argv[i] ) ) { allowVerMismatch = true; } else if( !strcasecmp( "-DumpRescue", argv[i] ) ) { dagman.dumpRescueDag = true; } else if( !strcasecmp( "-verbose", argv[i] ) ) { dagman._submitDagDeepOpts.bVerbose = true; } else if( !strcasecmp( "-force", argv[i] ) ) { dagman._submitDagDeepOpts.bForce = true; } else if( !strcasecmp( "-notification", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No notification value specified\n" ); Usage(); } dagman._submitDagDeepOpts.strNotification = argv[i]; } else if( !strcasecmp( "-dagman", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No dagman value specified\n" ); Usage(); } dagman._submitDagDeepOpts.strDagmanPath = argv[i]; } else if( !strcasecmp( "-outfile_dir", argv[i] ) ) { i++; if( argc <= i || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_SILENT, "No outfile_dir value specified\n" ); Usage(); } dagman._submitDagDeepOpts.strOutfileDir = argv[i]; } else if( !strcasecmp( "-update_submit", argv[i] ) ) { dagman._submitDagDeepOpts.updateSubmit = true; } else if( !strcasecmp( "-import_env", argv[i] ) ) { dagman._submitDagDeepOpts.importEnv = true; } else if( !strcasecmp( "-priority", argv[i] ) ) { ++i; if( i >= argc || strcmp( argv[i], "" ) == 0 ) { debug_printf( DEBUG_NORMAL, "No priority value specified\n"); Usage(); } dagman._submitDagDeepOpts.priority = atoi(argv[i]); } else if( !strcasecmp( "-dont_use_default_node_log", argv[i] ) ) { dagman._submitDagDeepOpts.always_use_node_log = false; } else { debug_printf( DEBUG_SILENT, "\nUnrecognized argument: %s\n", argv[i] ); Usage(); } } dagman.dagFiles.rewind(); dagman.primaryDagFile = dagman.dagFiles.next(); dagman.multiDags = (dagman.dagFiles.number() > 1); MyString tmpDefaultLog; if ( dagman._defaultNodeLog != NULL ) { tmpDefaultLog = dagman._defaultNodeLog; free( dagman._defaultNodeLog ); } else { tmpDefaultLog = dagman.primaryDagFile + ".nodes.log"; } // Force default log file path to be absolute so it works // with -usedagdir and DIR nodes. CondorError errstack; if ( !MultiLogFiles::makePathAbsolute( tmpDefaultLog, errstack) ) { debug_printf( DEBUG_QUIET, "Unable to convert default log " "file name to absolute path: %s\n", errstack.getFullText().c_str() ); dagman.dag->GetJobstateLog().WriteDagmanFinished( EXIT_ERROR ); DC_Exit( EXIT_ERROR ); } dagman._defaultNodeLog = strdup( tmpDefaultLog.Value() ); debug_printf( DEBUG_NORMAL, "Default node log file is: <%s>\n", dagman._defaultNodeLog); // // Check the arguments // //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Checking for version compatibility between the .condor.sub // file and this condor_dagman binary... // Note: if we're in recovery mode and the submit file version // causes us to quit, we leave any existing node jobs still // running -- may want to change that eventually. wenger 2009-10-13. // Version of the condor_submit_dag that created our submit file. CondorVersionInfo submitFileVersion( csdVersion ); // Version of this condor_dagman binary. CondorVersionInfo dagmanVersion; // Just generate this message fragment in one place. MyString versionMsg; versionMsg.formatstr("the version (%s) of this DAG's Condor submit " "file (created by condor_submit_dag)", csdVersion ); // Make sure version in submit file is valid. if( !submitFileVersion.is_valid() ) { if ( !allowVerMismatch ) { debug_printf( DEBUG_QUIET, "Error: %s is invalid!\n", versionMsg.Value() ); DC_Exit( EXIT_ERROR ); } else { debug_printf( DEBUG_NORMAL, "Warning: %s is invalid; " "continuing because of -AllowVersionMismatch flag\n", versionMsg.Value() ); } // Make sure .condor.sub file is recent enough. } else if ( submitFileVersion.compare_versions( CondorVersion() ) != 0 ) { if( !submitFileVersion.built_since_version( MIN_SUBMIT_FILE_VERSION.majorVer, MIN_SUBMIT_FILE_VERSION.minorVer, MIN_SUBMIT_FILE_VERSION.subMinorVer ) ) { if ( !allowVerMismatch ) { debug_printf( DEBUG_QUIET, "Error: %s is older than " "oldest permissible version (%s)\n", versionMsg.Value(), minSubmitVersionStr.Value() ); DC_Exit( EXIT_ERROR ); } else { debug_printf( DEBUG_NORMAL, "Warning: %s is older than " "oldest permissible version (%s); continuing " "because of -AllowVersionMismatch flag\n", versionMsg.Value(), minSubmitVersionStr.Value() ); } // Warn if .condor.sub file is a newer version than this binary. } else if (dagmanVersion.compare_versions( csdVersion ) > 0 ) { debug_printf( DEBUG_NORMAL, "Warning: %s is newer than " "condor_dagman version (%s)\n", versionMsg.Value(), CondorVersion() ); check_warning_strictness( DAG_STRICT_3 ); } else { debug_printf( DEBUG_NORMAL, "Note: %s differs from " "condor_dagman version (%s), but the " "difference is permissible\n", versionMsg.Value(), CondorVersion() ); } } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if( dagman.primaryDagFile == "" ) { debug_printf( DEBUG_SILENT, "No DAG file was specified\n" ); Usage(); } if (lockFileName == NULL) { debug_printf( DEBUG_SILENT, "No DAG lock file was specified\n" ); Usage(); } if( dagman.maxJobs < 0 ) { debug_printf( DEBUG_SILENT, "-MaxJobs must be non-negative\n"); Usage(); } if( dagman.maxPreScripts < 0 ) { debug_printf( DEBUG_SILENT, "-MaxPre must be non-negative\n" ); Usage(); } if( dagman.maxPostScripts < 0 ) { debug_printf( DEBUG_SILENT, "-MaxPost must be non-negative\n" ); Usage(); } if( dagman.doRescueFrom < 0 ) { debug_printf( DEBUG_SILENT, "-DoRescueFrom must be non-negative\n" ); Usage(); } debug_printf( DEBUG_VERBOSE, "DAG Lockfile will be written to %s\n", lockFileName ); if ( dagman.dagFiles.number() == 1 ) { debug_printf( DEBUG_VERBOSE, "DAG Input file is %s\n", dagman.primaryDagFile.Value() ); } else { MyString msg = "DAG Input files are "; dagman.dagFiles.rewind(); const char *dagFile; while ( (dagFile = dagman.dagFiles.next()) != NULL ) { msg += dagFile; msg += " "; } msg += "\n"; debug_printf( DEBUG_VERBOSE, "%s", msg.Value() ); } // if requested, wait for someone to attach with a debugger... while( wait_for_debug ) { } { MyString cwd; if( !condor_getcwd(cwd) ) { cwd = "<null>"; } debug_printf( DEBUG_DEBUG_1, "Current path is %s\n",cwd.Value()); char *temp = my_username(); debug_printf( DEBUG_DEBUG_1, "Current user is %s\n", temp ? temp : "<null>" ); if( temp ) { free( temp ); } } // // Figure out the rescue DAG to run, if any (this is with "new- // style" rescue DAGs). // int rescueDagNum = 0; MyString rescueDagMsg; if ( dagman.doRescueFrom != 0 ) { rescueDagNum = dagman.doRescueFrom; rescueDagMsg.formatstr( "Rescue DAG number %d specified", rescueDagNum ); RenameRescueDagsAfter( dagman.primaryDagFile.Value(), dagman.multiDags, rescueDagNum, dagman.maxRescueDagNum ); } else if ( dagman.autoRescue ) { rescueDagNum = FindLastRescueDagNum( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum ); rescueDagMsg.formatstr( "Found rescue DAG number %d", rescueDagNum ); } // // Fill in values in the deep submit options that we haven't // already set. // dagman._submitDagDeepOpts.bAllowLogError = dagman.allowLogError; dagman._submitDagDeepOpts.useDagDir = dagman.useDagDir; dagman._submitDagDeepOpts.autoRescue = dagman.autoRescue; dagman._submitDagDeepOpts.doRescueFrom = dagman.doRescueFrom; dagman._submitDagDeepOpts.allowVerMismatch = allowVerMismatch; dagman._submitDagDeepOpts.recurse = false; // // Create the DAG // // Note: a bunch of the parameters we pass here duplicate things // in submitDagOpts, but I'm keeping them separate so we don't have to // bother to construct a new SubmitDagOtions object for splices. // wenger 2010-03-25 dagman.dag = new Dag( dagman.dagFiles, dagman.maxJobs, dagman.maxPreScripts, dagman.maxPostScripts, dagman.allowLogError, dagman.useDagDir, dagman.maxIdle, dagman.retrySubmitFirst, dagman.retryNodeFirst, dagman.condorRmExe, dagman.storkRmExe, &dagman.DAGManJobId, dagman.prohibitMultiJobs, dagman.submitDepthFirst, dagman._defaultNodeLog, dagman._generateSubdagSubmits, &dagman._submitDagDeepOpts, false ); /* toplevel dag! */ if( dagman.dag == NULL ) { EXCEPT( "ERROR: out of memory!\n"); } dagman.dag->SetAbortOnScarySubmit( dagman.abortOnScarySubmit ); dagman.dag->SetAllowEvents( dagman.allow_events ); dagman.dag->SetConfigFile( dagman._dagmanConfigFile ); dagman.dag->SetMaxJobHolds( dagman._maxJobHolds ); dagman.dag->SetPostRun(dagman._runPost); if( dagman._submitDagDeepOpts.priority != 0 ) { // From command line dagman.dag->SetDefaultPriority(dagman._submitDagDeepOpts.priority); } else if( dagman._defaultPriority != 0 ) { // From config file dagman.dag->SetDefaultPriority(dagman._defaultPriority); dagman._submitDagDeepOpts.priority = dagman._defaultPriority; } // // Parse the input files. The parse() routine // takes care of adding jobs and dependencies to the DagMan // dagman.mungeNodeNames = (dagman.dagFiles.number() > 1); parseSetDoNameMunge( dagman.mungeNodeNames ); debug_printf( DEBUG_VERBOSE, "Parsing %d dagfiles\n", dagman.dagFiles.number() ); dagman.dagFiles.rewind(); char *dagFile; // Here we make a copy of the dagFiles for iteration purposes. Deep inside // of the parsing, copies of the dagman.dagFile string list happen which // mess up the iteration of this list. StringList sl( dagman.dagFiles ); sl.rewind(); while ( (dagFile = sl.next()) != NULL ) { debug_printf( DEBUG_VERBOSE, "Parsing %s ...\n", dagFile ); if( !parse( dagman.dag, dagFile, dagman.useDagDir ) ) { if ( dagman.dumpRescueDag ) { // Dump the rescue DAG so we can see what we got // in the failed parse attempt. debug_printf( DEBUG_QUIET, "Dumping rescue DAG " "because of -DumpRescue flag\n" ); dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, false, true, false ); } dagman.dag->RemoveRunningJobs(dagman, true); MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored. unlink( lockFileName ); dagman.CleanUp(); // Note: debug_error calls DC_Exit(). debug_error( 1, DEBUG_QUIET, "Failed to parse %s\n", dagFile ); } } if( dagman.dag->GetDefaultPriority() != 0 ) { dagman.dag->SetDefaultPriorities(); // Applies to the nodes of the dag } dagman.dag->GetJobstateLog().WriteDagmanStarted( dagman.DAGManJobId ); if ( rescueDagNum > 0 ) { // Get our Pegasus sequence numbers set correctly. dagman.dag->GetJobstateLog().InitializeRescue(); } // lift the final set of splices into the main dag. dagman.dag->LiftSplices(SELF); // // Actually parse the "new-new" style (partial DAG info only) // rescue DAG here. Note: this *must* be done after splices // are lifted! // if ( rescueDagNum > 0 ) { dagman.rescueFileToRun = RescueDagName( dagman.primaryDagFile.Value(), dagman.multiDags, rescueDagNum ); debug_printf ( DEBUG_QUIET, "%s; running %s in combination with " "normal DAG file%s\n", rescueDagMsg.Value(), dagman.rescueFileToRun.Value(), dagman.multiDags ? "s" : ""); debug_printf ( DEBUG_QUIET, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); debug_printf ( DEBUG_QUIET, "USING RESCUE DAG %s\n", dagman.rescueFileToRun.Value() ); // Turn off node name munging for the rescue DAG, because // it will already have munged node names. parseSetDoNameMunge( false ); if( !parse( dagman.dag, dagman.rescueFileToRun.Value(), dagman.useDagDir ) ) { if ( dagman.dumpRescueDag ) { // Dump the rescue DAG so we can see what we got // in the failed parse attempt. debug_printf( DEBUG_QUIET, "Dumping rescue DAG " "because of -DumpRescue flag\n" ); dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, true, false ); } dagman.dag->RemoveRunningJobs(dagman, true); MSC_SUPPRESS_WARNING_FIXME(6031) // return falue of unlink ignored. unlink( lockFileName ); dagman.CleanUp(); // Note: debug_error calls DC_Exit(). debug_error( 1, DEBUG_QUIET, "Failed to parse %s\n", dagFile ); } } dagman.dag->CheckThrottleCats(); // fix up any use of $(JOB) in the vars values for any node dagman.dag->ResolveVarsInterpolations(); /* debug_printf(DEBUG_QUIET, "COMPLETED DAG!\n");*/ /* dagman.dag->PrintJobList();*/ #ifndef NOT_DETECT_CYCLE if( dagman.startup_cycle_detect && dagman.dag->isCycle() ) { // Note: maybe we should run the final node here, if there is one. // wenger 2011-12-19. debug_error (1, DEBUG_QUIET, "ERROR: a cycle exists in the dag, please check input\n"); } #endif debug_printf( DEBUG_VERBOSE, "Dag contains %d total jobs\n", dagman.dag->NumNodes( true ) ); MyString firstLocation; if ( dagman.dag->GetReject( firstLocation ) ) { debug_printf( DEBUG_QUIET, "Exiting because of REJECT " "specification in %s. This most likely means " "that the DAG file was produced with the -DumpRescue " "flag when parsing the original DAG failed.\n", firstLocation.Value() ); DC_Exit( EXIT_ERROR ); return; } dagman.dag->DumpDotFile(); if ( dagman.dumpRescueDag ) { debug_printf( DEBUG_QUIET, "Dumping rescue DAG and exiting " "because of -DumpRescue flag\n" ); dagman.dag->Rescue( dagman.primaryDagFile.Value(), dagman.multiDags, dagman.maxRescueDagNum, false, false, false ); ExitSuccess(); return; } //------------------------------------------------------------------------ // Bootstrap and Recovery // // If the Lockfile exists, this indicates a premature termination // of a previous run of Dagman. If condor log is also present, // we run in recovery mode // If the Daglog is not present, then we do not run in recovery // mode { bool recovery = access(lockFileName, F_OK) == 0; if (recovery) { debug_printf( DEBUG_VERBOSE, "Lock file %s detected, \n", lockFileName); if (dagman.abortDuplicates) { if (util_check_lock_file(lockFileName) == 1) { debug_printf( DEBUG_QUIET, "Aborting because it " "looks like another instance of DAGMan is " "currently running on this DAG; if that is " "not the case, delete the lock file (%s) " "and re-submit the DAG.\n", lockFileName ); dagman.dag->GetJobstateLog(). WriteDagmanFinished( EXIT_RESTART ); dagman.CleanUp(); DC_Exit( EXIT_ERROR ); // We should never get to here! } } } // // If this DAGMan continues, it should overwrite the lock // file if it exists. // util_create_lock_file(lockFileName, dagman.abortDuplicates); debug_printf( DEBUG_VERBOSE, "Bootstrapping...\n"); if( !dagman.dag->Bootstrap( recovery ) ) { dagman.dag->PrintReadyQ( DEBUG_DEBUG_1 ); debug_error( 1, DEBUG_QUIET, "ERROR while bootstrapping\n"); } } debug_printf( DEBUG_VERBOSE, "Registering condor_event_timer...\n" ); daemonCore->Register_Timer( 1, dagman.m_user_log_scan_interval, condor_event_timer, "condor_event_timer" ); dagman.dag->SetPendingNodeReportInterval( dagman.pendingReportInterval ); }