void main_init( int argc, char* argv[] ) { extern int runfor; char **ptr; if ( argc > 3 ) { usage( argv[0] ); } int argc_count = 1; for( ptr=argv+1, argc_count = 1; argc_count<argc && *ptr; ptr++,argc_count++) { if( ptr[0][0] != '-' ) { usage( argv[0] ); } switch( ptr[0][1] ) { case 'n': ptr++; if( !(ptr && *ptr) ) { EXCEPT( "-n requires another argument" ); } MasterName = build_valid_daemon_name( *ptr ); dprintf( D_ALWAYS, "Using name: %s\n", MasterName ); break; default: usage( argv[0] ); } } if (runfor != 0) { // We will construct an environment variable that // tells the daemon what time it will be shut down. // We'll give it an absolute time, though runfor is a // relative time. This means that we don't have to update // the time each time we restart the daemon. MyString runfor_env; runfor_env.formatstr("%s=%ld", EnvGetName(ENV_DAEMON_DEATHTIME), time(NULL) + (runfor * 60)); SetEnv(runfor_env.Value()); } daemons.SetDefaultReaper(); // Grab all parameters needed by the master. init_params(); // param() for DAEMON_LIST and initialize our daemons object. init_daemon_list(); if ( daemons.SetupControllers() < 0 ) { EXCEPT( "Daemon initialization failed" ); } // Lookup the paths to all the daemons we now care about. daemons.InitParams(); // Initialize our classad; init_classad(); // Initialize the master entry in the daemons data structure. daemons.InitMaster(); // Make sure if PrivSep is on we're not running as root check_uid_for_privsep(); // open up the windows firewall init_firewall_exceptions(); #if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT) #if defined(HAVE_DLOPEN) MasterPluginManager::Load(); #elif defined(WIN32) load_master_mgmt(); #endif MasterPluginManager::Initialize(); #endif // Register admin commands daemonCore->Register_Command( RESTART, "RESTART", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( RESTART_PEACEFUL, "RESTART_PEACEFUL", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_OFF, "DAEMONS_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_OFF_FAST, "DAEMONS_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_OFF_PEACEFUL, "DAEMONS_OFF_PEACEFUL", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_ON, "DAEMONS_ON", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( MASTER_OFF, "MASTER_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( MASTER_OFF_FAST, "MASTER_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_ON, "DAEMON_ON", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_OFF, "DAEMON_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_OFF_FAST, "DAEMON_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_OFF_PEACEFUL, "DAEMON_OFF_PEACEFUL", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( CHILD_ON, "CHILD_ON", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( CHILD_OFF, "CHILD_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( CHILD_OFF_FAST, "CHILD_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( SET_SHUTDOWN_PROGRAM, "SET_SHUTDOWN_PROGRAM", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); // Command handler for stashing the pool password daemonCore->Register_Command( STORE_POOL_CRED, "STORE_POOL_CRED", (CommandHandler)&store_pool_cred_handler, "store_pool_cred_handler", NULL, CONFIG_PERM, D_FULLDEBUG ); /* daemonCore->Register_Command( START_AGENT, "START_AGENT", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); */ daemonCore->RegisterTimeSkipCallback(time_skip_handler,0); _EXCEPT_Cleanup = DoCleanup; #if !defined(WIN32) if( !dprintf_to_term_check() && param_boolean( "USE_PROCESS_GROUPS", true ) ) { // If we're not connected to a terminal, start our own // process group, unless the config file says not to. setsid(); } #endif if( StartDaemons ) { daemons.StartAllDaemons(); } daemons.StartTimers(); }
int main( int argc, char *argv[] ) { int i; char *log_file_name = 0; char *job_name = 0; time_t waittime=0, stoptime=0; int minjobs = 0; int print_status = false; int echo_events = false; int debug_print_rescue = false; myDistro->Init( argc, argv ); config(); for( i=1; i<argc; i++ ) { if(!strcmp(argv[i],"-help")) { usage(argv[0]); EXIT_SUCCESS; } else if(!strcmp(argv[i],"-version")) { version(); EXIT_FAILURE; } else if(!strcmp(argv[i],"-debug")) { // dprintf to console dprintf_set_tool_debug("TOOL", 0); print_status = false; } else if(!strcmp(argv[i],"-status")) { if (dprintf_to_term_check()) { fprintf(stderr,"-status is implied by -debug\n"); } else { print_status = true; } } else if(!strcmp(argv[i],"-echo")) { echo_events = true; } else if(!strcmp(argv[i],"-wait")) { i++; if(i>=argc) { fprintf(stderr,"-wait requires an argument\n"); usage(argv[0]); EXIT_FAILURE; } waittime = atoi(argv[i]); stoptime = time(0) + waittime; dprintf(D_FULLDEBUG,"Will wait until %s\n",ctime(&stoptime)); } else if( !strcmp( argv[i], "-num" ) ) { i++; if( i >= argc ) { fprintf( stderr, "-num requires an argument\n" ); usage( argv[0] ); EXIT_FAILURE; } minjobs = atoi( argv[i] ); if( minjobs < 1 ) { fprintf( stderr, "-num must be greater than zero\n" ); usage( argv[0] ); EXIT_FAILURE; } dprintf( D_FULLDEBUG, "Will wait until %d jobs end\n", minjobs ); } else if(argv[i][0]!='-') { if(!log_file_name) { log_file_name = argv[i]; } else if(!job_name) { job_name = argv[i]; } else { fprintf(stderr,"Extra argument: %s\n\n",argv[i]); usage(argv[0]); EXIT_FAILURE; } } else { usage(argv[0]); EXIT_FAILURE; } } if( !log_file_name ) { usage(argv[0]); EXIT_FAILURE; } int cluster=ANY_NUMBER; int process=ANY_NUMBER; int subproc=ANY_NUMBER; if( job_name ) { int fields = sscanf(job_name,"%d.%d.%d",&cluster,&process,&subproc); if(fields>=1 && fields<=3) { /* number is fine */ } else { fprintf(stderr,"Couldn't understand job number: %s\n",job_name); EXIT_FAILURE; } } dprintf(D_FULLDEBUG,"Reading log file %s\n",log_file_name); int submitted, aborted, completed, flagged; FILE *sec_fp = NULL; int pos, nPos; rescue : submitted=0; aborted=0; completed=0; flagged = 0; ReadUserLog log ; HashTable<MyString,MyString> table(127,MyStringHash); if(log.initialize(log_file_name,false,false,true)) { sec_fp = safe_fopen_wrapper_follow(log_file_name, "r", 0644); fseek (sec_fp, 0, SEEK_END); pos = ftell(sec_fp); nPos = pos; if (debug_print_rescue) printf("begin:%d ", nPos); while(1) { fseek(sec_fp, 0, SEEK_END); int tmp_pos = ftell(sec_fp); ULogEventOutcome outcome; ULogEvent *event; outcome = log.readEvent(event); if(outcome==ULOG_OK) { flagged = 0; pos = nPos = tmp_pos; if (debug_print_rescue) printf("top:%d ", nPos); char key[1024]; sprintf(key,"%d.%d.%d",event->cluster,event->proc,event->subproc); MyString str(key); if( jobnum_matches( event, cluster, process, subproc ) ) { if (echo_events) { event->putEvent(stdout); printf("...\n"); } if(event->eventNumber==ULOG_SUBMIT) { dprintf(D_FULLDEBUG,"%s submitted\n",key); if (print_status) printf("%s submitted\n", key); table.insert(str,str); submitted++; } else if(event->eventNumber==ULOG_JOB_TERMINATED) { dprintf(D_FULLDEBUG,"%s completed\n",key); if (print_status) printf("%s completed\n", key); table.remove(str); completed++; } else if(event->eventNumber==ULOG_JOB_ABORTED) { dprintf(D_FULLDEBUG,"%s aborted\n",key); if (print_status) printf("%s aborted\n", key); table.remove(str); aborted++; } else if (event->eventNumber==ULOG_EXECUTE) { if (print_status) { printf("%s executing on host %s\n", key, ((ExecuteEvent*)event)->getExecuteHost()); } } else { /* nothing to do */ } } if (event != NULL) delete event; if( minjobs && (completed + aborted >= minjobs ) ) { printf( "Specifed number of jobs (%d) done.\n", minjobs ); EXIT_SUCCESS; } } else { // did something change in the file since our last visit? fseek(sec_fp, 0, SEEK_END); nPos = ftell(sec_fp); if (flagged == 1) { fclose(sec_fp); dprintf(D_FULLDEBUG, "INFO: File %s changed but userLog reader could not read another event. We are reinitializing userLog reader. \n", log_file_name); if (debug_print_rescue) printf("rescue:%d ", nPos); if (print_status) printf("<reinitializing userLog reader>\n"); // reinitialize the user log, we ended up here a second time goto rescue; } if ( nPos != pos ){ if (debug_print_rescue) printf("lagging:%d!=%d ", nPos, pos); pos = nPos; // we do not want to retry every time we are in a waiting sleep cycle, therefore flag a change flagged = 1; } dprintf(D_FULLDEBUG,"%d submitted %d completed %d aborted %d remaining\n",submitted,completed,aborted,submitted-completed-aborted); if(table.getNumElements()==0) { if(submitted>0) { if( !minjobs ) { printf("All jobs done.\n"); EXIT_SUCCESS; } } else { if(cluster==ANY_NUMBER) { fprintf(stderr,"This log does not mention any jobs!\n"); } else { fprintf(stderr,"This log does not mention that job!\n"); } EXIT_FAILURE; } } else if(stoptime && time(0)>stoptime) { printf("Time expired.\n"); EXIT_FAILURE; } else { time_t sleeptime; if(stoptime) { sleeptime = stoptime-time(0); } else { sleeptime = 5; } if(sleeptime>5) { sleeptime = 5; } else if(sleeptime<1) { sleeptime = 1; } log.synchronize(); dprintf(D_FULLDEBUG,"No more events, sleeping for %ld seconds\n", (long)sleeptime); sleep(sleeptime); } } } fclose(sec_fp); } else { fprintf(stderr,"Couldn't open %s: %s\n",log_file_name,strerror(errno)); } EXIT_FAILURE; return 1; /* meaningless, but it makes Windows happy */ }