void Defrag::publish(ClassAd *ad) { char *valid_name = build_valid_daemon_name(m_defrag_name.c_str()); ASSERT( valid_name ); m_daemon_name = valid_name; delete [] valid_name; ad->SetMyTypeName("Defrag"); ad->SetTargetTypeName(""); ad->Assign(ATTR_NAME,m_daemon_name.c_str()); m_stats.Tick(); m_stats.Publish(*ad); daemonCore->publish(ad); }
void JobQueueDBManager::config(bool reconfig) { char *tmp; MyString sql_str; int bndcnt = 0; const char *data_arr[3]; QuillAttrDataType data_typ[3]; if (param_boolean("QUILL_ENABLED", false) == false) { EXCEPT("Quill++ is currently disabled. Please set QUILL_ENABLED to " "TRUE if you want this functionality and read the manual " "about this feature since it requires other attributes to be " "set properly."); } //bail out if no SPOOL variable is defined since its used to //figure out the location of the job_queue.log file char *spool = param("SPOOL"); if(!spool) { EXCEPT("No SPOOL variable found in config file\n"); } jobQueueLogFile = (char *) malloc(_POSIX_PATH_MAX * sizeof(char)); snprintf(jobQueueLogFile,_POSIX_PATH_MAX * sizeof(char), "%s/job_queue.log", spool); /* Here we try to read the database parameters in config the db ip address format is <ipaddress:port> */ dt = getConfigDBType(); jobQueueDBIpAddress = param("QUILL_DB_IP_ADDR"); jobQueueDBName = param("QUILL_DB_NAME"); jobQueueDBUser = param("QUILL_DB_USER"); jobQueueDBConn = getDBConnStr(jobQueueDBIpAddress, jobQueueDBName, jobQueueDBUser, spool); dprintf(D_ALWAYS, "Using Job Queue File %s\n", jobQueueLogFile); dprintf(D_ALWAYS, "Using Database Type = Postgres\n"); dprintf(D_ALWAYS, "Using Database IpAddress = %s\n", jobQueueDBIpAddress?jobQueueDBIpAddress:""); dprintf(D_ALWAYS, "Using Database Name = %s\n", jobQueueDBName?jobQueueDBName:""); dprintf(D_ALWAYS, "Using Database User = %s\n", jobQueueDBUser?jobQueueDBUser:""); if(spool) { free(spool); spool = NULL; } // this function is also called when condor_reconfig is issued // and so we dont want to recreate all essential objects if(!reconfig) { prober = new ClassAdLogProber(); caLogParser = new ClassAdLogParser(); switch (dt) { case T_PGSQL: DBObj = new PGSQLDatabase(jobQueueDBConn); break; default: break;; } xactState = NOT_IN_XACT; QuillErrCode ret_st; ret_st = DBObj->connectDB(); if (ret_st == QUILL_FAILURE) { displayErrorMsg("config: unable to connect to DB--- ERROR"); EXCEPT("config: unable to connect to DB\n"); } /* the following will also throw an exception if the schema version is not correct */ DBObj->assertSchemaVersion(); tmp = param( "SCHEDD_NAME" ); if( tmp ) { scheddname = build_valid_daemon_name( tmp ); dprintf(D_FULLDEBUG, "scheddname %s built from param value %s\n", scheddname, tmp); free(tmp); } else { scheddname = default_daemon_name(); dprintf(D_FULLDEBUG, "scheddname built from default daemon name: %s\n", scheddname); } { /* create an entry in jobqueuepollinginfo if this schedd is the * first time being logged to database */ sql_str.formatstr("INSERT INTO jobqueuepollinginfo (scheddname, last_file_mtime, last_file_size) SELECT '%s', 0, 0 FROM dummy_single_row_table WHERE NOT EXISTS (SELECT * FROM jobqueuepollinginfo WHERE scheddname = '%s')", scheddname, scheddname); ret_st = DBObj->execCommand(sql_str.Value()); if (ret_st == QUILL_FAILURE) { dprintf(D_ALWAYS, "Insert JobQueuePollInfo --- ERROR [SQL] %s\n", sql_str.Value()); } } { /* create an entry in currency table if this schedd is the first * time being logged to database */ sql_str.formatstr("INSERT INTO currencies (datasource) SELECT '%s' FROM dummy_single_row_table WHERE NOT EXISTS (SELECT * FROM currencies WHERE datasource = '%s')", scheddname, scheddname); ret_st = DBObj->execCommand(sql_str.Value()); if (ret_st == QUILL_FAILURE) { dprintf(D_ALWAYS, "Insert Currency --- ERROR [SQL] %s\n", sql_str.Value()); } } ret_st = DBObj->commitTransaction(); if (ret_st == QUILL_FAILURE) { dprintf(D_ALWAYS, "Commit transaction failed in JobQueueDBManager::config\n"); } if (param_boolean("QUILL_MAINTAIN_DB_CONN", true) == false) { ret_st = DBObj->disconnectDB(); if (ret_st == QUILL_FAILURE) { dprintf(D_ALWAYS, "JobQueueDBManager:config: unable to disconnect database --- ERROR\n"); } } } //this function assumes that certain members have been initialized // (specifically prober and caLogParser) and so the order is important. setJobQueueFileName(jobQueueLogFile); }
void main_init( int argc, char* argv[] ) { extern int runfor; char **ptr; if ( argc > 3 ) { usage( argv[0] ); } int argc_count = 1; for( ptr=argv+1, argc_count = 1; argc_count<argc && *ptr; ptr++,argc_count++) { if( ptr[0][0] != '-' ) { usage( argv[0] ); } switch( ptr[0][1] ) { case 'n': ptr++; if( !(ptr && *ptr) ) { EXCEPT( "-n requires another argument" ); } MasterName = build_valid_daemon_name( *ptr ); dprintf( D_ALWAYS, "Using name: %s\n", MasterName ); break; default: usage( argv[0] ); } } if (runfor != 0) { // We will construct an environment variable that // tells the daemon what time it will be shut down. // We'll give it an absolute time, though runfor is a // relative time. This means that we don't have to update // the time each time we restart the daemon. MyString runfor_env; runfor_env.formatstr("%s=%ld", EnvGetName(ENV_DAEMON_DEATHTIME), time(NULL) + (runfor * 60)); SetEnv(runfor_env.Value()); } daemons.SetDefaultReaper(); // Grab all parameters needed by the master. init_params(); // param() for DAEMON_LIST and initialize our daemons object. init_daemon_list(); if ( daemons.SetupControllers() < 0 ) { EXCEPT( "Daemon initialization failed" ); } // Lookup the paths to all the daemons we now care about. daemons.InitParams(); // Initialize our classad; init_classad(); // Initialize the master entry in the daemons data structure. daemons.InitMaster(); // Make sure if PrivSep is on we're not running as root check_uid_for_privsep(); // open up the windows firewall init_firewall_exceptions(); #if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT) #if defined(HAVE_DLOPEN) MasterPluginManager::Load(); #elif defined(WIN32) load_master_mgmt(); #endif MasterPluginManager::Initialize(); #endif // Register admin commands daemonCore->Register_Command( RESTART, "RESTART", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( RESTART_PEACEFUL, "RESTART_PEACEFUL", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_OFF, "DAEMONS_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_OFF_FAST, "DAEMONS_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_OFF_PEACEFUL, "DAEMONS_OFF_PEACEFUL", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMONS_ON, "DAEMONS_ON", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( MASTER_OFF, "MASTER_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( MASTER_OFF_FAST, "MASTER_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_ON, "DAEMON_ON", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_OFF, "DAEMON_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_OFF_FAST, "DAEMON_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( DAEMON_OFF_PEACEFUL, "DAEMON_OFF_PEACEFUL", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( CHILD_ON, "CHILD_ON", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( CHILD_OFF, "CHILD_OFF", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( CHILD_OFF_FAST, "CHILD_OFF_FAST", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); daemonCore->Register_Command( SET_SHUTDOWN_PROGRAM, "SET_SHUTDOWN_PROGRAM", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); // Command handler for stashing the pool password daemonCore->Register_Command( STORE_POOL_CRED, "STORE_POOL_CRED", (CommandHandler)&store_pool_cred_handler, "store_pool_cred_handler", NULL, CONFIG_PERM, D_FULLDEBUG ); /* daemonCore->Register_Command( START_AGENT, "START_AGENT", (CommandHandler)admin_command_handler, "admin_command_handler", 0, ADMINISTRATOR ); */ daemonCore->RegisterTimeSkipCallback(time_skip_handler,0); _EXCEPT_Cleanup = DoCleanup; #if !defined(WIN32) if( !dprintf_to_term_check() && param_boolean( "USE_PROCESS_GROUPS", true ) ) { // If we're not connected to a terminal, start our own // process group, unless the config file says not to. setsid(); } #endif if( StartDaemons ) { daemons.StartAllDaemons(); } daemons.StartTimers(); }
void init_params() { char *tmp; static int master_name_in_config = 0; if( ! master_name_in_config ) { // First time, or we know it's not in the config file. if( ! MasterName ) { // Not set on command line tmp = param( "MASTER_NAME" ); if( tmp ) { MasterName = build_valid_daemon_name( tmp ); master_name_in_config = 1; free( tmp ); } } } else { delete [] MasterName; tmp = param( "MASTER_NAME" ); MasterName = build_valid_daemon_name( tmp ); free( tmp ); } if( MasterName ) { dprintf( D_FULLDEBUG, "Using name: %s\n", MasterName ); } if (!param_boolean_crufty("START_MASTER", true)) { dprintf( D_ALWAYS, "START_MASTER was set to FALSE, shutting down.\n" ); StartDaemons = FALSE; main_shutdown_graceful(); } StartDaemons = TRUE; if (!param_boolean_crufty("START_DAEMONS", true)) { dprintf( D_ALWAYS, "START_DAEMONS flag was set to FALSE. Not starting daemons.\n" ); StartDaemons = FALSE; } // If we were sent the daemons_off command, don't forget that // here. if( GotDaemonsOff ) { StartDaemons = FALSE; } PublishObituaries = param_boolean_crufty("PUBLISH_OBITUARIES", true) ? TRUE : FALSE; Lines = param_integer("OBITUARY_LOG_LENGTH",20); master_backoff_constant = param_integer( "MASTER_BACKOFF_CONSTANT", 9, 1 ); master_backoff_ceiling = param_integer( "MASTER_BACKOFF_CEILING", 3600,1 ); master_backoff_factor = param_double( "MASTER_BACKOFF_FACTOR", 2.0, 0 ); if( master_backoff_factor <= 0.0 ) { master_backoff_factor = 2.0; } master_recover_time = param_integer( "MASTER_RECOVER_FACTOR", 300, 1 ); update_interval = param_integer( "MASTER_UPDATE_INTERVAL", 5 * MINUTE, 1 ); check_new_exec_interval = param_integer( "MASTER_CHECK_NEW_EXEC_INTERVAL", 5*MINUTE ); new_bin_delay = param_integer( "MASTER_NEW_BINARY_DELAY", 2*MINUTE, 1 ); new_bin_restart_mode = GRACEFUL; char * restart_mode = param("MASTER_NEW_BINARY_RESTART"); if (restart_mode) { #if 1 StopStateT mode = StringToStopState(restart_mode); #else static const struct { const char * text; StopStateT mode; } modes[] = { { "GRACEFUL", GRACEFUL }, { "PEACEFUL", PEACEFUL }, { "NEVER", NONE }, { "NONE", NONE }, { "NO", NONE }, // { "FAST", FAST }, // { "KILL", KILL }, }; StopStateT mode = (StopStateT)-1; // prime with -1 so we can detect bad input. for (int ii = 0; ii < (int)COUNTOF(modes); ++ii) { if (MATCH == strcasecmp(restart_mode, modes[ii].text)) { mode = modes[ii].mode; break; } } #endif if (mode == (StopStateT)-1) { dprintf(D_ALWAYS, "%s is not a valid value for MASTER_NEW_BINARY_RESTART. using GRACEFUL\n", restart_mode); } if (mode >= 0 && mode <= NONE) new_bin_restart_mode = mode; free(restart_mode); } preen_interval = param_integer( "PREEN_INTERVAL", 24*HOUR, 0 ); if(preen_interval == 0) { EXCEPT("PREEN_INTERVAL in the condor configuration is too low (0). Please set it to an integer in the range 1 to %d (default %d). To disable condor_preen entirely, comment out PREEN.", INT_MAX, 24*HOUR); } shutdown_fast_timeout = param_integer( "SHUTDOWN_FAST_TIMEOUT", 5*MINUTE, 1 ); shutdown_graceful_timeout = param_integer( "SHUTDOWN_GRACEFUL_TIMEOUT", 30*MINUTE, 1 ); AllowAdminCommands = param_boolean( "ALLOW_ADMIN_COMMANDS", true ); if( FS_Preen ) { free( FS_Preen ); } FS_Preen = param( "PREEN" ); }
void DBMSManager::config() { char *name = param("DBMSMANAGER_NAME"); if(name) { char *valid_name = build_valid_daemon_name(name); m_name = valid_name; free(name); delete [] valid_name; } else { char *default_name = default_daemon_name(); if(default_name) { m_name = default_name; delete [] default_name; } } InitPublicAd(); int update_interval = param_integer("UPDATE_INTERVAL", 60); if(m_public_ad_update_interval != update_interval) { m_public_ad_update_interval = update_interval; if(m_public_ad_update_timer >= 0) { daemonCore->Cancel_Timer(m_public_ad_update_timer); m_public_ad_update_timer = -1; } dprintf(D_FULLDEBUG, "Setting update interval to %d\n", m_public_ad_update_interval); m_public_ad_update_timer = daemonCore->Register_Timer( 0, m_public_ad_update_interval, (TimerHandlercpp)&DBMSManager::TimerHandler_UpdateCollector, "DBMSManager::TimerHandler_UpdateCollector", this); } /* register the database purging callback */ int purge_interval = param_integer("DATABASE_PURGE_INTERVAL", 86400); // 24 hours if(m_database_purge_interval != purge_interval) { m_database_purge_interval = purge_interval; if(m_database_purge_timer >= 0) { daemonCore->Cancel_Timer(m_database_purge_timer); m_database_purge_timer = -1; } dprintf(D_FULLDEBUG, "Setting database purge interval to %d\n", m_database_purge_interval); m_database_purge_timer = daemonCore->Register_Timer( 0, m_database_purge_interval, (TimerHandlercpp)&DBMSManager::TimerHandler_PurgeDatabase, "DBMSManager::TimerHandler_PurgeDatabase", this); } /* register the database reindexing callback */ int reindex_interval = param_integer("DATABASE_REINDEX_INTERVAL", 86400); // 24 hours if(m_database_reindex_interval != reindex_interval) { m_database_reindex_interval = reindex_interval; if(m_database_reindex_timer >= 0) { daemonCore->Cancel_Timer(m_database_reindex_timer); m_database_reindex_timer = -1; } dprintf(D_FULLDEBUG, "Setting database reindex interval to %d\n", m_database_reindex_interval); m_database_reindex_timer = daemonCore->Register_Timer( 0, m_database_reindex_interval, (TimerHandlercpp)&DBMSManager::TimerHandler_ReindexDatabase, "DBMSManager::TimerHandler_ReindexDatabase", this); } }
void main_init(int argc, char* argv[]) { char** ptr; MyString job_queue_name; int argc_count = 1; for(ptr = argv + 1, argc_count = 1; argc_count<argc && *ptr; ptr++,argc_count++) { if(ptr[0][0] != '-') { usage(argv[0]); } switch(ptr[0][1]) { case 'n': if (Name) { free(Name); } Name = build_valid_daemon_name( *(++ptr) ); break; default: usage(argv[0]); } } // Tell Attrlist to publish the server time AttrList_setPublishServerTime( true ); // Initialize DaemonCore's use of ProcFamily. We do this so that we // launch a ProcD if necessary so that any Starters that we launch // for Local Universe jobs can share a single ProcD, instead of // each creating their own daemonCore->Proc_Family_Init(); #if defined(HAVE_DLOPEN) ClassAdLogPluginManager::Load(); ScheddPluginManager::Load(); ScheddPluginManager::EarlyInitialize(); ClassAdLogPluginManager::EarlyInitialize(); #endif /* schedd doesn't care about other daemons. only that it has the ability * to run jobs. so the following code is for now not needed. // ZKM HACK TO MAKE SURE SCHEDD HAS USER CREDENTIALS // // if we are using the credd and credmon, we need to init them before // doing anything! char* p = param("SEC_CREDENTIAL_DIRECTORY"); if(p) { free(p); dprintf(D_ALWAYS, "SCHEDD: INITIALIZING USER CREDS\n"); Daemon *my_credd; // we will abort if we can't locate the credd, so let's try a // few times. locate() caches the result so we have to destroy // the object and make a new one each time. int retries = 20; bool success = false; do { // allocate a credd my_credd = new Daemon(DT_CREDD); if(my_credd) { // call locate bool loc_rc = my_credd->locate(); if(loc_rc) { // get a connected relisock CondorError errstack; ReliSock* r = (ReliSock*)my_credd->startCommand( CREDD_REFRESH_ALL, Stream::reli_sock, 20, &errstack); if ( r ) { // ask the credd to get us some fresh user creds ClassAd ad; putClassAd(r, ad); r->end_of_message(); r->decode(); getClassAd(r, ad); r->end_of_message(); dprintf(D_SECURITY | D_FULLDEBUG, "SCHEDD: received ad from CREDD:\n"); dPrintAd(D_SECURITY | D_FULLDEBUG, ad); MyString result; ad.LookupString("Result", result); if(result == "success") { success = true; } else { dprintf(D_FULLDEBUG, "SCHEDD: warning, creddmon returned failure.\n"); } // clean up. delete r; } else { dprintf(D_FULLDEBUG, "SCHEDD: warning, startCommand failed, %s\n", errstack.getFullText(true).c_str()); } } else { dprintf(D_FULLDEBUG, "SCHEDD: warning, locate failed.\n"); } // clean up. delete my_credd; } else { dprintf(D_FULLDEBUG, "SCHEDD: warning, new Daemon(DT_CREDD) failed.\n"); } // if something went wrong, sleep and retry (finit number of times) if(!success) { dprintf(D_FULLDEBUG, "SCHEDD: sleeping and trying again %i times.\n", retries); sleep(1); retries--; } } while ((retries > 0) && (success == false)); // except if fail if (!success) { EXCEPT("FAILED TO INITIALIZE USER CREDS (locate failed)"); } } // END ZKM HACK */ #ifndef WIN32 // if using the SEC_CREDENTIAL_DIRECTORY, confirm we are "up-to-date". // at the moment, we take an "all-or-nothing" approach. ultimately, this // should be per-user, and the SchedD should start normally and run jobs // for users who DO have valid credentials, and simply holding on to jobs // in idle state for users who do NOT have valid credentials. // char* p = param("SEC_CREDENTIAL_DIRECTORY"); if(p) { free(p); bool success = false; int retries = 60; do { // look for existence of file that says everything is up-to-date. success = credmon_poll(NULL, false, false); if(!success) { dprintf(D_ALWAYS, "SCHEDD: User credentials not up-to-date. Start-up delayed. Waiting 10 seconds and trying %i more times.\n", retries); sleep(10); retries--; } } while ((!success) && (retries > 0)); // we tried, we give up. if(!success) { EXCEPT("User credentials unavailable after 10 minutes"); } } // User creds good to go, let's start this thing up! #endif // WIN32 // Initialize all the modules scheduler.Init(); scheduler.Register(); // Initialize the job queue char *job_queue_param_name = param("JOB_QUEUE_LOG"); if (job_queue_param_name == NULL) { // the default place for the job_queue.log is in spool job_queue_name.formatstr( "%s/job_queue.log", Spool); } else { job_queue_name = job_queue_param_name; // convert char * to MyString free(job_queue_param_name); } // Make a backup of the job queue? if ( param_boolean_crufty("SCHEDD_BACKUP_SPOOL", false) ) { MyString hostname; hostname = get_local_hostname(); MyString job_queue_backup; job_queue_backup.formatstr( "%s/job_queue.bak.%s.%ld", Spool, hostname.Value(), (long)time(NULL) ); if ( copy_file( job_queue_name.Value(), job_queue_backup.Value() ) ) { dprintf( D_ALWAYS, "Failed to backup spool to '%s'\n", job_queue_backup.Value() ); } else { dprintf( D_FULLDEBUG, "Spool backed up to '%s'\n", job_queue_backup.Value() ); } } int max_historical_logs = param_integer( "MAX_JOB_QUEUE_LOG_ROTATIONS", DEFAULT_MAX_JOB_QUEUE_LOG_ROTATIONS ); InitJobQueue(job_queue_name.Value(),max_historical_logs); PostInitJobQueue(); // Initialize the dedicated scheduler stuff dedicated_scheduler.initialize(); // Do a timeout now at startup to get the ball rolling... scheduler.timeout(); #if defined(HAVE_DLOPEN) ScheddPluginManager::Initialize(); ClassAdLogPluginManager::Initialize(); #endif daemonCore->InstallAuditingCallback( AuditLogNewConnection ); }
void main_init(int argc, char* argv[]) { char** ptr; MyString job_queue_name; int argc_count = 1; for(ptr = argv + 1, argc_count = 1; argc_count<argc && *ptr; ptr++,argc_count++) { if(ptr[0][0] != '-') { usage(argv[0]); } switch(ptr[0][1]) { case 'n': Name = build_valid_daemon_name( *(++ptr) ); break; default: usage(argv[0]); } } // Tell Attrlist to publish the server time AttrList_setPublishServerTime( true ); // Initialize DaemonCore's use of ProcFamily. We do this so that we // launch a ProcD if necessary so that any Starters that we launch // for Local Universe jobs can share a single ProcD, instead of // each creating their own daemonCore->Proc_Family_Init(); #if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT) #if defined(HAVE_DLOPEN) // Intialization of the plugin manager, i.e. loading all // plugins, should be performed before the job queue log is // read so plugins have a chance to learn about all jobs // already in the queue ClassAdLogPluginManager::Load(); // Load all ScheddPlugins. In reality this doesn't do much // since initializing any plugin manager loads plugins for all // plugin manager. ScheddPluginManager::Load(); // Tell all ScheddPlugins to initialze themselves ScheddPluginManager::EarlyInitialize(); // Tell all plugins to initialize themselves ClassAdLogPluginManager::EarlyInitialize(); #endif #endif // Initialize all the modules scheduler.Init(); scheduler.Register(); // Initialize the job queue char *job_queue_param_name = param("JOB_QUEUE_LOG"); if (job_queue_param_name == NULL) { // the default place for the job_queue.log is in spool job_queue_name.sprintf( "%s/job_queue.log", Spool); } else { job_queue_name = job_queue_param_name; // convert char * to MyString free(job_queue_param_name); } // Make a backup of the job queue? if ( param_boolean_crufty("SCHEDD_BACKUP_SPOOL", false) ) { MyString hostname; UtcTime now(true); hostname = get_local_hostname(); MyString job_queue_backup; job_queue_backup.sprintf( "%s/job_queue.bak.%s.%ld", Spool, hostname.Value(), now.seconds() ); if ( copy_file( job_queue_name.Value(), job_queue_backup.Value() ) ) { dprintf( D_ALWAYS, "Failed to backup spool to '%s'\n", job_queue_backup.Value() ); } else { dprintf( D_FULLDEBUG, "Spool backed up to '%s'\n", job_queue_backup.Value() ); } } int max_historical_logs = param_integer( "MAX_JOB_QUEUE_LOG_ROTATIONS", DEFAULT_MAX_JOB_QUEUE_LOG_ROTATIONS ); InitJobQueue(job_queue_name.Value(),max_historical_logs); mark_jobs_idle(); // The below must happen _after_ InitJobQueue is called. if ( scheduler.autocluster.config() ) { // clear out auto cluster id attributes WalkJobQueue( (int(*)(ClassAd *))clear_autocluster_id ); } // // Update the SchedDInterval attributes in jobs if they // have it defined. This will be for JobDeferral and // CronTab jobs // WalkJobQueue( (int(*)(ClassAd *))::updateSchedDInterval ); // Initialize the dedicated scheduler stuff dedicated_scheduler.initialize(); // Do a timeout now at startup to get the ball rolling... scheduler.timeout(); #if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT) #if defined(HAVE_DLOPEN) // Tell all ScheddPlugins to initialze themselves ScheddPluginManager::Initialize(); // Tell all plugins to initialize themselves ClassAdLogPluginManager::Initialize(); #endif #endif }
int init_params( int /* first_time */) { char *tmp; resmgr->init_config_classad(); polling_interval = param_integer( "POLLING_INTERVAL", 5 ); update_interval = param_integer( "UPDATE_INTERVAL", 300, 1 ); update_offset = param_integer( "UPDATE_OFFSET", 0, 0 ); if( accountant_host ) { free( accountant_host ); } accountant_host = param("ACCOUNTANT_HOST"); match_timeout = param_integer( "MATCH_TIMEOUT", 120 ); killing_timeout = param_integer( "KILLING_TIMEOUT", 30 ); max_claim_alives_missed = param_integer( "MAX_CLAIM_ALIVES_MISSED", 6 ); sysapi_reconfig(); if( startd_job_exprs ) { delete( startd_job_exprs ); startd_job_exprs = NULL; } tmp = param( "STARTD_JOB_EXPRS" ); if( tmp ) { startd_job_exprs = new StringList(); startd_job_exprs->initializeFromString( tmp ); free( tmp ); } else { startd_job_exprs = new StringList(); startd_job_exprs->initializeFromString( ATTR_JOB_UNIVERSE ); } if( startd_slot_attrs ) { delete( startd_slot_attrs ); startd_slot_attrs = NULL; } tmp = param( "STARTD_SLOT_ATTRS" ); if (!tmp) { tmp = param( "STARTD_SLOT_EXPRS" ); } if (param_boolean("ALLOW_VM_CRUFT", false) && !tmp) { tmp = param( "STARTD_VM_ATTRS" ); if (!tmp) { tmp = param( "STARTD_VM_EXPRS" ); } } if( tmp ) { startd_slot_attrs = new StringList(); startd_slot_attrs->initializeFromString( tmp ); free( tmp ); } console_slots = param_integer( "SLOTS_CONNECTED_TO_CONSOLE", -12345); if (console_slots == -12345) { // if no value set, try the old names... console_slots = resmgr->m_attr->num_cpus(); console_slots = param_integer( "VIRTUAL_MACHINES_CONNECTED_TO_CONSOLE", param_integer( "CONSOLE_VMS", param_integer( "CONSOLE_CPUS", console_slots))); } keyboard_slots = param_integer( "SLOTS_CONNECTED_TO_KEYBOARD", -12345); if (keyboard_slots == -12345) { // if no value set, try the old names... keyboard_slots = resmgr->m_attr->num_cpus(); keyboard_slots = param_integer( "VIRTUAL_MACHINES_CONNECTED_TO_KEYBOARD", param_integer( "KEYBOARD_VMS", param_integer( "KEYBOARD_CPUS", 1))); } disconnected_keyboard_boost = param_integer( "DISCONNECTED_KEYBOARD_IDLE_BOOST", 1200 ); startd_noclaim_shutdown = param_integer( "STARTD_NOCLAIM_SHUTDOWN", 0 ); compute_avail_stats = false; compute_avail_stats = param_boolean( "STARTD_COMPUTE_AVAIL_STATS", false ); tmp = param( "STARTD_NAME" ); if( tmp ) { if( Name ) { delete [] Name; } Name = build_valid_daemon_name( tmp ); dprintf( D_FULLDEBUG, "Using %s for name\n", Name ); free( tmp ); } pid_snapshot_interval = param_integer( "PID_SNAPSHOT_INTERVAL", DEFAULT_PID_SNAPSHOT_INTERVAL ); if( valid_cod_users ) { delete( valid_cod_users ); valid_cod_users = NULL; } tmp = param( "VALID_COD_USERS" ); if( tmp ) { valid_cod_users = new StringList(); valid_cod_users->initializeFromString( tmp ); free( tmp ); } if( vmapi_is_virtual_machine() == TRUE ) { vmapi_destroy_vmregister(); } tmp = param( "VMP_HOST_MACHINE" ); if( tmp ) { if( vmapi_is_my_machine(tmp) ) { dprintf( D_ALWAYS, "WARNING: VMP_HOST_MACHINE should be the hostname of host machine. In host machine, it doesn't need to be defined\n"); } else { vmapi_create_vmregister(tmp); } free(tmp); } if( vmapi_is_host_machine() == TRUE ) { vmapi_destroy_vmmanager(); } tmp = param( "VMP_VM_LIST" ); if( tmp ) { if( vmapi_is_virtual_machine() == TRUE ) { dprintf( D_ALWAYS, "WARNING: both VMP_HOST_MACHINE and VMP_VM_LIST are defined. Assuming this machine is a virtual machine\n"); }else { vmapi_create_vmmanager(tmp); } free(tmp); } InitJobHistoryFile( "STARTD_HISTORY" , "STARTD_PER_JOB_HISTORY_DIR"); return TRUE; }
void main_init( int, char* argv[] ) { char** ptr; // Reset the cron & benchmark managers to a known state cron_job_mgr = NULL; bench_job_mgr = NULL; // Process command line args. for(ptr = argv + 1; *ptr; ptr++) { if(ptr[0][0] != '-') { usage( argv[0] ); } switch( ptr[0][1] ) { case 's': break; case 'n': ptr++; if( !(ptr && *ptr) ) { EXCEPT( "-n requires another arugment" ); } if (Name) { free(Name); } Name = build_valid_daemon_name( *ptr ); dprintf( D_ALWAYS, "Using name: %s\n", Name ); break; default: fprintf( stderr, "Error: Unknown option %s\n", *ptr ); usage( argv[0] ); } } // Record the time we started up for use in determining // keyboard idle time on SMP machines, etc. startd_startup = time( 0 ); // Instantiate the Resource Manager object. resmgr = new ResMgr; // find all the starters we care about and get their classads. resmgr->starter_mgr.init(); ClassAd tmp_classad; MyString starter_ability_list; resmgr->starter_mgr.publish(&tmp_classad, A_STATIC | A_PUBLIC); tmp_classad.LookupString(ATTR_STARTER_ABILITY_LIST, starter_ability_list); if( starter_ability_list.find(ATTR_HAS_VM) >= 0 ) { // Now starter has codes for vm universe. resmgr->m_vmuniverse_mgr.setStarterAbility(true); // check whether vm universe is available through vmgahp server resmgr->m_vmuniverse_mgr.checkVMUniverse(); } // Read in global parameters from the config file. // We do this after we instantiate the resmgr, so we can know // what num_cpus is, but before init_resources(), so we can // use polling_interval to figure out how big to make each // Resource's LoadQueue object. init_params(1); // The 1 indicates that this is the first time #if defined(WIN32) // We do this on Win32 since Win32 uses last_x_event // variable in a similar fasion to the X11 condor_kbdd, and // thus it must be initialized. command_x_event( 0, 0, 0 ); #endif // Instantiate Resource objects in the ResMgr resmgr->init_resources(); // Do a little sanity checking and cleanup StringList execute_dirs; resmgr->FillExecuteDirsList( &execute_dirs ); check_execute_dir_perms( execute_dirs ); cleanup_execute_dirs( execute_dirs ); // Compute all attributes resmgr->compute( A_ALL ); resmgr->walk( &Resource::init_classad ); // Startup Cron cron_job_mgr = new StartdCronJobMgr( ); cron_job_mgr->Initialize( "startd" ); // Startup benchmarking bench_job_mgr = new StartdBenchJobMgr( ); bench_job_mgr->Initialize( "benchmarks" ); // Now that we have our classads, we can compute things that // need to be evaluated resmgr->walk( &Resource::compute, A_EVALUATED ); resmgr->walk( &Resource::refresh_classad, A_PUBLIC | A_EVALUATED ); // Now that everything is computed and published, we can // finally put in the attrs shared across the different slots resmgr->walk( &Resource::refresh_classad, A_PUBLIC | A_SHARED_SLOT ); // If we EXCEPT, don't leave any starters lying around. _EXCEPT_Cleanup = do_cleanup; // register daemoncore stuff ////////////////////////////////////////////////// // Commands ////////////////////////////////////////////////// // These commands all read the ClaimId off the wire, find // the resource with that ClaimId, and call appropriate // action on that resource. Plus, all of these commands only // make sense when we're in the claimed state. So, we can // handle them all with a common handler. For all of them, // you need WRITE permission. daemonCore->Register_Command( ALIVE, "ALIVE", (CommandHandler)command_handler, "command_handler", 0, DAEMON, D_FULLDEBUG ); daemonCore->Register_Command( DEACTIVATE_CLAIM, "DEACTIVATE_CLAIM", (CommandHandler)command_handler, "command_handler", 0, DAEMON ); daemonCore->Register_Command( DEACTIVATE_CLAIM_FORCIBLY, "DEACTIVATE_CLAIM_FORCIBLY", (CommandHandler)command_handler, "command_handler", 0, DAEMON ); daemonCore->Register_Command( PCKPT_FRGN_JOB, "PCKPT_FRGN_JOB", (CommandHandler)command_handler, "command_handler", 0, DAEMON ); daemonCore->Register_Command( REQ_NEW_PROC, "REQ_NEW_PROC", (CommandHandler)command_handler, "command_handler", 0, DAEMON ); if (param_boolean("ALLOW_SLOT_PAIRING", false)) { daemonCore->Register_Command( SWAP_CLAIM_AND_ACTIVATION, "SWAP_CLAIM_AND_ACTIVATION", (CommandHandler)command_with_opts_handler, "command_handler", 0, DAEMON ); } // These commands are special and need their own handlers // READ permission commands daemonCore->Register_Command( GIVE_STATE, "GIVE_STATE", (CommandHandler)command_give_state, "command_give_state", 0, READ ); daemonCore->Register_Command( GIVE_TOTALS_CLASSAD, "GIVE_TOTALS_CLASSAD", (CommandHandler)command_give_totals_classad, "command_give_totals_classad", 0, READ ); daemonCore->Register_Command( QUERY_STARTD_ADS, "QUERY_STARTD_ADS", (CommandHandler)command_query_ads, "command_query_ads", 0, READ ); // DAEMON permission commands daemonCore->Register_Command( ACTIVATE_CLAIM, "ACTIVATE_CLAIM", (CommandHandler)command_activate_claim, "command_activate_claim", 0, DAEMON ); daemonCore->Register_Command( REQUEST_CLAIM, "REQUEST_CLAIM", (CommandHandler)command_request_claim, "command_request_claim", 0, DAEMON ); daemonCore->Register_Command( RELEASE_CLAIM, "RELEASE_CLAIM", (CommandHandler)command_release_claim, "command_release_claim", 0, DAEMON ); daemonCore->Register_Command( SUSPEND_CLAIM, "SUSPEND_CLAIM", (CommandHandler)command_suspend_claim, "command_suspend_claim", 0, DAEMON ); daemonCore->Register_Command( CONTINUE_CLAIM, "CONTINUE_CLAIM", (CommandHandler)command_continue_claim, "command_continue_claim", 0, DAEMON ); daemonCore->Register_Command( X_EVENT_NOTIFICATION, "X_EVENT_NOTIFICATION", (CommandHandler)command_x_event, "command_x_event", 0, ALLOW, D_FULLDEBUG ); daemonCore->Register_Command( PCKPT_ALL_JOBS, "PCKPT_ALL_JOBS", (CommandHandler)command_pckpt_all, "command_pckpt_all", 0, DAEMON ); daemonCore->Register_Command( PCKPT_JOB, "PCKPT_JOB", (CommandHandler)command_name_handler, "command_name_handler", 0, DAEMON ); #if !defined(WIN32) daemonCore->Register_Command( DELEGATE_GSI_CRED_STARTD, "DELEGATE_GSI_CRED_STARTD", (CommandHandler)command_delegate_gsi_cred, "command_delegate_gsi_cred", 0, DAEMON ); #endif // OWNER permission commands daemonCore->Register_Command( VACATE_ALL_CLAIMS, "VACATE_ALL_CLAIMS", (CommandHandler)command_vacate_all, "command_vacate_all", 0, OWNER ); daemonCore->Register_Command( VACATE_ALL_FAST, "VACATE_ALL_FAST", (CommandHandler)command_vacate_all, "command_vacate_all", 0, OWNER ); daemonCore->Register_Command( VACATE_CLAIM, "VACATE_CLAIM", (CommandHandler)command_name_handler, "command_name_handler", 0, OWNER ); daemonCore->Register_Command( VACATE_CLAIM_FAST, "VACATE_CLAIM_FAST", (CommandHandler)command_name_handler, "command_name_handler", 0, OWNER ); // NEGOTIATOR permission commands daemonCore->Register_Command( MATCH_INFO, "MATCH_INFO", (CommandHandler)command_match_info, "command_match_info", 0, NEGOTIATOR ); // the ClassAd-only command daemonCore->Register_Command( CA_AUTH_CMD, "CA_AUTH_CMD", (CommandHandler)command_classad_handler, "command_classad_handler", 0, WRITE ); daemonCore->Register_Command( CA_CMD, "CA_CMD", (CommandHandler)command_classad_handler, "command_classad_handler", 0, WRITE ); // Virtual Machine commands if( vmapi_is_host_machine() == TRUE ) { daemonCore->Register_Command( VM_REGISTER, "VM_REGISTER", (CommandHandler)command_vm_register, "command_vm_register", 0, DAEMON, D_FULLDEBUG ); } // Commands from starter for VM universe daemonCore->Register_Command( VM_UNIV_GAHP_ERROR, "VM_UNIV_GAHP_ERROR", (CommandHandler)command_vm_universe, "command_vm_universe", 0, DAEMON, D_FULLDEBUG ); daemonCore->Register_Command( VM_UNIV_VMPID, "VM_UNIV_VMPID", (CommandHandler)command_vm_universe, "command_vm_universe", 0, DAEMON, D_FULLDEBUG ); daemonCore->Register_Command( VM_UNIV_GUEST_IP, "VM_UNIV_GUEST_IP", (CommandHandler)command_vm_universe, "command_vm_universe", 0, DAEMON, D_FULLDEBUG ); daemonCore->Register_Command( VM_UNIV_GUEST_MAC, "VM_UNIV_GUEST_MAC", (CommandHandler)command_vm_universe, "command_vm_universe", 0, DAEMON, D_FULLDEBUG ); daemonCore->Register_CommandWithPayload( DRAIN_JOBS, "DRAIN_JOBS", (CommandHandler)command_drain_jobs, "command_drain_jobs", 0, ADMINISTRATOR); daemonCore->Register_CommandWithPayload( CANCEL_DRAIN_JOBS, "CANCEL_DRAIN_JOBS", (CommandHandler)command_cancel_drain_jobs, "command_cancel_drain_jobs", 0, ADMINISTRATOR); ////////////////////////////////////////////////// // Reapers ////////////////////////////////////////////////// main_reaper = daemonCore->Register_Reaper( "reaper_starters", (ReaperHandler)reaper, "reaper" ); ASSERT(main_reaper != FALSE); daemonCore->Set_Default_Reaper( main_reaper ); #if defined(WIN32) // Pretend we just got an X event so we think our console idle // is something, even if we haven't heard from the kbdd yet. // We do this on Win32 since Win32 uses last_x_event // variable in a similar fasion to the X11 condor_kbdd, and // thus it must be initialized. command_x_event( 0, 0, 0 ); #endif resmgr->start_update_timer(); #if HAVE_HIBERNATION resmgr->updateHibernateConfiguration(); #endif /* HAVE_HIBERNATION */ // Evaluate the state of all resources and update CM // We don't just call eval_and_update_all() b/c we don't need // to recompute anything. // This is now called by a timer registered by start_update_timer() //resmgr->update_all(); #if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT) #if defined(HAVE_DLOPEN) StartdPluginManager::Load(); #elif defined(WIN32) load_startd_mgmt(); #endif StartdPluginManager::Initialize(); #endif }