示例#1
0
bool
OsProc::JobExit( void )
{
	int reason;	

	dprintf( D_FULLDEBUG, "Inside OsProc::JobExit()\n" );

	if( requested_exit == true ) {
		if( Starter->jic->hadHold() || Starter->jic->hadRemove() ) {
			reason = JOB_KILLED;
		} else {
			reason = JOB_NOT_CKPTED;
		}
	} else if( dumped_core ) {
		reason = JOB_COREDUMPED;
	} else if( job_not_started ) {
		reason = JOB_NOT_STARTED;
	} else {
		reason = JOB_EXITED;
	}

#if defined ( WIN32 )
    
    /* If we loaded the user's profile, then we should dump it now */
    if ( owner_profile_.loaded () ) {
        owner_profile_.unload ();
        
        /* !!!! DO NOT DO THIS IN THE FUTURE !!!! */
        owner_profile_.destroy ();
        /* !!!! DO NOT DO THIS IN THE FUTURE !!!! */
        
    }

    priv_state old = set_user_priv ();
    HANDLE user_token = priv_state_get_handle ();
    ASSERT ( user_token );
    
    // Check USE_VISIBLE_DESKTOP in condor_config.  If set to TRUE,
    // then removed our users priveleges from the visible desktop.
	if (param_boolean_crufty("USE_VISIBLE_DESKTOP", false)) {
        /* at this point we can revoke the user's access to the visible desktop */
        RevokeDesktopAccess ( user_token );
    }

    set_priv ( old );

#endif

	return Starter->jic->notifyJobExit( exit_status, reason, this );
}
示例#2
0
void
init_params()
{
	char	*tmp;
	static	int	master_name_in_config = 0;

	if( ! master_name_in_config ) {
			// First time, or we know it's not in the config file. 
		if( ! MasterName ) {
				// Not set on command line
			tmp = param( "MASTER_NAME" );
			if( tmp ) {
				MasterName = build_valid_daemon_name( tmp );
				master_name_in_config = 1;
				free( tmp );
			} 
		}
	} else {
		delete [] MasterName;
		tmp = param( "MASTER_NAME" );
		MasterName = build_valid_daemon_name( tmp );
		free( tmp );
	}
	if( MasterName ) {
		dprintf( D_FULLDEBUG, "Using name: %s\n", MasterName );
	}
			
	if (!param_boolean_crufty("START_MASTER", true)) {
			dprintf( D_ALWAYS, "START_MASTER was set to FALSE, shutting down.\n" );
			StartDaemons = FALSE;
			main_shutdown_graceful();
	}

		
	StartDaemons = TRUE;
	if (!param_boolean_crufty("START_DAEMONS", true)) {
			dprintf( D_ALWAYS, 
					 "START_DAEMONS flag was set to FALSE.  Not starting daemons.\n" );
			StartDaemons = FALSE;
	} 
		// If we were sent the daemons_off command, don't forget that
		// here. 
	if( GotDaemonsOff ) {
		StartDaemons = FALSE;
	}

	PublishObituaries = param_boolean_crufty("PUBLISH_OBITUARIES", true) ? TRUE : FALSE;

	Lines = param_integer("OBITUARY_LOG_LENGTH",20);

	master_backoff_constant = param_integer( "MASTER_BACKOFF_CONSTANT", 9, 1 );

	master_backoff_ceiling = param_integer( "MASTER_BACKOFF_CEILING", 3600,1 );

	master_backoff_factor = param_double( "MASTER_BACKOFF_FACTOR", 2.0, 0 );
	if( master_backoff_factor <= 0.0 ) {
    	master_backoff_factor = 2.0;
    }
	
	master_recover_time = param_integer( "MASTER_RECOVER_FACTOR", 300, 1 );

	update_interval = param_integer( "MASTER_UPDATE_INTERVAL", 5 * MINUTE, 1 );

	check_new_exec_interval = param_integer( "MASTER_CHECK_NEW_EXEC_INTERVAL", 5*MINUTE );

	new_bin_delay = param_integer( "MASTER_NEW_BINARY_DELAY", 2*MINUTE, 1 );

	new_bin_restart_mode = GRACEFUL;
	char * restart_mode = param("MASTER_NEW_BINARY_RESTART");
	if (restart_mode) {
#if 1
		StopStateT mode = StringToStopState(restart_mode);
#else
		static const struct {
			const char * text;
			StopStateT   mode;
			} modes[] = {
				{ "GRACEFUL", GRACEFUL },
				{ "PEACEFUL", PEACEFUL },
				{ "NEVER", NONE }, { "NONE", NONE }, { "NO", NONE },
			//	{ "FAST", FAST },
			//	{ "KILL", KILL },
			};
		StopStateT mode = (StopStateT)-1; // prime with -1 so we can detect bad input.
		for (int ii = 0; ii < (int)COUNTOF(modes); ++ii) {
			if (MATCH == strcasecmp(restart_mode, modes[ii].text)) {
				mode = modes[ii].mode;
				break;
			}
		}
#endif
		if (mode == (StopStateT)-1)	{
			dprintf(D_ALWAYS, "%s is not a valid value for MASTER_NEW_BINARY_RESTART. using GRACEFUL\n", restart_mode);
		}
		if (mode >= 0 && mode <= NONE)
			new_bin_restart_mode = mode;
		free(restart_mode);
	}

	preen_interval = param_integer( "PREEN_INTERVAL", 24*HOUR, 0 );
	if(preen_interval == 0) {
		EXCEPT("PREEN_INTERVAL in the condor configuration is too low (0).  Please set it to an integer in the range 1 to %d (default %d).  To disable condor_preen entirely, comment out PREEN.", INT_MAX, 24*HOUR);

	}

	shutdown_fast_timeout = param_integer( "SHUTDOWN_FAST_TIMEOUT", 5*MINUTE, 1 );

	shutdown_graceful_timeout = param_integer( "SHUTDOWN_GRACEFUL_TIMEOUT", 30*MINUTE, 1 );

	AllowAdminCommands = param_boolean( "ALLOW_ADMIN_COMMANDS", true );

	if( FS_Preen ) {
		free( FS_Preen );
	}
	FS_Preen = param( "PREEN" );
}
示例#3
0
int
VanillaProc::StartJob()
{
	dprintf(D_FULLDEBUG,"in VanillaProc::StartJob()\n");

	// vanilla jobs, unlike standard jobs, are allowed to run 
	// shell scripts (or as is the case on NT, batch files).  so
	// edit the ad so we start up a shell, pass the executable as
	// an argument to the shell, if we are asked to run a .bat file.
#ifdef WIN32

	CHAR		interpreter[MAX_PATH+1],
				systemshell[MAX_PATH+1];    
	const char* jobtmp				= Starter->jic->origJobName();
	int			joblen				= strlen(jobtmp);
	const char	*extension			= joblen > 0 ? &(jobtmp[joblen-4]) : NULL;
	bool		binary_executable	= ( extension && 
										( MATCH == strcasecmp ( ".exe", extension ) || 
										  MATCH == strcasecmp ( ".com", extension ) ) ),
				java_universe		= ( CONDOR_UNIVERSE_JAVA == job_universe );
	ArgList		arguments;
	MyString	filename,
				jobname, 
				error;
	
	if ( extension && !java_universe && !binary_executable ) {

		/** since we do not actually know how long the extension of
			the file is, we'll need to hunt down the '.' in the path,
			if it exists */
		extension = strrchr ( jobtmp, '.' );

		if ( !extension ) {

			dprintf ( 
				D_ALWAYS, 
				"VanillaProc::StartJob(): Failed to extract "
				"the file's extension.\n" );

			/** don't fail here, since we want executables to run
				as usual.  That is, some condor jobs submit 
				executables that do not have the '.exe' extension,
				but are, nonetheless, executable binaries.  For
				instance, a submit script may contain:

				executable = executable$(OPSYS) */

		} else {

			/** pull out the path to the executable */
			if ( !JobAd->LookupString ( 
				ATTR_JOB_CMD, 
				jobname ) ) {
				
				/** fall back on Starter->jic->origJobName() */
				jobname = jobtmp;

			}

			/** If we transferred the job, it may have been
				renamed to condor_exec.exe even though it is
				not an executable. Here we rename it back to
				a the correct extension before it will run. */
			if ( MATCH == strcasecmp ( 
					CONDOR_EXEC, 
					condor_basename ( jobname.Value () ) ) ) {
				filename.formatstr ( "condor_exec%s", extension );
				if (rename(CONDOR_EXEC, filename.Value()) != 0) {
					dprintf (D_ALWAYS, "VanillaProc::StartJob(): ERROR: "
							"failed to rename executable from %s to %s\n", 
							CONDOR_EXEC, filename.Value() );
				}
			} else {
				filename = jobname;
			}
			
			/** Since we've renamed our executable, we need to
				update the job ad to reflect this change. */
			if ( !JobAd->Assign ( 
				ATTR_JOB_CMD, 
				filename ) ) {

				dprintf (
					D_ALWAYS,
					"VanillaProc::StartJob(): ERROR: failed to "
					"set new executable name.\n" );

				return FALSE;

			}

			/** We've moved the script to argv[1], so we need to 
				add	the remaining arguments to positions argv[2]..
				argv[/n/]. */
			if ( !arguments.AppendArgsFromClassAd ( JobAd, &error ) ||
				 !arguments.InsertArgsIntoClassAd ( JobAd, NULL, 
				&error ) ) {

				dprintf (
					D_ALWAYS,
					"VanillaProc::StartJob(): ERROR: failed to "
					"get arguments from job ad: %s\n",
					error.Value () );

				return FALSE;

			}

			/** Since we know already we don't want this file returned
				to us, we explicitly add it to an exception list which
				will stop the file transfer mechanism from considering
				it for transfer back to its submitter */
			Starter->jic->removeFromOutputFiles (
				filename.Value () );

		}
			
	}
#endif

	// set up a FamilyInfo structure to tell OsProc to register a family
	// with the ProcD in its call to DaemonCore::Create_Process
	//
	FamilyInfo fi;

	// take snapshots at no more than 15 seconds in between, by default
	//
	fi.max_snapshot_interval = param_integer("PID_SNAPSHOT_INTERVAL", 15);

	m_dedicated_account = Starter->jic->getExecuteAccountIsDedicated();
	if( ThisProcRunsAlongsideMainProc() ) {
			// If we track a secondary proc's family tree (such as
			// sshd) using the same dedicated account as the job's
			// family tree, we could end up killing the job when we
			// clean up the secondary family.
		m_dedicated_account = NULL;
	}
	if (m_dedicated_account) {
			// using login-based family tracking
		fi.login = m_dedicated_account;
			// The following message is documented in the manual as the
			// way to tell whether the dedicated execution account
			// configuration is being used.
		dprintf(D_ALWAYS,
		        "Tracking process family by login \"%s\"\n",
		        fi.login);
	}

	FilesystemRemap * fs_remap = NULL;
#if defined(LINUX)
	// on Linux, we also have the ability to track processes via
	// a phony supplementary group ID
	//
	gid_t tracking_gid = 0;
	if (param_boolean("USE_GID_PROCESS_TRACKING", false)) {
		if (!can_switch_ids() &&
		    (Starter->condorPrivSepHelper() == NULL))
		{
			EXCEPT("USE_GID_PROCESS_TRACKING enabled, but can't modify "
			           "the group list of our children unless running as "
			           "root or using PrivSep");
		}
		fi.group_ptr = &tracking_gid;
	}

	// Increase the OOM score of this process; the child will inherit it.
	// This way, the job will be heavily preferred to be killed over a normal process.
	// OOM score is currently exponential - a score of 4 is a factor-16 increase in
	// the OOM score.
	setupOOMScore(4,800);
#endif

#if defined(HAVE_EXT_LIBCGROUP)
	// Determine the cgroup
	std::string cgroup_base;
	param(cgroup_base, "BASE_CGROUP", "");
	MyString cgroup_str;
	const char *cgroup = NULL;
		/* Note on CONDOR_UNIVERSE_LOCAL - The cgroup setup code below
		 *  requires a unique name for the cgroup. It relies on
		 *  uniqueness of the MachineAd's Name
		 *  attribute. Unfortunately, in the local universe the
		 *  MachineAd (mach_ad elsewhere) is never populated, because
		 *  there is no machine. As a result the ASSERT on
		 *  starter_name fails. This means that the local universe
		 *  will not work on any machine that has BASE_CGROUP
		 *  configured. A potential workaround is to set
		 *  STARTER.BASE_CGROUP on any machine that is also running a
		 *  schedd, but that disables cgroup support from a
		 *  co-resident startd. Instead, I'm disabling cgroup support
		 *  from within the local universe until the intraction of
		 *  local universe and cgroups can be properly worked
		 *  out. -matt 7 nov '12
		 */
	if (CONDOR_UNIVERSE_LOCAL != job_universe && cgroup_base.length()) {
		MyString cgroup_uniq;
		std::string starter_name, execute_str;
		param(execute_str, "EXECUTE", "EXECUTE_UNKNOWN");
			// Note: Starter is a global variable from os_proc.cpp
		Starter->jic->machClassAd()->EvalString(ATTR_NAME, NULL, starter_name);
		if (starter_name.size() == 0) {
			char buf[16];
			sprintf(buf, "%d", getpid());
			starter_name = buf;
		}
		//ASSERT (starter_name.size());
		cgroup_uniq.formatstr("%s_%s", execute_str.c_str(), starter_name.c_str());
		const char dir_delim[2] = {DIR_DELIM_CHAR, '\0'};
		cgroup_uniq.replaceString(dir_delim, "_");
		cgroup_str.formatstr("%s%ccondor%s", cgroup_base.c_str(), DIR_DELIM_CHAR,
			cgroup_uniq.Value());
		cgroup_str += this->CgroupSuffix();
		
		cgroup = cgroup_str.Value();
		ASSERT (cgroup != NULL);
		fi.cgroup = cgroup;
		dprintf(D_FULLDEBUG, "Requesting cgroup %s for job.\n", cgroup);
	}

#endif

// The chroot stuff really only works on linux
#ifdef LINUX
	{
        // Have Condor manage a chroot
       std::string requested_chroot_name;
       JobAd->EvalString("RequestedChroot", NULL, requested_chroot_name);
       const char * allowed_root_dirs = param("NAMED_CHROOT");
       if (requested_chroot_name.size()) {
               dprintf(D_FULLDEBUG, "Checking for chroot: %s\n", requested_chroot_name.c_str());
               StringList chroot_list(allowed_root_dirs);
               chroot_list.rewind();
               const char * next_chroot;
               bool acceptable_chroot = false;
               std::string requested_chroot;
               while ( (next_chroot=chroot_list.next()) ) {
                       MyString chroot_spec(next_chroot);
                       chroot_spec.Tokenize();
                       const char * chroot_name = chroot_spec.GetNextToken("=", false);
                       if (chroot_name == NULL) {
                               dprintf(D_ALWAYS, "Invalid named chroot: %s\n", chroot_spec.Value());
                       }
                       const char * next_dir = chroot_spec.GetNextToken("=", false);
                       if (chroot_name == NULL) {
                               dprintf(D_ALWAYS, "Invalid named chroot: %s\n", chroot_spec.Value());
                       }
                       dprintf(D_FULLDEBUG, "Considering directory %s for chroot %s.\n", next_dir, chroot_spec.Value());
                       if (IsDirectory(next_dir) && chroot_name && (strcmp(requested_chroot_name.c_str(), chroot_name) == 0)) {
                               acceptable_chroot = true;
                               requested_chroot = next_dir;
                       }
               }
               // TODO: path to chroot MUST be all root-owned, or we have a nice security exploit.
               // Is this the responsibility of Condor to check, or the sysadmin who set it up?
               if (!acceptable_chroot) {
                       return FALSE;
               }
               dprintf(D_FULLDEBUG, "Will attempt to set the chroot to %s.\n", requested_chroot.c_str());

               std::stringstream ss;
               std::stringstream ss2;
               ss2 << Starter->GetExecuteDir() << DIR_DELIM_CHAR << "dir_" << getpid();
               std::string execute_dir = ss2.str();
               ss << requested_chroot << DIR_DELIM_CHAR << ss2.str();
               std::string full_dir_str = ss.str();
               if (is_trivial_rootdir(requested_chroot)) {
                   dprintf(D_FULLDEBUG, "Requested a trivial chroot %s; this is a no-op.\n", requested_chroot.c_str());
               } else if (IsDirectory(execute_dir.c_str())) {
                       {
                           TemporaryPrivSentry sentry(PRIV_ROOT);
                           if( mkdir(full_dir_str.c_str(), S_IRWXU) < 0 ) {
                               dprintf( D_FAILURE|D_ALWAYS,
                                   "Failed to create sandbox directory in chroot (%s): %s\n",
                                   full_dir_str.c_str(),
                                   strerror(errno) );
                               return FALSE;
                           }
                           if (chown(full_dir_str.c_str(),
                                     get_user_uid(),
                                     get_user_gid()) == -1)
                           {
                               EXCEPT("chown error on %s: %s",
                                      full_dir_str.c_str(),
                                      strerror(errno));
                           }
                       }
                       if (!fs_remap) {
                               fs_remap = new FilesystemRemap();
                       }
                       dprintf(D_FULLDEBUG, "Adding mapping: %s -> %s.\n", execute_dir.c_str(), full_dir_str.c_str());
                       if (fs_remap->AddMapping(execute_dir, full_dir_str)) {
                               // FilesystemRemap object prints out an error message for us.
                               return FALSE;
                       }
                       dprintf(D_FULLDEBUG, "Adding mapping %s -> %s.\n", requested_chroot.c_str(), "/");
                       std::string root_str("/");
                       if (fs_remap->AddMapping(requested_chroot, root_str)) {
                               return FALSE;
                       }
               } else {
                       dprintf(D_ALWAYS, "Unable to do chroot because working dir %s does not exist.\n", execute_dir.c_str());
               }
       } else {
               dprintf(D_FULLDEBUG, "Value of RequestedChroot is unset.\n");
       }
	}
// End of chroot 
#endif


	// On Linux kernel 2.4.19 and later, we can give each job its
	// own FS mounts.
	auto_free_ptr mount_under_scratch(param("MOUNT_UNDER_SCRATCH"));
	if (mount_under_scratch) {
		// try evaluating mount_under_scratch as a classad expression, if it is
		// an expression it must return a string. if it's not an expression, just
		// use it as a string (as we did before 8.3.6)
		classad::Value value;
		if (JobAd->EvaluateExpr(mount_under_scratch.ptr(), value)) {
			const char * pval = NULL;
			if (value.IsStringValue(pval)) {
				mount_under_scratch.set(strdup(pval));
			} else {
				// was an expression, but not a string, so report and error and fail.
				dprintf(D_ALWAYS | D_ERROR,
					"ERROR: MOUNT_UNDER_SCRATCH does not evaluate to a string, it is : %s\n",
					ClassAdValueToString(value));
				return FALSE;
			}
		}
	}

	// if execute dir is encrypted, add /tmp and /var/tmp to mount_under_scratch
	bool encrypt_execdir = false;
	JobAd->LookupBool(ATTR_ENCRYPT_EXECUTE_DIRECTORY,encrypt_execdir);
	if (encrypt_execdir || param_boolean_crufty("ENCRYPT_EXECUTE_DIRECTORY",false)) {
		// prepend /tmp, /var/tmp to whatever admin wanted. don't worry
		// if admin already listed /tmp etc - subdirs can appear twice
		// in this list because AddMapping() ok w/ duplicate entries
		MyString buf("/tmp,/var/tmp,");
		buf += mount_under_scratch.ptr();
		mount_under_scratch.set(buf.StrDup());
	}
	if (mount_under_scratch) {
		std::string working_dir = Starter->GetWorkingDir();

		if (IsDirectory(working_dir.c_str())) {
			StringList mount_list(mount_under_scratch);

			mount_list.rewind();
			if (!fs_remap) {
				fs_remap = new FilesystemRemap();
			}
			char * next_dir;
			while ( (next_dir=mount_list.next()) ) {
				if (!*next_dir) {
					// empty string?
					mount_list.deleteCurrent();
					continue;
				}
				std::string next_dir_str(next_dir);
				// Gah, I wish I could throw an exception to clean up these nested if statements.
				if (IsDirectory(next_dir)) {
					char * full_dir = dirscat(working_dir, next_dir_str);
					if (full_dir) {
						std::string full_dir_str(full_dir);
						delete [] full_dir; full_dir = NULL;
						if (!mkdir_and_parents_if_needed( full_dir_str.c_str(), S_IRWXU, PRIV_USER )) {
							dprintf(D_ALWAYS, "Failed to create scratch directory %s\n", full_dir_str.c_str());
							delete fs_remap;
							return FALSE;
						}
						dprintf(D_FULLDEBUG, "Adding mapping: %s -> %s.\n", full_dir_str.c_str(), next_dir_str.c_str());
						if (fs_remap->AddMapping(full_dir_str, next_dir_str)) {
							// FilesystemRemap object prints out an error message for us.
							delete fs_remap;
							return FALSE;
						}
					} else {
						dprintf(D_ALWAYS, "Unable to concatenate %s and %s.\n", working_dir.c_str(), next_dir_str.c_str());
						delete fs_remap;
						return FALSE;
					}
				} else {
					dprintf(D_ALWAYS, "Unable to add mapping %s -> %s because %s doesn't exist.\n", working_dir.c_str(), next_dir, next_dir);
				}
			}
		} else {
			dprintf(D_ALWAYS, "Unable to perform mappings because %s doesn't exist.\n", working_dir.c_str());
			delete fs_remap;
			return FALSE;
		}
		mount_under_scratch.clear();
	}

#if defined(LINUX)
	// On Linux kernel 2.6.24 and later, we can give each
	// job its own PID namespace
	if (param_boolean("USE_PID_NAMESPACES", false)) {
		if (!can_switch_ids()) {
			EXCEPT("USE_PID_NAMESPACES enabled, but can't perform this "
				"call in Linux unless running as root.");
		}
		fi.want_pid_namespace = this->SupportsPIDNamespace();
		if (fi.want_pid_namespace) {
			if (!fs_remap) {
				fs_remap = new FilesystemRemap();
			}
			fs_remap->RemapProc();
		}

		// When PID Namespaces are enabled, need to run the job
		// under the condor_pid_ns_init program, so that signals
		// propagate through to the child.  

		// First tell the program where to log output status
		// via an environment variable
		if (param_boolean("USE_PID_NAMESPACE_INIT", true)) {
			Env env;
			MyString env_errors;
			MyString arg_errors;
			std::string filename;

			filename = Starter->GetWorkingDir();
			filename += "/.condor_pid_ns_status";
		
			env.MergeFrom(JobAd, &env_errors);
			env.SetEnv("_CONDOR_PID_NS_INIT_STATUS_FILENAME", filename);
			env.InsertEnvIntoClassAd(JobAd, &env_errors);

			Starter->jic->removeFromOutputFiles(condor_basename(filename.c_str()));
			this->m_pid_ns_status_filename = filename;
			
			// Now, set the job's CMD to the wrapper, and shift
			// over the arguments by one

			ArgList args;
			std::string cmd;

			JobAd->LookupString(ATTR_JOB_CMD, cmd);
			args.AppendArg(cmd);
			args.AppendArgsFromClassAd(JobAd, &arg_errors);
			args.InsertArgsIntoClassAd(JobAd, NULL, & arg_errors);
	
			std::string libexec;
			if( !param(libexec,"LIBEXEC") ) {
				dprintf(D_ALWAYS, "Cannot find LIBEXEC so can not run condor_pid_ns_init\n");
				return 0;
			}
			std::string c_p_n_i = libexec + "/condor_pid_ns_init";
			JobAd->Assign(ATTR_JOB_CMD, c_p_n_i);
		}
	}
	dprintf(D_FULLDEBUG, "PID namespace option: %s\n", fi.want_pid_namespace ? "true" : "false");
#endif


	// have OsProc start the job
	//
	int retval = OsProc::StartJob(&fi, fs_remap);

	if (fs_remap != NULL) {
		delete fs_remap;
	}

#if defined(HAVE_EXT_LIBCGROUP)

	// Set fairshare limits.  Note that retval == 1 indicates success, 0 is failure.
	// See Note near setup of param(BASE_CGROUP)
	if (CONDOR_UNIVERSE_LOCAL != job_universe && cgroup && retval) {
		std::string mem_limit;
		param(mem_limit, "CGROUP_MEMORY_LIMIT_POLICY", "soft");
		bool mem_is_soft = mem_limit == "soft";
		std::string cgroup_string = cgroup;
		CgroupLimits climits(cgroup_string);
		if (mem_is_soft || (mem_limit == "hard")) {
			ClassAd * MachineAd = Starter->jic->machClassAd();
			int MemMb;
			if (MachineAd->LookupInteger(ATTR_MEMORY, MemMb)) {
				uint64_t MemMb_big = MemMb;
				m_memory_limit = MemMb_big;
				climits.set_memory_limit_bytes(1024*1024*MemMb_big, mem_is_soft);

				// Note that ATTR_VIRTUAL_MEMORY on Linux
				// is sum of memory and swap, in Kilobytes

				int VMemKb;
				if (MachineAd->LookupInteger(ATTR_VIRTUAL_MEMORY, VMemKb)) {

					uint64_t memsw_limit = ((uint64_t)1024) * VMemKb;
					if (VMemKb > 0) {
						// we're not allowed to set memsw limit <
						// the hard memory limit.  If we haven't set the hard
						// memory limit, the default may be infinity.
						// So, if we've set soft, set hard limit to memsw - one page
						if (mem_is_soft) {
							uint64_t hard_limit = memsw_limit - 4096;
							climits.set_memory_limit_bytes(hard_limit, false);
						}
						climits.set_memsw_limit_bytes(memsw_limit);
					}
				} else {
					dprintf(D_ALWAYS, "Not setting virtual memory limit in cgroup because "
						"Virtual Memory attribute missing in machine ad.\n");
				}
			} else {
				dprintf(D_ALWAYS, "Not setting memory limit in cgroup because "
					"Memory attribute missing in machine ad.\n");
			}
		} else if (mem_limit == "none") {
			dprintf(D_FULLDEBUG, "Not enforcing memory limit.\n");
		} else {
			dprintf(D_ALWAYS, "Invalid value of CGROUP_MEMORY_LIMIT_POLICY: %s.  Ignoring.\n", mem_limit.c_str());
		}

		// Now, set the CPU shares
		ClassAd * MachineAd = Starter->jic->machClassAd();
		int numCores = 1;
		if (MachineAd->LookupInteger(ATTR_CPUS, numCores)) {
			climits.set_cpu_shares(numCores*100);
		} else {
			dprintf(D_FULLDEBUG, "Invalid value of Cpus in machine ClassAd; ignoring.\n");
		}
		setupOOMEvent(cgroup);
	}

    m_statistics.Reconfig();

	// Now that the job is started, decrease the likelihood that the starter
	// is killed instead of the job itself.
	if (retval)
	{
		setupOOMScore(0,0);
	}

#endif

	return retval;
}
示例#4
0
/*ARGSUSED*/
int
main(int argc, char *argv[] )
{
	char	*tmp = NULL;
	int		reserved_swap, free_swap;
	char	*host = NULL, *cluster = NULL, *proc = NULL;
	char	*bogus_capability;
	int		i;

	set_mySubSystem( "SHADOW", SUBSYSTEM_TYPE_SHADOW );

	myDistro->Init( argc, argv );
	if( argc == 2 && strncasecmp(argv[1], "-cl", 3) == MATCH ) {
		printClassAd();
		exit( 0 );
	}

#if defined(SYSCALL_DEBUG)
	SyscallLabel = argv[0] + 7;
#endif

#if !defined(WIN32)
	install_sig_handler(SIGPIPE, (SIG_HANDLER)SIG_IGN );
#endif

	if( argc > 1 ) {
		if( strcmp("-t",argv[1]) == MATCH ) {
			Termlog = 1;
			argv++;
			argc--;
		}
	}

	ShadowBDate = LastRestartTime = time(0);

	_EXCEPT_Cleanup = ExceptCleanup;

	MyPid = getpid();
	
	config();

	/* Start up with condor.condor privileges. */
	/*
	  we need to do this AFTER we call config() so that if CONDOR_IDS
	  is being defined in the config file, we'll get the right value
	*/ 
	set_condor_priv();

	if(Termlog)
		dprintf_set_tool_debug(get_mySubSystem()->getName(), 0);
	else
		dprintf_config( get_mySubSystem()->getName() );
	DebugId = whoami;

	// create a database connection object
	
	dprintf( D_ALWAYS, "******* Standard Shadow starting up *******\n" );
	dprintf( D_ALWAYS, "** %s\n", CondorVersion() );
	dprintf( D_ALWAYS, "** %s\n", CondorPlatform() );
	dprintf( D_ALWAYS, "*******************************************\n" );

	reserved_swap = param_integer("RESERVED_SWAP", 0);
	reserved_swap *= 1024; /* megabytes -> kb */

	bool use_sql_log = param_boolean("QUILL_USE_SQL_LOG", false);
    FILEObj = FILESQL::createInstance(use_sql_log);
	
	free_swap = sysapi_swap_space();

	dprintf( D_FULLDEBUG, "*** Reserved Swap = %d\n", reserved_swap );
	dprintf( D_FULLDEBUG, "*** Free Swap = %d\n", free_swap );
	if( reserved_swap && free_swap < reserved_swap ) {
		dprintf( D_ALWAYS, "Not enough reserved swap space\n" );
		if(FILEObj) {
		  delete FILEObj;
		}

		exit( JOB_NO_MEM );
	}

	dprintf(D_ALWAYS, "uid=%d, euid=%d, gid=%d, egid=%d\n",
		getuid(), geteuid(), getgid(), getegid());

    dprintf(D_FULLDEBUG, "argc = %d\n", argc);
    for(i = 0; i < argc; i++)
    {
        dprintf(D_FULLDEBUG, "argv[%d] = %s\n", i, argv[i]);
    }
	if( argc < 6 ) {
		usage();
	}

	if (param_boolean("SHADOW_DEBUG_WAIT", false, false)) {
		int debug_wait = 1;
		dprintf(D_ALWAYS,
				"SHADOW_DEBUG_WAIT is TRUE, waiting for debugger to attach to pid %d.\n", 
				(int)::getpid());
		while (debug_wait) {
			sleep(1);
		}
	}

	CheckSpoolVersion(SPOOL_MIN_VERSION_SHADOW_SUPPORTS,SPOOL_CUR_VERSION_SHADOW_SUPPORTS);

	if( strcmp("-pipe",argv[1]) == 0 ) {
		bogus_capability = argv[2];
		cluster = argv[3];
		proc = argv[4];
		// read the big comment in the function for why this is here.
		RemoveNewShadowDroppings(cluster, proc);
		pipe_setup( cluster, proc, bogus_capability );
	} else {
		schedd = argv[1];
		host = argv[2];
		bogus_capability = argv[3];
		cluster = argv[4];
		proc = argv[5];
		if ( argc > 6 ) {
			IpcFile = argv[6];
			dprintf(D_FULLDEBUG,"Setting IpcFile to %s\n",IpcFile);
		} else {
			IpcFile = NULL;
		}
		// read the big comment in the function for why this is here.
		RemoveNewShadowDroppings(cluster, proc);
		regular_setup( host, cluster, proc );
	}
	scheddName = getenv( EnvGetName( ENV_SCHEDD_NAME ) );

#if 0
		/* Don't want to share log file lock between child and pnarent */
	(void)close( LockFd );
	LockFd = -1;
#endif

	// initialize the user log
	initializeUserLog();

	My_Filesystem_Domain = param( "FILESYSTEM_DOMAIN" ); 
	dprintf( D_ALWAYS, "My_Filesystem_Domain = \"%s\"\n", 
			 My_Filesystem_Domain );

	My_UID_Domain = param( "UID_DOMAIN" ); 
	dprintf( D_ALWAYS, "My_UID_Domain = \"%s\"\n", My_UID_Domain );

	UseAFS = param_boolean_crufty( "USE_AFS", false ) ? TRUE : FALSE;

	UseNFS = param_boolean_crufty( "USE_NFS", false ) ? TRUE : FALSE;

	// if job specifies a checkpoint server host, this overrides
	// the config file parameters
	tmp = NULL;
	if (JobAd->LookupString(ATTR_CKPT_SERVER, &tmp) == 1) {
		if (CkptServerHost) free(CkptServerHost);
		UseCkptServer = TRUE;
		CkptServerHost = strdup(tmp);
		StarterChoosesCkptServer = FALSE;
		free(tmp);
	} else {
		free(tmp);
		if (CkptServerHost) {
            free(CkptServerHost);
        }
		CkptServerHost = param( "CKPT_SERVER_HOST" );
		UseCkptServer = FALSE;
		if( CkptServerHost && param_boolean_crufty( "USE_CKPT_SERVER", true ) ) {
			UseCkptServer = TRUE;
		}

		StarterChoosesCkptServer =
			param_boolean_crufty("STARTER_CHOOSES_CKPT_SERVER", true) ? TRUE : FALSE;
	}

		// Initialize location of our checkpoint file.  If we stored it
		// on a checkpoint server then set LastCkptServer.  Otherwise,
		// LastCkptServer should be NULL to indicate that we should
		// look on the local disk for our checkpoint file.
	LastCkptServer = NULL;
	if (JobAd->LookupString(ATTR_LAST_CKPT_SERVER,
							&LastCkptServer) == 0) {
		free(LastCkptServer);
		LastCkptServer = NULL;
	}

	// LIGO
	if (param_boolean("ALWAYS_USE_LOCAL_CKPT_SERVER", false)) {
		if (LastCkptServer) {
			char *remoteHost = NULL;
			JobAd->LookupString(ATTR_REMOTE_HOST, &remoteHost);

			char *machineName = strrchr(remoteHost, '@');
			if (machineName == NULL) {
				machineName = remoteHost;
			} else {
				machineName++;
			}

			LastCkptServer = strdup(machineName);
			CkptServerHost = strdup(machineName);

			dprintf(D_ALWAYS, "ALWAYS_USE_LOCAL_CKPT_SERVER is true, forcing LastCkptServer to %s\n", LastCkptServer);
		} else {
			dprintf(D_ALWAYS, "ALWAYS_USE_LOCAL_CKPT_SERVER is true, but checkpoint is not on server, restoring file local file\n");
		}
	}

	MaxDiscardedRunTime = param_integer( "MAX_DISCARDED_RUN_TIME", 3600 );

	CompressPeriodicCkpt = param_boolean( "COMPRESS_PERIODIC_CKPT", false );

	PeriodicSync = param_boolean( "PERIODIC_MEMORY_SYNC", false );

	CompressVacateCkpt = param_boolean( "COMPRESS_VACATE_CKPT", false );

	SlowCkptSpeed = param_integer( "SLOW_CKPT_SPEED", 0 );

	// Install signal handlers such that all signals are blocked when inside
	// the handler.
	sigset_t fullset;
	sigfillset(&fullset);
	install_sig_handler_with_mask( SIGCHLD,&fullset, reaper );

		// SIGUSR1 is sent by the schedd when a job is removed with
		// condor_rm.
	install_sig_handler_with_mask( SIGUSR1, &fullset, handle_sigusr1 );

		// SIGQUIT is sent for a fast shutdow.
	install_sig_handler_with_mask( SIGQUIT, &fullset, handle_sigquit );

		// SIGTERM is sent for a graceful shutdow.
	install_sig_handler_with_mask( SIGTERM, &fullset, handle_sigterm );


	/* Here we block the async signals.  We do this mainly because on HPUX,
	 * XDR wigs out if it is interrupted by a signal.  We do it on all
	 * platforms because it seems like a safe idea.  We unblock the signals
	 * during before select(), which is where we spend most of our time. */
	block_signal(SIGCHLD);
	block_signal(SIGUSR1);      

	/* If the completed job had been committed to the job queue,
		but for some reason the shadow exited wierdly and the
		schedd is trying to run it again, then simply write
		the job termination events and send the email as if the job had
		just ended. */
	if (terminate_is_pending() == TRUE) {
		/* This function will exit()! */
		handle_terminate_pending();
	}

	HandleSyscalls();

	Wrapup();

	/* HACK! WHOOO!!!!! Throw the old shadow away already! */
	/* This will figure out whether or not the job should go on hold AFTER the
		job has exited for whatever reason, or if the job should be allowed
		to exit. It modifies ExitReason approriately for job holding, or, get
		this, just EXCEPTs if the jobs is supposed to go into idle state and
		not leave. :) */
	/* Small change by Todd : only check the static policy if the job really
	   exited of its own accord -- we don't want to even evaluate the static
	   policy if the job exited because it was preempted, for instance */
	if (check_static_policy && 
		(ExitReason == JOB_EXITED || ExitReason == JOB_KILLED 
		     	|| ExitReason == JOB_COREDUMPED)) 
	{
		static_policy();
	}
    if( My_UID_Domain ) {
        free( My_UID_Domain );
    }
    if( My_Filesystem_Domain ) {
        free( My_Filesystem_Domain );
    }
        if(FILEObj) {
                delete FILEObj;
        }

	dprintf( D_ALWAYS, "********** Shadow Exiting(%d) **********\n",
		ExitReason );
	exit( ExitReason );
}
示例#5
0
/* param_boolean() and param_boolean_int() are C++-only functions, so
 * we roll our own version for checking NO_DNS. :-(
 */
static int
nodns_enabled( void )
{
	return param_boolean_crufty("NO_DNS", false) ? 1 : 0;
}
示例#6
0
int
Starter::execDCStarter( ArgList const &args, Env const *env, 
						int* std_fds, Stream* s )
{
	Stream *inherit_list[] =
		{ 0 /*ClassAd update stream (assigned below)*/,
		  s /*shadow syscall sock*/,
		  0 /*terminal NULL*/ };

	const ArgList* final_args = &args;
	const char* final_path = s_path;
	Env new_env;

	if( env ) {
		new_env.MergeFrom( *env );
	}

		// The starter figures out its execute directory by paraming
		// for EXECUTE, which we override in the environment here.
		// This way, all the logic about choosing a directory to use
		// is in only one place.
	ASSERT( executeDir() );
	new_env.SetEnv( "_CONDOR_EXECUTE", executeDir() );

		// Handle encrypted execute directory
	FilesystemRemap  fs_remap_obj;	// put on stack so destroyed when leave this method
	FilesystemRemap* fs_remap = NULL;
	// If admin desires encrypted exec dir in config, do it
	bool encrypt_execdir = param_boolean_crufty("ENCRYPT_EXECUTE_DIRECTORY",false);
	// Or if user wants encrypted exec in job ad, do it
	if (!encrypt_execdir && s_claim->ad()) {
		s_claim->ad()->LookupBool(ATTR_ENCRYPT_EXECUTE_DIRECTORY,encrypt_execdir);
	}
	if ( encrypt_execdir ) {
#ifdef LINUX
		// On linux, setup a directory $EXECUTE/encryptedX subdirectory
		// to serve as an ecryptfs mount point; pass this directory
		// down to the condor_starter as if it were $EXECUTE so
		// that the starter creates its dir_<pid> directory on the
		// ecryptfs filesystem setup by doing an AddEncryptedMapping.
		static int unsigned long privdirnum = 0;
		TemporaryPrivSentry sentry(PRIV_CONDOR);
		s_encrypted_execute_dir.formatstr("%s%cencrypted%lu",executeDir(),
				DIR_DELIM_CHAR,privdirnum++);
		if( mkdir(encryptedExecuteDir(), 0755) < 0 ) {
			dprintf( D_FAILURE|D_ALWAYS,
			         "Failed to create encrypted dir %s: %s\n",
			         encryptedExecuteDir(),
			         strerror(errno) );
			return 0;
		}
		dprintf( D_ALWAYS,
		         "Created encrypted dir %s\n", encryptedExecuteDir() );
		fs_remap = &fs_remap_obj;
		if ( fs_remap->AddEncryptedMapping(encryptedExecuteDir()) ) {
			// FilesystemRemap object dprintfs out an error message for us
			return 0;
		}
		new_env.SetEnv( "_CONDOR_EXECUTE", encryptedExecuteDir() );
#endif
	}

	env = &new_env;


		// Build the affinity string to pass to the starter via env

	std::string affinityString;
	if (s_claim && s_claim->rip() && s_claim->rip()->get_affinity_set()) {
		std::list<int> *l = s_claim->rip()->get_affinity_set();
		bool needComma = false;
		for (std::list<int>::iterator it = l->begin(); it != l->end(); it++) {
			if (needComma) {
				formatstr_cat(affinityString, ", %d", *it);
			} else {
				formatstr_cat(affinityString, "%d ", *it);
				needComma = true;
			}
		}
	}

	bool affinityBool = false;
	if ( ! s_claim || ! s_claim->ad()) {
		affinityBool = param_boolean("ASSIGN_CPU_AFFINITY", false);
	} else {
		auto_free_ptr assign_cpu_affinity(param("ASSIGN_CPU_AFFINITY"));
		if ( ! assign_cpu_affinity.empty()) {
			classad::Value value;
			if (s_claim->ad()->EvaluateExpr(assign_cpu_affinity.ptr(), value)) {
				if ( ! value.IsBooleanValueEquiv(affinityBool)) {
					// was an expression, but not a bool, so report it and continue
					EXCEPT("ASSIGN_CPU_AFFINITY does not evaluate to a boolean, it is : %s", ClassAdValueToString(value));
				}
			}
		}
	}

	if (affinityBool) {
		new_env.SetEnv("_CONDOR_STARTD_ASSIGNED_AFFINITY", affinityString.c_str());
		new_env.SetEnv("_CONDOR_ENFORCE_CPU_AFFINITY", "true");
		dprintf(D_ALWAYS, "Setting affinity env to %s\n", affinityString.c_str());
	}


	ReliSock child_job_update_sock;   // child inherits this socket
	ASSERT( !s_job_update_sock );
	s_job_update_sock = new ReliSock; // parent (yours truly) keeps this socket
	ASSERT( s_job_update_sock );

		// Connect parent and child sockets together so child can send us
		// udpates to the job ClassAd.
	if( !s_job_update_sock->connect_socketpair( child_job_update_sock ) ) {
		dprintf( D_ALWAYS, "ERROR: Failed to create job ClassAd update socket!\n");
		s_pid = 0;
		return s_pid;
	}
	inherit_list[0] = &child_job_update_sock;

	// Pass the machine ad to the starter
	if (s_claim) 
		s_claim->writeMachAd( s_job_update_sock );

	if( daemonCore->Register_Socket(
			s_job_update_sock,
			"starter ClassAd update socket",
			(SocketHandlercpp)&Starter::receiveJobClassAdUpdate,
			"receiveJobClassAdUpdate",
			this) < 0 )
	{
		EXCEPT("Failed to register ClassAd update socket.");
	}

#if defined(LINUX)
	// see if we should be using glexec to spawn the starter.
	// if we are, the cmd, args, env, and stdin to use will be
	// modified
	ArgList glexec_args;
	Env glexec_env;
	int glexec_std_fds[3];
	if( param_boolean( "GLEXEC_STARTER", false ) ) {
		if( ! glexec_starter_prepare( s_path,
		                              s_claim->client()->proxyFile(),
		                              args,
		                              env,
		                              std_fds,
		                              glexec_args,
		                              glexec_env,
		                              glexec_std_fds ) )
		{
			// something went wrong; prepareForGlexec will
			// have already logged it
			cleanupAfterGlexec();
			return 0;
		}
		final_path = glexec_args.GetArg(0);
		final_args = &glexec_args;
		env = &glexec_env;
		std_fds = glexec_std_fds;
	}
#endif
								   
	int reaper_id;
	if( s_reaper_id > 0 ) {
		reaper_id = s_reaper_id;
	} else {
		reaper_id = main_reaper;
	}

	if(IsFulldebug(D_FULLDEBUG)) {
		MyString args_string;
		final_args->GetArgsStringForDisplay(&args_string);
		dprintf( D_FULLDEBUG, "About to Create_Process \"%s\"\n",
				 args_string.Value() );
	}

	FamilyInfo fi;
	fi.max_snapshot_interval = pid_snapshot_interval;

	s_pid = daemonCore->
		Create_Process( final_path, *final_args, PRIV_ROOT, reaper_id,
		                TRUE, TRUE, env, NULL, &fi, inherit_list, std_fds,
						NULL, 0, NULL, 0, NULL, NULL, NULL, NULL, fs_remap);
	if( s_pid == FALSE ) {
		dprintf( D_ALWAYS, "ERROR: exec_starter failed!\n");
		s_pid = 0;
	}

#if defined(LINUX)
	if( param_boolean( "GLEXEC_STARTER", false ) ) {
		// if we used glexec to spawn the Starter, we now need to send
		// the Starter's environment to our glexec wrapper script so it
		// can exec the Starter with all the environment variablew we rely
		// on it inheriting
		//
		if ( !glexec_starter_handle_env(s_pid) ) {
			// something went wrong; handleGlexecEnvironment will
			// have already logged it
			cleanupAfterGlexec();
			return 0;
		}
	}
#endif

	return s_pid;
}
示例#7
0
void
main_init(int argc, char* argv[])
{
	char**		ptr; 
	MyString		job_queue_name;
 
	int argc_count = 1;
	for(ptr = argv + 1, argc_count = 1; argc_count<argc && *ptr; ptr++,argc_count++)
	{
		if(ptr[0][0] != '-')
		{
			usage(argv[0]);
		}
		switch(ptr[0][1])
		{
		  case 'n':
			if (Name) {
				free(Name);
			}
			Name = build_valid_daemon_name( *(++ptr) );
			break;
		  default:
			usage(argv[0]);
		}
	}

		// Tell Attrlist to publish the server time
	AttrList_setPublishServerTime( true );

		// Initialize DaemonCore's use of ProcFamily. We do this so that we
		// launch a ProcD if necessary so that any Starters that we launch
		// for Local Universe jobs can share a single ProcD, instead of
		// each creating their own
	daemonCore->Proc_Family_Init();

#if defined(HAVE_DLOPEN)
	ClassAdLogPluginManager::Load();
	ScheddPluginManager::Load();

	ScheddPluginManager::EarlyInitialize();
	ClassAdLogPluginManager::EarlyInitialize();
#endif

/* schedd doesn't care about other daemons.  only that it has the ability
 * to run jobs.  so the following code is for now not needed.

	// ZKM HACK TO MAKE SURE SCHEDD HAS USER CREDENTIALS
	//
	// if we are using the credd and credmon, we need to init them before
	// doing anything!
	char* p = param("SEC_CREDENTIAL_DIRECTORY");
	if(p) {
		free(p);
		dprintf(D_ALWAYS, "SCHEDD: INITIALIZING USER CREDS\n");
		Daemon *my_credd;

		// we will abort if we can't locate the credd, so let's try a
		// few times. locate() caches the result so we have to destroy
		// the object and make a new one each time.
		int retries = 20;
		bool success = false;
		do {
			// allocate a credd
			my_credd = new Daemon(DT_CREDD);
			if(my_credd) {
				// call locate
				bool loc_rc = my_credd->locate();
				if(loc_rc) {
					// get a connected relisock
					CondorError errstack;
					ReliSock* r = (ReliSock*)my_credd->startCommand(
						CREDD_REFRESH_ALL, Stream::reli_sock, 20, &errstack);
					if ( r ) {
						// ask the credd to get us some fresh user creds
						ClassAd ad;
						putClassAd(r, ad);
						r->end_of_message();
						r->decode();
						getClassAd(r, ad);
						r->end_of_message();
						dprintf(D_SECURITY | D_FULLDEBUG, "SCHEDD: received ad from CREDD:\n");
						dPrintAd(D_SECURITY | D_FULLDEBUG, ad);
						MyString result;
						ad.LookupString("Result", result);
						if(result == "success") {
							success = true;
						} else {
							dprintf(D_FULLDEBUG, "SCHEDD: warning, creddmon returned failure.\n");
						}

						// clean up.
						delete r;
					} else {
						dprintf(D_FULLDEBUG, "SCHEDD: warning, startCommand failed, %s\n", errstack.getFullText(true).c_str());
					}
				} else {
					dprintf(D_FULLDEBUG, "SCHEDD: warning, locate failed.\n");
				}

				// clean up.
				delete my_credd;
			} else {
				dprintf(D_FULLDEBUG, "SCHEDD: warning, new Daemon(DT_CREDD) failed.\n");
			}

			// if something went wrong, sleep and retry (finit number of times)
			if(!success) {
				dprintf(D_FULLDEBUG, "SCHEDD: sleeping and trying again %i times.\n", retries);
				sleep(1);
				retries--;
			}
		} while ((retries > 0) && (success == false));

		// except if fail
		if (!success) {
			EXCEPT("FAILED TO INITIALIZE USER CREDS (locate failed)");
		}
	}
	// END ZKM HACK
*/

#ifndef WIN32
	// if using the SEC_CREDENTIAL_DIRECTORY, confirm we are "up-to-date".
	// at the moment, we take an "all-or-nothing" approach.  ultimately, this
	// should be per-user, and the SchedD should start normally and run jobs
	// for users who DO have valid credentials, and simply holding on to jobs
	// in idle state for users who do NOT have valid credentials.
	//
	char* p = param("SEC_CREDENTIAL_DIRECTORY");
	if(p) {
		free(p);
		bool success = false;
		int retries = 60;
		do {
			// look for existence of file that says everything is up-to-date.
			success = credmon_poll(NULL, false, false);
			if(!success) {
				dprintf(D_ALWAYS, "SCHEDD: User credentials not up-to-date.  Start-up delayed.  Waiting 10 seconds and trying %i more times.\n", retries);
				sleep(10);
				retries--;
			}
		} while ((!success) && (retries > 0));

		// we tried, we give up.
		if(!success) {
			EXCEPT("User credentials unavailable after 10 minutes");
		}
	}
	// User creds good to go, let's start this thing up!
#endif  // WIN32

		// Initialize all the modules
	scheduler.Init();
	scheduler.Register();

		// Initialize the job queue
	char *job_queue_param_name = param("JOB_QUEUE_LOG");

	if (job_queue_param_name == NULL) {
		// the default place for the job_queue.log is in spool
		job_queue_name.formatstr( "%s/job_queue.log", Spool);
	} else {
		job_queue_name = job_queue_param_name; // convert char * to MyString
		free(job_queue_param_name);
	}

		// Make a backup of the job queue?
	if ( param_boolean_crufty("SCHEDD_BACKUP_SPOOL", false) ) {
			MyString hostname;
			hostname = get_local_hostname();
			MyString		job_queue_backup;
			job_queue_backup.formatstr( "%s/job_queue.bak.%s.%ld",
			                            Spool, hostname.Value(), (long)time(NULL) );
			if ( copy_file( job_queue_name.Value(), job_queue_backup.Value() ) ) {
				dprintf( D_ALWAYS, "Failed to backup spool to '%s'\n",
						 job_queue_backup.Value() );
			} else {
				dprintf( D_FULLDEBUG, "Spool backed up to '%s'\n",
						 job_queue_backup.Value() );
			}
	}

	int max_historical_logs = param_integer( "MAX_JOB_QUEUE_LOG_ROTATIONS", DEFAULT_MAX_JOB_QUEUE_LOG_ROTATIONS );

	InitJobQueue(job_queue_name.Value(),max_historical_logs);
	PostInitJobQueue();

		// Initialize the dedicated scheduler stuff
	dedicated_scheduler.initialize();

		// Do a timeout now at startup to get the ball rolling...
	scheduler.timeout();

#if defined(HAVE_DLOPEN)
	ScheddPluginManager::Initialize();
	ClassAdLogPluginManager::Initialize();
#endif

	daemonCore->InstallAuditingCallback( AuditLogNewConnection );
} 
示例#8
0
void
main_init(int argc, char* argv[])
{
	char**		ptr; 
	MyString		job_queue_name;
 
	int argc_count = 1;
	for(ptr = argv + 1, argc_count = 1; argc_count<argc && *ptr; ptr++,argc_count++)
	{
		if(ptr[0][0] != '-')
		{
			usage(argv[0]);
		}
		switch(ptr[0][1])
		{
		  case 'n':
			Name = build_valid_daemon_name( *(++ptr) );
			break;
		  default:
			usage(argv[0]);
		}
	}

		// Tell Attrlist to publish the server time
	AttrList_setPublishServerTime( true );

		// Initialize DaemonCore's use of ProcFamily. We do this so that we
		// launch a ProcD if necessary so that any Starters that we launch
		// for Local Universe jobs can share a single ProcD, instead of
		// each creating their own
	daemonCore->Proc_Family_Init();

#if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT)
#if defined(HAVE_DLOPEN)
		// Intialization of the plugin manager, i.e. loading all
		// plugins, should be performed before the job queue log is
		// read so plugins have a chance to learn about all jobs
		// already in the queue
	ClassAdLogPluginManager::Load();

		// Load all ScheddPlugins. In reality this doesn't do much
		// since initializing any plugin manager loads plugins for all
		// plugin manager.
	ScheddPluginManager::Load();

		// Tell all ScheddPlugins to initialze themselves
	ScheddPluginManager::EarlyInitialize();

		// Tell all plugins to initialize themselves
	ClassAdLogPluginManager::EarlyInitialize();
#endif
#endif
	
		// Initialize all the modules
	scheduler.Init();
	scheduler.Register();

		// Initialize the job queue
	char *job_queue_param_name = param("JOB_QUEUE_LOG");

	if (job_queue_param_name == NULL) {
		// the default place for the job_queue.log is in spool
		job_queue_name.sprintf( "%s/job_queue.log", Spool);
	} else {
		job_queue_name = job_queue_param_name; // convert char * to MyString
		free(job_queue_param_name);
	}

		// Make a backup of the job queue?
	if ( param_boolean_crufty("SCHEDD_BACKUP_SPOOL", false) ) {
			MyString hostname;
			UtcTime now(true);
			hostname = get_local_hostname();
			MyString		job_queue_backup;
			job_queue_backup.sprintf( "%s/job_queue.bak.%s.%ld",
									  Spool, hostname.Value(), now.seconds() );
			if ( copy_file( job_queue_name.Value(), job_queue_backup.Value() ) ) {
				dprintf( D_ALWAYS, "Failed to backup spool to '%s'\n",
						 job_queue_backup.Value() );
			} else {
				dprintf( D_FULLDEBUG, "Spool backed up to '%s'\n",
						 job_queue_backup.Value() );
			}
	}

	int max_historical_logs = param_integer( "MAX_JOB_QUEUE_LOG_ROTATIONS", DEFAULT_MAX_JOB_QUEUE_LOG_ROTATIONS );

	InitJobQueue(job_queue_name.Value(),max_historical_logs);
	mark_jobs_idle();

		// The below must happen _after_ InitJobQueue is called.
	if ( scheduler.autocluster.config() ) {
		// clear out auto cluster id attributes
		WalkJobQueue( (int(*)(ClassAd *))clear_autocluster_id );
	}
	
		//
		// Update the SchedDInterval attributes in jobs if they
		// have it defined. This will be for JobDeferral and
		// CronTab jobs
		//
	WalkJobQueue( (int(*)(ClassAd *))::updateSchedDInterval );

		// Initialize the dedicated scheduler stuff
	dedicated_scheduler.initialize();

		// Do a timeout now at startup to get the ball rolling...
	scheduler.timeout();

#if defined(WANT_CONTRIB) && defined(WITH_MANAGEMENT)
#if defined(HAVE_DLOPEN)
		// Tell all ScheddPlugins to initialze themselves
	ScheddPluginManager::Initialize();

		// Tell all plugins to initialize themselves
	ClassAdLogPluginManager::Initialize();
#endif
#endif
}