Beispiel #1
0
void
do_process_request(const ClassAd *inputAd, ClassAd *resultAd, const int req_number, 
				   const char *iwd, const char *stdio_iwd)
{
		// Check for inputAd
	if ( !inputAd ) {
		handle_process_request_error("No input ad",req_number,resultAd);
		return;
	}

		// Map the CMD specified in the input via the config file.
	MyString UnmappedJobName,JobName;
	if (inputAd->LookupString(ATTR_JOB_CMD,UnmappedJobName) == 0 ) {
			// no CMD specified.
		handle_process_request_error("No CMD specified",req_number,resultAd);
		return;
	}
	char *auth_commands = param("SOAPSHELL_AUTHORIZED_COMMANDS");
	StringList auth_list(auth_commands,",");
	if ( auth_commands ) free(auth_commands);
		// Each command needs four tuples; anything else is a misconfiguration
	if ( auth_list.number() % 4 != 0 ) {
		handle_process_request_error("Service is misconfigured: SOAPSHELL_AUTHORIZED_COMMANDS malformed",req_number,resultAd);
		return;
	}

	if ( auth_list.contains_anycase(UnmappedJobName.Value()) == TRUE ) {
		JobName = auth_list.next();
	}
	if ( JobName.IsEmpty() ) {
			// the CMD not authorized
		handle_process_request_error("Requested CMD not authorized via SOAPSHELL_AUTHORIZED_COMMANDS",req_number,resultAd);
		return;
	}

		// handle command line arguments.
	ArgList args;
	args.SetArgV1SyntaxToCurrentPlatform();
	args.AppendArg(JobName.Value());	// set argv[0] to command
	char *soapshell_args = auth_list.next();
	if ( soapshell_args && strcmp(soapshell_args,"*") ) {
		if(!args.AppendArgsV1RawOrV2Quoted(soapshell_args,NULL)) {
			dprintf( D_ALWAYS, "ERROR: SOAPSHELL_ARGS config macro invalid\n" );
		}
	} else if(!args.AppendArgsFromClassAd(inputAd,NULL)) {
		handle_process_request_error("Failed to setup CMD arguments",req_number,resultAd);
		return;
	}
		
		// handle the environment.
	Env job_env;
	char *env_str = auth_list.next();
	if ( env_str && strcmp(env_str,"*") ) {
		if(!job_env.MergeFromV1RawOrV2Quoted(env_str,NULL) ) {
			dprintf(D_ALWAYS,"ERROR: SOAPSHELL_ENVIRONMENT config macro invalid\n");
		}
	} else if(!job_env.MergeFrom(inputAd,NULL)) {
		// bad environment string in job ad!
		handle_process_request_error("Request has faulty environment string",req_number,resultAd);
		return;
	}

		// Write input files into iwd (we will write stdin later)
	if ( !write_input_files(inputAd, iwd) ) {
		// failed to write input files
		handle_process_request_error("Failed to write input files",req_number,resultAd);
		return;
	}

		// handle stdin, stdout, and stderr redirection
	const char* jobstdin_ = dircat(stdio_iwd,"stdin");
	MyString jobstdin(jobstdin_);
	const char* jobstdout_ = dircat(stdio_iwd,"stdout");
	MyString jobstdout(jobstdout_);
	const char* jobstderr_ = dircat(stdio_iwd,"stderr");
	MyString jobstderr(jobstderr_);
	delete [] jobstdin_;
	delete [] jobstdout_;
	delete [] jobstderr_;
	int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_LARGEFILE;
		// write stdin file is needed
	{
		char *input = NULL;
		unsigned char *output = NULL;
		int output_length = 0;
		int fd = -1;
		inputAd->LookupString(ATTR_JOB_INPUT,&input);
		if ( input ) {
			// Caller needs to free *output if non-NULL
			condor_base64_decode(input,&output,&output_length);
			if ( output ) {
				fd = safe_open_wrapper_follow( jobstdin.Value(), flags, 0666 );
				if ( fd > -1 ) {
					write(fd,output,output_length);
					close(fd);
				}
				free(output);
			}
			free(input);
			if ( fd < 0 ) {
				handle_process_request_error("Failed to write stdin",req_number,resultAd);
				return;
			}
		}
	}
	int fds[3]; 
		// initialize these to -2 to mean they're not specified.
		// -1 will be treated as an error.
	fds[0] = -2; fds[1] = -2; fds[2] = -2;	
	fds[0] = safe_open_wrapper_follow( jobstdin.Value(), O_RDONLY | O_LARGEFILE ); // stdin	
	fds[1] = safe_open_wrapper_follow( jobstdout.Value(), flags, 0666 );	// stdout
	fds[2] = safe_open_wrapper_follow( jobstderr.Value(), flags, 0666 );	// stderr
	/* Bail out if we couldn't open stdout/err files correctly */
	if( fds[1]==-1 || fds[2]==-1 ) {
		/* only close ones that had been opened correctly */
		for ( int i = 0; i <= 2; i++ ) {
			if ( fds[i] >= 0 ) {
				daemonCore->Close_FD ( fds[i] );
			}
		}
		handle_process_request_error("Failed to write stdout/err files",req_number,resultAd);
		return;
	}

		// Print what we are about to do to the log
	MyString args_string;
	args.GetArgsStringForDisplay(&args_string,1);
	dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(),
				 args_string.Value() );

		// Spawn a process, baby!!!
	int JobPid = daemonCore->Create_Process( JobName.Value(),	// executable
		                                     args,				// args
		                                     PRIV_UNKNOWN,		// priv_state - TODO
		                                     0,					// reaper id - TODO
		                                     FALSE,				// want_command_port
		                                     &job_env,			// job environment
		                                     iwd,				// job iwd
		                                     NULL,				// family_info - TODO
		                                     NULL,				// sock_inherit_list
		                                     fds				// stdio redirection
										);

		// NOTE: Create_Process() saves the errno for us if it is an
		// "interesting" error.
	char const *create_process_error = NULL;
	if(JobPid == FALSE && errno) create_process_error = strerror(errno);

		// now close the descriptors in fds array.  our child has inherited
		// them already, so we should close them so we do not leak descriptors.
	for ( int i = 0; i <= 2; i++ ) {
		if ( fds[i] >= 0 ) {
			daemonCore->Close_FD ( fds[i] );
		}
	}

	if ( JobPid == FALSE ) {
		JobPid = -1;
		MyString errormsg;
		errormsg.formatstr("Create_Process failed %s",create_process_error ? create_process_error : "");
		handle_process_request_error(errormsg.Value(),req_number,resultAd);
		return;
	}


	dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid);

		// TODO - For now, just deal w/ one at a time. :(
		// So for now just wait for the child to exit.
#ifdef WIN32
#error This service does not yet work on Windows
#else
	{
		int exit_status;
		pid_t pid;
		for (;;) {
			pid = wait(&exit_status);
			dprintf(D_FULLDEBUG,"WAIT returned %d, errno=%d\n",pid,errno);
			if (pid == JobPid ) break;
			if (pid == -1 && errno != EINTR) {
				EXCEPT("waitpid failed errno=%d",errno);
			}
		}
		if ( WIFEXITED(exit_status) ) {
			int status = WEXITSTATUS(exit_status);
			resultAd->Assign("EXIT_STATUS",status);
		}		
	}
#endif

		// Job has completed, exit status is in the ad.  Now put
		// the output files into the result ad.
	stash_output_file(resultAd, jobstdout.Value(), ATTR_JOB_OUTPUT);
	stash_output_file(resultAd, jobstderr.Value(), ATTR_JOB_ERROR);

}
Beispiel #2
0
int
VanillaProc::StartJob()
{
	dprintf(D_FULLDEBUG,"in VanillaProc::StartJob()\n");

	// vanilla jobs, unlike standard jobs, are allowed to run 
	// shell scripts (or as is the case on NT, batch files).  so
	// edit the ad so we start up a shell, pass the executable as
	// an argument to the shell, if we are asked to run a .bat file.
#ifdef WIN32

	CHAR		interpreter[MAX_PATH+1],
				systemshell[MAX_PATH+1];    
	const char* jobtmp				= Starter->jic->origJobName();
	int			joblen				= strlen(jobtmp);
	const char	*extension			= joblen > 0 ? &(jobtmp[joblen-4]) : NULL;
	bool		binary_executable	= ( extension && 
										( MATCH == strcasecmp ( ".exe", extension ) || 
										  MATCH == strcasecmp ( ".com", extension ) ) ),
				java_universe		= ( CONDOR_UNIVERSE_JAVA == job_universe );
	ArgList		arguments;
	MyString	filename,
				jobname, 
				error;
	
	if ( extension && !java_universe && !binary_executable ) {

		/** since we do not actually know how long the extension of
			the file is, we'll need to hunt down the '.' in the path,
			if it exists */
		extension = strrchr ( jobtmp, '.' );

		if ( !extension ) {

			dprintf ( 
				D_ALWAYS, 
				"VanillaProc::StartJob(): Failed to extract "
				"the file's extension.\n" );

			/** don't fail here, since we want executables to run
				as usual.  That is, some condor jobs submit 
				executables that do not have the '.exe' extension,
				but are, nonetheless, executable binaries.  For
				instance, a submit script may contain:

				executable = executable$(OPSYS) */

		} else {

			/** pull out the path to the executable */
			if ( !JobAd->LookupString ( 
				ATTR_JOB_CMD, 
				jobname ) ) {
				
				/** fall back on Starter->jic->origJobName() */
				jobname = jobtmp;

			}

			/** If we transferred the job, it may have been
				renamed to condor_exec.exe even though it is
				not an executable. Here we rename it back to
				a the correct extension before it will run. */
			if ( MATCH == strcasecmp ( 
					CONDOR_EXEC, 
					condor_basename ( jobname.Value () ) ) ) {
				filename.formatstr ( "condor_exec%s", extension );
				if (rename(CONDOR_EXEC, filename.Value()) != 0) {
					dprintf (D_ALWAYS, "VanillaProc::StartJob(): ERROR: "
							"failed to rename executable from %s to %s\n", 
							CONDOR_EXEC, filename.Value() );
				}
			} else {
				filename = jobname;
			}
			
			/** Since we've renamed our executable, we need to
				update the job ad to reflect this change. */
			if ( !JobAd->Assign ( 
				ATTR_JOB_CMD, 
				filename ) ) {

				dprintf (
					D_ALWAYS,
					"VanillaProc::StartJob(): ERROR: failed to "
					"set new executable name.\n" );

				return FALSE;

			}

			/** We've moved the script to argv[1], so we need to 
				add	the remaining arguments to positions argv[2]..
				argv[/n/]. */
			if ( !arguments.AppendArgsFromClassAd ( JobAd, &error ) ||
				 !arguments.InsertArgsIntoClassAd ( JobAd, NULL, 
				&error ) ) {

				dprintf (
					D_ALWAYS,
					"VanillaProc::StartJob(): ERROR: failed to "
					"get arguments from job ad: %s\n",
					error.Value () );

				return FALSE;

			}

			/** Since we know already we don't want this file returned
				to us, we explicitly add it to an exception list which
				will stop the file transfer mechanism from considering
				it for transfer back to its submitter */
			Starter->jic->removeFromOutputFiles (
				filename.Value () );

		}
			
	}
#endif

	// set up a FamilyInfo structure to tell OsProc to register a family
	// with the ProcD in its call to DaemonCore::Create_Process
	//
	FamilyInfo fi;

	// take snapshots at no more than 15 seconds in between, by default
	//
	fi.max_snapshot_interval = param_integer("PID_SNAPSHOT_INTERVAL", 15);

	m_dedicated_account = Starter->jic->getExecuteAccountIsDedicated();
	if( ThisProcRunsAlongsideMainProc() ) {
			// If we track a secondary proc's family tree (such as
			// sshd) using the same dedicated account as the job's
			// family tree, we could end up killing the job when we
			// clean up the secondary family.
		m_dedicated_account = NULL;
	}
	if (m_dedicated_account) {
			// using login-based family tracking
		fi.login = m_dedicated_account;
			// The following message is documented in the manual as the
			// way to tell whether the dedicated execution account
			// configuration is being used.
		dprintf(D_ALWAYS,
		        "Tracking process family by login \"%s\"\n",
		        fi.login);
	}

	FilesystemRemap * fs_remap = NULL;
#if defined(LINUX)
	// on Linux, we also have the ability to track processes via
	// a phony supplementary group ID
	//
	gid_t tracking_gid = 0;
	if (param_boolean("USE_GID_PROCESS_TRACKING", false)) {
		if (!can_switch_ids() &&
		    (Starter->condorPrivSepHelper() == NULL))
		{
			EXCEPT("USE_GID_PROCESS_TRACKING enabled, but can't modify "
			           "the group list of our children unless running as "
			           "root or using PrivSep");
		}
		fi.group_ptr = &tracking_gid;
	}

	// Increase the OOM score of this process; the child will inherit it.
	// This way, the job will be heavily preferred to be killed over a normal process.
	// OOM score is currently exponential - a score of 4 is a factor-16 increase in
	// the OOM score.
	setupOOMScore(4);
#endif

#if defined(HAVE_EXT_LIBCGROUP)
	// Determine the cgroup
	std::string cgroup_base;
	param(cgroup_base, "BASE_CGROUP", "");
	MyString cgroup_str;
	const char *cgroup = NULL;
		/* Note on CONDOR_UNIVERSE_LOCAL - The cgroup setup code below
		 *  requires a unique name for the cgroup. It relies on
		 *  uniqueness of the MachineAd's Name
		 *  attribute. Unfortunately, in the local universe the
		 *  MachineAd (mach_ad elsewhere) is never populated, because
		 *  there is no machine. As a result the ASSERT on
		 *  starter_name fails. This means that the local universe
		 *  will not work on any machine that has BASE_CGROUP
		 *  configured. A potential workaround is to set
		 *  STARTER.BASE_CGROUP on any machine that is also running a
		 *  schedd, but that disables cgroup support from a
		 *  co-resident startd. Instead, I'm disabling cgroup support
		 *  from within the local universe until the intraction of
		 *  local universe and cgroups can be properly worked
		 *  out. -matt 7 nov '12
		 */
	if (CONDOR_UNIVERSE_LOCAL != job_universe && cgroup_base.length()) {
		MyString cgroup_uniq;
		std::string starter_name, execute_str;
		param(execute_str, "EXECUTE", "EXECUTE_UNKNOWN");
			// Note: Starter is a global variable from os_proc.cpp
		Starter->jic->machClassAd()->EvalString(ATTR_NAME, NULL, starter_name);
		if (starter_name.size() == 0) {
			char buf[16];
			sprintf(buf, "%d", getpid());
			starter_name = buf;
		}
		//ASSERT (starter_name.size());
		cgroup_uniq.formatstr("%s_%s", execute_str.c_str(), starter_name.c_str());
		const char dir_delim[2] = {DIR_DELIM_CHAR, '\0'};
		cgroup_uniq.replaceString(dir_delim, "_");
		cgroup_str.formatstr("%s%ccondor%s", cgroup_base.c_str(), DIR_DELIM_CHAR,
			cgroup_uniq.Value());
		cgroup_str += this->CgroupSuffix();
		
		cgroup = cgroup_str.Value();
		ASSERT (cgroup != NULL);
		fi.cgroup = cgroup;
		dprintf(D_FULLDEBUG, "Requesting cgroup %s for job.\n", cgroup);
	}

#endif

// The chroot stuff really only works on linux
#ifdef LINUX
	{
        // Have Condor manage a chroot
       std::string requested_chroot_name;
       JobAd->EvalString("RequestedChroot", NULL, requested_chroot_name);
       const char * allowed_root_dirs = param("NAMED_CHROOT");
       if (requested_chroot_name.size()) {
               dprintf(D_FULLDEBUG, "Checking for chroot: %s\n", requested_chroot_name.c_str());
               StringList chroot_list(allowed_root_dirs);
               chroot_list.rewind();
               const char * next_chroot;
               bool acceptable_chroot = false;
               std::string requested_chroot;
               while ( (next_chroot=chroot_list.next()) ) {
                       MyString chroot_spec(next_chroot);
                       chroot_spec.Tokenize();
                       const char * chroot_name = chroot_spec.GetNextToken("=", false);
                       if (chroot_name == NULL) {
                               dprintf(D_ALWAYS, "Invalid named chroot: %s\n", chroot_spec.Value());
                       }
                       const char * next_dir = chroot_spec.GetNextToken("=", false);
                       if (chroot_name == NULL) {
                               dprintf(D_ALWAYS, "Invalid named chroot: %s\n", chroot_spec.Value());
                       }
                       dprintf(D_FULLDEBUG, "Considering directory %s for chroot %s.\n", next_dir, chroot_spec.Value());
                       if (IsDirectory(next_dir) && chroot_name && (strcmp(requested_chroot_name.c_str(), chroot_name) == 0)) {
                               acceptable_chroot = true;
                               requested_chroot = next_dir;
                       }
               }
               // TODO: path to chroot MUST be all root-owned, or we have a nice security exploit.
               // Is this the responsibility of Condor to check, or the sysadmin who set it up?
               if (!acceptable_chroot) {
                       return FALSE;
               }
               dprintf(D_FULLDEBUG, "Will attempt to set the chroot to %s.\n", requested_chroot.c_str());

               std::stringstream ss;
               std::stringstream ss2;
               ss2 << Starter->GetExecuteDir() << DIR_DELIM_CHAR << "dir_" << getpid();
               std::string execute_dir = ss2.str();
               ss << requested_chroot << DIR_DELIM_CHAR << ss2.str();
               std::string full_dir_str = ss.str();
               if (is_trivial_rootdir(requested_chroot)) {
                   dprintf(D_FULLDEBUG, "Requested a trivial chroot %s; this is a no-op.\n", requested_chroot.c_str());
               } else if (IsDirectory(execute_dir.c_str())) {
                       {
                           TemporaryPrivSentry sentry(PRIV_ROOT);
                           if( mkdir(full_dir_str.c_str(), S_IRWXU) < 0 ) {
                               dprintf( D_FAILURE|D_ALWAYS,
                                   "Failed to create sandbox directory in chroot (%s): %s\n",
                                   full_dir_str.c_str(),
                                   strerror(errno) );
                               return FALSE;
                           }
                           if (chown(full_dir_str.c_str(),
                                     get_user_uid(),
                                     get_user_gid()) == -1)
                           {
                               EXCEPT("chown error on %s: %s",
                                      full_dir_str.c_str(),
                                      strerror(errno));
                           }
                       }
                       if (!fs_remap) {
                               fs_remap = new FilesystemRemap();
                       }
                       dprintf(D_FULLDEBUG, "Adding mapping: %s -> %s.\n", execute_dir.c_str(), full_dir_str.c_str());
                       if (fs_remap->AddMapping(execute_dir, full_dir_str)) {
                               // FilesystemRemap object prints out an error message for us.
                               return FALSE;
                       }
                       dprintf(D_FULLDEBUG, "Adding mapping %s -> %s.\n", requested_chroot.c_str(), "/");
                       std::string root_str("/");
                       if (fs_remap->AddMapping(requested_chroot, root_str)) {
                               return FALSE;
                       }
               } else {
                       dprintf(D_ALWAYS, "Unable to do chroot because working dir %s does not exist.\n", execute_dir.c_str());
               }
       } else {
               dprintf(D_FULLDEBUG, "Value of RequestedChroot is unset.\n");
       }
	}
// End of chroot 
#endif


	// On Linux kernel 2.4.19 and later, we can give each job its
	// own FS mounts.
	char * mount_under_scratch = param("MOUNT_UNDER_SCRATCH");
	if (mount_under_scratch) {

		std::string working_dir = Starter->GetWorkingDir();

		if (IsDirectory(working_dir.c_str())) {
			StringList mount_list(mount_under_scratch);
			free(mount_under_scratch);

			mount_list.rewind();
			if (!fs_remap) {
				fs_remap = new FilesystemRemap();
			}
			char * next_dir;
			while ( (next_dir=mount_list.next()) ) {
				if (!*next_dir) {
					// empty string?
					mount_list.deleteCurrent();
					continue;
				}
				std::string next_dir_str(next_dir);
				// Gah, I wish I could throw an exception to clean up these nested if statements.
				if (IsDirectory(next_dir)) {
					char * full_dir = dirscat(working_dir, next_dir_str);
					if (full_dir) {
						std::string full_dir_str(full_dir);
						delete [] full_dir; full_dir = NULL;
						if (!mkdir_and_parents_if_needed( full_dir_str.c_str(), S_IRWXU, PRIV_USER )) {
							dprintf(D_ALWAYS, "Failed to create scratch directory %s\n", full_dir_str.c_str());
							return FALSE;
						}
						dprintf(D_FULLDEBUG, "Adding mapping: %s -> %s.\n", full_dir_str.c_str(), next_dir_str.c_str());
						if (fs_remap->AddMapping(full_dir_str, next_dir_str)) {
							// FilesystemRemap object prints out an error message for us.
							return FALSE;
						}
					} else {
						dprintf(D_ALWAYS, "Unable to concatenate %s and %s.\n", working_dir.c_str(), next_dir_str.c_str());
						return FALSE;
					}
				} else {
					dprintf(D_ALWAYS, "Unable to add mapping %s -> %s because %s doesn't exist.\n", working_dir.c_str(), next_dir, next_dir);
				}
			}
		} else {
			dprintf(D_ALWAYS, "Unable to perform mappings because %s doesn't exist.\n", working_dir.c_str());
			return FALSE;
		}
	}

#if defined(LINUX)
	// On Linux kernel 2.6.24 and later, we can give each
	// job its own PID namespace
	if (param_boolean("USE_PID_NAMESPACES", false)) {
		if (!can_switch_ids()) {
			EXCEPT("USE_PID_NAMESPACES enabled, but can't perform this "
				"call in Linux unless running as root.");
		}
		fi.want_pid_namespace = this->SupportsPIDNamespace();
		if (fi.want_pid_namespace) {
			if (!fs_remap) {
				fs_remap = new FilesystemRemap();
			}
			fs_remap->RemapProc();
		}

		// When PID Namespaces are enabled, need to run the job
		// under the condor_pid_ns_init program, so that signals
		// propagate through to the child.  

		// First tell the program where to log output status
		// via an environment variable
		if (param_boolean("USE_PID_NAMESPACE_INIT", true)) {
			Env env;
			MyString env_errors;
			MyString arg_errors;
			std::string filename;

			filename = Starter->GetWorkingDir();
			filename += "/.condor_pid_ns_status";
		
			env.MergeFrom(JobAd, &env_errors);
			env.SetEnv("_CONDOR_PID_NS_INIT_STATUS_FILENAME", filename);
			env.InsertEnvIntoClassAd(JobAd, &env_errors);

			Starter->jic->removeFromOutputFiles(filename.c_str());
			this->m_pid_ns_init_filename = filename;
			
			// Now, set the job's CMD to the wrapper, and shift
			// over the arguments by one

			ArgList args;
			std::string cmd;

			JobAd->LookupString(ATTR_JOB_CMD, cmd);
			args.AppendArg(cmd);
			args.AppendArgsFromClassAd(JobAd, &arg_errors);
			args.InsertArgsIntoClassAd(JobAd, NULL, & arg_errors);
	
			std::string libexec;
			if( !param(libexec,"LIBEXEC") ) {
				dprintf(D_ALWAYS, "Cannot find LIBEXEC so can not run condor_pid_ns_init\n");
				return 0;
			}
			std::string c_p_n_i = libexec + "/condor_pid_ns_init";
			JobAd->Assign(ATTR_JOB_CMD, c_p_n_i);
		}
	}
	dprintf(D_FULLDEBUG, "PID namespace option: %s\n", fi.want_pid_namespace ? "true" : "false");
#endif


	// have OsProc start the job
	//
	int retval = OsProc::StartJob(&fi, fs_remap);

	if (fs_remap != NULL) {
		delete fs_remap;
	}

#if defined(HAVE_EXT_LIBCGROUP)

	// Set fairshare limits.  Note that retval == 1 indicates success, 0 is failure.
	// See Note near setup of param(BASE_CGROUP)
	if (CONDOR_UNIVERSE_LOCAL != job_universe && cgroup && retval) {
		std::string mem_limit;
		param(mem_limit, "CGROUP_MEMORY_LIMIT_POLICY", "soft");
		bool mem_is_soft = mem_limit == "soft";
		std::string cgroup_string = cgroup;
		CgroupLimits climits(cgroup_string);
		if (mem_is_soft || (mem_limit == "hard")) {
			ClassAd * MachineAd = Starter->jic->machClassAd();
			int MemMb;
			if (MachineAd->LookupInteger(ATTR_MEMORY, MemMb)) {
				uint64_t MemMb_big = MemMb;
				m_memory_limit = MemMb_big;
				climits.set_memory_limit_bytes(1024*1024*MemMb_big, mem_is_soft);
			} else {
				dprintf(D_ALWAYS, "Not setting memory soft limit in cgroup because "
					"Memory attribute missing in machine ad.\n");
			}
		} else if (mem_limit == "none") {
			dprintf(D_FULLDEBUG, "Not enforcing memory soft limit.\n");
		} else {
			dprintf(D_ALWAYS, "Invalid value of CGROUP_MEMORY_LIMIT_POLICY: %s.  Ignoring.\n", mem_limit.c_str());
		}

		// Now, set the CPU shares
		ClassAd * MachineAd = Starter->jic->machClassAd();
		int numCores = 1;
		if (MachineAd->LookupInteger(ATTR_CPUS, numCores)) {
			climits.set_cpu_shares(numCores*100);
		} else {
			dprintf(D_FULLDEBUG, "Invalid value of Cpus in machine ClassAd; ignoring.\n");
		}
		setupOOMEvent(cgroup);
	}

    m_statistics.Reconfig();

	// Now that the job is started, decrease the likelihood that the starter
	// is killed instead of the job itself.
	if (retval)
	{
		setupOOMScore(-4);
	}

#endif

	return retval;
}
Beispiel #3
0
GridUniverseLogic::gman_node_t *
GridUniverseLogic::StartOrFindGManager(const char* owner, const char* domain,
	   	const char* attr_value, const char* attr_name, int cluster, int proc)
{
	gman_node_t* gman_node;
	int pid;

		// If attr_value is an empty string, convert to NULL since code
		// after this point expects that.
	if ( attr_value && strlen(attr_value)==0 ) {
		attr_value = NULL;
		attr_name = NULL;
	}

	if ( (gman_node=lookupGmanByOwner(owner, attr_value, cluster, proc)) ) {
		// found it
		return gman_node;
	}

	// not found.  fire one up!  we want to run the GManager as the user.

	// but first, make certain we are not shutting down...
	if (!gman_pid_table) {
		// destructor has already been called; we are probably
		// closing down.
		return NULL;
	}


#ifndef WIN32
	if (owner && strcasecmp(owner, "root") == 0 ) {
		dprintf(D_ALWAYS, "Tried to start condor_gmanager as root.\n");
		return NULL;
	}
#endif

	dprintf( D_FULLDEBUG, "Starting condor_gmanager for owner %s (%d.%d)\n",
			owner, cluster, proc);

	char *gman_binary;
	gman_binary = param("GRIDMANAGER");
	if ( !gman_binary ) {
		dprintf(D_ALWAYS,"ERROR - GRIDMANAGER not defined in config file\n");
		return NULL;
	}

	ArgList args;
	MyString error_msg;

	args.AppendArg("condor_gridmanager");
	args.AppendArg("-f");

	char *gman_args = param("GRIDMANAGER_ARGS");

	if(!args.AppendArgsV1RawOrV2Quoted(gman_args,&error_msg)) {
		dprintf( D_ALWAYS, "ERROR: failed to parse gridmanager args: %s\n",
				 error_msg.Value());
		free(gman_binary);
		free(gman_args);
		return NULL;
	}
	free(gman_args);

	// build a constraint
	if ( !owner ) {
		dprintf(D_ALWAYS,"ERROR - missing owner field\n");
		free(gman_binary);
		return NULL;
	}
	MyString constraint;
	if ( !attr_name  ) {
		constraint.formatstr("(%s=?=\"%s\"&&%s==%d)",
						   ATTR_OWNER,owner,
						   ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID);
	} else {
		constraint.formatstr("(%s=?=\"%s\"&&%s=?=\"%s\"&&%s==%d)",
						   ATTR_OWNER,owner,
						   attr_name,attr_value,
						   ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID);

		args.AppendArg("-A");
		args.AppendArg(attr_value);
	}
	args.AppendArg("-C");
	args.AppendArg(constraint.Value());

	MyString full_owner_name(owner);
	if ( domain && *domain ) {
		full_owner_name.formatstr_cat( "@%s", domain );
	}
	args.AppendArg("-o");
	args.AppendArg(full_owner_name.Value());

	if (!init_user_ids(owner, domain)) {
		dprintf(D_ALWAYS,"ERROR - init_user_ids() failed in GRIDMANAGER\n");
		free(gman_binary);
		return NULL;
	}

	static bool first_time_through = true;
	if ( first_time_through ) {
		// Note: Because first_time_through is static, this block runs only 
		// once per schedd invocation.
		first_time_through = false;

		// Clean up any old / abandoned scratch dirs.
		dprintf(D_FULLDEBUG,"Checking for old gridmanager scratch dirs\n");
		char *prefix = temp_dir_path();
		ASSERT(prefix);
		Directory tmp( prefix, PRIV_USER );
		const char *f;
		char const *dot;
		int fname_pid;
		int mypid = daemonCore->getpid();
		int scratch_pre_len = strlen(scratch_prefix);
		while ( (f=tmp.Next()) ) {
				// skip regular files -- we only need to inspect subdirs
			if ( !tmp.IsDirectory() ) {
				continue;
			}
				// skip if it does not start with our prefix
			if ( strncmp(scratch_prefix,f,scratch_pre_len) ) {
				continue;
			}
				// skip if does not end w/ a pid
			dot = strrchr(f,'.');
			if ( !dot ) {
				continue;
			}
				// skip if this pid is still alive and not ours
			dot++;	// skip over period
			fname_pid = atoi(dot);
			if ( fname_pid != mypid && daemonCore->Is_Pid_Alive(fname_pid) ) {
					continue;
			}
				// if we made it here, blow away this subdir
			if ( tmp.Remove_Current_File() ) {
				dprintf(D_ALWAYS,"Removed old scratch dir %s\n",
				tmp.GetFullPath());
			}
		}	// end of while for cleanup of old scratch dirs

		dprintf(D_FULLDEBUG,"Done checking for old scratch dirs\n");			

		if (prefix != NULL) {
			free(prefix);
			prefix = NULL;
		}

	}	// end of once-per-schedd invocation block

	// Create a temp dir for the gridmanager and append proper
	// command-line arguments to tell where it is.
	bool failed = false;
	gman_node = new gman_node_t;
	char *finalpath = scratchFilePath(gman_node);
	priv_state saved_priv = set_user_priv();
	if ( (mkdir(finalpath,0700)) < 0 ) {
		// mkdir failed.  
		dprintf(D_ALWAYS,"ERROR - mkdir(%s,0700) failed in GRIDMANAGER, errno=%d (%s)\n",
				finalpath, errno, strerror(errno));
		failed = true;
	}
	set_priv(saved_priv);
	uninit_user_ids();
	args.AppendArg("-S");	// -S = "ScratchDir" argument
	args.AppendArg(finalpath);
	delete [] finalpath;
	if ( failed ) {
		// we already did dprintf reason to the log...
		free(gman_binary);
		delete gman_node;
		return NULL;
	}

	if(IsFulldebug(D_FULLDEBUG)) {
		MyString args_string;
		args.GetArgsStringForDisplay(&args_string);
		dprintf(D_FULLDEBUG,"Really Execing %s\n",args_string.Value());
	}

	pid = daemonCore->Create_Process( 
		gman_binary,			// Program to exec
		args,					// Command-line args
		PRIV_ROOT,				// Run as root, so it can switch to
		                        //   PRIV_CONDOR
		rid						// Reaper ID
		);

	free(gman_binary);

	if ( pid <= 0 ) {
		dprintf ( D_ALWAYS, "StartOrFindGManager: Create_Process problems!\n" );
		if (gman_node) delete gman_node;
		return NULL;
	}

	// If we made it here, we happily started up a new gridmanager process

	dprintf( D_ALWAYS, "Started condor_gmanager for owner %s pid=%d\n",
			owner,pid);

	// Make a new gman_node entry for our hashtable & insert it
	if ( !gman_node ) {
		gman_node = new gman_node_t;
	}
	gman_node->pid = pid;
	gman_node->owner[0] = '\0';
	gman_node->domain[0] = '\0';
	if ( owner ) {
		strcpy(gman_node->owner,owner);
	}
	if ( domain ) {
		strcpy(gman_node->domain,domain);
	}
	MyString owner_key(owner);
	if(attr_value){
		owner_key += attr_value;
	}
	if (cluster) {
		owner_key.formatstr_cat( "-%d.%d", cluster, proc );
	}

	ASSERT( gman_pid_table->insert(owner_key,gman_node) == 0 );

	// start timer to signal gridmanager if we haven't already
	if ( gman_node->add_timer_id == -1 ) {  // == -1 means no timer set
		gman_node->add_timer_id = daemonCore->Register_Timer(job_added_delay,
			GridUniverseLogic::SendAddSignal,
			"GridUniverseLogic::SendAddSignal");
		daemonCore->Register_DataPtr(gman_node);
	}

	// All done
	return gman_node;
}
Beispiel #4
0
// I really need a good way to determine the type of a classad
// attribute.  Right now I just try all four possibilities, which is a
// horrible mess...
bool VirshType::CreateVirshConfigFile(const char*  /*filename*/)
{
  vmprintf(D_FULLDEBUG, "In VirshType::CreateVirshConfigFile\n");
  //  std::string name;
  char * tmp = param("LIBVIRT_XML_SCRIPT");
  if(tmp == NULL)
    {
      vmprintf(D_ALWAYS, "LIBVIRT_XML_SCRIPT not defined\n");
      return false;
    }
  // This probably needs some work...
  ArgList args;
  args.AppendArg(tmp);
  free(tmp);

  // We might want to have specific debugging output enabled in the
  // helper script; however, it is not clear where that output should
  // go.  This gives us a way to do so even in cases where the script
  // is unable to read from condor_config (why would this ever
  // happen?)
  tmp = param("LIBVIRT_XML_SCRIPT_ARGS");
  if(tmp != NULL)
    {
      MyString errormsg;
      args.AppendArgsV1RawOrV2Quoted(tmp,&errormsg);
      free(tmp);
    }
  StringList input_strings, output_strings, error_strings;
  MyString classad_string;
  m_classAd.sPrint(classad_string);
  classad_string += VMPARAM_XEN_BOOTLOADER;
  classad_string += " = \"";
  classad_string += m_xen_bootloader;
  classad_string += "\"\n";
  if(classad_string.find(VMPARAM_XEN_INITRD) < 1)
    {
      classad_string += VMPARAM_XEN_INITRD;
      classad_string += " = \"";
      classad_string += m_xen_initrd_file;
      classad_string += "\"\n";
    }
  if(!m_vm_bridge_interface.empty())
    {
      classad_string += VMPARAM_BRIDGE_INTERFACE;
      classad_string += " = \"";
      classad_string += m_vm_bridge_interface.c_str();
      classad_string += "\"\n";
    }
  if(classad_string.find(ATTR_JOB_VM_NETWORKING_TYPE) < 1)
    {
      classad_string += ATTR_JOB_VM_NETWORKING_TYPE;
      classad_string += " = \"";
      classad_string += m_vm_networking_type.Value();
      classad_string += "\"\n";
    }
  input_strings.append(classad_string.Value());
  
  tmp = input_strings.print_to_string();
  vmprintf(D_FULLDEBUG, "LIBVIRT_XML_SCRIPT_ARGS input_strings= %s\n", tmp);
  free(tmp);

  int ret = systemCommand(args, PRIV_ROOT, &output_strings, &input_strings, &error_strings, false);
  error_strings.rewind();
  if(ret != 0)
    {
      vmprintf(D_ALWAYS, "XML helper script could not be executed\n");
      output_strings.rewind();
      // If there is any output from the helper, write it to the debug
      // log.  Presumably, this is separate from the script's own
      // debug log.
      while((tmp = error_strings.next()) != NULL)
	{
	  vmprintf(D_FULLDEBUG, "Helper stderr output: %s\n", tmp);
	}
      return false;
    }
  error_strings.rewind();
  while((tmp = error_strings.next()) != NULL)
    {
      vmprintf(D_ALWAYS, "Helper stderr output: %s\n", tmp);
    }
  output_strings.rewind();
  while((tmp = output_strings.next()) != NULL)
    {
      m_xml += tmp;
    }
  return true;
}
Beispiel #5
0
int JavaProc::StartJob()
{
	
	MyString java_cmd;
	char* jarfiles = NULL;
	ArgList args;
	MyString arg_buf;

	// Since we are adding to the argument list, we may need to deal
	// with platform-specific arg syntax in the user's args in order
	// to successfully merge them with the additional java VM args.
	args.SetArgV1SyntaxToCurrentPlatform();

	// Construct the list of jar files for the command line
	// If a jar file is transferred locally, use its local name
	// (in the execute directory)
	// otherwise use the original name

	StringList jarfiles_orig_list;
	StringList jarfiles_local_list;
	StringList* jarfiles_final_list = NULL;

	if( JobAd->LookupString(ATTR_JAR_FILES,&jarfiles) ) {
		jarfiles_orig_list.initializeFromString( jarfiles );
		free( jarfiles );
		jarfiles = NULL;

		char * jarfile_name;
		const char * base_name;
		struct stat stat_buff;
		if( Starter->jic->iwdIsChanged() ) {
				// If the job's IWD has been changed (because we're
				// running in the sandbox due to file transfer), we
				// need to use a local version of the path to the jar
				// files, not the full paths from the submit machine. 
			jarfiles_orig_list.rewind();
			while( (jarfile_name = jarfiles_orig_list.next()) ) {
					// Construct the local name
				base_name = condor_basename( jarfile_name );
				MyString local_name = execute_dir;
				local_name += DIR_DELIM_CHAR;
				local_name += base_name; 

				if( stat(local_name.Value(), &stat_buff) == 0 ) {
						// Jar file exists locally, use local name
					jarfiles_local_list.append( local_name.Value() );
				} else {
						// Use the original name
					jarfiles_local_list.append (jarfile_name);
				}
			} // while(jarfiles_orig_list)

				// jarfiles_local_list is our real copy...
			jarfiles_final_list = &jarfiles_local_list;

		} else {  // !iwdIsChanged()

				// just use jarfiles_orig_list as our real copy...
			jarfiles_final_list = &jarfiles_orig_list;
		}			
	}

	startfile.formatstr("%s%cjvm.start",execute_dir,DIR_DELIM_CHAR);
	endfile.formatstr("%s%cjvm.end",execute_dir,DIR_DELIM_CHAR);

	if( !java_config(java_cmd,&args,jarfiles_final_list) ) {
		dprintf(D_FAILURE|D_ALWAYS,"JavaProc: Java is not configured!\n");
		return 0;
	}

	JobAd->Assign(ATTR_JOB_CMD, java_cmd.Value());

	arg_buf.formatstr("-Dchirp.config=%s%cchirp.config",execute_dir,DIR_DELIM_CHAR);
	args.AppendArg(arg_buf.Value());

	char *jvm_args1 = NULL;
	char *jvm_args2 = NULL;
	MyString jvm_args_error;
	bool jvm_args_success = true;
	JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS1, &jvm_args1);
	JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS2, &jvm_args2);
	if(jvm_args2) {
		jvm_args_success = args.AppendArgsV2Raw(jvm_args2, &jvm_args_error);
	}
	else if(jvm_args1) {
		jvm_args_success = args.AppendArgsV1Raw(jvm_args1, &jvm_args_error);
	}
	free(jvm_args1);
	free(jvm_args2);
	if (!jvm_args_success) {
		dprintf(D_ALWAYS, "JavaProc: failed to parse JVM args: %s\n",
				jvm_args_error.Value());
		return 0;
	}

	args.AppendArg("CondorJavaWrapper");
	args.AppendArg(startfile.Value());
	args.AppendArg(endfile.Value());

	MyString args_error;
	if(!args.AppendArgsFromClassAd(JobAd,&args_error)) {
		dprintf(D_ALWAYS,"JavaProc: failed to read job arguments: %s\n",
				args_error.Value());
		return 0;
	}

	// We are just talking to ourselves, so it is fine to use argument
	// syntax compatible with this current version of Condor.
	CondorVersionInfo ver_info;
	if(!args.InsertArgsIntoClassAd(JobAd,&ver_info,&args_error)) {
		dprintf(D_ALWAYS,"JavaProc: failed to insert java job arguments: %s\n",
				args_error.Value());
		return 0;
	}

	dprintf(D_ALWAYS,"JavaProc: Cmd=%s\n",java_cmd.Value());
	MyString args_string;
	args.GetArgsStringForDisplay(&args_string);
	dprintf(D_ALWAYS,"JavaProc: Args=%s\n",args_string.Value());

	return VanillaProc::StartJob();
}
/** Submit the DAGMan submit file unless the -no_submit option was given.
	@param shallowOpts: the condor_submit_dag shallow options
	@return 0 if successful, 1 if failed
*/
int
submitDag( SubmitDagShallowOptions &shallowOpts )
{
	printf("-----------------------------------------------------------------------\n");
	printf("File for submitting this DAG to Condor           : %s\n", 
			shallowOpts.strSubFile.Value());
	printf("Log of DAGMan debugging messages                 : %s\n",
		   	shallowOpts.strDebugLog.Value());
	printf("Log of Condor library output                     : %s\n", 
			shallowOpts.strLibOut.Value());
	printf("Log of Condor library error messages             : %s\n", 
			shallowOpts.strLibErr.Value());
	printf("Log of the life of condor_dagman itself          : %s\n",
		   	shallowOpts.strSchedLog.Value());
	printf("\n");

	if (shallowOpts.bSubmit)
	{
		ArgList args;
		args.AppendArg( "condor_submit" );
		if( shallowOpts.strRemoteSchedd != "" ) {
			args.AppendArg( "-r" );
			args.AppendArg( shallowOpts.strRemoteSchedd );
		}
		args.AppendArg( shallowOpts.strSubFile );

			// It is important to set the destination Schedd before
			// calling condor_submit, otherwise it may submit to the
			// wrong Schedd.
			//
			// my_system() has a variant that takes an Env.
			// Unfortunately, it results in an execve and no path
			// searching, which makes the relative path to
			// "condor_submit" above not work. Instead, we'll set the
			// env before execvp is called. It may be more correct to
			// fix my_system to inject the Env after the fork() and
			// before the execvp().
		if ( shallowOpts.strScheddDaemonAdFile != "" ) {
			SetEnv("_CONDOR_SCHEDD_DAEMON_AD_FILE",
				   shallowOpts.strScheddDaemonAdFile.Value());
		}
		if ( shallowOpts.strScheddAddressFile != "" ) {
			SetEnv("_CONDOR_SCHEDD_ADDRESS_FILE",
				   shallowOpts.strScheddAddressFile.Value());
		}

		int retval = my_system( args );
		if( retval != 0 ) {
			fprintf( stderr, "ERROR: condor_submit failed; aborting.\n" );
			return 1;
		}
	}
	else
	{
		printf("-no_submit given, not submitting DAG to Condor.  "
					"You can do this with:\n");
		printf("\"condor_submit %s\"\n", shallowOpts.strSubFile.Value());
	}
	printf("-----------------------------------------------------------------------\n");

	return 0;
}
Beispiel #7
0
int RefreshProxyThruMyProxy(X509CredentialWrapper * proxy)
{
  const char * proxy_filename = proxy->GetStorageName();
  char * myproxy_host = NULL;
  int status;

  if (((X509Credential*)proxy->cred)->GetMyProxyServerHost() == NULL) {
    dprintf (D_ALWAYS, "Skipping %s\n", proxy->cred->GetName());
    return FALSE;
  }

  // First check if a refresh process is already running
  time_t now = time(NULL);

  if (proxy->get_delegation_pid != GET_DELEGATION_PID_NONE) {
    time_t time_started = proxy->get_delegation_proc_start_time;

    // If the old "refresh proxy" proc is still running, kill it
    if (now - time_started > 500) {
      dprintf (D_FULLDEBUG, "MyProxy refresh process pid=%d still running, "
			  "sending signal %d\n",
			   proxy->get_delegation_pid, SIGKILL);
      daemonCore->Send_Signal (proxy->get_delegation_pid, SIGKILL);
	  // Wait for reaper to cleanup.
    } else {
      dprintf (D_FULLDEBUG, "MyProxy refresh process pid=%d still running, "
			  "letting it finish\n",
			   proxy->get_delegation_pid);
	}
	return FALSE;
  }

  proxy->get_delegation_proc_start_time = now;

  // Set up environnment for myproxy-get-delegation
  Env myEnv;
  MyString strBuff;

  if (((X509Credential*)proxy->cred)->GetMyProxyServerDN()) {
    strBuff="MYPROXY_SERVER_DN=";
    strBuff+= ((X509Credential*)proxy->cred)->GetMyProxyServerDN();
    myEnv.SetEnv (strBuff.Value());
    dprintf (D_FULLDEBUG, "%s\n", strBuff.Value());
  }

  strBuff="X509_USER_PROXY=";
  strBuff+=proxy->GetStorageName();
  dprintf (D_FULLDEBUG, "%s\n", strBuff.Value());

  // Get password (this will end up in stdin for myproxy-get-delegation)
  const char * myproxy_password =((X509Credential*)proxy->cred)->GetRefreshPassword();
  if (myproxy_password == NULL ) {
    dprintf (D_ALWAYS, "No MyProxy password specified for %s:%s\n",
	     proxy->cred->GetName(),
	     proxy->cred->GetOwner());
    myproxy_password = "";
  }

  status = pipe (proxy->get_delegation_password_pipe);
  if (status == -1) {
	dprintf (D_ALWAYS, "get_delegation pipe() failed: %s\n", strerror(errno) );
	proxy->get_delegation_reset();
	return FALSE;
  }
  // TODO: check write() return values for errors, short writes.
  int written = write (proxy->get_delegation_password_pipe[1],
	 myproxy_password,
	 strlen (myproxy_password));

  if (written < (long)strlen(myproxy_password)) {
	dprintf (D_ALWAYS, "Write to proxy delegation pipe failed (%s)", strerror(errno));
	proxy->get_delegation_reset();
	return FALSE;
  }

  written = write (proxy->get_delegation_password_pipe[1], "\n", 1);
  if (written < 1) {
	dprintf (D_ALWAYS, "Write newline to proxy delegation pipe failed (%s)", strerror(errno) );
	proxy->get_delegation_reset();
	return FALSE;
  }


  // Figure out user name;
  const char * username = proxy->cred->GetOrigOwner();

  // Figure out myproxy host and port
  myproxy_host = getHostFromAddr (((X509Credential*)proxy->cred)->GetMyProxyServerHost());
  int myproxy_port = getPortFromAddr (((X509Credential*)proxy->cred)->GetMyProxyServerHost());

  // construct arguments
  ArgList args;
  args.AppendArg("--verbose ");

  args.AppendArg("--out");
  args.AppendArg(proxy_filename);

  args.AppendArg("--pshost");
  args.AppendArg(myproxy_host);
  if ( myproxy_host != NULL ) {
	  free ( myproxy_host );
  }

  args.AppendArg("--dn_as_username");

  args.AppendArg("--proxy_lifetime");	// hours
  args.AppendArg(6);

  args.AppendArg("--stdin_pass");

  args.AppendArg("--username");
  args.AppendArg(username);

  // Optional port argument
  if (myproxy_port) {
	  args.AppendArg("--psport");
	  args.AppendArg(myproxy_port);
  }

  // Optional credential name
  if	(	((X509Credential*)proxy->cred)->GetCredentialName() && 
  			( ((X509Credential*)proxy->cred)->GetCredentialName() )[0] ) {
	  args.AppendArg("--credname");
	  args.AppendArg(((X509Credential*)proxy->cred)->GetCredentialName());
  }


  // Create temporary file to store myproxy-get-delegation's stderr
  // The file will be owned by the "condor" user

  priv_state priv = set_condor_priv();
  proxy->get_delegation_err_filename = create_temp_file();
  if (proxy->get_delegation_err_filename == NULL) {
	dprintf (D_ALWAYS, "get_delegation create_temp_file() failed: %s\n",
			strerror(errno) );
	proxy->get_delegation_reset();
	return FALSE;
  }
  status = chmod (proxy->get_delegation_err_filename, 0600);
  if (status == -1) {
	dprintf (D_ALWAYS, "chmod() get_delegation_err_filename %s failed: %s\n",
			proxy->get_delegation_err_filename, strerror(errno) );
	proxy->get_delegation_reset();
	return FALSE;
  }


  proxy->get_delegation_err_fd = safe_open_wrapper_follow(proxy->get_delegation_err_filename,O_RDWR);
  if (proxy->get_delegation_err_fd == -1) {
    dprintf (D_ALWAYS, "Error opening get_delegation file %s: %s\n",
	     proxy->get_delegation_err_filename, strerror(errno) );
	proxy->get_delegation_reset();
	return FALSE;
  }
  set_priv (priv);


  int arrIO[3];
  arrIO[0]=proxy->get_delegation_password_pipe[0]; //stdin
  arrIO[1]=-1; //proxy->get_delegation_err_fd;
  arrIO[2]=proxy->get_delegation_err_fd; // stderr


  char * myproxy_get_delegation_pgm = param ("MYPROXY_GET_DELEGATION");
  if (!myproxy_get_delegation_pgm) {
    dprintf (D_ALWAYS, "MYPROXY_GET_DELEGATION not defined in config file\n");
    return FALSE;
  }
  MyString args_string;
  args.GetArgsStringForDisplay(&args_string);
  dprintf (D_ALWAYS, "Calling %s %s\n", myproxy_get_delegation_pgm, args_string.Value());

  int pid = daemonCore->Create_Process (
					myproxy_get_delegation_pgm,		// name
					args,				 			// args
					PRIV_USER_FINAL,				// priv
					myproxyGetDelegationReaperId,	// reaper_id
					FALSE,							// want_command_port
					FALSE,							// want_command_port
					&myEnv,							// env
					NULL,							// cwd		
					NULL,							// family_info
					NULL,							// sock_inherit_list
					arrIO);							// in/out/err streams
  													// nice_inc
													// job_opt_mask
  free (myproxy_get_delegation_pgm);
  myproxy_get_delegation_pgm = NULL;


  

  if (pid == FALSE) {
    dprintf (D_ALWAYS, "Failed to run myproxy-get-delegation\n");
	proxy->get_delegation_reset();
    return FALSE;
  }

  proxy->get_delegation_pid = pid;

  return TRUE;
}
Beispiel #8
0
//
// FIXME: We have a lot of boilerplate code in this function and file.
//
int DockerAPI::version( std::string & version, CondorError & /* err */ ) {

	ArgList versionArgs;
	if ( ! add_docker_arg(versionArgs))
		return -1;
	versionArgs.AppendArg( "-v" );

	MyString displayString;
	versionArgs.GetArgsStringForLogging( & displayString );
	dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() );

#if 1
	MyPopenTimer pgm;
	if (pgm.start_program(versionArgs, true, NULL, false) < 0) {
		// treat 'file not found' as not really error
		int d_level = (pgm.error_code() == ENOENT) ? D_FULLDEBUG : (D_ALWAYS | D_FAILURE);
		dprintf(d_level, "Failed to run '%s' errno=%d %s.\n", displayString.c_str(), pgm.error_code(), pgm.error_str() );
		return -2;
	}

	int exitCode;
	if ( ! pgm.wait_for_exit(default_timeout, &exitCode)) {
		pgm.close_program(1);
		dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), pgm.error_str(), pgm.error_code() );
		return -3;
	}

	if (pgm.output_size() <= 0) {
		dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() );
		return -3;
	}

	MyStringSource * src = &pgm.output();
	MyString line;
	if (line.readLine(*src, false)) {
		line.chomp();
		bool jansens = strstr( line.c_str(), "Jansens" ) != NULL;
		bool bad_size = ! src->isEof() || line.size() > 1024 || line.size() < (int)sizeof("Docker version ");
		if (bad_size && ! jansens) {
			// check second line of output for the word Jansens also.
			MyString tmp; tmp.readLine(*src, false);
			jansens = strstr( tmp.c_str(), "Jansens" ) != NULL;
		}
		if (jansens) {
			dprintf( D_ALWAYS | D_FAILURE, "The DOCKER configuration setting appears to point to OpenBox's docker.  If you want to use Docker.IO, please set DOCKER appropriately in your configuration.\n" );
			return -5;
		} else if (bad_size) {
			dprintf( D_ALWAYS | D_FAILURE, "Read more than one line (or a very long line) from '%s', which we think means it's not Docker.  The (first line of the) trailing text was '%s'.\n", displayString.c_str(), line.c_str() );
			return -5;
		}
	}

	if( exitCode != 0 ) {
		dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str() );
		return -4;
	}

	version = line.c_str();

#else
	FILE * dockerResults = my_popen( versionArgs, "r", 1 , 0, false);
	if( dockerResults == NULL ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	char buffer[1024];
	if( NULL == fgets( buffer, 1024, dockerResults ) ) {
		if( errno ) {
			dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), strerror( errno ), errno );
		} else {
			dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() );
		}
		my_pclose( dockerResults );
		return -3;
	}

	if( NULL != fgets( buffer, 1024, dockerResults ) ) {
		if( strstr( buffer, "Jansens" ) != NULL ) {
			dprintf( D_ALWAYS | D_FAILURE, "The DOCKER configuration setting appears to point to OpenBox's docker.  If you want to use Docker.IO, please set DOCKER appropriately in your configuration.\n" );
		} else {
			dprintf( D_ALWAYS | D_FAILURE, "Read more than one line (or a very long line) from '%s', which we think means it's not Docker.  The (first line of the) trailing text was '%s'.\n", displayString.c_str(), buffer );
		}
		my_pclose( dockerResults );
		return -5;
	}

	int exitCode = my_pclose( dockerResults );
	if( exitCode != 0 ) {
		dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, buffer );
		return -4;
	}

	size_t end = strlen(buffer);
	if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; }
	version = buffer;
#endif
	sscanf(version.c_str(), "Docker version %d.%d", &DockerAPI::majorVersion, &DockerAPI::minorVersion);
	return 0;
}
Beispiel #9
0
int DockerAPI::inspect( const std::string & containerID, ClassAd * dockerAd, CondorError & /* err */ ) {
	if( dockerAd == NULL ) {
		dprintf( D_ALWAYS | D_FAILURE, "dockerAd is NULL.\n" );
		return -2;
	}

	ArgList inspectArgs;
	if ( ! add_docker_arg(inspectArgs))
		return -1;
	inspectArgs.AppendArg( "inspect" );
	inspectArgs.AppendArg( "--format" );
	StringList formatElements(	"ContainerId=\"{{.Id}}\" "
								"Pid={{.State.Pid}} "
								"Name=\"{{.Name}}\" "
								"Running={{.State.Running}} "
								"ExitCode={{.State.ExitCode}} "
								"StartedAt=\"{{.State.StartedAt}}\" "
								"FinishedAt=\"{{.State.FinishedAt}}\" "
								"DockerError=\"{{.State.Error}}\" "
								"OOMKilled=\"{{.State.OOMKilled}}\" " );
	char * formatArg = formatElements.print_to_delimed_string( "\n" );
	inspectArgs.AppendArg( formatArg );
	free( formatArg );
	inspectArgs.AppendArg( containerID );

	MyString displayString;
	inspectArgs.GetArgsStringForLogging( & displayString );
	dprintf( D_FULLDEBUG, "Attempting to run: %s\n", displayString.c_str() );

#if 1
	MyPopenTimer pgm;
	if (pgm.start_program(inspectArgs, true, NULL, false) < 0) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -6;
	}

	MyStringSource * src = NULL;
	if (pgm.wait_and_close(default_timeout)) {
		src = &pgm.output();
	}

	int expected_rows = formatElements.number();
	dprintf( D_FULLDEBUG, "exit_status=%d, error=%d, %d bytes. expecting %d lines\n",
		pgm.exit_status(), pgm.error_code(), pgm.output_size(), expected_rows );

	// If the output isn't exactly formatElements.number() lines long,
	// something has gone wrong and we'll at least be able to print out
	// the error message(s).
	std::vector<std::string> correctOutput(expected_rows);
	if (src) {
		MyString line;
		int i=0;
		while (line.readLine(*src,false)) {
			line.chomp();
			//dprintf( D_FULLDEBUG, "\t[%2d] %s\n", i, line.c_str() );
			if (i >= expected_rows) {
				if (line.empty()) continue;
				correctOutput.push_back(line.c_str());
			} else {
				correctOutput[i] = line.c_str();
			}
			std::string::iterator first = 
				std::find(correctOutput[i].begin(),
					correctOutput[i].end(),
					'\"');
			if (first != correctOutput[i].end()) {
				std::replace(++first,
					--correctOutput[i].end(), '\"','\'');
			}
			//dprintf( D_FULLDEBUG, "\tfix: %s\n", correctOutput[i].c_str() );
			++i;
		}
	}
#else
	FILE * dockerResults = my_popen( inspectArgs, "r", 1 , 0, false);
	if( dockerResults == NULL ) {
		dprintf( D_ALWAYS | D_FAILURE, "Unable to run '%s'.\n", displayString.c_str() );
		return -6;
	}

	// If the output isn't exactly formatElements.number() lines long,
	// something has gone wrong and we'll at least be able to print out
	// the error message(s).
	char buffer[1024];
	std::vector<std::string> correctOutput(formatElements.number());
	for( int i = 0; i < formatElements.number(); ++i ) {
		if( fgets( buffer, 1024, dockerResults ) != NULL ) {
			correctOutput[i] = buffer;
			std::string::iterator first = 
				std::find(correctOutput[i].begin(),
					correctOutput[i].end(),
					'\"');
			if (first != correctOutput[i].end()) {
				std::replace(++first,
					-- --correctOutput[i].end(), '\"','\'');
			}
		}
	}
	my_pclose( dockerResults );
#endif

	int attrCount = 0;
	for( int i = 0; i < formatElements.number(); ++i ) {
		if( correctOutput[i].empty() || dockerAd->Insert( correctOutput[i].c_str() ) == FALSE ) {
			break;
		}
		++attrCount;
	}

	if( attrCount != formatElements.number() ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to create classad from Docker output (%d).  Printing up to the first %d (nonblank) lines.\n", attrCount, formatElements.number() );
		for( int i = 0; i < formatElements.number() && ! correctOutput[i].empty(); ++i ) {
			dprintf( D_ALWAYS | D_FAILURE, "%s", correctOutput[i].c_str() );
		}
		return -4;
	}

	dprintf( D_FULLDEBUG, "docker inspect printed:\n" );
	for( int i = 0; i < formatElements.number() && ! correctOutput[i].empty(); ++i ) {
		dprintf( D_FULLDEBUG, "\t%s\n", correctOutput[i].c_str() );
	}
	return 0;
}
Beispiel #10
0
//
// Because we fork before calling docker, we don't actually
// care if the image is stored locally or not (except to the extent that
// remote image pull violates the principle of least astonishment).
//
int DockerAPI::run(
	ClassAd &machineAd,
	ClassAd &jobAd,
	const std::string & containerName,
	const std::string & imageID,
	const std::string & command,
	const ArgList & args,
	const Env & env,
	const std::string & sandboxPath,
	const std::list<std::string> extraVolumes,
	int & pid,
	int * childFDs,
	CondorError & /* err */ )
{
	gc_image(imageID);
	//
	// We currently assume that the system has been configured so that
	// anyone (user) who can run an HTCondor job can also run docker.  It's
	// also apparently a security worry to run Docker as root, so let's not.
	//
	ArgList runArgs;
	if ( ! add_docker_arg(runArgs))
		return -1;
	runArgs.AppendArg( "run" );

	// Write out a file with the container ID.
	// FIXME: The startd can check this to clean up after us.
	// This needs to go into a directory that condor user
	// can write to.

/*
	std::string cidFileName = sandboxPath + "/.cidfile";
	runArgs.AppendArg( "--cidfile=" + cidFileName );
*/

	
	// Configure resource limits.
	
	// First cpus
	int  cpus;
	int cpuShare;

	if (machineAd.LookupInteger(ATTR_CPUS, cpus)) {
		cpuShare = 10 * cpus;
	} else {
		cpuShare = 10;
	}
	std::string cpuShareStr;
	formatstr(cpuShareStr, "--cpu-shares=%d", cpuShare);
	runArgs.AppendArg(cpuShareStr);

	// Now memory
	int memory; // in Megabytes
	if (machineAd.LookupInteger(ATTR_MEMORY, memory)) {
		std::string mem;
		formatstr(mem, "--memory=%dm", memory);
		runArgs.AppendArg(mem);
	} 

	// drop unneeded Linux capabilities
	if (param_boolean("DOCKER_DROP_ALL_CAPABILITIES", true /*default*/,
		true /*do_log*/, &machineAd, &jobAd)) {
		runArgs.AppendArg("--cap-drop=all");
			
		// --no-new-privileges flag appears in docker 1.11
		if (DockerAPI::majorVersion > 1 ||
		    DockerAPI::minorVersion > 10) {
			runArgs.AppendArg("--no-new-privileges");
		}
	}

	// Give the container a useful name
	std::string hname = makeHostname(&machineAd, &jobAd);
	runArgs.AppendArg("--hostname");
	runArgs.AppendArg(hname.c_str());

		// Now the container name
	runArgs.AppendArg( "--name" );
	runArgs.AppendArg( containerName );

	if ( ! add_env_to_args_for_docker(runArgs, env)) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to pass enviroment to docker.\n" );
		return -8;
	}

	// Map the external sanbox to the internal sandbox.
	runArgs.AppendArg( "--volume" );
	runArgs.AppendArg( sandboxPath + ":" + sandboxPath );

	// Now any extra volumes
	for (std::list<std::string>::const_iterator it = extraVolumes.begin(); it != extraVolumes.end(); it++) {
		runArgs.AppendArg("--volume");
		std::string volume = *it;
		runArgs.AppendArg(volume);
	}
	
	// Start in the sandbox.
	runArgs.AppendArg( "--workdir" );
	runArgs.AppendArg( sandboxPath );

	// Run with the uid that condor selects for the user
	// either a slot user or submitting user or nobody
	uid_t uid = 0;
	uid_t gid = 0;

	// Docker doesn't actually run on Windows, but we compile
	// on Windows because...
#ifndef WIN32
	uid = get_user_uid();
	gid = get_user_gid();
#endif
	
	if ((uid == 0) || (gid == 0)) {
		dprintf(D_ALWAYS|D_FAILURE, "Failed to get userid to run docker job\n");
		return -9;
	}

	runArgs.AppendArg("--user");
	std::string uidgidarg;
	formatstr(uidgidarg, "%d:%d", uid, gid);
	runArgs.AppendArg(uidgidarg);

	// Run the command with its arguments in the image.
	runArgs.AppendArg( imageID );

	
	// If no command given, the default command in the image will run
	if (command.length() > 0) {
		runArgs.AppendArg( command );
	}

	runArgs.AppendArgsFromArgList( args );

	MyString displayString;
	runArgs.GetArgsStringForLogging( & displayString );
	dprintf( D_ALWAYS, "Attempting to run: %s\n", displayString.c_str() );

	//
	// If we run Docker attached, we avoid a race condition where
	// 'docker logs --follow' returns before 'docker rm' knows that the
	// container is gone (and refuses to remove it).  Of course, we
	// can't block, so we have a proxy process run attached for us.
	//
	FamilyInfo fi;
	fi.max_snapshot_interval = param_integer( "PID_SNAPSHOT_INTERVAL", 15 );
	int childPID = daemonCore->Create_Process( runArgs.GetArg(0), runArgs,
		PRIV_CONDOR_FINAL, 1, FALSE, FALSE, NULL, "/",
		& fi, NULL, childFDs );

	if( childPID == FALSE ) {
		dprintf( D_ALWAYS | D_FAILURE, "Create_Process() failed.\n" );
		return -1;
	}
	pid = childPID;

	return 0;
}
Beispiel #11
0
int DockerAPI::detect( CondorError & err ) {
	// FIXME: Remove ::version() as a public API and return it from here,
	// because there's no point in doing this twice.
	std::string version;
	int rval = DockerAPI::version( version, err );
	if( rval  != 0 ) {
		dprintf(D_ALWAYS, "DockerAPI::detect() failed to detect the Docker version; assuming absent.\n" );
		return -4;
	}

	ArgList infoArgs;
	if ( ! add_docker_arg(infoArgs))
		return -1;
	infoArgs.AppendArg( "info" );

	MyString displayString;
	infoArgs.GetArgsStringForLogging( & displayString );
	dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() );

#if 1
	MyPopenTimer pgm;
	if (pgm.start_program(infoArgs, true, NULL, false) < 0) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	int exitCode;
	if ( ! pgm.wait_for_exit(default_timeout, &exitCode) || exitCode != 0) {
		pgm.close_program(1);
		MyString line;
		line.readLine(pgm.output(), false); line.chomp();
		dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str());
		return -3;
	}

	if (IsFulldebug(D_ALWAYS)) {
		MyString line;
		do {
			line.readLine(pgm.output(), false);
			line.chomp();
			dprintf( D_FULLDEBUG, "[docker info] %s\n", line.c_str() );
		} while (line.readLine(pgm.output(), false));
	}

#else
	FILE * dockerResults = my_popen( infoArgs, "r", 1 , 0, false);
	if( dockerResults == NULL ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	// Even if we don't care about the success output, the failure output
	// can be handy for debugging...
	char buffer[1024];
	std::vector< std::string > output;
	while( fgets( buffer, 1024, dockerResults ) != NULL ) {
		size_t end = strlen(buffer);
		if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; }
		output.push_back( buffer );
	}
	for( unsigned i = 0; i < output.size(); ++i ) {
		dprintf( D_FULLDEBUG, "[docker info] %s\n", output[i].c_str() );
	}

	int exitCode = my_pclose( dockerResults );
	if( exitCode != 0 ) {
		dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, output[0].c_str() );
		return -3;
	}
#endif
	return 0;
}
Beispiel #12
0
int
DockerAPI::rmi(const std::string &image, CondorError &err) {
		// First, try to remove the named image
	run_simple_docker_command("rmi", image, default_timeout, err, true);
		
		// That may have succeed or failed.  It could have
		// failed if the image doesn't exist (anymore), or
		// if someone else deleted it outside of condor.
		// Check to see if the image still exists.  If it
		// has been removed, return 0.

	ArgList args;
	if ( ! add_docker_arg(args))
		return -1;
	args.AppendArg( "images" );
	args.AppendArg( "-q" );
	args.AppendArg( image );

	MyString displayString;
	args.GetArgsStringForLogging( & displayString );
	dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() );

#if 1
	MyPopenTimer pgm;
	if (pgm.start_program(args, true, NULL, false) < 0) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	int exitCode;
	if ( ! pgm.wait_for_exit(default_timeout, &exitCode) || exitCode != 0) {
		pgm.close_program(1);
		MyString line;
		line.readLine(pgm.output(), false); line.chomp();
		dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str());
		return -3;
	}

	return pgm.output_size() > 0;
#else
	FILE * dockerResults = my_popen( args, "r", 1 , 0, false);
	if( dockerResults == NULL ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	char buffer[1024];
	std::vector< std::string > output;
	while( fgets( buffer, 1024, dockerResults ) != NULL ) {
		size_t end = strlen(buffer);
		if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; }
		output.push_back( buffer );
	}

	int exitCode = my_pclose( dockerResults );
	if( exitCode != 0 ) {
		dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, output[0].c_str() );
		return -3;
	}

	if (output.size() == 0) {
		return 0;
	} else {
		return 1;
	}
#endif
}
Beispiel #13
0
int DockerAPI::rm( const std::string & containerID, CondorError & /* err */ ) {

	ArgList rmArgs;
	if ( ! add_docker_arg(rmArgs))
		return -1;
	rmArgs.AppendArg( "rm" );
	rmArgs.AppendArg( "-f" );  // if for some reason still running, kill first
	rmArgs.AppendArg( "-v" );  // also remove the volume
	rmArgs.AppendArg( containerID.c_str() );

	MyString displayString;
	rmArgs.GetArgsStringForLogging( & displayString );
	dprintf( D_FULLDEBUG, "Attempting to run: %s\n", displayString.c_str() );

	// Read from Docker's combined output and error streams.
#if 1
	MyPopenTimer pgm;
	if (pgm.start_program( rmArgs, true, NULL, false ) < 0) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}
	const char * got_output = pgm.wait_and_close(default_timeout);

	// On a success, Docker writes the containerID back out.
	MyString line;
	if ( ! got_output || ! line.readLine(pgm.output(), false)) {
		int error = pgm.error_code();
		if( error ) {
			dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), pgm.error_str(), error );
			if (pgm.was_timeout()) {
				dprintf( D_ALWAYS | D_FAILURE, "Declaring a hung docker\n");
				return docker_hung;
			}
		} else {
			dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() );
		}
		return -3;
	}

	line.chomp(); line.trim();
	if (line != containerID.c_str()) {
		// Didn't get back the result I expected, report the error and check to see if docker is hung.
		return check_if_docker_offline(pgm, "Docker remove", -4);
	}
#else
	FILE * dockerResults = my_popen( rmArgs, "r", 1 , 0, false);
	if( dockerResults == NULL ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	// On a success, Docker writes the containerID back out.
	char buffer[1024];
	if( NULL == fgets( buffer, 1024, dockerResults ) ) {
		if( errno ) {
			dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), strerror( errno ), errno );
		} else {
			dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() );
		}
		my_pclose( dockerResults );
		return -3;
	}

	int length = strlen( buffer );
	if( length < 1 || strncmp( buffer, containerID.c_str(), length - 1 ) != 0 ) {
		dprintf( D_ALWAYS | D_FAILURE, "Docker remove failed, printing first few lines of output.\n" );
		dprintf( D_ALWAYS | D_FAILURE, "%s", buffer );
		while( NULL != fgets( buffer, 1024, dockerResults ) ) {
			dprintf( D_ALWAYS | D_FAILURE, "%s", buffer );
		}
		my_pclose( dockerResults );
		return -4;
	}

	my_pclose( dockerResults );
#endif
	return 0;
}
Beispiel #14
0
static int check_if_docker_offline(MyPopenTimer & pgmIn, const char * cmd_str, int original_error_code)
{
	int rval = original_error_code;
	// this should not be called with a program that is still running.
	ASSERT(pgmIn.is_closed());

	MyString line;
	MyStringCharSource * src = NULL;
	if (pgmIn.output_size() > 0) {
		src = &pgmIn.output();
		src->rewind();
	}

	bool check_for_hung_docker = true; // if no output, we should check for hung docker.
	dprintf( D_ALWAYS | D_FAILURE, "%s failed, %s output.\n", cmd_str, src ? "printing first few lines of" : "no" );
	if (src) {
		check_for_hung_docker = false; // if we got output, assume docker is not hung.
		for (int ii = 0; ii < 10; ++ii) {
			if ( ! line.readLine(*src, false)) break;
			dprintf( D_ALWAYS | D_FAILURE, "%s\n", line.c_str() );

			// if we got something resembling "/var/run/docker.sock: resource temporarily unavaible" 
			// then we should check for a hung docker.
			const char * p = strstr(line.c_str(), ".sock: resource ");
			if (p && strstr(p, "unavailable")) {
				check_for_hung_docker = true;
			}
		}
	}

	if (check_for_hung_docker) {
		dprintf( D_ALWAYS, "Checking to see if Docker is offline\n");

		ArgList infoArgs;
		add_docker_arg(infoArgs);
		infoArgs.AppendArg( "info" );
		MyString displayString;
		infoArgs.GetArgsStringForLogging( & displayString );

		MyPopenTimer pgm2;
		if (pgm2.start_program(infoArgs, true, NULL, false) < 0) {
			dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
			rval = DockerAPI::docker_hung;
		} else {
			int exitCode = 0;
			if ( ! pgm2.wait_for_exit(60, &exitCode) || pgm2.output_size() <= 0) {
				dprintf( D_ALWAYS | D_FAILURE, "Failed to get output from '%s' : %s.\n", displayString.c_str(), pgm2.error_str() );
				rval = DockerAPI::docker_hung;
			} else {
				while (line.readLine(pgm2.output(),false)) {
					line.chomp();
					dprintf( D_FULLDEBUG, "[Docker Info] %s\n", line.c_str() );
				}
			}
		}

		if (rval == DockerAPI::docker_hung) {
			dprintf( D_ALWAYS | D_FAILURE, "Docker is not responding. returning docker_hung error code.\n");
		}
	}

	return rval;
}
Beispiel #15
0
void
secondPass (int argc, char *argv[])
{
	const char * pcolon = NULL;
	char *daemonname;
	for (int i = 1; i < argc; i++) {
		// omit parameters which qualify switches
		if( matchPrefix(argv[i],"-pool", 2) || matchPrefix(argv[i],"-direct", 4) ) {
			i++;
			continue;
		}
		if( matchPrefix(argv[i],"-subsystem", 5) ) {
			i++;
			continue;
		}
		if (matchPrefix (argv[i], "-format", 2)) {
			pm.registerFormat (argv[i+1], argv[i+2]);

			StringList attributes;
			ClassAd ad;
			if(!ad.GetExprReferences(argv[i+2],NULL,&attributes)){
				fprintf( stderr, "Error:  Parse error of: %s\n", argv[i+2]);
				exit(1);
			}

			attributes.rewind();
			char const *s;
			while( (s=attributes.next()) ) {
				projList.AppendArg(s);
			}

			if (diagnose) {
				printf ("Arg %d --- register format [%s] for [%s]\n",
						i, argv[i+1], argv[i+2]);
			}
			i += 2;
			continue;
		}
		if (*argv[i] == '-' &&
			(is_arg_colon_prefix(argv[i]+1, "autoformat", &pcolon, 5) || 
			 is_arg_colon_prefix(argv[i]+1, "af", &pcolon, 2)) ) {
				// make sure we have at least one more argument
			if ( !argv[i+1] || *(argv[i+1]) == '-') {
				fprintf( stderr, "Error: Argument %s requires "
						 "at last one attribute parameter\n", argv[i] );
				fprintf( stderr, "Use \"%s -help\" for details\n", myName );
				exit( 1 );
			}

			bool flabel = false;
			bool fCapV  = false;
			bool fRaw = false;
			bool fheadings = false;
			const char * prowpre = NULL;
			const char * pcolpre = " ";
			const char * pcolsux = NULL;
			if (pcolon) {
				++pcolon;
				while (*pcolon) {
					switch (*pcolon)
					{
						case ',': pcolsux = ","; break;
						case 'n': pcolsux = "\n"; break;
						case 'g': pcolpre = NULL; prowpre = "\n"; break;
						case 't': pcolpre = "\t"; break;
						case 'l': flabel = true; break;
						case 'V': fCapV = true; break;
						case 'r': case 'o': fRaw = true; break;
						case 'h': fheadings = true; break;
					}
					++pcolon;
				}
			}
			pm.SetAutoSep(prowpre, pcolpre, pcolsux, "\n");

			while (argv[i+1] && *(argv[i+1]) != '-') {
				++i;
				ClassAd ad;
				StringList attributes;
				if(!ad.GetExprReferences(argv[i],NULL,&attributes)){
					fprintf( stderr, "Error:  Parse error of: %s\n", argv[i]);
					exit(1);
				}

				attributes.rewind();
				char const *s;
				while ((s = attributes.next())) {
					projList.AppendArg(s);
				}

				MyString lbl = "";
				int wid = 0;
				int opts = FormatOptionNoTruncate;
				if (fheadings || pm_head.Length() > 0) { 
					const char * hd = fheadings ? argv[i] : "(expr)";
					wid = 0 - (int)strlen(hd); 
					opts = FormatOptionAutoWidth | FormatOptionNoTruncate; 
					pm_head.Append(hd);
				}
				else if (flabel) { lbl.formatstr("%s = ", argv[i]); wid = 0; opts = 0; }
				lbl += fRaw ? "%r" : (fCapV ? "%V" : "%v");
				if (diagnose) {
					printf ("Arg %d --- register format [%s] width=%d, opt=0x%x for [%s]\n",
							i, lbl.Value(), wid, opts,  argv[i]);
				}
				pm.registerFormat(lbl.Value(), wid, opts, argv[i]);
			}
			// if autoformat list ends in a '-' without any characters after it, just eat the arg and keep going.
			if (i+1 < argc && '-' == (argv[i+1])[0] && 0 == (argv[i+1])[1]) {
				++i;
			}
			continue;
		}
		if (is_dash_arg_colon_prefix(argv[i], "print-format", &pcolon, 2)) {
			if ( (i+1 >= argc)  || (*(argv[i+1]) == '-' && (argv[i+1])[1] != 0)) {
				fprintf( stderr, "Error: Argument -print-format requires a filename argument\n");
				exit( 1 );
			}
			// hack allow -pr ! to disable use of user-default print format files.
			if (MATCH == strcmp(argv[i+1], "!")) {
				++i;
				disable_user_print_files = true;
				continue;
			}
			ppTotalStyle = ppStyle;
			setPPstyle (PP_CUSTOM, i, argv[i]);
			setPPwidth();
			++i; // skip to the next argument.
			if (set_status_print_mask_from_stream(argv[i], true, &mode_constraint) < 0) {
				fprintf(stderr, "Error: invalid select file %s\n", argv[i]);
				exit (1);
			}
			if (mode_constraint) {
				query->addANDConstraint(mode_constraint);
			}
			using_print_format = true; // so we can hack totals.
			continue;
		}
		if (matchPrefix (argv[i], "-target", 5)) {
			i++;
			continue;
		}
		if (is_dash_arg_prefix(argv[i], "ads", 2)) {
			++i;
			continue;
		}
		if( matchPrefix(argv[i], "-sort", 3) ) {
			i++;
			if ( ! noSort) {
				sprintf( buffer, "%s =!= UNDEFINED", argv[i] );
				query->addANDConstraint( buffer );
			}
			continue;
		}
		
		if (matchPrefix (argv[i], "-statistics", 6)) {
			i += 2;
            sprintf(buffer,"STATISTICS_TO_PUBLISH = \"%s\"", statistics);
            if (diagnose) {
               printf ("[%s]\n", buffer);
               }
            query->addExtraAttribute(buffer);
            continue;
        }

		if (matchPrefix (argv[i], "-attributes", 3) ) {
			// parse attributes to be selected and split them along ","
			StringList more_attrs(argv[i+1],",");
			char const *s;
			more_attrs.rewind();
			while( (s=more_attrs.next()) ) {
				projList.AppendArg(s);
				dashAttributes.append(s);
			}
			i++;
			continue;
		}
		


		// figure out what the other parameters should do
		if (*argv[i] != '-') {
			// display extra information for diagnosis
			if (diagnose) {
				printf ("Arg %d (%s) --- adding constraint", i, argv[i]);
			}

			if( !(daemonname = get_daemon_name(argv[i])) ) {
				if ( (mode==MODE_SCHEDD_SUBMITTORS) && strchr(argv[i],'@') ) {
					// For a submittor query, it is possible that the
					// hostname is really a UID_DOMAIN.  And there is
					// no requirement that UID_DOMAIN actually have
					// an inverse lookup in DNS...  so if get_daemon_name()
					// fails with a fully qualified submittor lookup, just
					// use what we are given and do not flag an error.
					daemonname = strnewp(argv[i]);
				} else {
					dprintf_WriteOnErrorBuffer(stderr, true);
					fprintf( stderr, "%s: unknown host %s\n",
								 argv[0], get_host_part(argv[i]) );
					exit(1);
				}
			}

			switch (mode) {
			  case MODE_DEFRAG_NORMAL:
			  case MODE_STARTD_NORMAL:
			  case MODE_STARTD_COD:
#ifdef HAVE_EXT_POSTGRESQL
			  case MODE_QUILL_NORMAL:
#endif /* HAVE_EXT_POSTGRESQL */
			  case MODE_SCHEDD_NORMAL:
			  case MODE_SCHEDD_SUBMITTORS:
			  case MODE_MASTER_NORMAL:
			  case MODE_COLLECTOR_NORMAL:
			  case MODE_CKPT_SRVR_NORMAL:
			  case MODE_NEGOTIATOR_NORMAL:
			  case MODE_STORAGE_NORMAL:
			  case MODE_ANY_NORMAL:
			  case MODE_GENERIC_NORMAL:
			  case MODE_STARTD_AVAIL:
			  case MODE_OTHER:
			  case MODE_GRID_NORMAL:
			  case MODE_HAD_NORMAL:
			  	sprintf(buffer,"(%s==\"%s\") || (%s==\"%s\")",
						ATTR_NAME, daemonname, ATTR_MACHINE, daemonname );
				if (diagnose) {
					printf ("[%s]\n", buffer);
				}
				query->addORConstraint (buffer);
				break;

			  case MODE_STARTD_RUN:
				sprintf (buffer,"%s == \"%s\"",ATTR_REMOTE_USER,argv[i]);
				if (diagnose) {
					printf ("[%s]\n", buffer);
				}
				query->addORConstraint (buffer);
				break;

			  default:
				fprintf(stderr,"Error: Don't know how to process %s\n",argv[i]);
			}
			delete [] daemonname;
			daemonname = NULL;
		} else
		if (matchPrefix (argv[i], "-constraint", 4)) {
			if (diagnose) {
				printf ("[%s]\n", argv[i+1]);
			}
			query->addANDConstraint (argv[i+1]);
			i++;
		}
	}
}
Beispiel #16
0
int
run_simple_docker_command(const std::string &command, const std::string &container, int timeout, CondorError &, bool ignore_output)
{
  ArgList args;
  if ( ! add_docker_arg(args))
    return -1;
  args.AppendArg( command );
  args.AppendArg( container.c_str() );

  MyString displayString;
  args.GetArgsStringForLogging( & displayString );
  dprintf( D_FULLDEBUG, "Attempting to run: %s\n", displayString.c_str() );

#if 1
	MyPopenTimer pgm;
	if (pgm.start_program( args, true, NULL, false ) < 0) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
		return -2;
	}

	if ( ! pgm.wait_and_close(timeout) || pgm.output_size() <= 0) {
		int error = pgm.error_code();
		if( error ) {
			dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), pgm.error_str(), error );
			if (pgm.was_timeout()) {
				dprintf( D_ALWAYS | D_FAILURE, "Declaring a hung docker\n");
				return DockerAPI::docker_hung;
			}
		} else {
			dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() );
		}
		return -3;
	}

	// On a success, Docker writes the containerID back out.
	MyString line;
	line.readLine(pgm.output());
	line.chomp(); line.trim();
	if (!ignore_output && line != container.c_str()) {
		// Didn't get back the result I expected, report the error and check to see if docker is hung.
		dprintf( D_ALWAYS | D_FAILURE, "Docker %s failed, printing first few lines of output.\n", command.c_str());
		for (int ii = 0; ii < 10; ++ii) {
			if ( ! line.readLine(pgm.output(), false)) break;
			dprintf( D_ALWAYS | D_FAILURE, "%s\n", line.c_str() );
		}
		return -4;
	}

#else
  // Read from Docker's combined output and error streams.
  FILE * dockerResults = my_popen( args, "r", 1 , 0, false);
  if( dockerResults == NULL ) {
    dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() );
    return -2;
  }

  // On a success, Docker writes the containerID back out.
  char buffer[1024];
  if( NULL == fgets( buffer, 1024, dockerResults ) ) {
    if( errno ) {
      dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), strerror( errno ), errno );
    } else {
      dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() );
    }
    my_pclose( dockerResults );
    return -3;
  }

  size_t length = strlen( buffer );
  if (!ignore_output) {
    if( length < 1 || strncmp( buffer, container.c_str(), length - 1 ) != 0 ) {
      dprintf( D_ALWAYS | D_FAILURE, "Docker %s failed, printing first few lines of output.\n", command.c_str() );
      dprintf( D_ALWAYS | D_FAILURE, "%s", buffer );
      while( NULL != fgets( buffer, 1024, dockerResults ) ) {
	dprintf( D_ALWAYS | D_FAILURE, "%s", buffer );
      }
      my_pclose( dockerResults );
      return -4;
    }
  }

  my_pclose( dockerResults );
#endif
  return 0;
}
Beispiel #17
0
int
main (int argc, char *argv[])
{
#if !defined(WIN32)
	install_sig_handler(SIGPIPE, (SIG_HANDLER)SIG_IGN );
#endif

	// initialize to read from config file
	myDistro->Init( argc, argv );
	myName = argv[0];
	config();
	dprintf_config_tool_on_error(0);

	// The arguments take two passes to process --- the first pass
	// figures out the mode, after which we can instantiate the required
	// query object.  We add implied constraints from the command line in
	// the second pass.
	firstPass (argc, argv);
	
	// if the mode has not been set, it is STARTD_NORMAL
	if (mode == MODE_NOTSET) {
		setMode (MODE_STARTD_NORMAL, 0, DEFAULT);
	}

	// instantiate query object
	if (!(query = new CondorQuery (type))) {
		dprintf_WriteOnErrorBuffer(stderr, true);
		fprintf (stderr, "Error:  Out of memory\n");
		exit (1);
	}
	// if a first-pass setMode set a mode_constraint, apply it now to the query object
	if (mode_constraint && ! explicit_format) {
		query->addANDConstraint(mode_constraint);
	}

	// set pretty print style implied by the type of entity being queried
	// but do it with default priority, so that explicitly requested options
	// can override it
	switch (type)
	{
#ifdef HAVE_EXT_POSTGRESQL
	  case QUILL_AD:
		setPPstyle(PP_QUILL_NORMAL, 0, DEFAULT);
		break;
#endif /* HAVE_EXT_POSTGRESQL */


	  case DEFRAG_AD:
		setPPstyle(PP_GENERIC_NORMAL, 0, DEFAULT);
		break;

	  case STARTD_AD:
		setPPstyle(PP_STARTD_NORMAL, 0, DEFAULT);
		break;

	  case SCHEDD_AD:
		setPPstyle(PP_SCHEDD_NORMAL, 0, DEFAULT);
		break;

	  case MASTER_AD:
		setPPstyle(PP_MASTER_NORMAL, 0, DEFAULT);
		break;

	  case CKPT_SRVR_AD:
		setPPstyle(PP_CKPT_SRVR_NORMAL, 0, DEFAULT);
		break;

	  case COLLECTOR_AD:
		setPPstyle(PP_COLLECTOR_NORMAL, 0, DEFAULT);
		break;

	  case STORAGE_AD:
		setPPstyle(PP_STORAGE_NORMAL, 0, DEFAULT);
		break;

	  case NEGOTIATOR_AD:
		setPPstyle(PP_NEGOTIATOR_NORMAL, 0, DEFAULT);
		break;

      case GRID_AD:
        setPPstyle(PP_GRID_NORMAL, 0, DEFAULT);
		break;

	  case GENERIC_AD:
		setPPstyle(PP_GENERIC, 0, DEFAULT);
		break;

	  case ANY_AD:
		setPPstyle(PP_ANY_NORMAL, 0, DEFAULT);
		break;

	  default:
		setPPstyle(PP_VERBOSE, 0, DEFAULT);
	}

	// set the constraints implied by the mode
	switch (mode) {
#ifdef HAVE_EXT_POSTGRESQL
	  case MODE_QUILL_NORMAL:
#endif /* HAVE_EXT_POSTGRESQL */

	  case MODE_DEFRAG_NORMAL:
	  case MODE_STARTD_NORMAL:
	  case MODE_MASTER_NORMAL:
	  case MODE_CKPT_SRVR_NORMAL:
	  case MODE_SCHEDD_NORMAL:
	  case MODE_SCHEDD_SUBMITTORS:
	  case MODE_COLLECTOR_NORMAL:
	  case MODE_NEGOTIATOR_NORMAL:
	  case MODE_STORAGE_NORMAL:
	  case MODE_GENERIC_NORMAL:
	  case MODE_ANY_NORMAL:
	  case MODE_GRID_NORMAL:
	  case MODE_HAD_NORMAL:
		break;

	  case MODE_OTHER:
			// tell the query object what the type we're querying is
		query->setGenericQueryType(genericType);
		free(genericType);
		genericType = NULL;
		break;

	  case MODE_STARTD_AVAIL:
			  // For now, -avail shows you machines avail to anyone.
		sprintf (buffer, "%s == \"%s\"", ATTR_STATE,
					state_to_string(unclaimed_state));
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;


	  case MODE_STARTD_RUN:
		sprintf (buffer, "%s == \"%s\"", ATTR_STATE,
					state_to_string(claimed_state));
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;

	  case MODE_STARTD_COD:
	    sprintf (buffer, "%s > 0", ATTR_NUM_COD_CLAIMS );
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addORConstraint (buffer);
		break;

	  default:
		break;
	}	

	if(javaMode) {
		sprintf( buffer, "%s == TRUE", ATTR_HAS_JAVA );
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addANDConstraint (buffer);
		
		projList.AppendArg(ATTR_HAS_JAVA);
		projList.AppendArg(ATTR_JAVA_MFLOPS);
		projList.AppendArg(ATTR_JAVA_VENDOR);
		projList.AppendArg(ATTR_JAVA_VERSION);

	}

	if(offlineMode) {
		query->addANDConstraint( "size( OfflineUniverses ) != 0" );

		projList.AppendArg( "OfflineUniverses" );

		//
		// Since we can't add a regex to a projection, explicitly list all
		// the attributes we know about.
		//

		projList.AppendArg( "HasVM" );
		projList.AppendArg( "VMOfflineReason" );
		projList.AppendArg( "VMOfflineTime" );
	}

	if(absentMode) {
	    sprintf( buffer, "%s == TRUE", ATTR_ABSENT );
	    if (diagnose) {
	        printf( "Adding constraint %s\n", buffer );
	    }
	    query->addANDConstraint( buffer );
	    
	    projList.AppendArg( ATTR_ABSENT );
	    projList.AppendArg( ATTR_LAST_HEARD_FROM );
	    projList.AppendArg( ATTR_CLASSAD_LIFETIME );
	}

	if(vmMode) {
		sprintf( buffer, "%s == TRUE", ATTR_HAS_VM);
		if (diagnose) {
			printf ("Adding constraint [%s]\n", buffer);
		}
		query->addANDConstraint (buffer);

		projList.AppendArg(ATTR_VM_TYPE);
		projList.AppendArg(ATTR_VM_MEMORY);
		projList.AppendArg(ATTR_VM_NETWORKING);
		projList.AppendArg(ATTR_VM_NETWORKING_TYPES);
		projList.AppendArg(ATTR_VM_HARDWARE_VT);
		projList.AppendArg(ATTR_VM_AVAIL_NUM);
		projList.AppendArg(ATTR_VM_ALL_GUEST_MACS);
		projList.AppendArg(ATTR_VM_ALL_GUEST_IPS);
		projList.AppendArg(ATTR_VM_GUEST_MAC);
		projList.AppendArg(ATTR_VM_GUEST_IP);

	}

	// second pass:  add regular parameters and constraints
	if (diagnose) {
		printf ("----------\n");
	}

	secondPass (argc, argv);

	// initialize the totals object
	if (ppStyle == PP_CUSTOM && using_print_format) {
		if (pmHeadFoot & HF_NOSUMMARY) ppTotalStyle = PP_CUSTOM;
	} else {
		ppTotalStyle = ppStyle;
	}
	TrackTotals	totals(ppTotalStyle);

	// fetch the query
	QueryResult q;

	if ((mode == MODE_STARTD_NORMAL) && (ppStyle == PP_STARTD_NORMAL)) {
		projList.AppendArg("Name");
		projList.AppendArg("Machine");
		projList.AppendArg("Opsys");
		projList.AppendArg("Arch");
		projList.AppendArg("State");
		projList.AppendArg("Activity");
		projList.AppendArg("LoadAvg");
		projList.AppendArg("Memory");
		projList.AppendArg("ActvtyTime");
		projList.AppendArg("MyCurrentTime");
		projList.AppendArg("EnteredCurrentActivity");
	} else if( ppStyle == PP_VERBOSE ) {
	    // Remove everything from the projection list if we're displaying
	    // the "long form" of the ads.
	    projList.Clear();
		// but if -attributes was supplied, show only those attributes
		if ( ! dashAttributes.isEmpty()) {
			const char * s;
			dashAttributes.rewind();
			while ((s = dashAttributes.next())) {
				projList.AppendArg(s);
			}
		}
	}

	if( projList.Count() > 0 ) {
		char **attr_list = projList.GetStringArray();
		query->setDesiredAttrs(attr_list);
		deleteStringArray(attr_list);
	}

	// if diagnose was requested, just print the query ad
	if (diagnose) {
		ClassAd 	queryAd;

		// print diagnostic information about inferred internal state
		setMode ((Mode) 0, 0, NULL);
		setType (NULL, 0, NULL);
		setPPstyle ((ppOption) 0, 0, DEFAULT);
		printf ("----------\n");

		q = query->getQueryAd (queryAd);
		fPrintAd (stdout, queryAd);

		printf ("----------\n");
		fprintf (stderr, "Result of making query ad was:  %d\n", q);
		exit (1);
	}

        // Address (host:port) is taken from requested pool, if given.
	char* addr = (NULL != pool) ? pool->addr() : NULL;
        Daemon* requested_daemon = pool;

        // If we're in "direct" mode, then we attempt to locate the daemon
	// associated with the requested subsystem (here encoded by value of mode)
        // In this case the host:port of pool (if given) denotes which
        // pool is being consulted
	if( direct ) {
		Daemon *d = NULL;
		switch( mode ) {
		case MODE_MASTER_NORMAL:
			d = new Daemon( DT_MASTER, direct, addr );
			break;
		case MODE_STARTD_NORMAL:
		case MODE_STARTD_AVAIL:
		case MODE_STARTD_RUN:
		case MODE_STARTD_COD:
			d = new Daemon( DT_STARTD, direct, addr );
			break;

#ifdef HAVE_EXT_POSTGRESQL
		case MODE_QUILL_NORMAL:
			d = new Daemon( DT_QUILL, direct, addr );
			break;
#endif /* HAVE_EXT_POSTGRESQL */

		case MODE_SCHEDD_NORMAL:
		case MODE_SCHEDD_SUBMITTORS:
			d = new Daemon( DT_SCHEDD, direct, addr );
			break;
		case MODE_NEGOTIATOR_NORMAL:
			d = new Daemon( DT_NEGOTIATOR, direct, addr );
			break;
		case MODE_CKPT_SRVR_NORMAL:
		case MODE_COLLECTOR_NORMAL:
		case MODE_LICENSE_NORMAL:
		case MODE_STORAGE_NORMAL:
		case MODE_GENERIC_NORMAL:
		case MODE_ANY_NORMAL:
		case MODE_OTHER:
		case MODE_GRID_NORMAL:
		case MODE_HAD_NORMAL:
				// These have to go to the collector, anyway.
			break;
		default:
            fprintf( stderr, "Error:  Illegal mode %d\n", mode );
			exit( 1 );
			break;
		}

                // Here is where we actually override 'addr', if we can obtain
                // address of the requested daemon/subsys.  If it can't be
                // located, then fail with error msg.
                // 'd' will be null (unset) if mode is one of above that must go to
                // collector (MODE_ANY_NORMAL, MODE_COLLECTOR_NORMAL, etc)
		if (NULL != d) {
			if( d->locate() ) {
				addr = d->addr();
				requested_daemon = d;
			} else {
				const char* id = d->idStr();
				if (NULL == id) id = d->name();
				dprintf_WriteOnErrorBuffer(stderr, true);
				if (NULL == id) id = "daemon";
				fprintf(stderr, "Error: Failed to locate %s\n", id);
				fprintf(stderr, "%s\n", d->error());
				exit( 1 );
			}
		}
	}

	ClassAdList result;
	CondorError errstack;
	if (NULL != ads_file) {
		MyString req; // query requirements
		q = query->getRequirements(req);
		const char * constraint = req.empty() ? NULL : req.c_str();
		if (read_classad_file(ads_file, result, constraint)) {
			q = Q_OK;
		}
	} else if (NULL != addr) {
			// this case executes if pool was provided, or if in "direct" mode with
			// subsystem that corresponds to a daemon (above).
			// Here 'addr' represents either the host:port of requested pool, or
			// alternatively the host:port of daemon associated with requested subsystem (direct mode)
		q = query->fetchAds (result, addr, &errstack);
	} else {
			// otherwise obtain list of collectors and submit query that way
		CollectorList * collectors = CollectorList::create();
		q = collectors->query (*query, result, &errstack);
		delete collectors;
	}
		

	// if any error was encountered during the query, report it and exit 
	if (Q_OK != q) {

		dprintf_WriteOnErrorBuffer(stderr, true);
			// we can always provide these messages:
		fprintf( stderr, "Error: %s\n", getStrQueryResult(q) );
		fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() );

        if ((NULL != requested_daemon) && ((Q_NO_COLLECTOR_HOST == q) ||
			(requested_daemon->type() == DT_COLLECTOR)))
		{
				// Specific long message if connection to collector failed.
			const char* fullhost = requested_daemon->fullHostname();
			if (NULL == fullhost) fullhost = "<unknown_host>";
			const char* daddr = requested_daemon->addr();
			if (NULL == daddr) daddr = "<unknown>";
			char info[1000];
			sprintf(info, "%s (%s)", fullhost, daddr);
	        printNoCollectorContact( stderr, info, !expert );
        } else if ((NULL != requested_daemon) && (Q_COMMUNICATION_ERROR == q)) {
				// more helpful message for failure to connect to some daemon/subsys
			const char* id = requested_daemon->idStr();
			if (NULL == id) id = requested_daemon->name();
			if (NULL == id) id = "daemon";
			const char* daddr = requested_daemon->addr();
			if (NULL == daddr) daddr = "<unknown>";
			fprintf(stderr, "Error: Failed to contact %s at %s\n", id, daddr);
		}

		// fail
		exit (1);
	}

	if (noSort) {
		// do nothing 
	} else if (sortSpecs.empty()) {
        // default classad sorting
		result.Sort((SortFunctionType)lessThanFunc);
	} else {
        // User requested custom sorting expressions:
        // insert attributes related to custom sorting
        result.Open();
        while (ClassAd* ad = result.Next()) {
            for (vector<SortSpec>::iterator ss(sortSpecs.begin());  ss != sortSpecs.end();  ++ss) {
                ss->expr->SetParentScope(ad);
                classad::Value v;
                ss->expr->Evaluate(v);
                stringstream vs;
                // This will properly render all supported value types,
                // including undefined and error, although current semantic
                // pre-filters classads where sort expressions are undef/err:
                vs << ((v.IsStringValue())?"\"":"") << v << ((v.IsStringValue())?"\"":"");
                ad->AssignExpr(ss->keyAttr.c_str(), vs.str().c_str());
                // Save the full expr in case user wants to examine on output:
                ad->AssignExpr(ss->keyExprAttr.c_str(), ss->arg.c_str());
            }
        }
        
        result.Open();
		result.Sort((SortFunctionType)customLessThanFunc);
	}

	
	// output result
	prettyPrint (result, &totals);
	
    delete query;

	return 0;
}
Beispiel #18
0
int
OsProc::StartJob(FamilyInfo* family_info, NetworkNamespaceManager * network_manager = NULL, FilesystemRemap* fs_remap=NULL)
{
	int nice_inc = 0;
	bool has_wrapper = false;

	dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n");

	if ( !JobAd ) {
		dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" );
		return 0;
	}

	MyString JobName;
	if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) {
		dprintf( D_ALWAYS, "%s not found in JobAd.  Aborting StartJob.\n", 
				 ATTR_JOB_CMD );
		return 0;
	}

	const char* job_iwd = Starter->jic->jobRemoteIWD();
	dprintf( D_ALWAYS, "IWD: %s\n", job_iwd );

		// some operations below will require a PrivSepHelper if
		// PrivSep is enabled (if it's not, privsep_helper will be
		// NULL)
	PrivSepHelper* privsep_helper = Starter->privSepHelper();

		// // // // // // 
		// Arguments
		// // // // // // 

		// prepend the full path to this name so that we
		// don't have to rely on the PATH inside the
		// USER_JOB_WRAPPER or for exec().

    bool transfer_exe = false;
    if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) {
        transfer_exe = false;
    }

    bool preserve_rel = false;
    if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) {
        preserve_rel = false;
    }

    bool relative_exe = is_relative_to_cwd(JobName.Value());

    if (relative_exe && preserve_rel && !transfer_exe) {
        dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value());
    }
	else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) {
		JobName.sprintf( "%s%c%s",
		                 Starter->GetWorkingDir(),
		                 DIR_DELIM_CHAR,
		                 CONDOR_EXEC );
    }
	else if (relative_exe && job_iwd && *job_iwd) {
		MyString full_name;
		full_name.sprintf("%s%c%s",
		                  job_iwd,
		                  DIR_DELIM_CHAR,
		                  JobName.Value());
		JobName = full_name;

	}

	if( Starter->isGridshell() ) {
			// if we're a gridshell, just try to chmod our job, since
			// globus probably transfered it for us and left it with
			// bad permissions...
		priv_state old_priv = set_user_priv();
		int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG );
		set_priv( old_priv );
		if( retval < 0 ) {
			dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() );
			return 0;
		}
	} 

	ArgList args;

		// Since we may be adding to the argument list, we may need to deal
		// with platform-specific arg syntax in the user's args in order
		// to successfully merge them with the additional wrapper args.
	args.SetArgV1SyntaxToCurrentPlatform();

		// First, put "condor_exec" or whatever at the front of Args,
		// since that will become argv[0] of what we exec(), either
		// the wrapper or the actual job.

	if( !getArgv0() ) {
		args.AppendArg(JobName.Value());
	} else {
		args.AppendArg(getArgv0());
	}
	
		// Support USER_JOB_WRAPPER parameter...
	char *wrapper = NULL;
	if( (wrapper=param("USER_JOB_WRAPPER")) ) {

			// make certain this wrapper program exists and is executable
		if( access(wrapper,X_OK) < 0 ) {
			dprintf( D_ALWAYS, 
					 "Cannot find/execute USER_JOB_WRAPPER file %s\n",
					 wrapper );
			free( wrapper );
			return 0;
		}
		has_wrapper = true;
			// Now, we've got a valid wrapper.  We want that to become
			// "JobName" so we exec it directly, and we want to put
			// what was the JobName (with the full path) as the first
			// argument to the wrapper
		args.AppendArg(JobName.Value());
		JobName = wrapper;
		free(wrapper);
	}
	
		// Support USE_PARROT 
	bool use_parrot = false;
	if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) {
			// Check for parrot executable
		char *parrot = NULL;
		if( (parrot=param("PARROT")) ) {
			if( access(parrot,X_OK) < 0 ) {
				dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute "
					"at %s(%s)).\n", parrot, strerror(errno) );
				free( parrot );
				return 0;
			} else {
				args.AppendArg(JobName.Value());
				JobName = parrot;
				free( parrot );
			}
		} else {
			dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config"
			" file)" );
			return 0;
		}
	}

		// Either way, we now have to add the user-specified args as
		// the rest of the Args string.
	MyString args_error;
	if(!args.AppendArgsFromClassAd(JobAd,&args_error)) {
		dprintf(D_ALWAYS, "Failed to read job arguments from JobAd.  "
				"Aborting OsProc::StartJob: %s\n",args_error.Value());
		return 0;
	}

		// // // // // // 
		// Environment 
		// // // // // // 

		// Now, instantiate an Env object so we can manipulate the
		// environment as needed.
	Env job_env;

	MyString env_errors;
	if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) {
		dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n",
				 env_errors.Value());
		return 0;
	}


		// // // // // // 
		// Standard Files
		// // // // // // 

	// handle stdin, stdout, and stderr redirection
	int fds[3];
		// initialize these to -2 to mean they're not specified.
		// -1 will be treated as an error.
	fds[0] = -2; fds[1] = -2; fds[2] = -2;

		// in order to open these files we must have the user's privs:
	priv_state priv;
	priv = set_user_priv();

		// if we're in PrivSep mode, we won't necessarily be able to
		// open the files for the job. getStdFile will return us an
		// open FD in some situations, but otherwise will give us
		// a filename that we'll pass to the PrivSep Switchboard
		//
	bool stdin_ok;
	bool stdout_ok;
	bool stderr_ok;
	MyString privsep_stdin_name;
	MyString privsep_stdout_name;
	MyString privsep_stderr_name;
	if (privsep_helper != NULL) {
		stdin_ok = getStdFile(SFT_IN,
		                      NULL,
		                      true,
		                      "Input file",
		                      &fds[0],
		                      &privsep_stdin_name);
		stdout_ok = getStdFile(SFT_OUT,
		                       NULL,
		                       true,
		                       "Output file",
		                       &fds[1],
		                       &privsep_stdout_name);
		stderr_ok = getStdFile(SFT_ERR,
		                       NULL,
		                       true,
		                       "Error file",
		                       &fds[2],
		                       &privsep_stderr_name);
	}
	else {
		fds[0] = openStdFile( SFT_IN,
		                      NULL,
		                      true,
		                      "Input file");
		stdin_ok = (fds[0] != -1);
		fds[1] = openStdFile( SFT_OUT,
		                      NULL,
		                      true,
		                      "Output file");
		stdout_ok = (fds[1] != -1);
		fds[2] = openStdFile( SFT_ERR,
		                      NULL,
		                      true,
		                      "Error file");
		stderr_ok = (fds[2] != -1);
	}

	/* Bail out if we couldn't open the std files correctly */
	if( !stdin_ok || !stdout_ok || !stderr_ok ) {
		/* only close ones that had been opened correctly */
		for ( int i = 0; i <= 2; i++ ) {
			if ( fds[i] >= 0 ) {
				daemonCore->Close_FD ( fds[i] );
			}
		}
		dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n");
		dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n");
		set_priv(priv); /* go back to original priv state before leaving */
		return 0;
	}

		// // // // // // 
		// Misc + Exec
		// // // // // // 

	if( !ThisProcRunsAlongsideMainProc() ) {
		Starter->jic->notifyJobPreSpawn();
	}

	// compute job's renice value by evaluating the machine's
	// JOB_RENICE_INCREMENT in the context of the job ad...

    char* ptmp = param( "JOB_RENICE_INCREMENT" );
	if( ptmp ) {
			// insert renice expr into our copy of the job ad
		MyString reniceAttr = "Renice = ";
		reniceAttr += ptmp;
		if( !JobAd->Insert( reniceAttr.Value() ) ) {
			dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT "
				"into job ad, Aborting OsProc::StartJob...\n" );
			free( ptmp );
			return 0;
		}
			// evaluate
		if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) {
			dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n",
					 ptmp, nice_inc );
		} else {
			dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't "
					 "eval to int!  Using default of 10...\n", ptmp );
			nice_inc = 10;
		}

			// enforce valid ranges for nice_inc
		if( nice_inc < 0 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "low: adjusted to 0\n", nice_inc );
			nice_inc = 0;
		}
		else if( nice_inc > 19 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "high: adjusted to 19\n", nice_inc );
			nice_inc = 19;
		}

		ASSERT( ptmp );
		free( ptmp );
		ptmp = NULL;
	} else {
			// if JOB_RENICE_INCREMENT is undefined, default to 10
		nice_inc = 10;
	}

		// in the below dprintfs, we want to skip past argv[0], which
		// is sometimes condor_exec, in the Args string. 

	MyString args_string;
	args.GetArgsStringForDisplay(&args_string, 1);
	if( has_wrapper ) { 
			// print out exactly what we're doing so folks can debug
			// it, if they need to.
		dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), 
				 args_string.Value() );

		MyString wrapper_err;
		wrapper_err.sprintf("%s%c%s", Starter->GetWorkingDir(),
				 	DIR_DELIM_CHAR,
					JOB_WRAPPER_FAILURE_FILE);
		if( ! job_env.SetEnv("_CONDOR_WRAPPER_ERROR_FILE", wrapper_err.Value()) ) {
			dprintf( D_ALWAYS, "Failed to set _CONDOR_WRAPPER_ERROR_FILE environment variable\n");
		}
	} else {
		dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(),
				 args_string.Value() );
	}

	MyString path;
	path.sprintf("%s%c%s", Starter->GetWorkingDir(),
			 	DIR_DELIM_CHAR,
				MACHINE_AD_FILENAME);
	if( ! job_env.SetEnv("_CONDOR_MACHINE_AD", path.Value()) ) {
		dprintf( D_ALWAYS, "Failed to set _CONDOR_MACHINE_AD environment variable\n");
	}

	path.sprintf("%s%c%s", Starter->GetWorkingDir(),
			 	DIR_DELIM_CHAR,
				JOB_AD_FILENAME);
	if( ! job_env.SetEnv("_CONDOR_JOB_AD", path.Value()) ) {
		dprintf( D_ALWAYS, "Failed to set _CONDOR_JOB_AD environment variable\n");
	}

		// Grab the full environment back out of the Env object 
	if(IsFulldebug(D_FULLDEBUG)) {
		MyString env_string;
		job_env.getDelimitedStringForDisplay(&env_string);
		dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value());
	}

	// Check to see if we need to start this process paused, and if
	// so, pass the right flag to DC::Create_Process().
	int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT;
	if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) {
		job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT;
	}
	int suspend_job_at_exec = 0;
	JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec);
	if( suspend_job_at_exec ) {
		dprintf( D_FULLDEBUG, "OsProc::StartJob(): "
				 "Job wants to be suspended at exec\n" );
		job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC;
	}

	// If there is a requested coresize for this job, enforce it.
	// It is truncated because you can't put an unsigned integer
	// into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to
	// be a float, but then when that attribute is read/written to the
	// job queue log by/or shared between versions of Condor which view the
	// type of that attribute differently, calamity would arise.
	int core_size_truncated;
	size_t core_size;
	size_t *core_size_ptr = NULL;
	if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_truncated ) ) {
		core_size = (size_t)core_size_truncated;
		core_size_ptr = &core_size;
	}

	long rlimit_as_hard_limit = 0;
	char *rlimit_expr = param("STARTER_RLIMIT_AS");
	if (rlimit_expr) {
		classad::ClassAdParser parser;

		classad::ExprTree *tree = parser.ParseExpression(rlimit_expr);
		if (tree) {
			classad::Value val;
			int result;

			if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && 
				val.IsIntegerValue(result)) {
					rlimit_as_hard_limit = ((long)result) * 1024 * 1024;
					dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit);
			} else {
				dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr);
			}
		} else {
			dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr);
		}
	}

	int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber());

#if defined ( WIN32 )
    owner_profile_.update ();
    /*************************************************************
    NOTE: We currently *ONLY* support loading slot-user profiles.
    This limitation will be addressed shortly, by allowing regular 
    users to load their registry hive - Ben [2008-09-31]
    **************************************************************/
    bool load_profile = false,
         run_as_owner = false;
    JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile );
    JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER,  run_as_owner );
    if ( load_profile && !run_as_owner ) {
        if ( owner_profile_.load () ) {
            /* publish the users environment into that of the main 

            job's environment */
            if ( !owner_profile_.environment ( job_env ) ) {
                dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to "
                    "export owner's environment.\n" );
            }            
        } else {
            dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load "
                "owner's profile.\n" );
        }
    }
#endif

		// While we are still in user priv, print out the username
#if defined(LINUX)
	if( Starter->glexecPrivSepHelper() ) {
			// TODO: if there is some way to figure out the final username,
			// print it out here or after starting the job.
		dprintf(D_ALWAYS,"Running job via glexec\n");
	}
#else
	if( false ) {
	}
#endif
	else {
		char const *username = NULL;
		char const *how = "";
		CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper();
		if( cpsh ) {
			username = cpsh->get_user_name();
			how = "via privsep switchboard ";
		}
		else {
			username = get_real_username();
		}
		if( !username ) {
			username = "******";
		}
		dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username);
	}

	set_priv ( priv );

    // use this to return more detailed and reliable error message info
    // from create-process operation.
    MyString create_process_err_msg;

	if (privsep_helper != NULL) {
		const char* std_file_names[3] = {
			privsep_stdin_name.Value(),
			privsep_stdout_name.Value(),
			privsep_stderr_name.Value()
		};
		JobPid = privsep_helper->create_process(JobName.Value(),
		                                        args,
		                                        job_env,
		                                        job_iwd,
		                                        fds,
		                                        std_file_names,
		                                        nice_inc,
		                                        core_size_ptr,
		                                        1,
		                                        job_opt_mask,
		                                        family_info,
												affinity_mask,
												&create_process_err_msg);
	}
	else {
		JobPid = daemonCore->Create_Process( JobName.Value(),
		                                     args,
		                                     PRIV_USER_FINAL,
		                                     1,
		                                     FALSE,
		                                     &job_env,
		                                     job_iwd,
		                                     family_info,
		                                     NULL,
		                                     fds,
		                                     NULL,
		                                     nice_inc,
		                                     NULL,
		                                     job_opt_mask, 
		                                     core_size_ptr,
                                             affinity_mask,
											 NULL,
                                             &create_process_err_msg,
					     fs_remap,
					     rlimit_as_hard_limit,
                                             network_manager);
	}

	// Create_Process() saves the errno for us if it is an "interesting" error.
	int create_process_errno = errno;

    // errno is 0 in the privsep case.  This executes for the daemon core create-process logic
    if ((FALSE == JobPid) && (0 != create_process_errno)) {
        if (create_process_err_msg != "") create_process_err_msg += " ";
        MyString errbuf;
        errbuf.sprintf("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno));
        create_process_err_msg += errbuf;
    }

	// now close the descriptors in fds array.  our child has inherited
	// them already, so we should close them so we do not leak descriptors.
	// NOTE, we want to use a special method to close the starter's
	// versions, if that's what we're using, so we don't think we've
	// still got those available in other parts of the code for any
	// reason.
	for ( int i = 0; i <= 2; i++ ) {
		if ( fds[i] >= 0 ) {
			daemonCore->Close_FD ( fds[i] );
		}
	}

	if ( JobPid == FALSE ) {
		JobPid = -1;

		if(!create_process_err_msg.IsEmpty()) {

			// if the reason Create_Process failed was that registering
			// a family with the ProcD failed, it is indicative of a
			// problem regarding this execute machine, not the job. in
			// this case, we'll want to EXCEPT instead of telling the
			// Shadow to put the job on hold. there are probably other
			// error conditions where EXCEPTing would be more appropriate
			// as well...
			//
			if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) {
				EXCEPT("Create_Process failed to register the job with the ProcD");
			}

			MyString err_msg = "Failed to execute '";
			err_msg += JobName;
			err_msg += "'";
			if(!args_string.IsEmpty()) {
				err_msg += " with arguments ";
				err_msg += args_string.Value();
			}
			err_msg += ": ";
			err_msg += create_process_err_msg;
			if( !ThisProcRunsAlongsideMainProc() ) {
				Starter->jic->notifyStarterError( err_msg.Value(),
			    	                              true,
			        	                          CONDOR_HOLD_CODE_FailedToCreateProcess,
			            	                      create_process_errno );
			}
		}

		dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n",
			JobName.Value(), args_string.Value(), create_process_err_msg.Value());
		return 0;
	}

	num_pids++;

	dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid);

	job_start_time.getTime();

	return 1;
}
Beispiel #19
0
//---------------------------------------------------------------------------
void writeSubmitFile(/* const */ SubmitDagDeepOptions &deepOpts,
			/* const */ SubmitDagShallowOptions &shallowOpts)
{
	FILE *pSubFile = safe_fopen_wrapper_follow(shallowOpts.strSubFile.Value(), "w");
	if (!pSubFile)
	{
		fprintf( stderr, "ERROR: unable to create submit file %s\n",
				 shallowOpts.strSubFile.Value() );
		exit( 1 );
	}

	const char *executable = NULL;
	MyString valgrindPath; // outside if so executable is valid!
	if ( shallowOpts.runValgrind ) {
		valgrindPath = which( valgrind_exe );
		if ( valgrindPath == "" ) {
			fprintf( stderr, "ERROR: can't find %s in PATH, aborting.\n",
				 		valgrind_exe );
			exit( 1 );
		} else {
			executable = valgrindPath.Value();
		}
	} else {
		executable = deepOpts.strDagmanPath.Value();
	}

    fprintf(pSubFile, "# Filename: %s\n", shallowOpts.strSubFile.Value());

    fprintf(pSubFile, "# Generated by condor_submit_dag ");
	shallowOpts.dagFiles.rewind();
	char *dagFile;
	while ( (dagFile = shallowOpts.dagFiles.next()) != NULL ) {
    	fprintf(pSubFile, "%s ", dagFile);
	}
    fprintf(pSubFile, "\n");

    fprintf(pSubFile, "universe\t= scheduler\n");
    fprintf(pSubFile, "executable\t= %s\n", executable);
	fprintf(pSubFile, "getenv\t\t= True\n");
	fprintf(pSubFile, "output\t\t= %s\n", shallowOpts.strLibOut.Value());
    fprintf(pSubFile, "error\t\t= %s\n", shallowOpts.strLibErr.Value());
    fprintf(pSubFile, "log\t\t= %s\n", shallowOpts.strSchedLog.Value());
#if !defined ( WIN32 )
    fprintf(pSubFile, "remove_kill_sig\t= SIGUSR1\n" );
#endif
    fprintf(pSubFile, "+%s\t= \"%s =?= $(cluster)\"\n",
				ATTR_OTHER_JOB_REMOVE_REQUIREMENTS, ATTR_DAGMAN_JOB_ID );

		// ensure DAGMan is automatically requeued by the schedd if it
		// exits abnormally or is killed (e.g., during a reboot)
	const char *defaultRemoveExpr = "( ExitSignal =?= 11 || "
				"(ExitCode =!= UNDEFINED && ExitCode >=0 && ExitCode <= 2))";
	MyString removeExpr(defaultRemoveExpr);
	char *tmpRemoveExpr = param("DAGMAN_ON_EXIT_REMOVE");
	if ( tmpRemoveExpr ) {
		removeExpr = tmpRemoveExpr;
		free(tmpRemoveExpr);
	}
    fprintf(pSubFile, "# Note: default on_exit_remove expression:\n");
	fprintf(pSubFile, "# %s\n", defaultRemoveExpr);
	fprintf(pSubFile, "# attempts to ensure that DAGMan is automatically\n");
	fprintf(pSubFile, "# requeued by the schedd if it exits abnormally or\n");
    fprintf(pSubFile, "# is killed (e.g., during a reboot).\n");
    fprintf(pSubFile, "on_exit_remove\t= %s\n", removeExpr.Value() );

    fprintf(pSubFile, "copy_to_spool\t= %s\n", shallowOpts.copyToSpool ?
				"True" : "False" );

	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	// Be sure to change MIN_SUBMIT_FILE_VERSION in dagman_main.cpp
	// if the arguments passed to condor_dagman change in an
	// incompatible way!!
	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	ArgList args;

	if ( shallowOpts.runValgrind ) {
		args.AppendArg("--tool=memcheck");
		args.AppendArg("--leak-check=yes");
		args.AppendArg("--show-reachable=yes");
		args.AppendArg(deepOpts.strDagmanPath.Value());
	}

	args.AppendArg("-f");
	args.AppendArg("-l");
	args.AppendArg(".");
	if ( shallowOpts.iDebugLevel != DEBUG_UNSET ) {
		args.AppendArg("-Debug");
		args.AppendArg(shallowOpts.iDebugLevel);
	}
	args.AppendArg("-Lockfile");
	args.AppendArg(shallowOpts.strLockFile.Value());
	args.AppendArg("-AutoRescue");
	args.AppendArg(deepOpts.autoRescue);
	args.AppendArg("-DoRescueFrom");
	args.AppendArg(deepOpts.doRescueFrom);
	if(!deepOpts.always_use_node_log) {
		args.AppendArg("-dont_use_default_node_log");
	}

	shallowOpts.dagFiles.rewind();
	while ( (dagFile = shallowOpts.dagFiles.next()) != NULL ) {
		args.AppendArg("-Dag");
		args.AppendArg(dagFile);
	}

    if(shallowOpts.iMaxIdle != 0) 
	{
		args.AppendArg("-MaxIdle");
		args.AppendArg(shallowOpts.iMaxIdle);
    }

    if(shallowOpts.iMaxJobs != 0) 
	{
		args.AppendArg("-MaxJobs");
		args.AppendArg(shallowOpts.iMaxJobs);
    }

    if(shallowOpts.iMaxPre != 0) 
	{
		args.AppendArg("-MaxPre");
		args.AppendArg(shallowOpts.iMaxPre);
    }

    if(shallowOpts.iMaxPost != 0) 
	{
		args.AppendArg("-MaxPost");
		args.AppendArg(shallowOpts.iMaxPost);
    }

	if(shallowOpts.bNoEventChecks)
	{
		// strArgs += " -NoEventChecks";
		printf( "Warning: -NoEventChecks is ignored; please use "
					"the DAGMAN_ALLOW_EVENTS config parameter instead\n");
	}

	if(!shallowOpts.bPostRun)
	{
		args.AppendArg("-DontAlwaysRunPost");
	}

	if(deepOpts.bAllowLogError)
	{
		args.AppendArg("-AllowLogError");
	}

	if(deepOpts.useDagDir)
	{
		args.AppendArg("-UseDagDir");
	}

	if(deepOpts.suppress_notification)
	{
		args.AppendArg("-Suppress_notification");
	}
	else
	{
		args.AppendArg("-Dont_Suppress_notification");
	}

	if ( shallowOpts.doRecovery ) {
		args.AppendArg( "-DoRecov" );
	}

	args.AppendArg("-CsdVersion");
	args.AppendArg(CondorVersion());

	if(deepOpts.allowVerMismatch) {
		args.AppendArg("-AllowVersionMismatch");
	}

	if(shallowOpts.dumpRescueDag) {
		args.AppendArg("-DumpRescue");
	}

	if(deepOpts.bVerbose) {
		args.AppendArg("-Verbose");
	}

	if(deepOpts.bForce) {
		args.AppendArg("-Force");
	}

	if(deepOpts.strNotification != "") {
		args.AppendArg("-Notification");
		args.AppendArg(deepOpts.strNotification);
	}

	if(deepOpts.strDagmanPath != "") {
		args.AppendArg("-Dagman");
		args.AppendArg(deepOpts.strDagmanPath);
	}

	if(deepOpts.strOutfileDir != "") {
		args.AppendArg("-Outfile_dir");
		args.AppendArg(deepOpts.strOutfileDir);
	}

	if(deepOpts.updateSubmit) {
		args.AppendArg("-Update_submit");
	}

	if(deepOpts.importEnv) {
		args.AppendArg("-Import_env");
	}

	if( deepOpts.priority != 0 ) {
		args.AppendArg("-Priority");
		args.AppendArg(deepOpts.priority);
	}

	MyString arg_str,args_error;
	if(!args.GetArgsStringV1WackedOrV2Quoted(&arg_str,&args_error)) {
		fprintf(stderr,"Failed to insert arguments: %s",args_error.Value());
		exit(1);
	}
    fprintf(pSubFile, "arguments\t= %s\n", arg_str.Value());

	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	// Be sure to change MIN_SUBMIT_FILE_VERSION in dagman_main.cpp
	// if the environment passed to condor_dagman changes in an
	// incompatible way!!
	//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	EnvFilter env;
	if ( deepOpts.importEnv ) {
		env.Import( );
	}
	env.SetEnv("_CONDOR_DAGMAN_LOG", shallowOpts.strDebugLog.Value());
	env.SetEnv("_CONDOR_MAX_DAGMAN_LOG=0");
	if ( shallowOpts.strScheddDaemonAdFile != "" ) {
		env.SetEnv("_CONDOR_SCHEDD_DAEMON_AD_FILE",
				   shallowOpts.strScheddDaemonAdFile.Value());
	}
	if ( shallowOpts.strScheddAddressFile != "" ) {
		env.SetEnv("_CONDOR_SCHEDD_ADDRESS_FILE",
				   shallowOpts.strScheddAddressFile.Value());
	}
	if ( shallowOpts.strConfigFile != "" ) {
		if ( access( shallowOpts.strConfigFile.Value(), F_OK ) != 0 ) {
			fprintf( stderr, "ERROR: unable to read config file %s "
						"(error %d, %s)\n",
						shallowOpts.strConfigFile.Value(), errno, strerror(errno) );
			exit(1);
		}
		env.SetEnv("_CONDOR_DAGMAN_CONFIG_FILE", shallowOpts.strConfigFile.Value());
	}

	MyString env_str;
	MyString env_errors;
	if(!env.getDelimitedStringV1RawOrV2Quoted(&env_str,&env_errors)) {
		fprintf(stderr,"Failed to insert environment: %s",env_errors.Value());
		exit(1);
	}
    fprintf(pSubFile, "environment\t= %s\n",env_str.Value());

    if(deepOpts.strNotification != "") 
	{	
		fprintf(pSubFile, "notification\t= %s\n", deepOpts.strNotification.Value());
    }

		// Append user-specified stuff to submit file...
		// ...first, the insert file, if any...
	if (shallowOpts.appendFile != "") {
		FILE *aFile = safe_fopen_wrapper_follow(shallowOpts.appendFile.Value(), "r");
		if (!aFile)
		{
			fprintf( stderr, "ERROR: unable to read submit append file (%s)\n",
				 	shallowOpts.appendFile.Value() );
			exit( 1 );
		}

		char *line;
		while ((line = getline(aFile)) != NULL) {
    		fprintf(pSubFile, "%s\n", line);
		}

		fclose(aFile);
	}

		// ...now things specified directly on the command line.
	shallowOpts.appendLines.rewind();
	char *command;
	while ((command = shallowOpts.appendLines.next()) != NULL) {
    	fprintf(pSubFile, "%s\n", command);
	}

    fprintf(pSubFile, "queue\n");

	fclose(pSubFile);
}
Beispiel #20
0
//-------------------------------------------------------------------------
bool
condor_submit( const Dagman &dm, const char* cmdFile, CondorID& condorID,
			   const char* DAGNodeName, MyString &DAGParentNodeNames,
			   List<Job::NodeVar> *vars, int priority, int retry,
			   const char* directory, const char *workflowLogFile,
			   bool hold_claim, const MyString &batchName )
{
	TmpDir		tmpDir;
	MyString	errMsg;
	if ( !tmpDir.Cd2TmpDir( directory, errMsg ) ) {
		debug_printf( DEBUG_QUIET,
				"Could not change to node directory %s: %s\n",
				directory, errMsg.Value() );
		return false;
	}

	ArgList args;

	// construct arguments to condor_submit to add attributes to the
	// job classad which identify the job's node name in the DAG, the
	// node names of its parents in the DAG, and the job ID of DAGMan
	// itself; then, define submit_event_notes to print the job's node
	// name inside the submit event in the userlog

	// NOTE: we specify the job ID of DAGMan using only its cluster ID
	// so that it may be referenced by jobs in their priority
	// attribute (which needs an int, not a string).  Doing so allows
	// users to effectively "batch" jobs by DAG so that when they
	// submit many DAGs to the same schedd, all the ready jobs from
	// one DAG complete before any jobs from another begin.

	args.AppendArg( dm.condorSubmitExe );

	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	MyString nodeName = MyString(ATTR_DAG_NODE_NAME_ALT) + " = " + DAGNodeName;
	args.AppendArg( nodeName.Value() );

		// append a line adding the parent DAGMan's cluster ID to the job ad
	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	MyString dagJobId = MyString( "+" ) + ATTR_DAGMAN_JOB_ID + " = " +
				IntToStr( dm.DAGManJobId._cluster );
	args.AppendArg( dagJobId.Value() );

		// now we append a line setting the same thing as a submit-file macro
		// (this is necessary so the user can reference it in the priority)
	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	MyString dagJobIdMacro = MyString( "" ) + ATTR_DAGMAN_JOB_ID + " = " +
				IntToStr( dm.DAGManJobId._cluster );
	args.AppendArg( dagJobIdMacro.Value() );

		// Pass the batch name to lower levels.
	if ( batchName != "" ) {
		args.AppendArg( "-batch-name" );
		args.AppendArg( batchName.Value() );
	}

	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	MyString submitEventNotes = MyString(
				"submit_event_notes = DAG Node: " ) + DAGNodeName;
	args.AppendArg( submitEventNotes.Value() );

	ASSERT( workflowLogFile );

		// We need to append the DAGman default log file to
		// the log file list
	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	std::string dlog( "dagman_log = " );
	dlog += workflowLogFile;
	args.AppendArg( dlog.c_str() );
	debug_printf( DEBUG_VERBOSE, "Adding a DAGMan workflow log %s\n",
				workflowLogFile );

		// Now append the mask
	debug_printf( DEBUG_VERBOSE, "Masking the events recorded in the DAGMAN workflow log\n" );
	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	std::string dmask("+");
	dmask += ATTR_DAGMAN_WORKFLOW_MASK;
	dmask += " = \"";
	const char *eventMask = getEventMask();
	debug_printf( DEBUG_VERBOSE, "Mask for workflow log is %s\n",
				eventMask );
	dmask += eventMask;
	dmask += "\"";
	args.AppendArg( dmask.c_str() );

		// Append the priority, if we have one.
	if ( priority != 0 ) {
		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		MyString prioStr = "priority=";
		prioStr += IntToStr( priority );
		args.AppendArg( prioStr.Value() );
	}


		// Suppress the job's log file if that option is enabled.
	if ( dm._suppressJobLogs ) {
		debug_printf( DEBUG_VERBOSE, "Suppressing node job log file\n" );
		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		args.AppendArg( "log=" );
	}

	ArgList parentNameArgs;
	parentNameArgs.AppendArg( "-a" ); // -a == -append; using -a to save chars
	MyString parentNodeNames = MyString( "+DAGParentNodeNames = " ) +
	                        "\"" + DAGParentNodeNames + "\"";
	parentNameArgs.AppendArg( parentNodeNames.Value() );

		// set any VARS specified in the DAG file
	MyString anotherLine;
	ListIterator<Job::NodeVar> varsIter(*vars);
	Job::NodeVar nodeVar;
	while ( varsIter.Next(nodeVar) ) {

			// Substitute the node retry count if necessary.  Note that
			// we can't do this in Job::ResolveVarsInterpolations()
			// because that's only called at parse time.
		MyString value = nodeVar._value;
		MyString retryStr = IntToStr( retry );
		value.replaceString( "$(RETRY)", retryStr.Value() );
		MyString varStr = nodeVar._name + " = " + value;

		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		args.AppendArg( varStr.Value() );
	}

		// Set the special DAG_STATUS variable (mainly for use by
		// "final" nodes).
	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	MyString var = "DAG_STATUS = ";
	var += IntToStr( (int)dm.dag->_dagStatus );
	args.AppendArg( var.Value() );

		// Set the special FAILED_COUNT variable (mainly for use by
		// "final" nodes).
	args.AppendArg( "-a" ); // -a == -append; using -a to save chars
	var = "FAILED_COUNT = ";
	var += IntToStr( dm.dag->NumNodesFailed() );
	args.AppendArg( var.Value() );

	if( hold_claim ){
		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		MyString holdit = MyString("+") + MyString(ATTR_JOB_KEEP_CLAIM_IDLE) + " = "
			+ IntToStr( dm._claim_hold_time );
		args.AppendArg( holdit.Value() );	
	}
	
	if (dm._submitDagDeepOpts.suppress_notification) {
		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		MyString notify = MyString("notification = never");
		args.AppendArg( notify.Value() );
	}

		//
		// Add accounting group and user if we have them.
		//
	if ( dm._submitDagDeepOpts.acctGroup != "" ) {
		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		MyString arg = "accounting_group=";
		arg += dm._submitDagDeepOpts.acctGroup;
		args.AppendArg( arg );
	}

	if ( dm._submitDagDeepOpts.acctGroupUser != "" ) {
		args.AppendArg( "-a" ); // -a == -append; using -a to save chars
		MyString arg = "accounting_group_user="******"Warning: node %s has too many parents "
					  "to list in its classad; leaving its DAGParentNodeNames "
					  "attribute undefined\n", DAGNodeName );
		check_warning_strictness( DAG_STRICT_3 );
	} else {
		args.AppendArgsFromArgList( parentNameArgs );
	}

	args.AppendArg( cmdFile );

	bool success = do_submit( args, condorID, dm.prohibitMultiJobs );

	if ( !tmpDir.Cd2MainDir( errMsg ) ) {
		debug_printf( DEBUG_QUIET,
				"Could not change to original directory: %s\n",
				errMsg.Value() );
		success = false;
	}

	return success;
}
Beispiel #21
0
void Pigeon::initialize() {
  /*        if (m_state == STATE_RUNNING) {
  */
  MyString* qpidPort;
  char *path = NULL;
  //notify us when our process is down.
  m_reaper = daemonCore->Register_Reaper(
      "reaperQpid",
      (ReaperHandlercpp) &Pigeon::reaperResponse,
      "Qpid process reaper", (Service*) this);		

  ASSERT(m_reaper != FALSE);

  //ClassAd Initialization
  //cleanup metafile
  ArgList argClean;
  clean();

  char* proc= param("QPID_EXEC");
  if (!proc) {
  	dprintf(D_ALWAYS, "You need to specify the QPID executable as QPID_EXEC in your condor config \n");
  	EXCEPT("No qpid executable (QPID_EXEC) specified!");
  }
  const char *hostname = my_full_hostname() ;
  
  ArgList arglist; 
  arglist.AppendArg("qpidd");
  char *qpidConf = param("QPID_CONF");
  if (qpidConf) {
  	arglist.AppendArg("--config");
  	arglist.AppendArg(qpidConf);
  	free(qpidConf);
  } else {
  	
  	arglist.AppendArg("-p0");
  	arglist.AppendArg("--auth");
  	arglist.AppendArg("no");
  }
  
  
  MyString argString;
  arglist.GetArgsStringForDisplay(&argString);
  dprintf(D_ALWAYS, "\n Invoking: %s\n", argString.Value());
  path = getPortPath();
  int fd_stdout = safe_open_wrapper(path, O_RDWR|O_CREAT, 0666);
  free(path);
  int fds[3] = {-1, fd_stdout, -1};
  int mm_pid = daemonCore->Create_Process(proc,arglist,PRIV_CONDOR_FINAL, 0,FALSE,FALSE,NULL,NULL,NULL,NULL,fds);
  if (mm_pid <= 0) 
    EXCEPT("Failed to launch qpid process using Create_Process.");

  dprintf(D_ALWAYS, "Launched qpid process pid=%d \n", mm_pid);
  sleep(10);
  close(fd_stdout);
 
  char *portChr = getPort(false);
  string portStr = string(portChr);
  free(portChr);
  free(proc);
  if(strcmp(portStr.c_str(),"") != 0){
    m_qpidAd.Assign("PORT", portStr.c_str());
    dprintf(D_ALWAYS,"qpid process started on port number %s \n", portStr.c_str());
  }  
  SetMyTypeName(m_qpidAd, "pigeon");
  SetTargetTypeName(m_qpidAd, "");
  std::string hostAddr = "pigeon@";
  hostAddr += hostname;
  m_qpidAd.Assign(ATTR_NAME, "pigeon"); //hostAddr.c_str());
  m_qpidAd.Assign("Key", "qpidKey");
  m_qpidAd.Assign("IP","128" );
  daemonCore->publish(&m_qpidAd); 

  //Register a timer for periodically pushing classads.
  //TODO: Make these rate and interval configurable
  dprintf(D_ALWAYS, "Calling the classAd publish()\n");
  daemonCore->Register_Timer(1, m_adPubInterval, (TimerHandlercpp) &Pigeon::publishClassAd, 
      "publishClassAd", this);

  dprintf(D_ALWAYS, "Launched qpid process pid=%d at port=|%s|\n", mm_pid,portStr.c_str());
  
  
  char *execDir = param("SBIN");
  if (execDir) {
  	dprintf(D_ALWAYS, "Declaring queues...  \n");
  	ArgList qArglist;
  	proc = (char*)malloc(strlen(execDir) + 15);
  	sprintf(proc, "%s%c%s",execDir, DIR_DELIM_CHAR, "declareQueues");
  	qArglist.AppendArg(proc);
  	qArglist.AppendArg(hostname);
  	qArglist.AppendArg(portStr.c_str());
  	mm_pid = daemonCore->Create_Process(proc,qArglist,PRIV_CONDOR_FINAL, 0,FALSE,FALSE,NULL,NULL,NULL,NULL);
  	if (mm_pid <= 0) 
		EXCEPT("Failed to launch declareQueues process using Create_Process.");
    free(proc);
    free(execDir);
	dprintf(D_ALWAYS, "QPID queues declared. \n");
   }
}
Beispiel #22
0
bool
VMUniverseMgr::testVMGahp(const char* gahppath, const char* vmtype)
{
	m_needCheck = false;

	if( !m_starter_has_vmcode ) {
		return false;
	}

	if( !gahppath || !vmtype ) {
		return false;
	}

#if defined(WIN32)
		// On Windows machine, the option that Starter log file includes 
		// logs from vmgahp causes deadlock even if the option works well 
		// on Linux machine. I guess that is due to Windows Pipes but 
		// I don't know the exact reason.
		// Until the problem is solved, 
		// this option will be disabled on Windows machine.
	char *need_log_file = param("VM_GAHP_LOG");
	if( need_log_file ) {
		free(need_log_file);
	}else {
		dprintf( D_ALWAYS, "To support vm universe, '%s' must be defined "
				"in condor config file, which is a log file for vmgahp.\n", 
				"VM_GAHP_LOG"); 
		return false;
	}
#endif

	// vmgahp is daemonCore, so we need to add -f -t options of daemonCore.
	// Then, try to execute vmgahp with 
	// vmtype <vmtype>"
	// and grab the output as a ClassAd
	ArgList systemcmd;
	systemcmd.AppendArg(gahppath);
	systemcmd.AppendArg("-f");
	char *gahp_log_file = param("VM_GAHP_LOG");
	if( gahp_log_file ) {
		free(gahp_log_file);
	}else {
		systemcmd.AppendArg("-t");
	}
	systemcmd.AppendArg("-M");
	systemcmd.AppendArg(VMGAHP_TEST_MODE);
	systemcmd.AppendArg("vmtype");
	systemcmd.AppendArg(vmtype);

#if !defined(WIN32)
	if( can_switch_ids() ) {
		MyString tmp_str;
		tmp_str.formatstr("%d", (int)get_condor_uid());
		SetEnv("VMGAHP_USER_UID", tmp_str.Value());
	}
#endif

	priv_state prev_priv;
	if( (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_XEN) == MATCH) || (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_KVM) == MATCH) ) {
		// Xen requires root privilege
		prev_priv = set_root_priv();
	}else {
		prev_priv = set_condor_priv();

	}
	FILE* fp = NULL;
	fp = my_popen(systemcmd, "r", FALSE );
	set_priv(prev_priv);

	if( !fp ) {
		dprintf( D_ALWAYS, "Failed to execute %s, ignoring\n", gahppath );
		return false;
	}

	bool read_something = false;
	char buf[2048];

	m_vmgahp_info.Clear();
	while( fgets(buf, 2048, fp) ) {
		if( !m_vmgahp_info.Insert(buf) ) {
			dprintf( D_ALWAYS, "Failed to insert \"%s\" into VMInfo, "
					 "ignoring invalid parameter\n", buf );
			continue;
		}
		read_something = true;
	}
	my_pclose( fp );
	if( !read_something ) {
		MyString args_string;
		systemcmd.GetArgsStringForDisplay(&args_string,0);
		dprintf( D_ALWAYS, 
				 "Warning: '%s' did not produce any valid output.\n", 
				 args_string.Value());
		if( (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_XEN) == 0) ) {
			MyString err_msg;
			err_msg += "\n#######################################################\n";
			err_msg += "##### Make sure the followings ";
			err_msg += "to use VM universe for Xen\n";
			err_msg += "### - The owner of script progrm like ";
			err_msg += "'condor_vm_xen.sh' must be root\n";
			err_msg += "### - The script program must be executable\n";
			err_msg += "### - Other writable bit for the above files is ";
			err_msg += "not allowed.\n";
			err_msg += "#########################################################\n";
			dprintf( D_ALWAYS, "%s", err_msg.Value());
		} else if( (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_KVM) == 0)) {
		        MyString err_msg;
			err_msg += "\n#######################################################\n";
			err_msg += "##### Make sure the followings ";
			err_msg += "to use VM universe for KVM\n";
			err_msg += "### - The owner of script progrm like ";
			err_msg += "'condor_vm_xen.sh' must be root\n";
			err_msg += "### - The script program must be executable\n";
			err_msg += "### - Other writable bit for the above files is ";
			err_msg += "not allowed.\n";
			err_msg += "#########################################################\n";
			dprintf( D_ALWAYS, "%s", err_msg.Value());
		}else if( strcasecmp(vmtype, CONDOR_VM_UNIVERSE_VMWARE ) == 0 ) {
			MyString err_msg;
			MyString err_msg2;
			err_msg += "\n#######################################################\n";
			err_msg += "##### Make sure the followings ";
			err_msg += "to use VM universe for VMware\n";

			if( can_switch_ids() ) {
				// Condor runs as root
				err_msg += "### - The script program like 'condor_vm_vmware'";
				err_msg += " must be readable for anybody.\n";
			}

			err_msg += "### - Check the path of vmware-cmd, vmrun, and mkisofs ";
			err_msg += "in 'condor_vm_vmware\n'";
			err_msg += "#########################################################\n";
			dprintf( D_ALWAYS, "%s", err_msg.Value());
		}
		return false;
	}

	// For debug
	printVMGahpInfo(D_ALWAYS);

	// Read vm_type
	MyString tmp_vmtype;
	if( m_vmgahp_info.LookupString( ATTR_VM_TYPE, tmp_vmtype) != 1 ) {
		dprintf( D_ALWAYS, "There is no %s in the output of vmgahp. "
				"So VM Universe will be disabled\n", ATTR_VM_TYPE);
		return false;
	}
	if( strcasecmp(tmp_vmtype.Value(), vmtype) != 0 ) {
		dprintf( D_ALWAYS, "The vmgahp(%s) doesn't support this vmtype(%s)\n",
				gahppath, vmtype);
		return false;
	}
	dprintf( D_ALWAYS, "VMType('%s') is supported\n", vmtype);

	// Read vm_memory
	if( m_vmgahp_info.LookupInteger(ATTR_VM_MEMORY, m_vm_max_memory) != 1 ) {
		dprintf( D_ALWAYS, "There is no %s in the output of vmgahp\n",ATTR_VM_MEMORY);
		return false;
	}
	if( m_vm_max_memory == 0 ) {
		dprintf( D_ALWAYS, "There is no sufficient memory for virtual machines\n");
		return false;
	}

	dprintf( D_ALWAYS, "The maximum available memory for vm universe is "
			"set to %d MB\n", m_vm_max_memory);

	// Read vm_networking
	bool tmp_networking = false;
	MyString tmp_networking_types;

	m_vmgahp_info.LookupBool(ATTR_VM_NETWORKING, tmp_networking);
	if( tmp_networking ) {
		if( m_vmgahp_info.LookupString( ATTR_VM_NETWORKING_TYPES, 
					tmp_networking_types) != 1 ) {
			tmp_networking = false;
			m_vmgahp_info.Assign(ATTR_VM_NETWORKING, false);
		}
	}

	m_vm_networking = param_boolean("VM_NETWORKING",false);
	if( m_vm_networking ) {
		if( !tmp_networking ) {
			dprintf( D_ALWAYS, "Even if VM_NETWORKING is TRUE in condor config,"
					" VM_NETWORKING is disabled because vmgahp doesn't "
					"support VM_NETWORKING\n");
			m_vm_networking = false;
		}
	}
	if( m_vm_networking == false ) {
		dprintf( D_ALWAYS, "VM networking is disabled\n");
	}else {
		dprintf( D_ALWAYS, "VM networking is enabled\n");
		dprintf( D_ALWAYS, "Supported networking types are %s\n", 
				tmp_networking_types.Value());
	}
			
	// Now, we received correct information from vmgahp
	m_vm_type = tmp_vmtype;
	m_vmgahp_server = gahppath;

	return true;
}