Example #1
0
int DockerProc::StartJob() {
	std::string imageID;

	if( ! JobAd->LookupString( ATTR_DOCKER_IMAGE, imageID ) ) {
		dprintf( D_ALWAYS | D_FAILURE, "%s not defined in job ad, unable to start job.\n", ATTR_DOCKER_IMAGE );
		return FALSE;
	}

	std::string command;
	JobAd->LookupString( ATTR_JOB_CMD, command );
	dprintf( D_FULLDEBUG, "%s: '%s'\n", ATTR_JOB_CMD, command.c_str() );

	std::string sandboxPath = Starter->jic->jobRemoteIWD();

	//
	// This code is deliberately wrong, probably for backwards-compability.
	// (See the code in JICShadow::beginFileTransfer(), which assumes that
	// we transferred the executable if ATTR_TRANSFER_EXECUTABLE is unset.)
	// Rather than risk breaking anything by fixing condor_submit (which
	// does not set ATTR_TRANSFER_EXECUTABLE unless it's false) -- and
	// introducing a version dependency -- assume the executable was
	// transferred unless it was explicitly noted otherwise.
	//
	bool transferExecutable = true;
	JobAd->LookupBool( ATTR_TRANSFER_EXECUTABLE, transferExecutable );
	if( transferExecutable ) {
		command = sandboxPath + "/" + command;
	}

	ArgList args;
	args.SetArgV1SyntaxToCurrentPlatform();
	MyString argsError;
	if( ! args.AppendArgsFromClassAd( JobAd, & argsError ) ) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to read job arguments from job ad: '%s'.\n", argsError.c_str() );
		return FALSE;
	}

	Env job_env;
	MyString env_errors;
	if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) {
		dprintf( D_ALWAYS, "Aborting DockerProc::StartJob: %s\n", env_errors.Value());
		return 0;
	}

	// The GlobalJobID is unsuitable by virtue its octothorpes.  This
	// construction is informative, but could be made even less likely
	// to collide if it had a timestamp.
	formatstr( containerName, "HTCJob%d_%d_%s_PID%d",
		Starter->jic->jobCluster(),
		Starter->jic->jobProc(),
		Starter->getMySlotName().c_str(), // note: this can be "" for single slot machines.
		getpid() );


	//
	// Do I/O redirection (includes streaming).
	//

	int childFDs[3] = { -2, -2, -2 };
	{
	TemporaryPrivSentry sentry(PRIV_USER);
	// getStdFile() returns -1 on error.

	if( -1 == (childFDs[0] = openStdFile( SFT_IN, NULL, true, "Input file" )) ) {
		dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stdin.\n" );
		return FALSE;
	}
	if( -1 == (childFDs[1] = openStdFile( SFT_OUT, NULL, true, "Output file" )) ) {
		dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stdout.\n" );
		daemonCore->Close_FD( childFDs[0] );
		return FALSE;
	}
	if( -1 == (childFDs[2] = openStdFile( SFT_ERR, NULL, true, "Error file" )) ) {
		dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stderr.\n" );
		daemonCore->Close_FD( childFDs[0] );
		daemonCore->Close_FD( childFDs[1] );
		return FALSE;
	}
	}

	  // Ulog the execute event
	Starter->jic->notifyJobPreSpawn();

	CondorError err;
	// DockerAPI::run() returns a PID from daemonCore->Create_Process(), which
	// makes it suitable for passing up into VanillaProc.  This combination
	// will trigger the reaper(s) when the container terminates.
	
	ClassAd *machineAd = Starter->jic->machClassAd();

	std::list<std::string> extras;
	buildExtraVolumes(extras);

	int rv = DockerAPI::run( *machineAd, containerName, imageID, command, args, job_env, sandboxPath, extras, JobPid, childFDs, err );
	if( rv < 0 ) {
		dprintf( D_ALWAYS | D_FAILURE, "DockerAPI::run( %s, %s, ... ) failed with return value %d\n", imageID.c_str(), command.c_str(), rv );
		return FALSE;
	}
	dprintf( D_FULLDEBUG, "DockerAPI::run() returned pid %d\n", JobPid );


	// TODO: Start a timer to poll for job usage updates.

	++num_pids; // Used by OsProc::PublishUpdateAd().
	return TRUE;
}
Example #2
0
int
ScriptProc::StartJob()
{
	dprintf(D_FULLDEBUG,"in ScriptProc::StartJob()\n");

	if ( !JobAd ) {
		dprintf ( D_ALWAYS, "No JobAd in ScriptProc::StartJob()!\n" );
		return 0;
	}

	MyString attr;

	attr = name;
	attr += ATTR_JOB_CMD;
	char* tmp = NULL;
	if( ! JobAd->LookupString( attr.Value(), &tmp ) ) {
		dprintf( D_ALWAYS, "%s not found in JobAd.  Aborting StartJob.\n", 
				 attr.Value() );
		return 0;
	}

		// // // // // // 
		// executable
		// // // // // // 

		// TODO: make it smart in cases we're not the gridshell and/or
		// didn't transfer files so that we don't prepend the wrong
		// path to the binary, and don't try to chmod it.
	MyString exe_path = Starter->GetWorkingDir();
	exe_path += DIR_DELIM_CHAR;
	exe_path += tmp;
	free( tmp ); 
	tmp = NULL;

	if( Starter->isGridshell() ) {
			// if we're a gridshell, chmod() the binary, since globus
			// probably transfered it for us and left it with bad
			// permissions...
		priv_state old_priv = set_user_priv();
		int retval = chmod( exe_path.Value(), 0755 );
		set_priv( old_priv );
		if( retval < 0 ) {
			dprintf( D_ALWAYS, "Failed to chmod %s: %s (errno %d)\n", 
					 exe_path.Value(), strerror(errno), errno );
			return 0;
		}
	} 


		// // // // // // 
		// Args
		// // // // // // 

	char *args1 = NULL;
	char *args2 = NULL;
	MyString args1_attr;
	MyString args2_attr;
	args1_attr = name;
	args1_attr += ATTR_JOB_ARGUMENTS1;
	args2_attr = name;
	args2_attr += ATTR_JOB_ARGUMENTS2;

	JobAd->LookupString(args1_attr.Value(), &args1);
	JobAd->LookupString(args2_attr.Value(), &args2);

	ArgList args;

		// Since we are adding to the argument list, we may need to deal
		// with platform-specific arg syntax in the user's args in order
		// to successfully merge them with the additional args.
	args.SetArgV1SyntaxToCurrentPlatform();

		// First, put "condor_<name>script" at the front of Args,
		// since that will become argv[0] of what we exec(), either
		// the wrapper or the actual job.
	MyString arg0;
	arg0 = "condor_";
	arg0 += name;
	arg0 += "script";
	args.AppendArg(arg0.Value());

	MyString args_error;
	if(args2 && *args2) {
		args.AppendArgsV2Raw(args2,&args_error);
	}
	else if(args1 && *args1) {
		args.AppendArgsV1Raw(args1,&args_error);
	}
	else {
		dprintf( D_FULLDEBUG, "neither %s nor %s could be found in JobAd\n",
				 args1_attr.Value(), args2_attr.Value());
	}

	free( args1 );
	free( args2 );

		// // // // // // 
		// Environment 
		// // // // // // 

	char *env1 = NULL;
	char *env2 = NULL;
	MyString env1_attr;
	MyString env2_attr;
	env1_attr = name;
	env1_attr += ATTR_JOB_ENVIRONMENT1;
	env2_attr = name;
	env2_attr += ATTR_JOB_ENVIRONMENT2;
	JobAd->LookupString( env1_attr.Value(), &env1 );
	JobAd->LookupString( env2_attr.Value(), &env2 );
			// TODO do we want to use the regular ATTR_JOB_ENVIRONMENT
			// if there's nothing specific for this script?

		// Now, instantiate an Env object so we can manipulate the
		// environment as needed.
	Env job_env;
	MyString env_errors;
	if( env2 && *env2 ) { 
		if( ! job_env.MergeFromV2Raw(env2,&env_errors) ) {
			dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s).  "
					 "Aborting ScriptProc::StartJob.\n",
					 env2_attr.Value(),env_errors.Value() );  
			free( env1 );
			free( env2 );
			return 0;
		}
	}
	else if( env1 && *env1 ) { 
		if( ! job_env.MergeFromV1Raw(env1,&env_errors) ) {
			dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s).  "
					 "Aborting ScriptProc::StartJob.\n",
					 env1_attr.Value(),env_errors.Value() );  
			free( env1 );
			free( env2 );
			return 0;
		}
	}

	free(env1);
	free(env2);

		// Now, let the starter publish any env vars it wants to add
	Starter->PublishToEnv( &job_env );


		// TODO: Deal with port regulation stuff?

		// Grab the full environment back out of the Env object 
	if(IsFulldebug(D_FULLDEBUG)) {
		MyString env_str;
		job_env.getDelimitedStringForDisplay(&env_str);
		dprintf(D_FULLDEBUG, "%sEnv = %s\n", name, env_str.Value() );
	}



		// // // // // // 
		// Standard Files
		// // // // // // 

		// TODO???


		// // // // // // 
		// Misc + Exec
		// // // // // // 

		// TODO?
		// Starter->jic->notifyJobPreSpawn( name );

		// compute job's renice value by evaluating the machine's
		// JOB_RENICE_INCREMENT in the context of the job ad...
		// TODO?
	int nice_inc = 10;


		// in the below dprintfs, we want to skip past argv[0], which
		// is sometimes condor_exec, in the Args string. 

	MyString args_string;
	args.GetArgsStringForDisplay(&args_string,1);
	dprintf( D_ALWAYS, "About to exec %s script: %s %s\n", 
			 name, exe_path.Value(), 
			 args_string.Value() );
		
	// If there is a requested coresize for this job, enforce it.
	// It is truncated because you can't put an unsigned integer
	// into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to
	// be a float, but then when that attribute is read/written to the
	// job queue log by/or shared between versions of Condor which view the
	// type of that attribute differently, calamity would arise.
	int core_size_truncated;
	size_t core_size;
	size_t *core_size_ptr = NULL;
	if ( JobAd->LookupInteger(ATTR_CORE_SIZE, core_size_truncated) ) {
		core_size = (size_t)core_size_truncated;
		core_size_ptr = &core_size;
	}

	JobPid = daemonCore->Create_Process(exe_path.Value(), 
	                                    args,
	                                    PRIV_USER_FINAL,
	                                    1,
	                                    FALSE,
	                                    FALSE,
	                                    &job_env,
	                                    Starter->jic->jobIWD(),
	                                    NULL,
	                                    NULL,
	                                    NULL,
	                                    NULL,
	                                    nice_inc,
	                                    NULL,
	                                    DCJOBOPT_NO_ENV_INHERIT,
	                                    core_size_ptr );

	//NOTE: Create_Process() saves the errno for us if it is an
	//"interesting" error.
	char const *create_process_error = NULL;
	int create_process_errno = errno;
	if( JobPid == FALSE && errno ) {
		create_process_error = strerror( errno );
	}

	if( JobPid == FALSE ) {
		JobPid = -1;

		if( create_process_error ) {
			MyString err_msg = "Failed to execute '";
			err_msg += exe_path.Value();
			err_msg += "'";
			if(!args_string.IsEmpty()) {
				err_msg += " with arguments ";
				err_msg += args_string.Value();
			}
			err_msg += ": ";
			err_msg += create_process_error;
			Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno );
		}

		EXCEPT( "Create_Process(%s,%s, ...) failed",
				exe_path.Value(), args_string.Value() );
		return 0;
	}

	dprintf( D_ALWAYS, "Create_Process succeeded, pid=%d\n", JobPid );

	job_start_time.getTime();

	return 1;
}
Example #3
0
void
do_process_request(const ClassAd *inputAd, ClassAd *resultAd, const int req_number, 
				   const char *iwd, const char *stdio_iwd)
{
		// Check for inputAd
	if ( !inputAd ) {
		handle_process_request_error("No input ad",req_number,resultAd);
		return;
	}

		// Map the CMD specified in the input via the config file.
	MyString UnmappedJobName,JobName;
	if (inputAd->LookupString(ATTR_JOB_CMD,UnmappedJobName) == 0 ) {
			// no CMD specified.
		handle_process_request_error("No CMD specified",req_number,resultAd);
		return;
	}
	char *auth_commands = param("SOAPSHELL_AUTHORIZED_COMMANDS");
	StringList auth_list(auth_commands,",");
	if ( auth_commands ) free(auth_commands);
		// Each command needs four tuples; anything else is a misconfiguration
	if ( auth_list.number() % 4 != 0 ) {
		handle_process_request_error("Service is misconfigured: SOAPSHELL_AUTHORIZED_COMMANDS malformed",req_number,resultAd);
		return;
	}

	if ( auth_list.contains_anycase(UnmappedJobName.Value()) == TRUE ) {
		JobName = auth_list.next();
	}
	if ( JobName.IsEmpty() ) {
			// the CMD not authorized
		handle_process_request_error("Requested CMD not authorized via SOAPSHELL_AUTHORIZED_COMMANDS",req_number,resultAd);
		return;
	}

		// handle command line arguments.
	ArgList args;
	args.SetArgV1SyntaxToCurrentPlatform();
	args.AppendArg(JobName.Value());	// set argv[0] to command
	char *soapshell_args = auth_list.next();
	if ( soapshell_args && strcmp(soapshell_args,"*") ) {
		if(!args.AppendArgsV1RawOrV2Quoted(soapshell_args,NULL)) {
			dprintf( D_ALWAYS, "ERROR: SOAPSHELL_ARGS config macro invalid\n" );
		}
	} else if(!args.AppendArgsFromClassAd(inputAd,NULL)) {
		handle_process_request_error("Failed to setup CMD arguments",req_number,resultAd);
		return;
	}
		
		// handle the environment.
	Env job_env;
	char *env_str = auth_list.next();
	if ( env_str && strcmp(env_str,"*") ) {
		if(!job_env.MergeFromV1RawOrV2Quoted(env_str,NULL) ) {
			dprintf(D_ALWAYS,"ERROR: SOAPSHELL_ENVIRONMENT config macro invalid\n");
		}
	} else if(!job_env.MergeFrom(inputAd,NULL)) {
		// bad environment string in job ad!
		handle_process_request_error("Request has faulty environment string",req_number,resultAd);
		return;
	}

		// Write input files into iwd (we will write stdin later)
	if ( !write_input_files(inputAd, iwd) ) {
		// failed to write input files
		handle_process_request_error("Failed to write input files",req_number,resultAd);
		return;
	}

		// handle stdin, stdout, and stderr redirection
	const char* jobstdin_ = dircat(stdio_iwd,"stdin");
	MyString jobstdin(jobstdin_);
	const char* jobstdout_ = dircat(stdio_iwd,"stdout");
	MyString jobstdout(jobstdout_);
	const char* jobstderr_ = dircat(stdio_iwd,"stderr");
	MyString jobstderr(jobstderr_);
	delete [] jobstdin_;
	delete [] jobstdout_;
	delete [] jobstderr_;
	int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_LARGEFILE;
		// write stdin file is needed
	{
		char *input = NULL;
		unsigned char *output = NULL;
		int output_length = 0;
		int fd = -1;
		inputAd->LookupString(ATTR_JOB_INPUT,&input);
		if ( input ) {
			// Caller needs to free *output if non-NULL
			condor_base64_decode(input,&output,&output_length);
			if ( output ) {
				fd = safe_open_wrapper_follow( jobstdin.Value(), flags, 0666 );
				if ( fd > -1 ) {
					write(fd,output,output_length);
					close(fd);
				}
				free(output);
			}
			free(input);
			if ( fd < 0 ) {
				handle_process_request_error("Failed to write stdin",req_number,resultAd);
				return;
			}
		}
	}
	int fds[3]; 
		// initialize these to -2 to mean they're not specified.
		// -1 will be treated as an error.
	fds[0] = -2; fds[1] = -2; fds[2] = -2;	
	fds[0] = safe_open_wrapper_follow( jobstdin.Value(), O_RDONLY | O_LARGEFILE ); // stdin	
	fds[1] = safe_open_wrapper_follow( jobstdout.Value(), flags, 0666 );	// stdout
	fds[2] = safe_open_wrapper_follow( jobstderr.Value(), flags, 0666 );	// stderr
	/* Bail out if we couldn't open stdout/err files correctly */
	if( fds[1]==-1 || fds[2]==-1 ) {
		/* only close ones that had been opened correctly */
		for ( int i = 0; i <= 2; i++ ) {
			if ( fds[i] >= 0 ) {
				daemonCore->Close_FD ( fds[i] );
			}
		}
		handle_process_request_error("Failed to write stdout/err files",req_number,resultAd);
		return;
	}

		// Print what we are about to do to the log
	MyString args_string;
	args.GetArgsStringForDisplay(&args_string,1);
	dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(),
				 args_string.Value() );

		// Spawn a process, baby!!!
	int JobPid = daemonCore->Create_Process( JobName.Value(),	// executable
		                                     args,				// args
		                                     PRIV_UNKNOWN,		// priv_state - TODO
		                                     0,					// reaper id - TODO
		                                     FALSE,				// want_command_port
		                                     &job_env,			// job environment
		                                     iwd,				// job iwd
		                                     NULL,				// family_info - TODO
		                                     NULL,				// sock_inherit_list
		                                     fds				// stdio redirection
										);

		// NOTE: Create_Process() saves the errno for us if it is an
		// "interesting" error.
	char const *create_process_error = NULL;
	if(JobPid == FALSE && errno) create_process_error = strerror(errno);

		// now close the descriptors in fds array.  our child has inherited
		// them already, so we should close them so we do not leak descriptors.
	for ( int i = 0; i <= 2; i++ ) {
		if ( fds[i] >= 0 ) {
			daemonCore->Close_FD ( fds[i] );
		}
	}

	if ( JobPid == FALSE ) {
		JobPid = -1;
		MyString errormsg;
		errormsg.formatstr("Create_Process failed %s",create_process_error ? create_process_error : "");
		handle_process_request_error(errormsg.Value(),req_number,resultAd);
		return;
	}


	dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid);

		// TODO - For now, just deal w/ one at a time. :(
		// So for now just wait for the child to exit.
#ifdef WIN32
#error This service does not yet work on Windows
#else
	{
		int exit_status;
		pid_t pid;
		for (;;) {
			pid = wait(&exit_status);
			dprintf(D_FULLDEBUG,"WAIT returned %d, errno=%d\n",pid,errno);
			if (pid == JobPid ) break;
			if (pid == -1 && errno != EINTR) {
				EXCEPT("waitpid failed errno=%d",errno);
			}
		}
		if ( WIFEXITED(exit_status) ) {
			int status = WEXITSTATUS(exit_status);
			resultAd->Assign("EXIT_STATUS",status);
		}		
	}
#endif

		// Job has completed, exit status is in the ad.  Now put
		// the output files into the result ad.
	stash_output_file(resultAd, jobstdout.Value(), ATTR_JOB_OUTPUT);
	stash_output_file(resultAd, jobstderr.Value(), ATTR_JOB_ERROR);

}
Example #4
0
int
OsProc::StartJob(FamilyInfo* family_info, FilesystemRemap* fs_remap=NULL)
{
	int nice_inc = 0;
	bool has_wrapper = false;

	dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n");

	if ( !JobAd ) {
		dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" );
		return 0;
	}

	MyString JobName;
	if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) {
		dprintf( D_ALWAYS, "%s not found in JobAd.  Aborting StartJob.\n", 
				 ATTR_JOB_CMD );
		return 0;
	}

	const char* job_iwd = Starter->jic->jobRemoteIWD();
	dprintf( D_ALWAYS, "IWD: %s\n", job_iwd );

		// some operations below will require a PrivSepHelper if
		// PrivSep is enabled (if it's not, privsep_helper will be
		// NULL)
	PrivSepHelper* privsep_helper = Starter->privSepHelper();

		// // // // // // 
		// Arguments
		// // // // // // 

		// prepend the full path to this name so that we
		// don't have to rely on the PATH inside the
		// USER_JOB_WRAPPER or for exec().

    bool transfer_exe = false;
    if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) {
        transfer_exe = false;
    }

    bool preserve_rel = false;
    if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) {
        preserve_rel = false;
    }

    bool relative_exe = is_relative_to_cwd(JobName.Value());

    if (relative_exe && preserve_rel && !transfer_exe) {
        dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value());
    }
	else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) {
		JobName.formatstr( "%s%c%s",
		                 Starter->GetWorkingDir(),
		                 DIR_DELIM_CHAR,
		                 CONDOR_EXEC );
    }
	else if (relative_exe && job_iwd && *job_iwd) {
		MyString full_name;
		full_name.formatstr("%s%c%s",
		                  job_iwd,
		                  DIR_DELIM_CHAR,
		                  JobName.Value());
		JobName = full_name;

	}

	if( Starter->isGridshell() ) {
			// if we're a gridshell, just try to chmod our job, since
			// globus probably transfered it for us and left it with
			// bad permissions...
		priv_state old_priv = set_user_priv();
		int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG );
		set_priv( old_priv );
		if( retval < 0 ) {
			dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() );
			return 0;
		}
	} 

	ArgList args;

		// Since we may be adding to the argument list, we may need to deal
		// with platform-specific arg syntax in the user's args in order
		// to successfully merge them with the additional wrapper args.
	args.SetArgV1SyntaxToCurrentPlatform();

		// First, put "condor_exec" or whatever at the front of Args,
		// since that will become argv[0] of what we exec(), either
		// the wrapper or the actual job.

	if( !getArgv0() ) {
		args.AppendArg(JobName.Value());
	} else {
		args.AppendArg(getArgv0());
	}
	
		// Support USER_JOB_WRAPPER parameter...
	char *wrapper = NULL;
	if( (wrapper=param("USER_JOB_WRAPPER")) ) {

			// make certain this wrapper program exists and is executable
		if( access(wrapper,X_OK) < 0 ) {
			dprintf( D_ALWAYS, 
					 "Cannot find/execute USER_JOB_WRAPPER file %s\n",
					 wrapper );
			free( wrapper );
			return 0;
		}
		has_wrapper = true;
			// Now, we've got a valid wrapper.  We want that to become
			// "JobName" so we exec it directly, and we want to put
			// what was the JobName (with the full path) as the first
			// argument to the wrapper
		args.AppendArg(JobName.Value());
		JobName = wrapper;
		free(wrapper);
	}
	
		// Support USE_PARROT 
	bool use_parrot = false;
	if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) {
			// Check for parrot executable
		char *parrot = NULL;
		if( (parrot=param("PARROT")) ) {
			if( access(parrot,X_OK) < 0 ) {
				dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute "
					"at %s(%s)).\n", parrot, strerror(errno) );
				free( parrot );
				return 0;
			} else {
				args.AppendArg(JobName.Value());
				JobName = parrot;
				free( parrot );
			}
		} else {
			dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config"
			" file)" );
			return 0;
		}
	}

		// Either way, we now have to add the user-specified args as
		// the rest of the Args string.
	MyString args_error;
	if(!args.AppendArgsFromClassAd(JobAd,&args_error)) {
		dprintf(D_ALWAYS, "Failed to read job arguments from JobAd.  "
				"Aborting OsProc::StartJob: %s\n",args_error.Value());
		return 0;
	}

		// // // // // // 
		// Environment 
		// // // // // // 

		// Now, instantiate an Env object so we can manipulate the
		// environment as needed.
	Env job_env;

	MyString env_errors;
	if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) {
		dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n",
				 env_errors.Value());
		return 0;
	}


		// // // // // // 
		// Standard Files
		// // // // // // 

	// handle stdin, stdout, and stderr redirection
	int fds[3];
		// initialize these to -2 to mean they're not specified.
		// -1 will be treated as an error.
	fds[0] = -2; fds[1] = -2; fds[2] = -2;

		// in order to open these files we must have the user's privs:
	priv_state priv;
	priv = set_user_priv();

		// if we're in PrivSep mode, we won't necessarily be able to
		// open the files for the job. getStdFile will return us an
		// open FD in some situations, but otherwise will give us
		// a filename that we'll pass to the PrivSep Switchboard
		//
	bool stdin_ok;
	bool stdout_ok;
	bool stderr_ok;
	MyString privsep_stdin_name;
	MyString privsep_stdout_name;
	MyString privsep_stderr_name;
	if (privsep_helper != NULL) {
		stdin_ok = getStdFile(SFT_IN,
		                      NULL,
		                      true,
		                      "Input file",
		                      &fds[0],
		                      &privsep_stdin_name);
		stdout_ok = getStdFile(SFT_OUT,
		                       NULL,
		                       true,
		                       "Output file",
		                       &fds[1],
		                       &privsep_stdout_name);
		stderr_ok = getStdFile(SFT_ERR,
		                       NULL,
		                       true,
		                       "Error file",
		                       &fds[2],
		                       &privsep_stderr_name);
	}
	else {
		fds[0] = openStdFile( SFT_IN,
		                      NULL,
		                      true,
		                      "Input file");
		stdin_ok = (fds[0] != -1);
		fds[1] = openStdFile( SFT_OUT,
		                      NULL,
		                      true,
		                      "Output file");
		stdout_ok = (fds[1] != -1);
		fds[2] = openStdFile( SFT_ERR,
		                      NULL,
		                      true,
		                      "Error file");
		stderr_ok = (fds[2] != -1);
	}

	/* Bail out if we couldn't open the std files correctly */
	if( !stdin_ok || !stdout_ok || !stderr_ok ) {
		/* only close ones that had been opened correctly */
		for ( int i = 0; i <= 2; i++ ) {
			if ( fds[i] >= 0 ) {
				daemonCore->Close_FD ( fds[i] );
			}
		}
		dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n");
		dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n");
		set_priv(priv); /* go back to original priv state before leaving */
		return 0;
	}

		// // // // // // 
		// Misc + Exec
		// // // // // // 

	if( !ThisProcRunsAlongsideMainProc() ) {
		Starter->jic->notifyJobPreSpawn();
	}

	// compute job's renice value by evaluating the machine's
	// JOB_RENICE_INCREMENT in the context of the job ad...

    char* ptmp = param( "JOB_RENICE_INCREMENT" );
	if( ptmp ) {
			// insert renice expr into our copy of the job ad
		MyString reniceAttr = "Renice = ";
		reniceAttr += ptmp;
		if( !JobAd->Insert( reniceAttr.Value() ) ) {
			dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT "
				"into job ad, Aborting OsProc::StartJob...\n" );
			free( ptmp );
			return 0;
		}
			// evaluate
		if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) {
			dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n",
					 ptmp, nice_inc );
		} else {
			dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't "
					 "eval to int!  Using default of 10...\n", ptmp );
			nice_inc = 10;
		}

			// enforce valid ranges for nice_inc
		if( nice_inc < 0 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "low: adjusted to 0\n", nice_inc );
			nice_inc = 0;
		}
		else if( nice_inc > 19 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "high: adjusted to 19\n", nice_inc );
			nice_inc = 19;
		}

		ASSERT( ptmp );
		free( ptmp );
		ptmp = NULL;
	} else {
			// if JOB_RENICE_INCREMENT is undefined, default to 0
		nice_inc = 0;
	}

		// in the below dprintfs, we want to skip past argv[0], which
		// is sometimes condor_exec, in the Args string. 

	MyString args_string;
	args.GetArgsStringForDisplay(&args_string, 1);
	if( has_wrapper ) { 
			// print out exactly what we're doing so folks can debug
			// it, if they need to.
		dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), 
				 args_string.Value() );
	} else {
		dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(),
				 args_string.Value() );
	}

		// Grab the full environment back out of the Env object 
	if(IsFulldebug(D_FULLDEBUG)) {
		MyString env_string;
		job_env.getDelimitedStringForDisplay(&env_string);
		dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value());
	}

	// Check to see if we need to start this process paused, and if
	// so, pass the right flag to DC::Create_Process().
	int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT;
	if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) {
		job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT;
	}
	int suspend_job_at_exec = 0;
	JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec);
	if( suspend_job_at_exec ) {
		dprintf( D_FULLDEBUG, "OsProc::StartJob(): "
				 "Job wants to be suspended at exec\n" );
		job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC;
	}

	// If there is a requested coresize for this job, enforce it.
	// Convert negative and very large values to RLIM_INFINITY, meaning
	// no size limit.
	// RLIM_INFINITY is unsigned, but its value and type size vary.
	long long core_size_ad;
	size_t core_size;
	size_t *core_size_ptr = NULL;
#if !defined(WIN32)
	if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_ad ) ) {
		if ( core_size_ad < 0 || (unsigned long long)core_size_ad > RLIM_INFINITY ) {
			core_size = RLIM_INFINITY;
		} else {
			core_size = (size_t)core_size_ad;
		}
		core_size_ptr = &core_size;
	}
#endif // !defined(WIN32)

	long rlimit_as_hard_limit = 0;
	char *rlimit_expr = param("STARTER_RLIMIT_AS");
	if (rlimit_expr) {
		classad::ClassAdParser parser;

		classad::ExprTree *tree = parser.ParseExpression(rlimit_expr);
		if (tree) {
			classad::Value val;
			long long result;

			if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && 
				val.IsIntegerValue(result)) {
					result *= 1024 * 1024; // convert to megabytes
					rlimit_as_hard_limit = (long)result; // truncate for Create_Process
					if (result > rlimit_as_hard_limit) {
						// if truncation to long results in a change in the value, then
						// the requested limit must be > 2 GB and we are on a 32 bit platform
						// in that case, the requested limit is > than what the process can get anyway
						// so just don't set a limit.
						rlimit_as_hard_limit = 0;
					}
					if (rlimit_as_hard_limit > 0) {
						dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit);
					}
			} else {
				dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr);
			}
		} else {
			dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr);
		}
	}

	int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber());

#if defined ( WIN32 )
    owner_profile_.update ();
    /*************************************************************
    NOTE: We currently *ONLY* support loading slot-user profiles.
    This limitation will be addressed shortly, by allowing regular 
    users to load their registry hive - Ben [2008-09-31]
    **************************************************************/
    bool load_profile = false,
         run_as_owner = false;
    JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile );
    JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER,  run_as_owner );
    if ( load_profile && !run_as_owner ) {
        if ( owner_profile_.load () ) {
            /* publish the users environment into that of the main 

            job's environment */
            if ( !owner_profile_.environment ( job_env ) ) {
                dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to "
                    "export owner's environment.\n" );
            }            
        } else {
            dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load "
                "owner's profile.\n" );
        }
    }
#endif

		// While we are still in user priv, print out the username
#if defined(LINUX)
	if( Starter->glexecPrivSepHelper() ) {
			// TODO: if there is some way to figure out the final username,
			// print it out here or after starting the job.
		dprintf(D_ALWAYS,"Running job via glexec\n");
	}
#else
	if( false ) {
	}
#endif
	else {
		char const *username = NULL;
		char const *how = "";
		CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper();
		if( cpsh ) {
			username = cpsh->get_user_name();
			how = "via privsep switchboard ";
		}
		else {
			username = get_user_loginname();
		}
		if( !username ) {
			username = "******";
		}
		dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username);
	}

	set_priv ( priv );

    // use this to return more detailed and reliable error message info
    // from create-process operation.
    MyString create_process_err_msg;

	if (privsep_helper != NULL) {
		const char* std_file_names[3] = {
			privsep_stdin_name.Value(),
			privsep_stdout_name.Value(),
			privsep_stderr_name.Value()
		};
		JobPid = privsep_helper->create_process(JobName.Value(),
		                                        args,
		                                        job_env,
		                                        job_iwd,
		                                        fds,
		                                        std_file_names,
		                                        nice_inc,
		                                        core_size_ptr,
		                                        1,
		                                        job_opt_mask,
		                                        family_info,
												affinity_mask,
												&create_process_err_msg);
	}
	else {
		JobPid = daemonCore->Create_Process( JobName.Value(),
		                                     args,
		                                     PRIV_USER_FINAL,
		                                     1,
		                                     FALSE,
		                                     FALSE,
		                                     &job_env,
		                                     job_iwd,
		                                     family_info,
		                                     NULL,
		                                     fds,
		                                     NULL,
		                                     nice_inc,
		                                     NULL,
		                                     job_opt_mask, 
		                                     core_size_ptr,
                                             affinity_mask,
											 NULL,
                                             &create_process_err_msg,
                                             fs_remap,
											 rlimit_as_hard_limit);
	}

	// Create_Process() saves the errno for us if it is an "interesting" error.
	int create_process_errno = errno;

    // errno is 0 in the privsep case.  This executes for the daemon core create-process logic
    if ((FALSE == JobPid) && (0 != create_process_errno)) {
        if (create_process_err_msg != "") create_process_err_msg += " ";
        MyString errbuf;
        errbuf.formatstr("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno));
        create_process_err_msg += errbuf;
    }

	// now close the descriptors in fds array.  our child has inherited
	// them already, so we should close them so we do not leak descriptors.
	// NOTE, we want to use a special method to close the starter's
	// versions, if that's what we're using, so we don't think we've
	// still got those available in other parts of the code for any
	// reason.
	for ( int i = 0; i <= 2; i++ ) {
		if ( fds[i] >= 0 ) {
			daemonCore->Close_FD ( fds[i] );
		}
	}

	if ( JobPid == FALSE ) {
		JobPid = -1;

		if(!create_process_err_msg.IsEmpty()) {

			// if the reason Create_Process failed was that registering
			// a family with the ProcD failed, it is indicative of a
			// problem regarding this execute machine, not the job. in
			// this case, we'll want to EXCEPT instead of telling the
			// Shadow to put the job on hold. there are probably other
			// error conditions where EXCEPTing would be more appropriate
			// as well...
			//
			if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) {
				EXCEPT("Create_Process failed to register the job with the ProcD");
			}

			MyString err_msg = "Failed to execute '";
			err_msg += JobName;
			err_msg += "'";
			if(!args_string.IsEmpty()) {
				err_msg += " with arguments ";
				err_msg += args_string.Value();
			}
			err_msg += ": ";
			err_msg += create_process_err_msg;
			if( !ThisProcRunsAlongsideMainProc() ) {
				Starter->jic->notifyStarterError( err_msg.Value(),
			    	                              true,
			        	                          CONDOR_HOLD_CODE_FailedToCreateProcess,
			            	                      create_process_errno );
			}
		}

		dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n",
			JobName.Value(), args_string.Value(), create_process_err_msg.Value());
		return 0;
	}

	num_pids++;

	dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid);

	job_start_time.getTime();

	return 1;
}
Example #5
0
int JavaProc::StartJob()
{
	
	MyString java_cmd;
	char* jarfiles = NULL;
	ArgList args;
	MyString arg_buf;

	// Since we are adding to the argument list, we may need to deal
	// with platform-specific arg syntax in the user's args in order
	// to successfully merge them with the additional java VM args.
	args.SetArgV1SyntaxToCurrentPlatform();

	// Construct the list of jar files for the command line
	// If a jar file is transferred locally, use its local name
	// (in the execute directory)
	// otherwise use the original name

	StringList jarfiles_orig_list;
	StringList jarfiles_local_list;
	StringList* jarfiles_final_list = NULL;

	if( JobAd->LookupString(ATTR_JAR_FILES,&jarfiles) ) {
		jarfiles_orig_list.initializeFromString( jarfiles );
		free( jarfiles );
		jarfiles = NULL;

		char * jarfile_name;
		const char * base_name;
		struct stat stat_buff;
		if( Starter->jic->iwdIsChanged() ) {
				// If the job's IWD has been changed (because we're
				// running in the sandbox due to file transfer), we
				// need to use a local version of the path to the jar
				// files, not the full paths from the submit machine. 
			jarfiles_orig_list.rewind();
			while( (jarfile_name = jarfiles_orig_list.next()) ) {
					// Construct the local name
				base_name = condor_basename( jarfile_name );
				MyString local_name = execute_dir;
				local_name += DIR_DELIM_CHAR;
				local_name += base_name; 

				if( stat(local_name.Value(), &stat_buff) == 0 ) {
						// Jar file exists locally, use local name
					jarfiles_local_list.append( local_name.Value() );
				} else {
						// Use the original name
					jarfiles_local_list.append (jarfile_name);
				}
			} // while(jarfiles_orig_list)

				// jarfiles_local_list is our real copy...
			jarfiles_final_list = &jarfiles_local_list;

		} else {  // !iwdIsChanged()

				// just use jarfiles_orig_list as our real copy...
			jarfiles_final_list = &jarfiles_orig_list;
		}			
	}

	startfile.formatstr("%s%cjvm.start",execute_dir,DIR_DELIM_CHAR);
	endfile.formatstr("%s%cjvm.end",execute_dir,DIR_DELIM_CHAR);

	if( !java_config(java_cmd,&args,jarfiles_final_list) ) {
		dprintf(D_FAILURE|D_ALWAYS,"JavaProc: Java is not configured!\n");
		return 0;
	}

	JobAd->Assign(ATTR_JOB_CMD, java_cmd.Value());

	arg_buf.formatstr("-Dchirp.config=%s%cchirp.config",execute_dir,DIR_DELIM_CHAR);
	args.AppendArg(arg_buf.Value());

	char *jvm_args1 = NULL;
	char *jvm_args2 = NULL;
	MyString jvm_args_error;
	bool jvm_args_success = true;
	JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS1, &jvm_args1);
	JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS2, &jvm_args2);
	if(jvm_args2) {
		jvm_args_success = args.AppendArgsV2Raw(jvm_args2, &jvm_args_error);
	}
	else if(jvm_args1) {
		jvm_args_success = args.AppendArgsV1Raw(jvm_args1, &jvm_args_error);
	}
	free(jvm_args1);
	free(jvm_args2);
	if (!jvm_args_success) {
		dprintf(D_ALWAYS, "JavaProc: failed to parse JVM args: %s\n",
				jvm_args_error.Value());
		return 0;
	}

	args.AppendArg("CondorJavaWrapper");
	args.AppendArg(startfile.Value());
	args.AppendArg(endfile.Value());

	MyString args_error;
	if(!args.AppendArgsFromClassAd(JobAd,&args_error)) {
		dprintf(D_ALWAYS,"JavaProc: failed to read job arguments: %s\n",
				args_error.Value());
		return 0;
	}

	// We are just talking to ourselves, so it is fine to use argument
	// syntax compatible with this current version of Condor.
	CondorVersionInfo ver_info;
	if(!args.InsertArgsIntoClassAd(JobAd,&ver_info,&args_error)) {
		dprintf(D_ALWAYS,"JavaProc: failed to insert java job arguments: %s\n",
				args_error.Value());
		return 0;
	}

	dprintf(D_ALWAYS,"JavaProc: Cmd=%s\n",java_cmd.Value());
	MyString args_string;
	args.GetArgsStringForDisplay(&args_string);
	dprintf(D_ALWAYS,"JavaProc: Args=%s\n",args_string.Value());

	return VanillaProc::StartJob();
}