Beispiel #1
0
bool
JobInfoCommunicator::initUserPrivNoOwner( void )
{
    // first, bale out if we really need ATTR_OWNER...
#ifdef WIN32
    return false;
#else
    // if we're root, we need ATTR_OWNER...
    if( getuid() == 0 ) {
        return false;
    }
#endif

    // if we're using PrivSep, we need ATTR_OWNER
    if (Starter->privSepHelper() != NULL) {
        return false;
    }

    // otherwise, we can't switch privs anyway, so consider
    // ourselves done. :)
    dprintf( D_FULLDEBUG,
             "Starter running as '%s', no uid switching possible\n",
             get_real_username() );
    user_priv_is_initialized = true;
    return true;
}
Beispiel #2
0
bool
HTCondorPeek::get_transfer_queue_slot()
{
	if( !m_xfer_q ) {
		return true;
	}

	char jobid[PROC_ID_STR_BUFLEN];
	ProcIdToStr(m_id,jobid);

		// queue_user determines which transfer queue we line up in.
		// Rather than using the same queue used by the shadow when it
		// transfers files for this job, we use a queue that is
		// specific to condor_tail for this user.  That way,
		// interactive use does not have to wait behind a potentially
		// large queue of shadow transfers.
	std::string queue_user;
	char const *username = get_real_username();
	if( !username ) {
		username = "******";
	}
	formatstr(queue_user,"%s:condor_tail",username);

		// for logging purposes, tell the xfer queue which file we are
		// initially accessing
	char const *fname = "condor_tail";
	if( m_transfer_stdout ) {
		fname = "stdout";
	}
	else if( m_transfer_stderr ) {
		fname = "stderr";
	}
	else if( m_filenames.size() > 0 ) {
		fname = m_filenames[0].c_str();
	}

	dprintf(D_ALWAYS,"Requesting GoAhead from the transfer queue manager.\n");

	int timeout = 60;
	MyString error_msg;
	if( !m_xfer_q->RequestTransferQueueSlot(true,0,fname,jobid,queue_user.c_str(),timeout,error_msg) ) {
		std::cerr << error_msg.Value() << std::endl;
		return false;
	}

	while( true ) {
		bool pending = true;

		if( !m_xfer_q->PollForTransferQueueSlot(timeout,pending,error_msg) && !pending ) {
			std::cerr << error_msg.Value() << std::endl;
			return false;
		}

		if( !pending ) break;
	}

	dprintf(D_ALWAYS,"Received GoAhead from the transfer queue manager.\n");
	return true;
}
Beispiel #3
0
int
OsProc::StartJob(FamilyInfo* family_info, NetworkNamespaceManager * network_manager = NULL, FilesystemRemap* fs_remap=NULL)
{
	int nice_inc = 0;
	bool has_wrapper = false;

	dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n");

	if ( !JobAd ) {
		dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" );
		return 0;
	}

	MyString JobName;
	if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) {
		dprintf( D_ALWAYS, "%s not found in JobAd.  Aborting StartJob.\n", 
				 ATTR_JOB_CMD );
		return 0;
	}

	const char* job_iwd = Starter->jic->jobRemoteIWD();
	dprintf( D_ALWAYS, "IWD: %s\n", job_iwd );

		// some operations below will require a PrivSepHelper if
		// PrivSep is enabled (if it's not, privsep_helper will be
		// NULL)
	PrivSepHelper* privsep_helper = Starter->privSepHelper();

		// // // // // // 
		// Arguments
		// // // // // // 

		// prepend the full path to this name so that we
		// don't have to rely on the PATH inside the
		// USER_JOB_WRAPPER or for exec().

    bool transfer_exe = false;
    if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) {
        transfer_exe = false;
    }

    bool preserve_rel = false;
    if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) {
        preserve_rel = false;
    }

    bool relative_exe = is_relative_to_cwd(JobName.Value());

    if (relative_exe && preserve_rel && !transfer_exe) {
        dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value());
    }
	else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) {
		JobName.sprintf( "%s%c%s",
		                 Starter->GetWorkingDir(),
		                 DIR_DELIM_CHAR,
		                 CONDOR_EXEC );
    }
	else if (relative_exe && job_iwd && *job_iwd) {
		MyString full_name;
		full_name.sprintf("%s%c%s",
		                  job_iwd,
		                  DIR_DELIM_CHAR,
		                  JobName.Value());
		JobName = full_name;

	}

	if( Starter->isGridshell() ) {
			// if we're a gridshell, just try to chmod our job, since
			// globus probably transfered it for us and left it with
			// bad permissions...
		priv_state old_priv = set_user_priv();
		int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG );
		set_priv( old_priv );
		if( retval < 0 ) {
			dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() );
			return 0;
		}
	} 

	ArgList args;

		// Since we may be adding to the argument list, we may need to deal
		// with platform-specific arg syntax in the user's args in order
		// to successfully merge them with the additional wrapper args.
	args.SetArgV1SyntaxToCurrentPlatform();

		// First, put "condor_exec" or whatever at the front of Args,
		// since that will become argv[0] of what we exec(), either
		// the wrapper or the actual job.

	if( !getArgv0() ) {
		args.AppendArg(JobName.Value());
	} else {
		args.AppendArg(getArgv0());
	}
	
		// Support USER_JOB_WRAPPER parameter...
	char *wrapper = NULL;
	if( (wrapper=param("USER_JOB_WRAPPER")) ) {

			// make certain this wrapper program exists and is executable
		if( access(wrapper,X_OK) < 0 ) {
			dprintf( D_ALWAYS, 
					 "Cannot find/execute USER_JOB_WRAPPER file %s\n",
					 wrapper );
			free( wrapper );
			return 0;
		}
		has_wrapper = true;
			// Now, we've got a valid wrapper.  We want that to become
			// "JobName" so we exec it directly, and we want to put
			// what was the JobName (with the full path) as the first
			// argument to the wrapper
		args.AppendArg(JobName.Value());
		JobName = wrapper;
		free(wrapper);
	}
	
		// Support USE_PARROT 
	bool use_parrot = false;
	if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) {
			// Check for parrot executable
		char *parrot = NULL;
		if( (parrot=param("PARROT")) ) {
			if( access(parrot,X_OK) < 0 ) {
				dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute "
					"at %s(%s)).\n", parrot, strerror(errno) );
				free( parrot );
				return 0;
			} else {
				args.AppendArg(JobName.Value());
				JobName = parrot;
				free( parrot );
			}
		} else {
			dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config"
			" file)" );
			return 0;
		}
	}

		// Either way, we now have to add the user-specified args as
		// the rest of the Args string.
	MyString args_error;
	if(!args.AppendArgsFromClassAd(JobAd,&args_error)) {
		dprintf(D_ALWAYS, "Failed to read job arguments from JobAd.  "
				"Aborting OsProc::StartJob: %s\n",args_error.Value());
		return 0;
	}

		// // // // // // 
		// Environment 
		// // // // // // 

		// Now, instantiate an Env object so we can manipulate the
		// environment as needed.
	Env job_env;

	MyString env_errors;
	if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) {
		dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n",
				 env_errors.Value());
		return 0;
	}


		// // // // // // 
		// Standard Files
		// // // // // // 

	// handle stdin, stdout, and stderr redirection
	int fds[3];
		// initialize these to -2 to mean they're not specified.
		// -1 will be treated as an error.
	fds[0] = -2; fds[1] = -2; fds[2] = -2;

		// in order to open these files we must have the user's privs:
	priv_state priv;
	priv = set_user_priv();

		// if we're in PrivSep mode, we won't necessarily be able to
		// open the files for the job. getStdFile will return us an
		// open FD in some situations, but otherwise will give us
		// a filename that we'll pass to the PrivSep Switchboard
		//
	bool stdin_ok;
	bool stdout_ok;
	bool stderr_ok;
	MyString privsep_stdin_name;
	MyString privsep_stdout_name;
	MyString privsep_stderr_name;
	if (privsep_helper != NULL) {
		stdin_ok = getStdFile(SFT_IN,
		                      NULL,
		                      true,
		                      "Input file",
		                      &fds[0],
		                      &privsep_stdin_name);
		stdout_ok = getStdFile(SFT_OUT,
		                       NULL,
		                       true,
		                       "Output file",
		                       &fds[1],
		                       &privsep_stdout_name);
		stderr_ok = getStdFile(SFT_ERR,
		                       NULL,
		                       true,
		                       "Error file",
		                       &fds[2],
		                       &privsep_stderr_name);
	}
	else {
		fds[0] = openStdFile( SFT_IN,
		                      NULL,
		                      true,
		                      "Input file");
		stdin_ok = (fds[0] != -1);
		fds[1] = openStdFile( SFT_OUT,
		                      NULL,
		                      true,
		                      "Output file");
		stdout_ok = (fds[1] != -1);
		fds[2] = openStdFile( SFT_ERR,
		                      NULL,
		                      true,
		                      "Error file");
		stderr_ok = (fds[2] != -1);
	}

	/* Bail out if we couldn't open the std files correctly */
	if( !stdin_ok || !stdout_ok || !stderr_ok ) {
		/* only close ones that had been opened correctly */
		for ( int i = 0; i <= 2; i++ ) {
			if ( fds[i] >= 0 ) {
				daemonCore->Close_FD ( fds[i] );
			}
		}
		dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n");
		dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n");
		set_priv(priv); /* go back to original priv state before leaving */
		return 0;
	}

		// // // // // // 
		// Misc + Exec
		// // // // // // 

	if( !ThisProcRunsAlongsideMainProc() ) {
		Starter->jic->notifyJobPreSpawn();
	}

	// compute job's renice value by evaluating the machine's
	// JOB_RENICE_INCREMENT in the context of the job ad...

    char* ptmp = param( "JOB_RENICE_INCREMENT" );
	if( ptmp ) {
			// insert renice expr into our copy of the job ad
		MyString reniceAttr = "Renice = ";
		reniceAttr += ptmp;
		if( !JobAd->Insert( reniceAttr.Value() ) ) {
			dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT "
				"into job ad, Aborting OsProc::StartJob...\n" );
			free( ptmp );
			return 0;
		}
			// evaluate
		if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) {
			dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n",
					 ptmp, nice_inc );
		} else {
			dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't "
					 "eval to int!  Using default of 10...\n", ptmp );
			nice_inc = 10;
		}

			// enforce valid ranges for nice_inc
		if( nice_inc < 0 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "low: adjusted to 0\n", nice_inc );
			nice_inc = 0;
		}
		else if( nice_inc > 19 ) {
			dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too "
					 "high: adjusted to 19\n", nice_inc );
			nice_inc = 19;
		}

		ASSERT( ptmp );
		free( ptmp );
		ptmp = NULL;
	} else {
			// if JOB_RENICE_INCREMENT is undefined, default to 10
		nice_inc = 10;
	}

		// in the below dprintfs, we want to skip past argv[0], which
		// is sometimes condor_exec, in the Args string. 

	MyString args_string;
	args.GetArgsStringForDisplay(&args_string, 1);
	if( has_wrapper ) { 
			// print out exactly what we're doing so folks can debug
			// it, if they need to.
		dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), 
				 args_string.Value() );

		MyString wrapper_err;
		wrapper_err.sprintf("%s%c%s", Starter->GetWorkingDir(),
				 	DIR_DELIM_CHAR,
					JOB_WRAPPER_FAILURE_FILE);
		if( ! job_env.SetEnv("_CONDOR_WRAPPER_ERROR_FILE", wrapper_err.Value()) ) {
			dprintf( D_ALWAYS, "Failed to set _CONDOR_WRAPPER_ERROR_FILE environment variable\n");
		}
	} else {
		dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(),
				 args_string.Value() );
	}

	MyString path;
	path.sprintf("%s%c%s", Starter->GetWorkingDir(),
			 	DIR_DELIM_CHAR,
				MACHINE_AD_FILENAME);
	if( ! job_env.SetEnv("_CONDOR_MACHINE_AD", path.Value()) ) {
		dprintf( D_ALWAYS, "Failed to set _CONDOR_MACHINE_AD environment variable\n");
	}

	path.sprintf("%s%c%s", Starter->GetWorkingDir(),
			 	DIR_DELIM_CHAR,
				JOB_AD_FILENAME);
	if( ! job_env.SetEnv("_CONDOR_JOB_AD", path.Value()) ) {
		dprintf( D_ALWAYS, "Failed to set _CONDOR_JOB_AD environment variable\n");
	}

		// Grab the full environment back out of the Env object 
	if(IsFulldebug(D_FULLDEBUG)) {
		MyString env_string;
		job_env.getDelimitedStringForDisplay(&env_string);
		dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value());
	}

	// Check to see if we need to start this process paused, and if
	// so, pass the right flag to DC::Create_Process().
	int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT;
	if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) {
		job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT;
	}
	int suspend_job_at_exec = 0;
	JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec);
	if( suspend_job_at_exec ) {
		dprintf( D_FULLDEBUG, "OsProc::StartJob(): "
				 "Job wants to be suspended at exec\n" );
		job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC;
	}

	// If there is a requested coresize for this job, enforce it.
	// It is truncated because you can't put an unsigned integer
	// into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to
	// be a float, but then when that attribute is read/written to the
	// job queue log by/or shared between versions of Condor which view the
	// type of that attribute differently, calamity would arise.
	int core_size_truncated;
	size_t core_size;
	size_t *core_size_ptr = NULL;
	if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_truncated ) ) {
		core_size = (size_t)core_size_truncated;
		core_size_ptr = &core_size;
	}

	long rlimit_as_hard_limit = 0;
	char *rlimit_expr = param("STARTER_RLIMIT_AS");
	if (rlimit_expr) {
		classad::ClassAdParser parser;

		classad::ExprTree *tree = parser.ParseExpression(rlimit_expr);
		if (tree) {
			classad::Value val;
			int result;

			if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && 
				val.IsIntegerValue(result)) {
					rlimit_as_hard_limit = ((long)result) * 1024 * 1024;
					dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit);
			} else {
				dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr);
			}
		} else {
			dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr);
		}
	}

	int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber());

#if defined ( WIN32 )
    owner_profile_.update ();
    /*************************************************************
    NOTE: We currently *ONLY* support loading slot-user profiles.
    This limitation will be addressed shortly, by allowing regular 
    users to load their registry hive - Ben [2008-09-31]
    **************************************************************/
    bool load_profile = false,
         run_as_owner = false;
    JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile );
    JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER,  run_as_owner );
    if ( load_profile && !run_as_owner ) {
        if ( owner_profile_.load () ) {
            /* publish the users environment into that of the main 

            job's environment */
            if ( !owner_profile_.environment ( job_env ) ) {
                dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to "
                    "export owner's environment.\n" );
            }            
        } else {
            dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load "
                "owner's profile.\n" );
        }
    }
#endif

		// While we are still in user priv, print out the username
#if defined(LINUX)
	if( Starter->glexecPrivSepHelper() ) {
			// TODO: if there is some way to figure out the final username,
			// print it out here or after starting the job.
		dprintf(D_ALWAYS,"Running job via glexec\n");
	}
#else
	if( false ) {
	}
#endif
	else {
		char const *username = NULL;
		char const *how = "";
		CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper();
		if( cpsh ) {
			username = cpsh->get_user_name();
			how = "via privsep switchboard ";
		}
		else {
			username = get_real_username();
		}
		if( !username ) {
			username = "******";
		}
		dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username);
	}

	set_priv ( priv );

    // use this to return more detailed and reliable error message info
    // from create-process operation.
    MyString create_process_err_msg;

	if (privsep_helper != NULL) {
		const char* std_file_names[3] = {
			privsep_stdin_name.Value(),
			privsep_stdout_name.Value(),
			privsep_stderr_name.Value()
		};
		JobPid = privsep_helper->create_process(JobName.Value(),
		                                        args,
		                                        job_env,
		                                        job_iwd,
		                                        fds,
		                                        std_file_names,
		                                        nice_inc,
		                                        core_size_ptr,
		                                        1,
		                                        job_opt_mask,
		                                        family_info,
												affinity_mask,
												&create_process_err_msg);
	}
	else {
		JobPid = daemonCore->Create_Process( JobName.Value(),
		                                     args,
		                                     PRIV_USER_FINAL,
		                                     1,
		                                     FALSE,
		                                     &job_env,
		                                     job_iwd,
		                                     family_info,
		                                     NULL,
		                                     fds,
		                                     NULL,
		                                     nice_inc,
		                                     NULL,
		                                     job_opt_mask, 
		                                     core_size_ptr,
                                             affinity_mask,
											 NULL,
                                             &create_process_err_msg,
					     fs_remap,
					     rlimit_as_hard_limit,
                                             network_manager);
	}

	// Create_Process() saves the errno for us if it is an "interesting" error.
	int create_process_errno = errno;

    // errno is 0 in the privsep case.  This executes for the daemon core create-process logic
    if ((FALSE == JobPid) && (0 != create_process_errno)) {
        if (create_process_err_msg != "") create_process_err_msg += " ";
        MyString errbuf;
        errbuf.sprintf("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno));
        create_process_err_msg += errbuf;
    }

	// now close the descriptors in fds array.  our child has inherited
	// them already, so we should close them so we do not leak descriptors.
	// NOTE, we want to use a special method to close the starter's
	// versions, if that's what we're using, so we don't think we've
	// still got those available in other parts of the code for any
	// reason.
	for ( int i = 0; i <= 2; i++ ) {
		if ( fds[i] >= 0 ) {
			daemonCore->Close_FD ( fds[i] );
		}
	}

	if ( JobPid == FALSE ) {
		JobPid = -1;

		if(!create_process_err_msg.IsEmpty()) {

			// if the reason Create_Process failed was that registering
			// a family with the ProcD failed, it is indicative of a
			// problem regarding this execute machine, not the job. in
			// this case, we'll want to EXCEPT instead of telling the
			// Shadow to put the job on hold. there are probably other
			// error conditions where EXCEPTing would be more appropriate
			// as well...
			//
			if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) {
				EXCEPT("Create_Process failed to register the job with the ProcD");
			}

			MyString err_msg = "Failed to execute '";
			err_msg += JobName;
			err_msg += "'";
			if(!args_string.IsEmpty()) {
				err_msg += " with arguments ";
				err_msg += args_string.Value();
			}
			err_msg += ": ";
			err_msg += create_process_err_msg;
			if( !ThisProcRunsAlongsideMainProc() ) {
				Starter->jic->notifyStarterError( err_msg.Value(),
			    	                              true,
			        	                          CONDOR_HOLD_CODE_FailedToCreateProcess,
			            	                      create_process_errno );
			}
		}

		dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n",
			JobName.Value(), args_string.Value(), create_process_err_msg.Value());
		return 0;
	}

	num_pids++;

	dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid);

	job_start_time.getTime();

	return 1;
}