Пример #1
0
void
UserProc::execute()
{
	ArgList new_args;
	char    **argv;
	char	**argp;
	char	**envp;
	sigset_t	sigmask;
	MyString	a_out_name;
	MyString	shortname;
	int		user_syscall_fd = -1;
	const	int READ_END = 0;
	const	int WRITE_END = 1;
	int		pipe_fds[2];
	FILE	*cmd_fp;
	char	buf[128];
	ReliSock	*new_reli = NULL;

	pipe_fds[0] = -1;
	pipe_fds[1] = -1;

	shortname.formatstr( "condor_exec.%d.%d", cluster, proc );
	a_out_name.formatstr( "%s/%s/%s", Execute, local_dir, shortname.Value() );

		// Set up arg vector according to class of job
	switch( job_class ) {

	  case CONDOR_UNIVERSE_STANDARD:
		if( pipe(pipe_fds) < 0 ) {
			EXCEPT( "pipe()" );}

			dprintf( D_ALWAYS, "Pipe built\n" );
		
			// The user process should not try to read commands from
			// 0, 1, or 2 since we'll be using the commands to redirect
			// those.
		if( pipe_fds[READ_END] < 14 ) {
			dup2( pipe_fds[READ_END], 14 );
			close( pipe_fds[READ_END] );
			pipe_fds[READ_END] = 14;
		}
		dprintf( D_ALWAYS, "New pipe_fds[%d,%d]\n", pipe_fds[0], pipe_fds[1] );
		sprintf( buf, "%d", pipe_fds[READ_END] );
		dprintf( D_ALWAYS, "cmd_fd = %s\n", buf );

		new_args.AppendArg(shortname);
		new_args.AppendArg("-_condor_cmd_fd");
		new_args.AppendArg(buf);
		break;

	  case CONDOR_UNIVERSE_PVM:
#if 1
		EXCEPT( "Don't know how to deal with PVM jobs" );
#else
		new_args.AppendArg(shortname);
		new_args.AppendArg("-1");
		new_args.AppendArg(in);
		new_args.AppendArg(out);
		new_args.AppendArg(err);
#endif
		break;

	  case CONDOR_UNIVERSE_VANILLA:
	  	if (privsep_enabled()) {
			EXCEPT("Don't know how to deal with Vanilla jobs");
		}
		new_args.AppendArg(shortname.Value());
		break;
	}

	new_args.AppendArgsFromArgList(args);

		// take care of USER_JOB_WRAPPER
	support_job_wrapper(a_out_name,&new_args);

	MyString exec_name;
	exec_name = a_out_name;

	// If privsep is turned on, then we need to use the PrivSep
	// Switchboard to launch the job
	//
	FILE* switchboard_in_fp;
	FILE* switchboard_err_fp;
	int switchboard_child_in_fd;
	int switchboard_child_err_fd;
	if (privsep_enabled()) {

		// create the pipes that we'll use to communicate
		//
		if (!privsep_create_pipes(switchboard_in_fp,
		                          switchboard_child_in_fd,
		                          switchboard_err_fp,
		                          switchboard_child_err_fd)) {
			EXCEPT("can't launch job: privsep_create_pipes failure");
		}
	}

	argv = new_args.GetStringArray();

		// Set an environment variable that tells the job where it may put scratch data
		// even if it moves to a different directory.

		// get the environment vector
	envp = env_obj.getStringArray();

		// We may run more than one of these, so each needs its own
		// remote system call connection to the shadow
	if( job_class == CONDOR_UNIVERSE_PVM ) {
		new_reli = NewConnection( v_pid );
		user_syscall_fd = new_reli->get_file_desc();
	}

		// print out arguments to execve
	dprintf( D_ALWAYS, "Calling execve( \"%s\"", exec_name.Value() );
	for( argp = argv; *argp; argp++ ) {							// argv
		dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp );
	}
	dprintf( D_ALWAYS | D_NOHEADER, ", 0" );
	for( argp = envp; *argp; argp++ ) {							// envp
		dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp );
	}
	dprintf( D_ALWAYS | D_NOHEADER, ", 0 )\n" );


	if( (pid = fork()) < 0 ) {
		EXCEPT( "fork" );
	}

	if( pid == 0 ) {	// the child

			// Block only these 3 signals which have special meaning for
			// checkpoint/restart purposes.  Leave other signals ublocked
			// so that if we get an exception during the restart process,
			// we will get a core file to debug.
		sigemptyset( &sigmask );
		// for some reason if we block these, the user process is unable
		// to unblock some or all of them.
#if 0
		sigaddset( &sigmask, SIGUSR1 );
		sigaddset( &sigmask, SIGUSR2 );
		sigaddset( &sigmask, SIGTSTP );
#endif
		sigprocmask( SIG_SETMASK, &sigmask, 0 );

			// renice
		renice_self( "JOB_RENICE_INCREMENT" );

			// make certain the syscall sockets which are being passed
			// to the user job are setup to be blocking sockets.  this
			// is done by calling timeout(0) CEDAR method.
			// we must do this because the syscall lib does _not_ 
			// expect to see any failures due to errno EAGAIN...
		if ( SyscallStream ) {
			SyscallStream->timeout(0);
		}
		if ( new_reli ) {
			new_reli->timeout(0);
		}

			// If I'm using privledge separation, connect to the procd.
			// we need to register a family with the procd for the newly
			// created process, so that the ProcD will allow us to send
			// signals to it
			//
		if (privsep_enabled() == true) {
			MyString procd_address;
			bool response;
			bool ret;
			ProcFamilyClient pfc;

			procd_address = get_procd_address();
			ret = pfc.initialize(procd_address.Value());
			if (ret == false) {
				EXCEPT("Failure to initialize the ProcFamilyClient object");
			}

			ret = pfc.register_subfamily(getpid(), getppid(), 60, response);

			if (ret == false) {
				EXCEPT("Could not communicate with procd. Aborting.");
			}

			if (response == false) {
				EXCEPT("Procd refused to register job subfamily. Aborting.");
			}
		}

			// If there is a requested coresize for this job, enforce it.
			// Do it before the set_priv_final to ensure root can alter 
			// the coresize to the requested amount. Otherwise, just
			// use whatever the current default is.
		if (coredump_limit_exists == TRUE) {
			limit( RLIMIT_CORE, coredump_limit, CONDOR_HARD_LIMIT, "max core size" );
		}

			// child process should have only it's submitting uid, and cannot
			// switch back to root or some other uid.  
			// It'd be nice to check for errors here, but
			// unfortunately, we can't, since this only returns the
			// previous priv state, not whether it worked or not. 
			//  -Derek Wright 4/30/98
		set_user_priv_final();

		switch( job_class ) {
		  
		  case CONDOR_UNIVERSE_STANDARD:
			// if we're using PrivSep, the chdir here could fail. instead,
			// we pass the job's IWD to the switchboard via pipe
			//
		  	if (!privsep_enabled()) {
				if( chdir(local_dir) < 0 ) {
					EXCEPT( "chdir(%s)", local_dir );
				}
			}
			close( pipe_fds[WRITE_END] );
			break;

		  case CONDOR_UNIVERSE_PVM:
			if( chdir(local_dir) < 0 ) {
				EXCEPT( "chdir(%s)", local_dir );
			}
			close( pipe_fds[WRITE_END] );
			dup2( user_syscall_fd, RSC_SOCK );
			break;

		  case CONDOR_UNIVERSE_VANILLA:
			set_iwd();
			open_std_file( 0 );
			open_std_file( 1 );
			open_std_file( 2 );

			(void)close( RSC_SOCK );
			(void)close( CLIENT_LOG );

			break;
		}

			// Make sure we're not root
		if( getuid() == 0 ) {
				// EXCEPT( "We're about to start as root, aborting." );
				// You can't see this error message at all.  So, just
				// exit(4), which is what EXCEPT normally gives. 
			exit( 4 );
		}

#if defined( LINUX ) && (defined(I386) || defined(X86_64))
		// adjust the execution domain of the child to be suitable for
		// checkpointing.
		patch_personality();
#endif 

			// if we're using privsep, we'll exec the PrivSep Switchboard
			// first, which is setuid; it will then setuid to the user we
			// give it and exec the real job
			//
		if (privsep_enabled()) {
			close(fileno(switchboard_in_fp));
			close(fileno(switchboard_err_fp));
			privsep_get_switchboard_command("exec",
			                                switchboard_child_in_fd,
			                                switchboard_child_err_fd,
			                                exec_name,
			                                new_args);
			deleteStringArray(argv);
			argv = new_args.GetStringArray();
		}

			// Everything's ready, start it up...
		errno = 0;
		execve( exec_name.Value(), argv, envp );

			// A successful call to execve() never returns, so it is an
			// error if we get here.  A number of errors are possible
			// but the most likely is that there is insufficient swap
			// space to start the new process.  We don't try to log
			// anything, since we have the UID/GID of the job's owner
			// and cannot write into the log files...
		exit( JOB_EXEC_FAILED );
	}

		// The parent

		// PrivSep - we have at this point only spawned the switchboard
		// with the "exec" command. we need to use our pipe to it in
		// order to tell it how to execute the user job, and then use
		// the error pipe to make sure everything worked
		//
	if (privsep_enabled()) {

		close(switchboard_child_in_fd);
		close(switchboard_child_err_fd);

		privsep_exec_set_uid(switchboard_in_fp, uid);
		privsep_exec_set_path(switchboard_in_fp, exec_name.Value());
		privsep_exec_set_args(switchboard_in_fp, new_args);
		privsep_exec_set_env(switchboard_in_fp, env_obj);
		privsep_exec_set_iwd(switchboard_in_fp, local_dir);
		privsep_exec_set_inherit_fd(switchboard_in_fp, pipe_fds[0]);
		privsep_exec_set_inherit_fd(switchboard_in_fp, RSC_SOCK);
		privsep_exec_set_inherit_fd(switchboard_in_fp, CLIENT_LOG);
		privsep_exec_set_is_std_univ(switchboard_in_fp);
		fclose(switchboard_in_fp);

		if (!privsep_get_switchboard_response(switchboard_err_fp)) {
			EXCEPT("error starting job: "
			           "privsep get_switchboard_response failure");
		}
	}

	dprintf( D_ALWAYS, "Started user job - PID = %d\n", pid );
	if( job_class != CONDOR_UNIVERSE_VANILLA ) {
			// Send the user process its startup environment conditions
		close( pipe_fds[READ_END] );
		cmd_fp = fdopen( pipe_fds[WRITE_END], "w" );
		dprintf( D_ALWAYS, "cmd_fp = %p\n", cmd_fp );

		if( is_restart() ) {
#if 1
			fprintf( cmd_fp, "restart\n" );
			dprintf( D_ALWAYS, "restart\n" );
#else
			fprintf( cmd_fp, "restart %s\n", target_ckpt );
			dprintf( D_ALWAYS, "restart %s\n", target_ckpt );
#endif
			fprintf( cmd_fp, "end\n" );
			dprintf( D_ALWAYS, "end\n" );
		} else {
			fprintf( cmd_fp, "end\n" );
			dprintf( D_ALWAYS, "end\n" );
		}
		fclose( cmd_fp );
	}

	deleteStringArray(argv);
	deleteStringArray(envp);
	state = EXECUTING;

	if( new_reli ) {
		delete new_reli;
	}

	// removed some vanilla-specific code here
	//
	ASSERT(job_class != CONDOR_UNIVERSE_VANILLA);
}
Пример #2
0
ProcFamilyProxy::ProcFamilyProxy(const char* address_suffix) :
	m_procd_pid(-1),
	m_reaper_id(FALSE)
{
	// only one of these should be instantiated
	//
	if (s_instantiated) {
		EXCEPT("ProcFamilyProxy: multiple instantiations");
	}
	s_instantiated = true;

	// get the address that we'll use to contact the ProcD
	//
	m_procd_addr = get_procd_address();

	// if we were handed a non-NULL address_suffix argument, tack
	// it on. this is meant so that if we are in a situation where
	// multiple daemons want to start ProcDs and they have the same
	// setting for PROCD_ADDRESS, the ProcDs won't attempt to use
	// the same "command pipe" (which would cause one of them to
	// fail)
	//
	MyString procd_addr_base = m_procd_addr;
	if (address_suffix != NULL) {
		m_procd_addr.formatstr_cat(".%s", address_suffix);
	}

	// see what log file (if any) the ProcD will be using if we
	// need to start one (use the address_suffix here as well to
	// avoid collisions)
	//
	char* procd_log = param("PROCD_LOG");
	if (procd_log != NULL) {
		m_procd_log = procd_log;
		free(procd_log);
		if (address_suffix != NULL) {
			m_procd_log.formatstr_cat(".%s", address_suffix);
		}
	}
	
	// create our "reaper helper" before we think about starting a ProcD
	//
	m_reaper_helper = new ProcFamilyProxyReaperHelper(this);
	ASSERT(m_reaper_helper != NULL);

	// determine if we need to launch a ProcD
	//
	// if a parent daemon created a ProcD that we can use, it will
	// have handed us an environment variable indicating its address.
	// if this address matches the ProcD address that we are configured
	// to use, we don't need to create a ProcD. if the env var isn't
	// there or they don't match, we need to create a ProcD and also set
	// the environment variable so any DC children can share this ProcD
	//
	const char* base_addr_from_env = GetEnv("CONDOR_PROCD_ADDRESS_BASE");
	if ((base_addr_from_env == NULL) || (procd_addr_base != base_addr_from_env)) {
		if (!start_procd()) {
			EXCEPT("unable to spawn the ProcD");
		}
		SetEnv("CONDOR_PROCD_ADDRESS_BASE", procd_addr_base.Value());
		SetEnv("CONDOR_PROCD_ADDRESS", m_procd_addr.Value());
	}
	else {
		const char* addr_from_env = GetEnv("CONDOR_PROCD_ADDRESS");
		if (addr_from_env == NULL) {
			EXCEPT("CONDOR_PROCD_ADDRESS_BASE in environment "
			           "but not CONDOR_PROCD_ADDRESS");
		}
		m_procd_addr = addr_from_env;
	}

	// create the ProcFamilyClient object for communicating with the ProcD
	//
	m_client = new ProcFamilyClient;
	ASSERT(m_client != NULL);
	if (!m_client->initialize(m_procd_addr.Value())) {
		dprintf(D_ALWAYS,
		        "ProcFamilyProxy: error initializing ProcFamilyClient\n");
		recover_from_procd_error();
	}
}