void UserProc::execute() { ArgList new_args; char **argv; char **argp; char **envp; sigset_t sigmask; MyString a_out_name; MyString shortname; int user_syscall_fd = -1; const int READ_END = 0; const int WRITE_END = 1; int pipe_fds[2]; FILE *cmd_fp; char buf[128]; ReliSock *new_reli = NULL; pipe_fds[0] = -1; pipe_fds[1] = -1; shortname.formatstr( "condor_exec.%d.%d", cluster, proc ); a_out_name.formatstr( "%s/%s/%s", Execute, local_dir, shortname.Value() ); // Set up arg vector according to class of job switch( job_class ) { case CONDOR_UNIVERSE_STANDARD: if( pipe(pipe_fds) < 0 ) { EXCEPT( "pipe()" );} dprintf( D_ALWAYS, "Pipe built\n" ); // The user process should not try to read commands from // 0, 1, or 2 since we'll be using the commands to redirect // those. if( pipe_fds[READ_END] < 14 ) { dup2( pipe_fds[READ_END], 14 ); close( pipe_fds[READ_END] ); pipe_fds[READ_END] = 14; } dprintf( D_ALWAYS, "New pipe_fds[%d,%d]\n", pipe_fds[0], pipe_fds[1] ); sprintf( buf, "%d", pipe_fds[READ_END] ); dprintf( D_ALWAYS, "cmd_fd = %s\n", buf ); new_args.AppendArg(shortname); new_args.AppendArg("-_condor_cmd_fd"); new_args.AppendArg(buf); break; case CONDOR_UNIVERSE_PVM: #if 1 EXCEPT( "Don't know how to deal with PVM jobs" ); #else new_args.AppendArg(shortname); new_args.AppendArg("-1"); new_args.AppendArg(in); new_args.AppendArg(out); new_args.AppendArg(err); #endif break; case CONDOR_UNIVERSE_VANILLA: if (privsep_enabled()) { EXCEPT("Don't know how to deal with Vanilla jobs"); } new_args.AppendArg(shortname.Value()); break; } new_args.AppendArgsFromArgList(args); // take care of USER_JOB_WRAPPER support_job_wrapper(a_out_name,&new_args); MyString exec_name; exec_name = a_out_name; // If privsep is turned on, then we need to use the PrivSep // Switchboard to launch the job // FILE* switchboard_in_fp; FILE* switchboard_err_fp; int switchboard_child_in_fd; int switchboard_child_err_fd; if (privsep_enabled()) { // create the pipes that we'll use to communicate // if (!privsep_create_pipes(switchboard_in_fp, switchboard_child_in_fd, switchboard_err_fp, switchboard_child_err_fd)) { EXCEPT("can't launch job: privsep_create_pipes failure"); } } argv = new_args.GetStringArray(); // Set an environment variable that tells the job where it may put scratch data // even if it moves to a different directory. // get the environment vector envp = env_obj.getStringArray(); // We may run more than one of these, so each needs its own // remote system call connection to the shadow if( job_class == CONDOR_UNIVERSE_PVM ) { new_reli = NewConnection( v_pid ); user_syscall_fd = new_reli->get_file_desc(); } // print out arguments to execve dprintf( D_ALWAYS, "Calling execve( \"%s\"", exec_name.Value() ); for( argp = argv; *argp; argp++ ) { // argv dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp ); } dprintf( D_ALWAYS | D_NOHEADER, ", 0" ); for( argp = envp; *argp; argp++ ) { // envp dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp ); } dprintf( D_ALWAYS | D_NOHEADER, ", 0 )\n" ); if( (pid = fork()) < 0 ) { EXCEPT( "fork" ); } if( pid == 0 ) { // the child // Block only these 3 signals which have special meaning for // checkpoint/restart purposes. Leave other signals ublocked // so that if we get an exception during the restart process, // we will get a core file to debug. sigemptyset( &sigmask ); // for some reason if we block these, the user process is unable // to unblock some or all of them. #if 0 sigaddset( &sigmask, SIGUSR1 ); sigaddset( &sigmask, SIGUSR2 ); sigaddset( &sigmask, SIGTSTP ); #endif sigprocmask( SIG_SETMASK, &sigmask, 0 ); // renice renice_self( "JOB_RENICE_INCREMENT" ); // make certain the syscall sockets which are being passed // to the user job are setup to be blocking sockets. this // is done by calling timeout(0) CEDAR method. // we must do this because the syscall lib does _not_ // expect to see any failures due to errno EAGAIN... if ( SyscallStream ) { SyscallStream->timeout(0); } if ( new_reli ) { new_reli->timeout(0); } // If I'm using privledge separation, connect to the procd. // we need to register a family with the procd for the newly // created process, so that the ProcD will allow us to send // signals to it // if (privsep_enabled() == true) { MyString procd_address; bool response; bool ret; ProcFamilyClient pfc; procd_address = get_procd_address(); ret = pfc.initialize(procd_address.Value()); if (ret == false) { EXCEPT("Failure to initialize the ProcFamilyClient object"); } ret = pfc.register_subfamily(getpid(), getppid(), 60, response); if (ret == false) { EXCEPT("Could not communicate with procd. Aborting."); } if (response == false) { EXCEPT("Procd refused to register job subfamily. Aborting."); } } // If there is a requested coresize for this job, enforce it. // Do it before the set_priv_final to ensure root can alter // the coresize to the requested amount. Otherwise, just // use whatever the current default is. if (coredump_limit_exists == TRUE) { limit( RLIMIT_CORE, coredump_limit, CONDOR_HARD_LIMIT, "max core size" ); } // child process should have only it's submitting uid, and cannot // switch back to root or some other uid. // It'd be nice to check for errors here, but // unfortunately, we can't, since this only returns the // previous priv state, not whether it worked or not. // -Derek Wright 4/30/98 set_user_priv_final(); switch( job_class ) { case CONDOR_UNIVERSE_STANDARD: // if we're using PrivSep, the chdir here could fail. instead, // we pass the job's IWD to the switchboard via pipe // if (!privsep_enabled()) { if( chdir(local_dir) < 0 ) { EXCEPT( "chdir(%s)", local_dir ); } } close( pipe_fds[WRITE_END] ); break; case CONDOR_UNIVERSE_PVM: if( chdir(local_dir) < 0 ) { EXCEPT( "chdir(%s)", local_dir ); } close( pipe_fds[WRITE_END] ); dup2( user_syscall_fd, RSC_SOCK ); break; case CONDOR_UNIVERSE_VANILLA: set_iwd(); open_std_file( 0 ); open_std_file( 1 ); open_std_file( 2 ); (void)close( RSC_SOCK ); (void)close( CLIENT_LOG ); break; } // Make sure we're not root if( getuid() == 0 ) { // EXCEPT( "We're about to start as root, aborting." ); // You can't see this error message at all. So, just // exit(4), which is what EXCEPT normally gives. exit( 4 ); } #if defined( LINUX ) && (defined(I386) || defined(X86_64)) // adjust the execution domain of the child to be suitable for // checkpointing. patch_personality(); #endif // if we're using privsep, we'll exec the PrivSep Switchboard // first, which is setuid; it will then setuid to the user we // give it and exec the real job // if (privsep_enabled()) { close(fileno(switchboard_in_fp)); close(fileno(switchboard_err_fp)); privsep_get_switchboard_command("exec", switchboard_child_in_fd, switchboard_child_err_fd, exec_name, new_args); deleteStringArray(argv); argv = new_args.GetStringArray(); } // Everything's ready, start it up... errno = 0; execve( exec_name.Value(), argv, envp ); // A successful call to execve() never returns, so it is an // error if we get here. A number of errors are possible // but the most likely is that there is insufficient swap // space to start the new process. We don't try to log // anything, since we have the UID/GID of the job's owner // and cannot write into the log files... exit( JOB_EXEC_FAILED ); } // The parent // PrivSep - we have at this point only spawned the switchboard // with the "exec" command. we need to use our pipe to it in // order to tell it how to execute the user job, and then use // the error pipe to make sure everything worked // if (privsep_enabled()) { close(switchboard_child_in_fd); close(switchboard_child_err_fd); privsep_exec_set_uid(switchboard_in_fp, uid); privsep_exec_set_path(switchboard_in_fp, exec_name.Value()); privsep_exec_set_args(switchboard_in_fp, new_args); privsep_exec_set_env(switchboard_in_fp, env_obj); privsep_exec_set_iwd(switchboard_in_fp, local_dir); privsep_exec_set_inherit_fd(switchboard_in_fp, pipe_fds[0]); privsep_exec_set_inherit_fd(switchboard_in_fp, RSC_SOCK); privsep_exec_set_inherit_fd(switchboard_in_fp, CLIENT_LOG); privsep_exec_set_is_std_univ(switchboard_in_fp); fclose(switchboard_in_fp); if (!privsep_get_switchboard_response(switchboard_err_fp)) { EXCEPT("error starting job: " "privsep get_switchboard_response failure"); } } dprintf( D_ALWAYS, "Started user job - PID = %d\n", pid ); if( job_class != CONDOR_UNIVERSE_VANILLA ) { // Send the user process its startup environment conditions close( pipe_fds[READ_END] ); cmd_fp = fdopen( pipe_fds[WRITE_END], "w" ); dprintf( D_ALWAYS, "cmd_fp = %p\n", cmd_fp ); if( is_restart() ) { #if 1 fprintf( cmd_fp, "restart\n" ); dprintf( D_ALWAYS, "restart\n" ); #else fprintf( cmd_fp, "restart %s\n", target_ckpt ); dprintf( D_ALWAYS, "restart %s\n", target_ckpt ); #endif fprintf( cmd_fp, "end\n" ); dprintf( D_ALWAYS, "end\n" ); } else { fprintf( cmd_fp, "end\n" ); dprintf( D_ALWAYS, "end\n" ); } fclose( cmd_fp ); } deleteStringArray(argv); deleteStringArray(envp); state = EXECUTING; if( new_reli ) { delete new_reli; } // removed some vanilla-specific code here // ASSERT(job_class != CONDOR_UNIVERSE_VANILLA); }
ProcFamilyProxy::ProcFamilyProxy(const char* address_suffix) : m_procd_pid(-1), m_reaper_id(FALSE) { // only one of these should be instantiated // if (s_instantiated) { EXCEPT("ProcFamilyProxy: multiple instantiations"); } s_instantiated = true; // get the address that we'll use to contact the ProcD // m_procd_addr = get_procd_address(); // if we were handed a non-NULL address_suffix argument, tack // it on. this is meant so that if we are in a situation where // multiple daemons want to start ProcDs and they have the same // setting for PROCD_ADDRESS, the ProcDs won't attempt to use // the same "command pipe" (which would cause one of them to // fail) // MyString procd_addr_base = m_procd_addr; if (address_suffix != NULL) { m_procd_addr.formatstr_cat(".%s", address_suffix); } // see what log file (if any) the ProcD will be using if we // need to start one (use the address_suffix here as well to // avoid collisions) // char* procd_log = param("PROCD_LOG"); if (procd_log != NULL) { m_procd_log = procd_log; free(procd_log); if (address_suffix != NULL) { m_procd_log.formatstr_cat(".%s", address_suffix); } } // create our "reaper helper" before we think about starting a ProcD // m_reaper_helper = new ProcFamilyProxyReaperHelper(this); ASSERT(m_reaper_helper != NULL); // determine if we need to launch a ProcD // // if a parent daemon created a ProcD that we can use, it will // have handed us an environment variable indicating its address. // if this address matches the ProcD address that we are configured // to use, we don't need to create a ProcD. if the env var isn't // there or they don't match, we need to create a ProcD and also set // the environment variable so any DC children can share this ProcD // const char* base_addr_from_env = GetEnv("CONDOR_PROCD_ADDRESS_BASE"); if ((base_addr_from_env == NULL) || (procd_addr_base != base_addr_from_env)) { if (!start_procd()) { EXCEPT("unable to spawn the ProcD"); } SetEnv("CONDOR_PROCD_ADDRESS_BASE", procd_addr_base.Value()); SetEnv("CONDOR_PROCD_ADDRESS", m_procd_addr.Value()); } else { const char* addr_from_env = GetEnv("CONDOR_PROCD_ADDRESS"); if (addr_from_env == NULL) { EXCEPT("CONDOR_PROCD_ADDRESS_BASE in environment " "but not CONDOR_PROCD_ADDRESS"); } m_procd_addr = addr_from_env; } // create the ProcFamilyClient object for communicating with the ProcD // m_client = new ProcFamilyClient; ASSERT(m_client != NULL); if (!m_client->initialize(m_procd_addr.Value())) { dprintf(D_ALWAYS, "ProcFamilyProxy: error initializing ProcFamilyClient\n"); recover_from_procd_error(); } }