void UserProc::delete_files() { do_unlink( cur_ckpt ); if (core_name != NULL) { do_unlink( core_name ); } dprintf( D_ALWAYS, "Removing directory \"%s\"\n", local_dir ); if (privsep_enabled()) { // again, the PrivSep Switchboard expects a full path for // the "remove dir" operation // MyString local_dir_path; local_dir_path.formatstr("%s/%s", Execute, local_dir); if (!privsep_remove_dir(local_dir_path.Value())) { dprintf(D_ALWAYS, "privsep_remove_dir failed to remove %s\n", local_dir_path.Value()); } } else { if( rmdir(local_dir) < 0 ) { dprintf( D_ALWAYS, "Can't remove directory \"%s\" - errno = %d\n", local_dir, errno); } } }
void UserProc::send_sig( int sig ) { if( !pid ) { dprintf( D_FULLDEBUG, "UserProc::send_sig(): " "Can't send signal to user job pid 0!\n"); return; } if (privsep_enabled() == true) { send_sig_privsep(sig); } else { send_sig_no_privsep(sig); } /* record the fact we unsuspended the job. */ if (sig == SIGCONT) { pids_suspended = 0; } if (SigNames.get_name(sig) != NULL) { dprintf( D_ALWAYS, "UserProc::send_sig(): " "Sent signal %s to user job %d\n", SigNames.get_name(sig), pid); } else { dprintf( D_ALWAYS, "UserProc::send_sig(): " "Unknown signum %d sent to user job %d\n", sig, pid); } }
void cleanup_execute_dirs( StringList &list ) { char const *exec_path; list.rewind(); while( (exec_path = list.next()) ) { #if defined(WIN32) dynuser nobody_login; // remove all users matching this prefix nobody_login.cleanup_condor_users("condor-run-"); // get rid of everything in the execute directory Directory execute_dir(exec_path); execute_dir.Rewind(); while ( execute_dir.Next() ) { check_recovery_file( execute_dir.GetFullPath() ); } execute_dir.Remove_Entire_Directory(); #else // if we're using PrivSep, the Switchboard will only allow // us to remove subdirectories of EXECUTE - so we need to // list them and ask the Switchboard to delete each one // pair_strings_vector root_dirs = root_dir_list(); for (pair_strings_vector::const_iterator it=root_dirs.begin(); it != root_dirs.end(); ++it) { const char * exec_path_full = dirscat(it->second.c_str(), exec_path); if(exec_path_full) { dprintf(D_FULLDEBUG, "Looking at %s\n",exec_path_full); } Directory execute_dir( exec_path_full, PRIV_ROOT ); execute_dir.Rewind(); while ( execute_dir.Next() ) { check_recovery_file( execute_dir.GetFullPath() ); } if (privsep_enabled()) { execute_dir.Rewind(); while (execute_dir.Next()) { dprintf(D_FULLDEBUG, "Attempting to remove %s\n",execute_dir.GetFullPath()); privsep_remove_dir(execute_dir.GetFullPath()); } } else { execute_dir.Remove_Entire_Directory(); } delete [] exec_path_full; } #endif } }
/* Fork to create privileged process connected by a pipe */ int privsep_init(void) { int pipefds[2]; pid_t pid; if (privsep_enabled()) return 0; if (pipe(pipefds) != 0) { flog(LOG_ERR, "Couldn't create privsep pipe."); return (-1); } pid = fork(); if (pid == -1) { flog(LOG_ERR, "Couldn't fork for privsep."); return (-1); } if (pid == 0) { int nullfd; /* This will be the privileged child */ close(pipefds[1]); pfd = pipefds[0]; /* Detach from stdio */ nullfd = open("/dev/null", O_RDONLY); if (nullfd < 0) { perror("/dev/null"); close(pfd); _exit(1); } dup2(nullfd, 0); dup2(nullfd, 1); /* XXX: we'll keep stderr open in debug mode for better logging */ if (get_debuglevel() == 0) dup2(nullfd, 2); privsep_read_loop(); close(pfd); _exit(0); } /* Continue execution (will drop privileges soon) */ close(pipefds[0]); pfd = pipefds[1]; return 0; }
VMType::VMType(const char* prog_for_script, const char* scriptname, const char* workingpath, ClassAd *ad) { m_vm_id = vmgahp->getNewVMId(); m_prog_for_script = prog_for_script; m_scriptname = scriptname; m_workingpath = workingpath; m_classAd = *ad; m_vm_pid = 0; m_vm_mem = 0; m_vm_networking = false; m_vm_checkpoint = false; m_vm_no_output_vm = false; m_vm_hardware_vt = false; m_is_soft_suspended = false; m_self_shutdown = false; m_is_checkpointed = false; m_status = VM_STOPPED; m_cpu_time = 0; m_delete_working_files = false; m_vcpus = 1; if ( privsep_enabled() ) { m_file_owner = PRIV_CONDOR; } else { m_file_owner = PRIV_USER; } vmprintf(D_FULLDEBUG, "Constructed VM_Type.\n"); // Use the script program to create a configuration file for VM ? m_use_script_to_create_config = param_boolean("USE_SCRIPT_TO_CREATE_CONFIG", false); // Create initially transfered file list createInitialFileList(); }
void UserProc::execute() { ArgList new_args; char **argv; char **argp; char **envp; sigset_t sigmask; MyString a_out_name; MyString shortname; int user_syscall_fd = -1; const int READ_END = 0; const int WRITE_END = 1; int pipe_fds[2]; FILE *cmd_fp; char buf[128]; ReliSock *new_reli = NULL; pipe_fds[0] = -1; pipe_fds[1] = -1; shortname.formatstr( "condor_exec.%d.%d", cluster, proc ); a_out_name.formatstr( "%s/%s/%s", Execute, local_dir, shortname.Value() ); // Set up arg vector according to class of job switch( job_class ) { case CONDOR_UNIVERSE_STANDARD: if( pipe(pipe_fds) < 0 ) { EXCEPT( "pipe()" );} dprintf( D_ALWAYS, "Pipe built\n" ); // The user process should not try to read commands from // 0, 1, or 2 since we'll be using the commands to redirect // those. if( pipe_fds[READ_END] < 14 ) { dup2( pipe_fds[READ_END], 14 ); close( pipe_fds[READ_END] ); pipe_fds[READ_END] = 14; } dprintf( D_ALWAYS, "New pipe_fds[%d,%d]\n", pipe_fds[0], pipe_fds[1] ); sprintf( buf, "%d", pipe_fds[READ_END] ); dprintf( D_ALWAYS, "cmd_fd = %s\n", buf ); new_args.AppendArg(shortname); new_args.AppendArg("-_condor_cmd_fd"); new_args.AppendArg(buf); break; case CONDOR_UNIVERSE_PVM: #if 1 EXCEPT( "Don't know how to deal with PVM jobs" ); #else new_args.AppendArg(shortname); new_args.AppendArg("-1"); new_args.AppendArg(in); new_args.AppendArg(out); new_args.AppendArg(err); #endif break; case CONDOR_UNIVERSE_VANILLA: if (privsep_enabled()) { EXCEPT("Don't know how to deal with Vanilla jobs"); } new_args.AppendArg(shortname.Value()); break; } new_args.AppendArgsFromArgList(args); // take care of USER_JOB_WRAPPER support_job_wrapper(a_out_name,&new_args); MyString exec_name; exec_name = a_out_name; // If privsep is turned on, then we need to use the PrivSep // Switchboard to launch the job // FILE* switchboard_in_fp; FILE* switchboard_err_fp; int switchboard_child_in_fd; int switchboard_child_err_fd; if (privsep_enabled()) { // create the pipes that we'll use to communicate // if (!privsep_create_pipes(switchboard_in_fp, switchboard_child_in_fd, switchboard_err_fp, switchboard_child_err_fd)) { EXCEPT("can't launch job: privsep_create_pipes failure"); } } argv = new_args.GetStringArray(); // Set an environment variable that tells the job where it may put scratch data // even if it moves to a different directory. // get the environment vector envp = env_obj.getStringArray(); // We may run more than one of these, so each needs its own // remote system call connection to the shadow if( job_class == CONDOR_UNIVERSE_PVM ) { new_reli = NewConnection( v_pid ); user_syscall_fd = new_reli->get_file_desc(); } // print out arguments to execve dprintf( D_ALWAYS, "Calling execve( \"%s\"", exec_name.Value() ); for( argp = argv; *argp; argp++ ) { // argv dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp ); } dprintf( D_ALWAYS | D_NOHEADER, ", 0" ); for( argp = envp; *argp; argp++ ) { // envp dprintf( D_ALWAYS | D_NOHEADER, ", \"%s\"", *argp ); } dprintf( D_ALWAYS | D_NOHEADER, ", 0 )\n" ); if( (pid = fork()) < 0 ) { EXCEPT( "fork" ); } if( pid == 0 ) { // the child // Block only these 3 signals which have special meaning for // checkpoint/restart purposes. Leave other signals ublocked // so that if we get an exception during the restart process, // we will get a core file to debug. sigemptyset( &sigmask ); // for some reason if we block these, the user process is unable // to unblock some or all of them. #if 0 sigaddset( &sigmask, SIGUSR1 ); sigaddset( &sigmask, SIGUSR2 ); sigaddset( &sigmask, SIGTSTP ); #endif sigprocmask( SIG_SETMASK, &sigmask, 0 ); // renice renice_self( "JOB_RENICE_INCREMENT" ); // make certain the syscall sockets which are being passed // to the user job are setup to be blocking sockets. this // is done by calling timeout(0) CEDAR method. // we must do this because the syscall lib does _not_ // expect to see any failures due to errno EAGAIN... if ( SyscallStream ) { SyscallStream->timeout(0); } if ( new_reli ) { new_reli->timeout(0); } // If I'm using privledge separation, connect to the procd. // we need to register a family with the procd for the newly // created process, so that the ProcD will allow us to send // signals to it // if (privsep_enabled() == true) { MyString procd_address; bool response; bool ret; ProcFamilyClient pfc; procd_address = get_procd_address(); ret = pfc.initialize(procd_address.Value()); if (ret == false) { EXCEPT("Failure to initialize the ProcFamilyClient object"); } ret = pfc.register_subfamily(getpid(), getppid(), 60, response); if (ret == false) { EXCEPT("Could not communicate with procd. Aborting."); } if (response == false) { EXCEPT("Procd refused to register job subfamily. Aborting."); } } // If there is a requested coresize for this job, enforce it. // Do it before the set_priv_final to ensure root can alter // the coresize to the requested amount. Otherwise, just // use whatever the current default is. if (coredump_limit_exists == TRUE) { limit( RLIMIT_CORE, coredump_limit, CONDOR_HARD_LIMIT, "max core size" ); } // child process should have only it's submitting uid, and cannot // switch back to root or some other uid. // It'd be nice to check for errors here, but // unfortunately, we can't, since this only returns the // previous priv state, not whether it worked or not. // -Derek Wright 4/30/98 set_user_priv_final(); switch( job_class ) { case CONDOR_UNIVERSE_STANDARD: // if we're using PrivSep, the chdir here could fail. instead, // we pass the job's IWD to the switchboard via pipe // if (!privsep_enabled()) { if( chdir(local_dir) < 0 ) { EXCEPT( "chdir(%s)", local_dir ); } } close( pipe_fds[WRITE_END] ); break; case CONDOR_UNIVERSE_PVM: if( chdir(local_dir) < 0 ) { EXCEPT( "chdir(%s)", local_dir ); } close( pipe_fds[WRITE_END] ); dup2( user_syscall_fd, RSC_SOCK ); break; case CONDOR_UNIVERSE_VANILLA: set_iwd(); open_std_file( 0 ); open_std_file( 1 ); open_std_file( 2 ); (void)close( RSC_SOCK ); (void)close( CLIENT_LOG ); break; } // Make sure we're not root if( getuid() == 0 ) { // EXCEPT( "We're about to start as root, aborting." ); // You can't see this error message at all. So, just // exit(4), which is what EXCEPT normally gives. exit( 4 ); } #if defined( LINUX ) && (defined(I386) || defined(X86_64)) // adjust the execution domain of the child to be suitable for // checkpointing. patch_personality(); #endif // if we're using privsep, we'll exec the PrivSep Switchboard // first, which is setuid; it will then setuid to the user we // give it and exec the real job // if (privsep_enabled()) { close(fileno(switchboard_in_fp)); close(fileno(switchboard_err_fp)); privsep_get_switchboard_command("exec", switchboard_child_in_fd, switchboard_child_err_fd, exec_name, new_args); deleteStringArray(argv); argv = new_args.GetStringArray(); } // Everything's ready, start it up... errno = 0; execve( exec_name.Value(), argv, envp ); // A successful call to execve() never returns, so it is an // error if we get here. A number of errors are possible // but the most likely is that there is insufficient swap // space to start the new process. We don't try to log // anything, since we have the UID/GID of the job's owner // and cannot write into the log files... exit( JOB_EXEC_FAILED ); } // The parent // PrivSep - we have at this point only spawned the switchboard // with the "exec" command. we need to use our pipe to it in // order to tell it how to execute the user job, and then use // the error pipe to make sure everything worked // if (privsep_enabled()) { close(switchboard_child_in_fd); close(switchboard_child_err_fd); privsep_exec_set_uid(switchboard_in_fp, uid); privsep_exec_set_path(switchboard_in_fp, exec_name.Value()); privsep_exec_set_args(switchboard_in_fp, new_args); privsep_exec_set_env(switchboard_in_fp, env_obj); privsep_exec_set_iwd(switchboard_in_fp, local_dir); privsep_exec_set_inherit_fd(switchboard_in_fp, pipe_fds[0]); privsep_exec_set_inherit_fd(switchboard_in_fp, RSC_SOCK); privsep_exec_set_inherit_fd(switchboard_in_fp, CLIENT_LOG); privsep_exec_set_is_std_univ(switchboard_in_fp); fclose(switchboard_in_fp); if (!privsep_get_switchboard_response(switchboard_err_fp)) { EXCEPT("error starting job: " "privsep get_switchboard_response failure"); } } dprintf( D_ALWAYS, "Started user job - PID = %d\n", pid ); if( job_class != CONDOR_UNIVERSE_VANILLA ) { // Send the user process its startup environment conditions close( pipe_fds[READ_END] ); cmd_fp = fdopen( pipe_fds[WRITE_END], "w" ); dprintf( D_ALWAYS, "cmd_fp = %p\n", cmd_fp ); if( is_restart() ) { #if 1 fprintf( cmd_fp, "restart\n" ); dprintf( D_ALWAYS, "restart\n" ); #else fprintf( cmd_fp, "restart %s\n", target_ckpt ); dprintf( D_ALWAYS, "restart %s\n", target_ckpt ); #endif fprintf( cmd_fp, "end\n" ); dprintf( D_ALWAYS, "end\n" ); } else { fprintf( cmd_fp, "end\n" ); dprintf( D_ALWAYS, "end\n" ); } fclose( cmd_fp ); } deleteStringArray(argv); deleteStringArray(envp); state = EXECUTING; if( new_reli ) { delete new_reli; } // removed some vanilla-specific code here // ASSERT(job_class != CONDOR_UNIVERSE_VANILLA); }
UserProc::UserProc( STARTUP_INFO &s ) : cluster( s.cluster ), proc( s.proc ), m_a_out( NULL ), core_name( NULL ), uid( s.uid ), gid( s.gid ), v_pid( s.virt_pid ), pid( 0 ), job_class( s.job_class ), state( NEW ), user_time( 0 ), sys_time( 0 ), exit_status_valid( FALSE ), exit_status( 0 ), ckpt_wanted( s.ckpt_wanted ), soft_kill_sig( s.soft_kill_sig ), new_ckpt_created( FALSE ), ckpt_transferred( FALSE ), core_created( FALSE ), core_transferred( FALSE ), exit_requested( FALSE ), image_size( -1 ), guaranteed_user_time( 0 ), guaranteed_sys_time( 0 ), pids_suspended( -1 ) { MyString buf; mode_t omask; cmd = new char [ strlen(s.cmd) + 1 ]; strcpy( cmd, s.cmd ); // Since we are adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional args. args.SetArgV1SyntaxToCurrentPlatform(); MyString args_errors; if(!args.AppendArgsV1or2Raw(s.args_v1or2,&args_errors)) { EXCEPT("ERROR: Failed to parse arguments string: %s\n%s", args_errors.Value(),s.args_v1or2); } // set up environment as an object MyString env_errors; if(!env_obj.MergeFromV1or2Raw( s.env_v1or2,&env_errors )) { EXCEPT("ERROR: Failed to parse environment string: %s\n%s", env_errors.Value(),s.env_v1or2); } // add name of SMP slot (from startd) into environment setSlotEnv(&env_obj); /* Port regulation for user job */ int low_port, high_port; // assume outgoing port range if (get_port_range(TRUE, &low_port, &high_port) == TRUE) { buf.formatstr( "_condor_LOWPORT=%d", low_port); env_obj.SetEnv(buf.Value()); buf.formatstr( "_condor_HIGHPORT=%d", high_port); env_obj.SetEnv(buf.Value()); } /* end - Port regulation for user job */ if( param_boolean("BIND_ALL_INTERFACES", true) ) { buf.formatstr( "_condor_BIND_ALL_INTERFACES=TRUE" ); } else { buf.formatstr( "_condor_BIND_ALL_INTERFACES=FALSE" ); } env_obj.SetEnv(buf.Value()); // Generate a directory where process can run and do its checkpointing omask = umask(0); buf.formatstr( "dir_%d", getpid() ); local_dir = new char [ buf.Length() + 1 ]; strcpy( local_dir, buf.Value() ); if (privsep_enabled()) { // the Switchboard expects a full path to privsep_create_dir MyString local_dir_path; local_dir_path.formatstr("%s/%s", Execute, local_dir); if (!privsep_create_dir(get_condor_uid(), local_dir_path.Value())) { EXCEPT("privsep_create_dir failure"); } if (chmod(local_dir_path.Value(), LOCAL_DIR_MODE) == -1) { EXCEPT("chmod failure after privsep_create_dir"); } } else { if( mkdir(local_dir,LOCAL_DIR_MODE) < 0 ) { EXCEPT( "mkdir(%s,0%o)", local_dir, LOCAL_DIR_MODE ); } } (void)umask(omask); // Now that we know what the local_dir is, put the path into // the environment so the job knows where it is MyString scratch_env; scratch_env.formatstr("CONDOR_SCRATCH_DIR=%s/%s",Execute,local_dir); env_obj.SetEnv(scratch_env.Value()); buf.formatstr( "%s/condor_exec.%d.%d", local_dir, cluster, proc ); cur_ckpt = new char [ buf.Length() + 1 ]; strcpy( cur_ckpt, buf.Value() ); // Find out if user wants checkpointing #if defined(NO_CKPT) ckpt_wanted = FALSE; dprintf(D_ALWAYS, "This platform doesn't implement checkpointing yet\n" ); #else ckpt_wanted = s.ckpt_wanted; #endif restart = s.is_restart; coredump_limit_exists = s.coredump_limit_exists; coredump_limit = s.coredump_limit; }
static void check_execute_dir_perms( char const *exec_path ) { struct stat st; if (stat(exec_path, &st) < 0) { EXCEPT( "stat exec path (%s), errno: %d (%s)", exec_path, errno, strerror( errno ) ); } // in PrivSep mode, the EXECUTE directory must be trusted by // the PrivSep kernel. we can't determine this ourselves in general // (since the PrivSep Switchboard can be recompiled to trust // non-root users), so we'll have to be satisfied for now that we // could stat its path // if (privsep_enabled()) { return; } // the following logic sets up the new_mode variable, depending // on the execute dir's current perms. if new_mode is set non-zero, // it means we need to do a chmod // mode_t new_mode = 0; #if defined(WIN32) mode_t desired_mode = _S_IREAD | _S_IWRITE; if ((st.st_mode & desired_mode) != desired_mode) { new_mode = st.st_mode | desired_mode; } #else // we want to avoid having our execute directory world-writable // if possible. it's possible if the execute directory is owned // by condor and either: // - we're not switching UIDs. in this case, job sandbox dirs // will just be owned by the condor UID, so we just need // owner-writability // - there's no root squash on the execute dir (since then we // can do a mkdir as the condor UID then a chown to the job // owner UID) // // additionally, the GLEXEC_JOB feature requires world-writability // on the execute dir // bool glexec_job = param_boolean("GLEXEC_JOB", false); if ((st.st_uid == get_condor_uid()) && (!can_switch_ids() || not_root_squashed(exec_path)) && !glexec_job) { // do the chown unless the current mode is exactly 755 // if ((st.st_mode & 07777) != 0755) { new_mode = 0755; } } else { // do the chown if the mode doesn't already include 1777 // if ((st.st_mode & 01777) != 01777) { new_mode = 01777; } if (!glexec_job) { dprintf(D_ALWAYS, "WARNING: %s root-squashed or not condor-owned: " "requiring world-writability\n", exec_path); } } #endif // now do a chmod if needed // if (new_mode != 0) { dprintf(D_FULLDEBUG, "Changing permission on %s\n", exec_path); if (chmod(exec_path, new_mode) < 0) { EXCEPT( "chmod exec path (%s), errno: %d (%s)", exec_path, errno, strerror( errno ) ); } } }
void cleanup_execute_dir(int pid, char const *exec_path, bool remove_exec_subdir) { ASSERT( pid ); #if defined(WIN32) MyString buf; dynuser nobody_login; if ( nobody_login.reuse_accounts() == false ) { // before removing subdir, remove any nobody-user account associated // with this starter pid. this account might have been left around // if the starter did not clean up completely. //sprintf(buf,"condor-run-dir_%d",pid); buf.formatstr("condor-run-%d",pid); if ( nobody_login.deleteuser(buf.Value()) ) { dprintf(D_FULLDEBUG,"Removed account %s left by starter\n",buf.Value()); } } // now remove the subdirectory. NOTE: we only remove the // subdirectory _after_ removing the nobody account, because the // existence of the subdirectory persistantly tells us that the // account may still exist [in case the startd blows up as well]. buf.formatstr( "%s\\dir_%d", exec_path, pid ); check_recovery_file( buf.Value() ); Directory dir( buf.Value() ); dir.Remove_Full_Path(buf.Value()); #else /* UNIX */ MyString pid_dir; MyString pid_dir_path; // We're trying to delete a specific subdirectory, either // b/c a starter just exited and we might need to clean up // after it, or because we're in a recursive call. pid_dir.formatstr( "dir_%d", pid ); pid_dir_path.formatstr( "%s/%s", exec_path, pid_dir.Value() ); check_recovery_file( pid_dir_path.Value() ); // if we're using PrivSep, we won't have the permissions // needed to clean up - ask the Switchboard to do it; but // before we do that, use stat to see if there's anything // to clean up and save the Switchboard invocation if not if (privsep_enabled()) { struct stat stat_buf; if (stat(pid_dir_path.Value(), &stat_buf) == -1) { return; } if (!privsep_remove_dir(pid_dir_path.Value())) { dprintf(D_ALWAYS, "privsep_remove_dir failed to remove %s\n", pid_dir_path.Value()); } return; } // Instantiate a directory object pointing at the execute directory pair_strings_vector root_dirs = root_dir_list(); for (pair_strings_vector::const_iterator it=root_dirs.begin(); it != root_dirs.end(); ++it) { const char * exec_path_full = dirscat(it->second.c_str(), exec_path); Directory execute_dir( exec_path_full, PRIV_ROOT ); if (remove_exec_subdir) { // Remove entire subdirectory; used to remove // an encrypted execute directory execute_dir.Remove_Full_Path(exec_path_full); } else { // Look for specific pid_dir subdir if ( execute_dir.Find_Named_Entry( pid_dir.Value() ) ) { // Remove the execute directory execute_dir.Remove_Current_File(); } } delete [] exec_path_full; } #endif /* UNIX */ }
bool ProcFamilyProxy::start_procd() { // we'll only start one ProcD // ASSERT(m_procd_pid == -1); // now, we build up an ArgList for the procd // MyString exe; ArgList args; // path to the executable // char* path = param("PROCD"); if (path == NULL) { dprintf(D_ALWAYS, "start_procd: PROCD not defined in configuration\n"); return false; } exe = path; args.AppendArg(condor_basename(path)); free(path); // the procd's address // args.AppendArg("-A"); args.AppendArg(m_procd_addr); // the (optional) procd log file // if (m_procd_log.Length() > 0) { args.AppendArg("-L"); args.AppendArg(m_procd_log); } // the (optional) procd log file size // char *procd_log_size = param("MAX_PROCD_LOG"); if (procd_log_size != NULL) { args.AppendArg("-R"); args.AppendArg(procd_log_size); free(procd_log_size); } Env env; // The procd can't param, so pass this via the environment if (param_boolean("USE_PSS", false)) { env.SetEnv("_condor_USE_PSS=TRUE"); } // the (optional) maximum snapshot interval // (the procd will default to every minute) // char* max_snapshot_interval = param("PROCD_MAX_SNAPSHOT_INTERVAL"); if (max_snapshot_interval != NULL) { args.AppendArg("-S"); args.AppendArg(max_snapshot_interval); free(max_snapshot_interval); } // (optional) make the procd sleep on startup so a // debugger can attach // bool debug = param_boolean("PROCD_DEBUG", false); if (debug) { args.AppendArg("-D"); } #if !defined(WIN32) // On UNIX, we need to tell the procd to allow connections from the // condor user // args.AppendArg("-C"); args.AppendArg(get_condor_uid()); #endif #if defined(WIN32) // on Windows, we need to tell the procd what program to use to send // softkills // char* softkill_path = param("WINDOWS_SOFTKILL"); if ( softkill_path == NULL ) { dprintf(D_ALWAYS, "WINDOWS_SOFTKILL undefined; " "ProcD won't be able to send WM_CLOSE to jobs\n"); } else { args.AppendArg("-K"); args.AppendArg(softkill_path); free(softkill_path); } #endif #if defined(LINUX) // enable group-based tracking if a group ID range is given in the // config file // if (param_boolean("USE_GID_PROCESS_TRACKING", false)) { if (!can_switch_ids() && !privsep_enabled()) { EXCEPT("USE_GID_PROCESS_TRACKING enabled, but can't modify " "the group list of our children unless running as " "root or using PrivSep"); } int min_tracking_gid = param_integer("MIN_TRACKING_GID", 0); if (min_tracking_gid == 0) { EXCEPT("USE_GID_PROCESS_TRACKING enabled, " "but MIN_TRACKING_GID is %d\n", min_tracking_gid); } int max_tracking_gid = param_integer("MAX_TRACKING_GID", 0); if (max_tracking_gid == 0) { EXCEPT("USE_GID_PROCESS_TRACKING enabled, " "but MAX_TRACKING_GID is %d\n", max_tracking_gid); } if (min_tracking_gid > max_tracking_gid) { EXCEPT("invalid tracking gid range: %d - %d\n", min_tracking_gid, max_tracking_gid); } args.AppendArg("-G"); args.AppendArg(min_tracking_gid); args.AppendArg(max_tracking_gid); } #endif // for the GLEXEC_JOB feature, we'll need to pass the ProcD paths // to glexec and the condor_glexec_kill script // if (param_boolean("GLEXEC_JOB", false)) { args.AppendArg("-I"); char* libexec = param("LIBEXEC"); if (libexec == NULL) { EXCEPT("GLEXEC_JOB is defined, but LIBEXEC not configured"); } MyString glexec_kill; glexec_kill.formatstr("%s/condor_glexec_kill", libexec); free(libexec); args.AppendArg(glexec_kill.Value()); char* glexec = param("GLEXEC"); if (glexec == NULL) { EXCEPT("GLEXEC_JOB is defined, but GLEXEC not configured"); } args.AppendArg(glexec); free(glexec); int glexec_retries = param_integer("GLEXEC_RETRIES",3,0); int glexec_retry_delay = param_integer("GLEXEC_RETRY_DELAY",5,0); args.AppendArg(glexec_retries); args.AppendArg(glexec_retry_delay); } // done constructing the argument list; now register a reaper for // notification when the procd exits // if (m_reaper_id == FALSE) { m_reaper_id = daemonCore->Register_Reaper( "condor_procd reaper", (ReaperHandlercpp)&ProcFamilyProxyReaperHelper::procd_reaper, "condor_procd reaper", m_reaper_helper ); } if (m_reaper_id == FALSE) { dprintf(D_ALWAYS, "start_procd: unable to register a reaper for the procd\n"); return false; } // we start the procd with a pipe coming back to us on its stderr. // the procd will close this pipe after it starts listening for // commands. // int pipe_ends[2]; if (daemonCore->Create_Pipe(pipe_ends) == FALSE) { dprintf(D_ALWAYS, "start_procd: error creating pipe for the procd\n"); return false; } int std_io[3]; std_io[0] = -1; std_io[1] = -1; std_io[2] = pipe_ends[1]; // use Create_Process to start the procd // if (privsep_enabled()) { m_procd_pid = privsep_spawn_procd(exe.Value(), args, std_io, m_reaper_id); } else { m_procd_pid = daemonCore->Create_Process(exe.Value(), args, PRIV_ROOT, m_reaper_id, FALSE, &env, NULL, NULL, NULL, std_io); } if (m_procd_pid == FALSE) { dprintf(D_ALWAYS, "start_procd: unable to execute the procd\n"); daemonCore->Close_Pipe(pipe_ends[0]); daemonCore->Close_Pipe(pipe_ends[1]); m_procd_pid = -1; return false; } // now close the pipe end we handed to the child and then block on the // pipe until it closes (which tells us the procd is listening for // commands) // if (daemonCore->Close_Pipe(pipe_ends[1]) == FALSE) { dprintf(D_ALWAYS, "error closing procd's pipe end\n"); daemonCore->Shutdown_Graceful(m_procd_pid); daemonCore->Close_Pipe(pipe_ends[0]); m_procd_pid = -1; return false; } const int MAX_PROCD_ERR_LEN = 80; char err_msg[MAX_PROCD_ERR_LEN + 1]; int ret = daemonCore->Read_Pipe(pipe_ends[0], err_msg, MAX_PROCD_ERR_LEN); if (ret != 0) { daemonCore->Shutdown_Graceful(m_procd_pid); daemonCore->Close_Pipe(pipe_ends[0]); m_procd_pid = -1; if (ret == -1) { dprintf(D_ALWAYS, "start_procd: error reading pipe from procd\n"); return false; } err_msg[ret] = '\0'; dprintf(D_ALWAYS, "start_procd: error received from procd: %s\n", err_msg); return false; } if (daemonCore->Close_Pipe(pipe_ends[0]) == FALSE) { dprintf(D_ALWAYS, "start_procd: error closing pipe to procd\n"); daemonCore->Shutdown_Graceful(m_procd_pid); m_procd_pid = -1; return false; } // OK, the ProcD's up and running! // return true; }
/** * merge_stderr_with_stdout is intended for clients of this function * that wish to have the old behavior, where stderr and stdout were * both added to the same StringList. */ int systemCommand( ArgList &args, priv_state priv, StringList *cmd_out, StringList * cmd_in, StringList *cmd_err, bool merge_stderr_with_stdout) { int result = 0; FILE *fp = NULL; FILE * fp_for_stdin = NULL; FILE * childerr = NULL; MyString line; char buff[1024]; StringList *my_cmd_out = cmd_out; priv_state prev = PRIV_UNKNOWN; int stdout_pipes[2]; int stdin_pipes[2]; int pid; bool use_privsep = false; switch ( priv ) { case PRIV_ROOT: prev = set_root_priv(); break; case PRIV_USER: case PRIV_USER_FINAL: prev = set_user_priv(); #if !defined(WIN32) if ( privsep_enabled() && (job_user_uid != get_condor_uid()) ) { use_privsep = true; } #endif break; default: // Stay as Condor user ; } #if defined(WIN32) if((cmd_in != NULL) || (cmd_err != NULL)) { vmprintf(D_ALWAYS, "Invalid use of systemCommand() in Windows.\n"); return -1; } //if ( use_privsep ) { // fp = privsep_popen(args, "r", want_stderr, job_user_uid); //} //else { fp = my_popen( args, "r", merge_stderr_with_stdout ); //} #else // The old way of doing things (and the Win32 way of doing // things) // fp = my_popen( args, "r", want_stderr ); if((cmd_err != NULL) && merge_stderr_with_stdout) { vmprintf(D_ALWAYS, "Invalid use of systemCommand().\n"); return -1; } PrivSepForkExec psforkexec; char ** args_array = args.GetStringArray(); int error_pipe[2]; // AIX 5.2, Solaris 5.9, HPUX 11 don't have AF_LOCAL if(pipe(stdin_pipes) < 0) { vmprintf(D_ALWAYS, "Error creating pipe: %s\n", strerror(errno)); deleteStringArray( args_array ); return -1; } if(pipe(stdout_pipes) < 0) { vmprintf(D_ALWAYS, "Error creating pipe: %s\n", strerror(errno)); close(stdin_pipes[0]); close(stdin_pipes[1]); deleteStringArray( args_array ); return -1; } if ( use_privsep ) { if(!psforkexec.init()) { vmprintf(D_ALWAYS, "my_popenv failure on %s\n", args_array[0]); close(stdin_pipes[0]); close(stdin_pipes[1]); close(stdout_pipes[0]); close(stdout_pipes[1]); deleteStringArray( args_array ); return -1; } } if(cmd_err != NULL) { if(pipe(error_pipe) < 0) { vmprintf(D_ALWAYS, "Could not open pipe for error output: %s\n", strerror(errno)); close(stdin_pipes[0]); close(stdin_pipes[1]); close(stdout_pipes[0]); close(stdout_pipes[1]); deleteStringArray( args_array ); return -1; } } // Now fork and do what my_popen used to do pid = fork(); if(pid < 0) { vmprintf(D_ALWAYS, "Error forking: %s\n", strerror(errno)); close(stdin_pipes[0]); close(stdin_pipes[1]); close(stdout_pipes[0]); close(stdout_pipes[1]); if(cmd_err != NULL) { close(error_pipe[0]); close(error_pipe[1]); } deleteStringArray( args_array ); return -1; } if(pid == 0) { close(stdout_pipes[0]); close(stdin_pipes[1]); dup2(stdout_pipes[1], STDOUT_FILENO); dup2(stdin_pipes[0], STDIN_FILENO); if(merge_stderr_with_stdout) dup2(stdout_pipes[1], STDERR_FILENO); else if(cmd_err != NULL) { close(error_pipe[0]); dup2(error_pipe[1], STDERR_FILENO); } uid_t euid = geteuid(); gid_t egid = getegid(); seteuid( 0 ); setgroups( 1, &egid ); setgid( egid ); setuid( euid ); install_sig_handler(SIGPIPE, SIG_DFL); sigset_t sigs; sigfillset(&sigs); sigprocmask(SIG_UNBLOCK, &sigs, NULL); MyString cmd = args_array[0]; if ( use_privsep ) { ArgList al; psforkexec.in_child(cmd, al); deleteStringArray( args_array ); args_array = al.GetStringArray(); } execvp(cmd.Value(), args_array); vmprintf(D_ALWAYS, "Could not execute %s: %s\n", args_array[0], strerror(errno)); exit(-1); } close(stdin_pipes[0]); close(stdout_pipes[1]); fp_for_stdin = fdopen(stdin_pipes[1], "w"); fp = fdopen(stdout_pipes[0], "r"); if(cmd_err != NULL) { close(error_pipe[1]); childerr = fdopen(error_pipe[0],"r"); if(childerr == 0) { vmprintf(D_ALWAYS, "Could not open pipe for reading child error output: %s\n", strerror(errno)); close(error_pipe[0]); close(stdin_pipes[1]); close(stdout_pipes[0]); fclose(fp); fclose(fp_for_stdin); deleteStringArray( args_array ); return -1; } } if ( use_privsep ) { FILE* _fp = psforkexec.parent_begin(); privsep_exec_set_uid(_fp, job_user_uid); privsep_exec_set_path(_fp, args_array[0]); privsep_exec_set_args(_fp, args); Env env; env.MergeFrom(environ); privsep_exec_set_env(_fp, env); privsep_exec_set_iwd(_fp, "."); privsep_exec_set_inherit_fd(_fp, 1); privsep_exec_set_inherit_fd(_fp, 2); privsep_exec_set_inherit_fd(_fp, 0); if (!psforkexec.parent_end()) { vmprintf(D_ALWAYS, "my_popenv failure on %s\n", args_array[0]); fclose(fp); fclose(fp_for_stdin); if (childerr) { fclose(childerr); } deleteStringArray( args_array ); return -1; } } deleteStringArray( args_array ); #endif set_priv( prev ); if ( fp == NULL ) { MyString args_string; args.GetArgsStringForDisplay( &args_string, 0 ); vmprintf( D_ALWAYS, "Failed to execute command: %s\n", args_string.Value() ); if (childerr) fclose(childerr); return -1; } if(cmd_in != NULL) { cmd_in->rewind(); char * tmp; while((tmp = cmd_in->next()) != NULL) { fprintf(fp_for_stdin, "%s\n", tmp); fflush(fp_for_stdin); } } if (fp_for_stdin) { // So that we will not be waiting for output while the // script waits for stdin to be closed. fclose(fp_for_stdin); } if ( my_cmd_out == NULL ) { my_cmd_out = new StringList(); } while ( fgets( buff, sizeof(buff), fp ) != NULL ) { line += buff; if ( line.chomp() ) { my_cmd_out->append( line.Value() ); line = ""; } } if(cmd_err != NULL) { while(fgets(buff, sizeof(buff), childerr) != NULL) { line += buff; if(line.chomp()) { cmd_err->append(line.Value()); line = ""; } } fclose(childerr); } #if defined(WIN32) result = my_pclose( fp ); #else // Why close first? Just in case the child process is waiting // on a read, and we have nothing more to send it. It will // now receive a SIGPIPE. fclose(fp); if(waitpid(pid, &result, 0) < 0) { vmprintf(D_ALWAYS, "Unable to wait: %s\n", strerror(errno)); if ( cmd_out == NULL ) { delete my_cmd_out; } return -1; } #endif if( result != 0 ) { MyString args_string; args.GetArgsStringForDisplay(&args_string,0); vmprintf(D_ALWAYS, "Command returned non-zero: %s\n", args_string.Value()); my_cmd_out->rewind(); const char *next_line; while ( (next_line = my_cmd_out->next()) ) { vmprintf( D_ALWAYS, " %s\n", next_line ); } } if ( cmd_out == NULL ) { delete my_cmd_out; } return result; }