int Condor_Auth_Kerberos :: map_domain_name(const char * domain) { if (RealmMap == 0) { init_realm_mapping(); // it's okay if it returns false } // two cases, if domain is the same as the current uid domain, // then we are okay, other wise, see if we have a map if (RealmMap) { MyString from(domain), to; if (RealmMap->lookup(from, to) != -1) { if (IsFulldebug(D_SECURITY)) { dprintf (D_SECURITY, "KERBEROS: mapping realm %s to domain %s.\n", from.Value(), to.Value()); } setRemoteDomain(to.Value()); return TRUE; } else { // if the map exists, they must be listed. and they're NOT! return FALSE; } } // if there is no map, we just allow realm -> domain. if (IsDebugVerbose(D_SECURITY)) { dprintf (D_SECURITY, "KERBEROS: mapping realm %s to domain %s.\n", domain, domain); setRemoteDomain(domain); } return TRUE; }
int IpVerify::add_hash_entry(const struct in6_addr & sin6_addr, const char * user, perm_mask_t new_mask) { UserPerm_t * perm = NULL; perm_mask_t old_mask = 0; // must init old_mask to zero!!! MyString user_key = user; // assert(PermHashTable); if ( PermHashTable->lookup(sin6_addr, perm) != -1 ) { // found an existing entry. if (has_user(perm, user, old_mask)) { // remove it because we are going to edit the mask below // and re-insert it. perm->remove(user_key); } } else { perm = new UserPerm_t(42, compute_host_hash); if (PermHashTable->insert(sin6_addr, perm) != 0) { delete perm; return FALSE; } } perm->insert(user_key, old_mask | new_mask); if( IsFulldebug(D_FULLDEBUG) || IsDebugLevel(D_SECURITY) ) { MyString auth_str; AuthEntryToString(sin6_addr,user,new_mask, auth_str); dprintf(D_FULLDEBUG|D_SECURITY, "Adding to resolved authorization table: %s\n", auth_str.Value()); } return TRUE; }
int ParallelProc::addEnvVars() { dprintf ( D_FULLDEBUG, "ParallelProc::addEnvVars()\n" ); // Pull the environment out of the job ad... Env env; MyString env_errors; if ( !env.MergeFrom(JobAd,&env_errors) ) { dprintf( D_ALWAYS, "Failed to read environment from JobAd: %s\n", env_errors.Value() ); return 0; } // Add the remote spool dir, the "server" directory for // condor_chirp to stage files to/from MyString spool; if ( JobAd->LookupString( ATTR_REMOTE_SPOOL_DIR, spool ) < 1 ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting.\n", ATTR_REMOTE_SPOOL_DIR); return 0; } env.SetEnv( "_CONDOR_REMOTE_SPOOL_DIR", spool.Value() ); // Add this node's number to CONDOR_PROCNO char buf[128]; sprintf(buf, "%d", Node); env.SetEnv("_CONDOR_PROCNO", buf); // And put the total number of nodes into CONDOR_NPROC int machine_count; if ( JobAd->LookupInteger( ATTR_CURRENT_HOSTS, machine_count ) != 1 ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting.\n", ATTR_CURRENT_HOSTS); return 0; } sprintf(buf, "%d", machine_count); env.SetEnv("_CONDOR_NPROCS", buf); // Now stick the condor bin directory in front of the path, // so user scripts can call condor_config_val char *bin = param( "BIN" ); if ( !bin ) { dprintf ( D_ALWAYS, "Can't find BIN " "in config file! Aborting!\n" ); return 0; } MyString path; MyString new_path; char *tmp; new_path = bin; new_path += ":"; if(env.GetEnv("PATH",path)) { // The user gave us a path in env. Find & alter: dprintf ( D_FULLDEBUG, "$PATH in ad:%s\n", path.Value() ); new_path += path; } else { // User did not specify any env, or there is no 'PATH' // in env sent along. We get $PATH and alter it. tmp = getenv( "PATH" ); if ( tmp ) { dprintf ( D_FULLDEBUG, "No Path in ad, $PATH in env\n" ); dprintf ( D_FULLDEBUG, "before: %s\n", tmp ); new_path += tmp; } else { // no PATH in env. Make one. dprintf ( D_FULLDEBUG, "No Path in ad, no $PATH in env\n" ); new_path = bin; } } free(bin); env.SetEnv("PATH",new_path.Value()); char *condor_config = getenv( "CONDOR_CONFIG"); if (condor_config) { env.SetEnv("CONDOR_CONFIG", condor_config); } if(IsFulldebug(D_FULLDEBUG)) { MyString env_str; env.getDelimitedStringForDisplay(&env_str); dprintf ( D_FULLDEBUG, "New env: %s\n", env_str.Value() ); } // now put the env back into the JobAd: if(!env.InsertEnvIntoClassAd(JobAd,&env_errors)) { dprintf( D_ALWAYS, "Unable to update env! Aborting: %s\n", env_errors.Value()); return 0; } return 1; }
int Starter::execDCStarter( ArgList const &args, Env const *env, int* std_fds, Stream* s ) { Stream *inherit_list[] = { 0 /*ClassAd update stream (assigned below)*/, s /*shadow syscall sock*/, 0 /*terminal NULL*/ }; const ArgList* final_args = &args; const char* final_path = s_path; Env new_env; if( env ) { new_env.MergeFrom( *env ); } // The starter figures out its execute directory by paraming // for EXECUTE, which we override in the environment here. // This way, all the logic about choosing a directory to use // is in only one place. ASSERT( executeDir() ); new_env.SetEnv( "_CONDOR_EXECUTE", executeDir() ); env = &new_env; // Build the affinity string to pass to the starter via env std::string affinityString; if (s_claim && s_claim->rip() && s_claim->rip()->get_affinity_set()) { std::list<int> *l = s_claim->rip()->get_affinity_set(); bool needComma = false; for (std::list<int>::iterator it = l->begin(); it != l->end(); it++) { if (needComma) { formatstr_cat(affinityString, ", %d", *it); } else { formatstr_cat(affinityString, "%d ", *it); needComma = true; } } } int slotId = s_claim->rip()->r_sub_id; if (slotId == 0) { // Isn't a paritionable slot, use the main id slotId = s_claim->rip()->r_id; } std::string affinityKnob; formatstr(affinityKnob, "_CONDOR_SLOT%d_CPU_AFFINITY", slotId); if (param_boolean("ASSIGN_CPU_AFFINITY", false)) { new_env.SetEnv(affinityKnob.c_str(), affinityString.c_str()); new_env.SetEnv("_CONDOR_ENFORCE_CPU_AFFINITY", "true"); dprintf(D_FULLDEBUG, "Setting affinity env to %s = %s\n", affinityKnob.c_str(), affinityString.c_str()); } ReliSock child_job_update_sock; // child inherits this socket ASSERT( !s_job_update_sock ); s_job_update_sock = new ReliSock; // parent (yours truly) keeps this socket ASSERT( s_job_update_sock ); // Connect parent and child sockets together so child can send us // udpates to the job ClassAd. if( !s_job_update_sock->connect_socketpair( child_job_update_sock ) ) { dprintf( D_ALWAYS, "ERROR: Failed to create job ClassAd update socket!\n"); s_pid = 0; return s_pid; } inherit_list[0] = &child_job_update_sock; // Pass the machine ad to the starter if (s_claim) s_claim->writeMachAd( s_job_update_sock ); if( daemonCore->Register_Socket( s_job_update_sock, "starter ClassAd update socket", (SocketHandlercpp)&Starter::receiveJobClassAdUpdate, "receiveJobClassAdUpdate", this) < 0 ) { EXCEPT("Failed to register ClassAd update socket."); } #if defined(LINUX) // see if we should be using glexec to spawn the starter. // if we are, the cmd, args, env, and stdin to use will be // modified ArgList glexec_args; Env glexec_env; int glexec_std_fds[3]; if( param_boolean( "GLEXEC_STARTER", false ) ) { if( ! glexec_starter_prepare( s_path, s_claim->client()->proxyFile(), args, env, std_fds, glexec_args, glexec_env, glexec_std_fds ) ) { // something went wrong; prepareForGlexec will // have already logged it cleanupAfterGlexec(); return 0; } final_path = glexec_args.GetArg(0); final_args = &glexec_args; env = &glexec_env; std_fds = glexec_std_fds; } #endif int reaper_id; if( s_reaper_id > 0 ) { reaper_id = s_reaper_id; } else { reaper_id = main_reaper; } if(IsFulldebug(D_FULLDEBUG)) { MyString args_string; final_args->GetArgsStringForDisplay(&args_string); dprintf( D_FULLDEBUG, "About to Create_Process \"%s\"\n", args_string.Value() ); } FamilyInfo fi; fi.max_snapshot_interval = pid_snapshot_interval; s_pid = daemonCore-> Create_Process( final_path, *final_args, PRIV_ROOT, reaper_id, TRUE, env, NULL, &fi, inherit_list, std_fds ); if( s_pid == FALSE ) { dprintf( D_ALWAYS, "ERROR: exec_starter failed!\n"); s_pid = 0; } #if defined(LINUX) if( param_boolean( "GLEXEC_STARTER", false ) ) { // if we used glexec to spawn the Starter, we now need to send // the Starter's environment to our glexec wrapper script so it // can exec the Starter with all the environment variablew we rely // on it inheriting // if ( !glexec_starter_handle_env(s_pid) ) { // something went wrong; handleGlexecEnvironment will // have already logged it cleanupAfterGlexec(); return 0; } } #endif return s_pid; }
bool VMGahpServer::startUp(Env *job_env, const char *workingdir, int nice_inc, FamilyInfo *family_info) { //check if we already have spawned a vmgahp server if( m_vmgahp_pid > 0 ) { //vmgahp is already running return true; } if( !m_job_ad ) { start_err_msg = "No JobAd in VMGahpServer::startUp()"; dprintf(D_ALWAYS,"%s\n", start_err_msg.Value()); return false; } MyString JobName; if( m_vmgahp_server.IsEmpty() ) { start_err_msg = "No path for vmgahp in VMGahpServer::startUp()"; dprintf(D_ALWAYS,"%s\n", start_err_msg.Value()); return false; } JobName = m_vmgahp_server; // Create two pairs of pipes which we will use to int stdin_pipefds[2]; int stdout_pipefds[2]; int stderr_pipefds[2]; if(!daemonCore->Create_Pipe(stdin_pipefds, true, // read end registerable false, // write end not registerable false, // read end blocking false // write end blocking )) { start_err_msg = "unable to create pipe to stdin of VM gahp"; dprintf(D_ALWAYS,"%s\n", start_err_msg.Value()); return false; } if(!daemonCore->Create_Pipe(stdout_pipefds, true, //read end registerable false, // write end not registerable false, // read end blocking false // write end blocking )) { // blocking read start_err_msg = "unable to create pipe to stdout of VM gahp"; dprintf(D_ALWAYS,"%s\n", start_err_msg.Value()); return false; } if( m_include_gahp_log ) { if(!daemonCore->Create_Pipe(stderr_pipefds, true, // read end registerable false, // write end not registerable true, // read end non-blocking true // write end non-blocking )) { // nonblocking read start_err_msg = "unable to create pipe to stderr of VM gahp"; dprintf(D_ALWAYS,"%s\n", start_err_msg.Value()); return false; } } int io_redirect[3]; io_redirect[0] = stdin_pipefds[0]; //stdin gets read side of in pipe io_redirect[1] = stdout_pipefds[1]; //stdout gets write side of out pipe if( m_include_gahp_log ) { io_redirect[2] = stderr_pipefds[1]; //stderr gets write side of err pipe } else { int null_fd = safe_open_wrapper_follow(NULL_FILE, O_WRONLY | O_APPEND, 0666); if( null_fd < 0 ) { start_err_msg = "unable to open null file for stderr of VM gahp"; dprintf(D_ALWAYS,"Failed to open '%s':%s (errno %d)\n", NULL_FILE, strerror(errno), errno); return false; } io_redirect[2] = null_fd; } // Set Arguments ArgList vmgahp_args; vmgahp_args.SetArgV1SyntaxToCurrentPlatform(); vmgahp_args.AppendArg(m_vmgahp_server.Value()); // Add daemonCore options vmgahp_args.AppendArg("-f"); if( m_include_gahp_log ) { vmgahp_args.AppendArg("-t"); } vmgahp_args.AppendArg("-M"); vmgahp_args.AppendArg(VMGAHP_STANDALONE_MODE); MyString args_string; vmgahp_args.GetArgsStringForDisplay(&args_string, 1); dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); #if !defined(WIN32) uid_t vmgahp_user_uid = (uid_t) -1; gid_t vmgahp_user_gid = (gid_t) -1; if( can_switch_ids() ) { // Condor runs as root vmgahp_user_uid = get_user_uid(); vmgahp_user_gid = get_user_gid(); } else if (Starter->condorPrivSepHelper() != NULL) { vmgahp_user_uid = Starter->condorPrivSepHelper()->get_uid(); char* user_name; if (!pcache()->get_user_name(vmgahp_user_uid, user_name)) { EXCEPT("unable to get user name for UID %u", vmgahp_user_uid); } if (!pcache()->get_user_ids(user_name, vmgahp_user_uid, vmgahp_user_gid)) { EXCEPT("unable to get GID for UID %u", vmgahp_user_uid); } free(user_name); } else { // vmgahp may have setuid-root (e.g. vmgahp for Xen) vmgahp_user_uid = get_condor_uid(); vmgahp_user_gid = get_condor_gid(); } // Setup vmgahp user uid/gid if( vmgahp_user_uid > 0 ) { if( vmgahp_user_gid <= 0 ) { vmgahp_user_gid = vmgahp_user_uid; } MyString tmp_str; tmp_str.sprintf("%d", (int)vmgahp_user_uid); job_env->SetEnv("VMGAHP_USER_UID", tmp_str.Value()); tmp_str.sprintf("%d", (int)vmgahp_user_gid); job_env->SetEnv("VMGAHP_USER_GID", tmp_str.Value()); } #endif job_env->SetEnv("VMGAHP_VMTYPE", m_vm_type.Value()); job_env->SetEnv("VMGAHP_WORKING_DIR", workingdir); // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_str; job_env->getDelimitedStringForDisplay(&env_str); dprintf(D_FULLDEBUG, "Env = %s\n", env_str.Value()); } priv_state vmgahp_priv = PRIV_ROOT; #if defined(WIN32) // TODO.. // Currently vmgahp for VMware VM universe can't run as user on Windows. // It seems like a bug of VMware. VMware command line tool such as "vmrun" // requires Administrator privilege. // -jaeyoung 06/15/07 if( strcasecmp(m_vm_type.Value(), CONDOR_VM_UNIVERSE_VMWARE ) == MATCH ) { vmgahp_priv = PRIV_UNKNOWN; } #endif m_vmgahp_pid = daemonCore->Create_Process( JobName.Value(), //Name of executable vmgahp_args, //Args vmgahp_priv, //Priv state 1, //id for our registered reaper FALSE, //do not want a command port job_env, //env workingdir, //cwd family_info, //family_info NULL, //network sockets to inherit io_redirect, //redirect stdin/out/err NULL, nice_inc ); //NOTE: Create_Process() saves the errno for us if it is an //"interesting" error. char const *create_process_error = NULL; if(m_vmgahp_pid == FALSE && errno) create_process_error = strerror(errno); // Now that the VMGAHP server is running, close the sides of // the pipes we gave away to the server, and stash the ones // we want to keep in an object data member. daemonCore->Close_Pipe(io_redirect[0]); daemonCore->Close_Pipe(io_redirect[1]); if( m_include_gahp_log ) { daemonCore->Close_Pipe(io_redirect[2]); } else { close(io_redirect[2]); } if ( m_vmgahp_pid == FALSE ) { m_vmgahp_pid = -1; start_err_msg = "Failed to start vm-gahp server"; dprintf(D_ALWAYS, "%s (%s)\n", start_err_msg.Value(), m_vmgahp_server.Value()); if(create_process_error) { MyString err_msg = "Failed to execute '"; err_msg += m_vmgahp_server.Value(), err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_error; dprintf(D_ALWAYS, "Failed to start vmgahp server (%s)\n", err_msg.Value()); } return false; } dprintf(D_ALWAYS, "VMGAHP server pid=%d\n", m_vmgahp_pid); m_vmgahp_writefd = stdin_pipefds[1]; m_vmgahp_readfd = stdout_pipefds[0]; if( m_include_gahp_log ) { m_vmgahp_errorfd = stderr_pipefds[0]; } // Now initialization is done m_is_initialized = true; // print initial stderr messages from vmgahp printSystemErrorMsg(); // Read the initial greeting from the vm-gahp, which is the version if( command_version() == false ) { start_err_msg = "Internal vmgahp server error"; dprintf(D_ALWAYS,"Failed to read vmgahp server version\n"); printSystemErrorMsg(); cleanup(); return false; } dprintf(D_FULLDEBUG,"VMGAHP server version: %s\n", m_vmgahp_version.Value()); // Now see what commands this server supports. if( command_commands() == false ) { start_err_msg = "Internal vmgahp server error"; dprintf(D_ALWAYS,"Failed to read supported commands from vmgahp server\n"); printSystemErrorMsg(); cleanup(); return false; } // Now see what virtual machine types this server supports if( command_support_vms() == false ) { start_err_msg = "Internal vmgahp server error"; dprintf(D_ALWAYS,"Failed to read supported vm types from vmgahp server\n"); printSystemErrorMsg(); cleanup(); return false; } int result = -1; if( m_include_gahp_log ) { result = daemonCore->Register_Pipe(m_vmgahp_errorfd, "m_vmgahp_errorfd", static_cast<PipeHandlercpp>(&VMGahpServer::err_pipe_ready), "VMGahpServer::err_pipe_ready",this); if( result == -1 ) { dprintf(D_ALWAYS,"Failed to register vmgahp stderr pipe\n"); if(m_stderr_tid != -1) { daemonCore->Cancel_Timer(m_stderr_tid); m_stderr_tid = -1; } m_stderr_tid = daemonCore->Register_Timer(2, 2, (TimerHandlercpp)&VMGahpServer::err_pipe_ready, "VMGahpServer::err_pipe_ready",this); if( m_stderr_tid == -1 ) { start_err_msg = "Internal vmgahp server error"; dprintf(D_ALWAYS,"Failed to register stderr timer\n"); printSystemErrorMsg(); cleanup(); return false; } } } // try to turn on vmgahp async notification mode if ( !command_async_mode_on() ) { // not supported, set a poll interval m_is_async_mode = false; setPollInterval(m_pollInterval); } else { // command worked... register the pipe and stop polling result = daemonCore->Register_Pipe(m_vmgahp_readfd, "m_vmgahp_readfd", static_cast<PipeHandlercpp>(&VMGahpServer::pipe_ready), "VMGahpServer::pipe_ready",this); if( result == -1 ) { // failed to register the pipe for some reason; fall // back on polling (yuck). dprintf(D_ALWAYS,"Failed to register vmgahp Read pipe\n"); m_is_async_mode = false; setPollInterval(m_pollInterval); } else { // pipe is registered. stop polling. setPollInterval(0); m_is_async_mode = true; } } return true; }
int Starter::execDCStarter( ArgList const &args, Env const *env, int* std_fds, Stream* s ) { Stream *inherit_list[] = { 0 /*ClassAd update stream (assigned below)*/, s /*shadow syscall sock*/, 0 /*terminal NULL*/ }; const ArgList* final_args = &args; const char* final_path = s_path; Env new_env; if( env ) { new_env.MergeFrom( *env ); } // The starter figures out its execute directory by paraming // for EXECUTE, which we override in the environment here. // This way, all the logic about choosing a directory to use // is in only one place. ASSERT( executeDir() ); new_env.SetEnv( "_CONDOR_EXECUTE", executeDir() ); // Handle encrypted execute directory FilesystemRemap fs_remap_obj; // put on stack so destroyed when leave this method FilesystemRemap* fs_remap = NULL; // If admin desires encrypted exec dir in config, do it bool encrypt_execdir = param_boolean_crufty("ENCRYPT_EXECUTE_DIRECTORY",false); // Or if user wants encrypted exec in job ad, do it if (!encrypt_execdir && s_claim->ad()) { s_claim->ad()->LookupBool(ATTR_ENCRYPT_EXECUTE_DIRECTORY,encrypt_execdir); } if ( encrypt_execdir ) { #ifdef LINUX // On linux, setup a directory $EXECUTE/encryptedX subdirectory // to serve as an ecryptfs mount point; pass this directory // down to the condor_starter as if it were $EXECUTE so // that the starter creates its dir_<pid> directory on the // ecryptfs filesystem setup by doing an AddEncryptedMapping. static int unsigned long privdirnum = 0; TemporaryPrivSentry sentry(PRIV_CONDOR); s_encrypted_execute_dir.formatstr("%s%cencrypted%lu",executeDir(), DIR_DELIM_CHAR,privdirnum++); if( mkdir(encryptedExecuteDir(), 0755) < 0 ) { dprintf( D_FAILURE|D_ALWAYS, "Failed to create encrypted dir %s: %s\n", encryptedExecuteDir(), strerror(errno) ); return 0; } dprintf( D_ALWAYS, "Created encrypted dir %s\n", encryptedExecuteDir() ); fs_remap = &fs_remap_obj; if ( fs_remap->AddEncryptedMapping(encryptedExecuteDir()) ) { // FilesystemRemap object dprintfs out an error message for us return 0; } new_env.SetEnv( "_CONDOR_EXECUTE", encryptedExecuteDir() ); #endif } env = &new_env; // Build the affinity string to pass to the starter via env std::string affinityString; if (s_claim && s_claim->rip() && s_claim->rip()->get_affinity_set()) { std::list<int> *l = s_claim->rip()->get_affinity_set(); bool needComma = false; for (std::list<int>::iterator it = l->begin(); it != l->end(); it++) { if (needComma) { formatstr_cat(affinityString, ", %d", *it); } else { formatstr_cat(affinityString, "%d ", *it); needComma = true; } } } bool affinityBool = false; if ( ! s_claim || ! s_claim->ad()) { affinityBool = param_boolean("ASSIGN_CPU_AFFINITY", false); } else { auto_free_ptr assign_cpu_affinity(param("ASSIGN_CPU_AFFINITY")); if ( ! assign_cpu_affinity.empty()) { classad::Value value; if (s_claim->ad()->EvaluateExpr(assign_cpu_affinity.ptr(), value)) { if ( ! value.IsBooleanValueEquiv(affinityBool)) { // was an expression, but not a bool, so report it and continue EXCEPT("ASSIGN_CPU_AFFINITY does not evaluate to a boolean, it is : %s", ClassAdValueToString(value)); } } } } if (affinityBool) { new_env.SetEnv("_CONDOR_STARTD_ASSIGNED_AFFINITY", affinityString.c_str()); new_env.SetEnv("_CONDOR_ENFORCE_CPU_AFFINITY", "true"); dprintf(D_ALWAYS, "Setting affinity env to %s\n", affinityString.c_str()); } ReliSock child_job_update_sock; // child inherits this socket ASSERT( !s_job_update_sock ); s_job_update_sock = new ReliSock; // parent (yours truly) keeps this socket ASSERT( s_job_update_sock ); // Connect parent and child sockets together so child can send us // udpates to the job ClassAd. if( !s_job_update_sock->connect_socketpair( child_job_update_sock ) ) { dprintf( D_ALWAYS, "ERROR: Failed to create job ClassAd update socket!\n"); s_pid = 0; return s_pid; } inherit_list[0] = &child_job_update_sock; // Pass the machine ad to the starter if (s_claim) s_claim->writeMachAd( s_job_update_sock ); if( daemonCore->Register_Socket( s_job_update_sock, "starter ClassAd update socket", (SocketHandlercpp)&Starter::receiveJobClassAdUpdate, "receiveJobClassAdUpdate", this) < 0 ) { EXCEPT("Failed to register ClassAd update socket."); } #if defined(LINUX) // see if we should be using glexec to spawn the starter. // if we are, the cmd, args, env, and stdin to use will be // modified ArgList glexec_args; Env glexec_env; int glexec_std_fds[3]; if( param_boolean( "GLEXEC_STARTER", false ) ) { if( ! glexec_starter_prepare( s_path, s_claim->client()->proxyFile(), args, env, std_fds, glexec_args, glexec_env, glexec_std_fds ) ) { // something went wrong; prepareForGlexec will // have already logged it cleanupAfterGlexec(); return 0; } final_path = glexec_args.GetArg(0); final_args = &glexec_args; env = &glexec_env; std_fds = glexec_std_fds; } #endif int reaper_id; if( s_reaper_id > 0 ) { reaper_id = s_reaper_id; } else { reaper_id = main_reaper; } if(IsFulldebug(D_FULLDEBUG)) { MyString args_string; final_args->GetArgsStringForDisplay(&args_string); dprintf( D_FULLDEBUG, "About to Create_Process \"%s\"\n", args_string.Value() ); } FamilyInfo fi; fi.max_snapshot_interval = pid_snapshot_interval; s_pid = daemonCore-> Create_Process( final_path, *final_args, PRIV_ROOT, reaper_id, TRUE, TRUE, env, NULL, &fi, inherit_list, std_fds, NULL, 0, NULL, 0, NULL, NULL, NULL, NULL, fs_remap); if( s_pid == FALSE ) { dprintf( D_ALWAYS, "ERROR: exec_starter failed!\n"); s_pid = 0; } #if defined(LINUX) if( param_boolean( "GLEXEC_STARTER", false ) ) { // if we used glexec to spawn the Starter, we now need to send // the Starter's environment to our glexec wrapper script so it // can exec the Starter with all the environment variablew we rely // on it inheriting // if ( !glexec_starter_handle_env(s_pid) ) { // something went wrong; handleGlexecEnvironment will // have already logged it cleanupAfterGlexec(); return 0; } } #endif return s_pid; }
int ScriptProc::StartJob() { dprintf(D_FULLDEBUG,"in ScriptProc::StartJob()\n"); if ( !JobAd ) { dprintf ( D_ALWAYS, "No JobAd in ScriptProc::StartJob()!\n" ); return 0; } MyString attr; attr = name; attr += ATTR_JOB_CMD; char* tmp = NULL; if( ! JobAd->LookupString( attr.Value(), &tmp ) ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting StartJob.\n", attr.Value() ); return 0; } // // // // // // // executable // // // // // // // TODO: make it smart in cases we're not the gridshell and/or // didn't transfer files so that we don't prepend the wrong // path to the binary, and don't try to chmod it. MyString exe_path = Starter->GetWorkingDir(); exe_path += DIR_DELIM_CHAR; exe_path += tmp; free( tmp ); tmp = NULL; if( Starter->isGridshell() ) { // if we're a gridshell, chmod() the binary, since globus // probably transfered it for us and left it with bad // permissions... priv_state old_priv = set_user_priv(); int retval = chmod( exe_path.Value(), 0755 ); set_priv( old_priv ); if( retval < 0 ) { dprintf( D_ALWAYS, "Failed to chmod %s: %s (errno %d)\n", exe_path.Value(), strerror(errno), errno ); return 0; } } // // // // // // // Args // // // // // // char *args1 = NULL; char *args2 = NULL; MyString args1_attr; MyString args2_attr; args1_attr = name; args1_attr += ATTR_JOB_ARGUMENTS1; args2_attr = name; args2_attr += ATTR_JOB_ARGUMENTS2; JobAd->LookupString(args1_attr.Value(), &args1); JobAd->LookupString(args2_attr.Value(), &args2); ArgList args; // Since we are adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional args. args.SetArgV1SyntaxToCurrentPlatform(); // First, put "condor_<name>script" at the front of Args, // since that will become argv[0] of what we exec(), either // the wrapper or the actual job. MyString arg0; arg0 = "condor_"; arg0 += name; arg0 += "script"; args.AppendArg(arg0.Value()); MyString args_error; if(args2 && *args2) { args.AppendArgsV2Raw(args2,&args_error); } else if(args1 && *args1) { args.AppendArgsV1Raw(args1,&args_error); } else { dprintf( D_FULLDEBUG, "neither %s nor %s could be found in JobAd\n", args1_attr.Value(), args2_attr.Value()); } free( args1 ); free( args2 ); // // // // // // // Environment // // // // // // char *env1 = NULL; char *env2 = NULL; MyString env1_attr; MyString env2_attr; env1_attr = name; env1_attr += ATTR_JOB_ENVIRONMENT1; env2_attr = name; env2_attr += ATTR_JOB_ENVIRONMENT2; JobAd->LookupString( env1_attr.Value(), &env1 ); JobAd->LookupString( env2_attr.Value(), &env2 ); // TODO do we want to use the regular ATTR_JOB_ENVIRONMENT // if there's nothing specific for this script? // Now, instantiate an Env object so we can manipulate the // environment as needed. Env job_env; MyString env_errors; if( env2 && *env2 ) { if( ! job_env.MergeFromV2Raw(env2,&env_errors) ) { dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s). " "Aborting ScriptProc::StartJob.\n", env2_attr.Value(),env_errors.Value() ); free( env1 ); free( env2 ); return 0; } } else if( env1 && *env1 ) { if( ! job_env.MergeFromV1Raw(env1,&env_errors) ) { dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s). " "Aborting ScriptProc::StartJob.\n", env1_attr.Value(),env_errors.Value() ); free( env1 ); free( env2 ); return 0; } } free(env1); free(env2); // Now, let the starter publish any env vars it wants to add Starter->PublishToEnv( &job_env ); // TODO: Deal with port regulation stuff? // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_str; job_env.getDelimitedStringForDisplay(&env_str); dprintf(D_FULLDEBUG, "%sEnv = %s\n", name, env_str.Value() ); } // // // // // // // Standard Files // // // // // // // TODO??? // // // // // // // Misc + Exec // // // // // // // TODO? // Starter->jic->notifyJobPreSpawn( name ); // compute job's renice value by evaluating the machine's // JOB_RENICE_INCREMENT in the context of the job ad... // TODO? int nice_inc = 10; // in the below dprintfs, we want to skip past argv[0], which // is sometimes condor_exec, in the Args string. MyString args_string; args.GetArgsStringForDisplay(&args_string,1); dprintf( D_ALWAYS, "About to exec %s script: %s %s\n", name, exe_path.Value(), args_string.Value() ); // If there is a requested coresize for this job, enforce it. // It is truncated because you can't put an unsigned integer // into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to // be a float, but then when that attribute is read/written to the // job queue log by/or shared between versions of Condor which view the // type of that attribute differently, calamity would arise. int core_size_truncated; size_t core_size; size_t *core_size_ptr = NULL; if ( JobAd->LookupInteger(ATTR_CORE_SIZE, core_size_truncated) ) { core_size = (size_t)core_size_truncated; core_size_ptr = &core_size; } JobPid = daemonCore->Create_Process(exe_path.Value(), args, PRIV_USER_FINAL, 1, FALSE, FALSE, &job_env, Starter->jic->jobIWD(), NULL, NULL, NULL, NULL, nice_inc, NULL, DCJOBOPT_NO_ENV_INHERIT, core_size_ptr ); //NOTE: Create_Process() saves the errno for us if it is an //"interesting" error. char const *create_process_error = NULL; int create_process_errno = errno; if( JobPid == FALSE && errno ) { create_process_error = strerror( errno ); } if( JobPid == FALSE ) { JobPid = -1; if( create_process_error ) { MyString err_msg = "Failed to execute '"; err_msg += exe_path.Value(); err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_error; Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno ); } EXCEPT( "Create_Process(%s,%s, ...) failed", exe_path.Value(), args_string.Value() ); return 0; } dprintf( D_ALWAYS, "Create_Process succeeded, pid=%d\n", JobPid ); job_start_time.getTime(); return 1; }
bool CollectorEngine::ValidateClassAd(int command,ClassAd *clientAd,Sock *sock) { if( !m_collector_requirements ) { // no need to do any of the following checks if the admin has // not configured any COLLECTOR_REQUIREMENTS return true; } char const *ipattr = NULL; switch( command ) { case MERGE_STARTD_AD: case UPDATE_STARTD_AD: case UPDATE_STARTD_AD_WITH_ACK: ipattr = ATTR_STARTD_IP_ADDR; break; case UPDATE_SCHEDD_AD: case UPDATE_SUBMITTOR_AD: ipattr = ATTR_SCHEDD_IP_ADDR; break; case UPDATE_MASTER_AD: ipattr = ATTR_MASTER_IP_ADDR; break; case UPDATE_NEGOTIATOR_AD: ipattr = ATTR_NEGOTIATOR_IP_ADDR; break; case UPDATE_COLLECTOR_AD: ipattr = ATTR_COLLECTOR_IP_ADDR; break; case UPDATE_LICENSE_AD: case UPDATE_CKPT_SRVR_AD: case UPDATE_STORAGE_AD: case UPDATE_HAD_AD: case UPDATE_AD_GENERIC: case UPDATE_GRID_AD: case UPDATE_ACCOUNTING_AD: default: break; } if(ipattr) { MyString my_address; MyString subsys_ipaddr; // Some ClassAds contain two copies of the IP address, // one named "MyAddress" and one named "<SUBSYS>IpAddr". // If the latter exists, then it _must_ match the former, // because people may be filtering in COLLECTOR_REQUIREMENTS // on MyAddress, and we don't want them to have to worry // about filtering on the older cruftier <SUBSYS>IpAddr. if( clientAd->LookupString( ipattr, subsys_ipaddr ) ) { clientAd->LookupString( ATTR_MY_ADDRESS, my_address ); if( my_address != subsys_ipaddr ) { dprintf(D_ALWAYS, "%s VIOLATION: ClassAd from %s advertises inconsistent" " IP addresses: %s=%s, %s=%s\n", COLLECTOR_REQUIREMENTS, (sock ? sock->get_sinful_peer() : "(NULL)"), ipattr, subsys_ipaddr.Value(), ATTR_MY_ADDRESS, my_address.Value()); return false; } } } // Now verify COLLECTOR_REQUIREMENTS bool collector_req_result = false; if( !EvalBool(COLLECTOR_REQUIREMENTS,m_collector_requirements,clientAd,collector_req_result) ) { dprintf(D_ALWAYS,"WARNING: %s did not evaluate to a boolean result.\n",COLLECTOR_REQUIREMENTS); collector_req_result = false; } if( !collector_req_result ) { static int details_shown=0; bool show_details = (details_shown<10) || IsFulldebug(D_FULLDEBUG); dprintf(D_ALWAYS,"%s VIOLATION: requirements do not match ad from %s.%s\n", COLLECTOR_REQUIREMENTS, sock ? sock->get_sinful_peer() : "(null)", show_details ? " Contents of the ClassAd:" : " (turn on D_FULLDEBUG to see details)"); if( show_details ) { details_shown += 1; dPrintAd(D_ALWAYS, *clientAd); } return false; } return true; }
int OsProc::StartJob(FamilyInfo* family_info, FilesystemRemap* fs_remap=NULL) { int nice_inc = 0; bool has_wrapper = false; dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n"); if ( !JobAd ) { dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" ); return 0; } MyString JobName; if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting StartJob.\n", ATTR_JOB_CMD ); return 0; } const char* job_iwd = Starter->jic->jobRemoteIWD(); dprintf( D_ALWAYS, "IWD: %s\n", job_iwd ); // some operations below will require a PrivSepHelper if // PrivSep is enabled (if it's not, privsep_helper will be // NULL) PrivSepHelper* privsep_helper = Starter->privSepHelper(); // // // // // // // Arguments // // // // // // // prepend the full path to this name so that we // don't have to rely on the PATH inside the // USER_JOB_WRAPPER or for exec(). bool transfer_exe = false; if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) { transfer_exe = false; } bool preserve_rel = false; if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) { preserve_rel = false; } bool relative_exe = is_relative_to_cwd(JobName.Value()); if (relative_exe && preserve_rel && !transfer_exe) { dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value()); } else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) { JobName.formatstr( "%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, CONDOR_EXEC ); } else if (relative_exe && job_iwd && *job_iwd) { MyString full_name; full_name.formatstr("%s%c%s", job_iwd, DIR_DELIM_CHAR, JobName.Value()); JobName = full_name; } if( Starter->isGridshell() ) { // if we're a gridshell, just try to chmod our job, since // globus probably transfered it for us and left it with // bad permissions... priv_state old_priv = set_user_priv(); int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG ); set_priv( old_priv ); if( retval < 0 ) { dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() ); return 0; } } ArgList args; // Since we may be adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional wrapper args. args.SetArgV1SyntaxToCurrentPlatform(); // First, put "condor_exec" or whatever at the front of Args, // since that will become argv[0] of what we exec(), either // the wrapper or the actual job. if( !getArgv0() ) { args.AppendArg(JobName.Value()); } else { args.AppendArg(getArgv0()); } // Support USER_JOB_WRAPPER parameter... char *wrapper = NULL; if( (wrapper=param("USER_JOB_WRAPPER")) ) { // make certain this wrapper program exists and is executable if( access(wrapper,X_OK) < 0 ) { dprintf( D_ALWAYS, "Cannot find/execute USER_JOB_WRAPPER file %s\n", wrapper ); free( wrapper ); return 0; } has_wrapper = true; // Now, we've got a valid wrapper. We want that to become // "JobName" so we exec it directly, and we want to put // what was the JobName (with the full path) as the first // argument to the wrapper args.AppendArg(JobName.Value()); JobName = wrapper; free(wrapper); } // Support USE_PARROT bool use_parrot = false; if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) { // Check for parrot executable char *parrot = NULL; if( (parrot=param("PARROT")) ) { if( access(parrot,X_OK) < 0 ) { dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute " "at %s(%s)).\n", parrot, strerror(errno) ); free( parrot ); return 0; } else { args.AppendArg(JobName.Value()); JobName = parrot; free( parrot ); } } else { dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config" " file)" ); return 0; } } // Either way, we now have to add the user-specified args as // the rest of the Args string. MyString args_error; if(!args.AppendArgsFromClassAd(JobAd,&args_error)) { dprintf(D_ALWAYS, "Failed to read job arguments from JobAd. " "Aborting OsProc::StartJob: %s\n",args_error.Value()); return 0; } // // // // // // // Environment // // // // // // // Now, instantiate an Env object so we can manipulate the // environment as needed. Env job_env; MyString env_errors; if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) { dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n", env_errors.Value()); return 0; } // // // // // // // Standard Files // // // // // // // handle stdin, stdout, and stderr redirection int fds[3]; // initialize these to -2 to mean they're not specified. // -1 will be treated as an error. fds[0] = -2; fds[1] = -2; fds[2] = -2; // in order to open these files we must have the user's privs: priv_state priv; priv = set_user_priv(); // if we're in PrivSep mode, we won't necessarily be able to // open the files for the job. getStdFile will return us an // open FD in some situations, but otherwise will give us // a filename that we'll pass to the PrivSep Switchboard // bool stdin_ok; bool stdout_ok; bool stderr_ok; MyString privsep_stdin_name; MyString privsep_stdout_name; MyString privsep_stderr_name; if (privsep_helper != NULL) { stdin_ok = getStdFile(SFT_IN, NULL, true, "Input file", &fds[0], &privsep_stdin_name); stdout_ok = getStdFile(SFT_OUT, NULL, true, "Output file", &fds[1], &privsep_stdout_name); stderr_ok = getStdFile(SFT_ERR, NULL, true, "Error file", &fds[2], &privsep_stderr_name); } else { fds[0] = openStdFile( SFT_IN, NULL, true, "Input file"); stdin_ok = (fds[0] != -1); fds[1] = openStdFile( SFT_OUT, NULL, true, "Output file"); stdout_ok = (fds[1] != -1); fds[2] = openStdFile( SFT_ERR, NULL, true, "Error file"); stderr_ok = (fds[2] != -1); } /* Bail out if we couldn't open the std files correctly */ if( !stdin_ok || !stdout_ok || !stderr_ok ) { /* only close ones that had been opened correctly */ for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n"); dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n"); set_priv(priv); /* go back to original priv state before leaving */ return 0; } // // // // // // // Misc + Exec // // // // // // if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyJobPreSpawn(); } // compute job's renice value by evaluating the machine's // JOB_RENICE_INCREMENT in the context of the job ad... char* ptmp = param( "JOB_RENICE_INCREMENT" ); if( ptmp ) { // insert renice expr into our copy of the job ad MyString reniceAttr = "Renice = "; reniceAttr += ptmp; if( !JobAd->Insert( reniceAttr.Value() ) ) { dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT " "into job ad, Aborting OsProc::StartJob...\n" ); free( ptmp ); return 0; } // evaluate if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) { dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n", ptmp, nice_inc ); } else { dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't " "eval to int! Using default of 10...\n", ptmp ); nice_inc = 10; } // enforce valid ranges for nice_inc if( nice_inc < 0 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "low: adjusted to 0\n", nice_inc ); nice_inc = 0; } else if( nice_inc > 19 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "high: adjusted to 19\n", nice_inc ); nice_inc = 19; } ASSERT( ptmp ); free( ptmp ); ptmp = NULL; } else { // if JOB_RENICE_INCREMENT is undefined, default to 0 nice_inc = 0; } // in the below dprintfs, we want to skip past argv[0], which // is sometimes condor_exec, in the Args string. MyString args_string; args.GetArgsStringForDisplay(&args_string, 1); if( has_wrapper ) { // print out exactly what we're doing so folks can debug // it, if they need to. dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), args_string.Value() ); } else { dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); } // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_string; job_env.getDelimitedStringForDisplay(&env_string); dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value()); } // Check to see if we need to start this process paused, and if // so, pass the right flag to DC::Create_Process(). int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT; if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) { job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT; } int suspend_job_at_exec = 0; JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec); if( suspend_job_at_exec ) { dprintf( D_FULLDEBUG, "OsProc::StartJob(): " "Job wants to be suspended at exec\n" ); job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC; } // If there is a requested coresize for this job, enforce it. // Convert negative and very large values to RLIM_INFINITY, meaning // no size limit. // RLIM_INFINITY is unsigned, but its value and type size vary. long long core_size_ad; size_t core_size; size_t *core_size_ptr = NULL; #if !defined(WIN32) if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_ad ) ) { if ( core_size_ad < 0 || (unsigned long long)core_size_ad > RLIM_INFINITY ) { core_size = RLIM_INFINITY; } else { core_size = (size_t)core_size_ad; } core_size_ptr = &core_size; } #endif // !defined(WIN32) long rlimit_as_hard_limit = 0; char *rlimit_expr = param("STARTER_RLIMIT_AS"); if (rlimit_expr) { classad::ClassAdParser parser; classad::ExprTree *tree = parser.ParseExpression(rlimit_expr); if (tree) { classad::Value val; long long result; if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && val.IsIntegerValue(result)) { result *= 1024 * 1024; // convert to megabytes rlimit_as_hard_limit = (long)result; // truncate for Create_Process if (result > rlimit_as_hard_limit) { // if truncation to long results in a change in the value, then // the requested limit must be > 2 GB and we are on a 32 bit platform // in that case, the requested limit is > than what the process can get anyway // so just don't set a limit. rlimit_as_hard_limit = 0; } if (rlimit_as_hard_limit > 0) { dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit); } } else { dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr); } } else { dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr); } } int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber()); #if defined ( WIN32 ) owner_profile_.update (); /************************************************************* NOTE: We currently *ONLY* support loading slot-user profiles. This limitation will be addressed shortly, by allowing regular users to load their registry hive - Ben [2008-09-31] **************************************************************/ bool load_profile = false, run_as_owner = false; JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile ); JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER, run_as_owner ); if ( load_profile && !run_as_owner ) { if ( owner_profile_.load () ) { /* publish the users environment into that of the main job's environment */ if ( !owner_profile_.environment ( job_env ) ) { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to " "export owner's environment.\n" ); } } else { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load " "owner's profile.\n" ); } } #endif // While we are still in user priv, print out the username #if defined(LINUX) if( Starter->glexecPrivSepHelper() ) { // TODO: if there is some way to figure out the final username, // print it out here or after starting the job. dprintf(D_ALWAYS,"Running job via glexec\n"); } #else if( false ) { } #endif else { char const *username = NULL; char const *how = ""; CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper(); if( cpsh ) { username = cpsh->get_user_name(); how = "via privsep switchboard "; } else { username = get_user_loginname(); } if( !username ) { username = "******"; } dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username); } set_priv ( priv ); // use this to return more detailed and reliable error message info // from create-process operation. MyString create_process_err_msg; if (privsep_helper != NULL) { const char* std_file_names[3] = { privsep_stdin_name.Value(), privsep_stdout_name.Value(), privsep_stderr_name.Value() }; JobPid = privsep_helper->create_process(JobName.Value(), args, job_env, job_iwd, fds, std_file_names, nice_inc, core_size_ptr, 1, job_opt_mask, family_info, affinity_mask, &create_process_err_msg); } else { JobPid = daemonCore->Create_Process( JobName.Value(), args, PRIV_USER_FINAL, 1, FALSE, FALSE, &job_env, job_iwd, family_info, NULL, fds, NULL, nice_inc, NULL, job_opt_mask, core_size_ptr, affinity_mask, NULL, &create_process_err_msg, fs_remap, rlimit_as_hard_limit); } // Create_Process() saves the errno for us if it is an "interesting" error. int create_process_errno = errno; // errno is 0 in the privsep case. This executes for the daemon core create-process logic if ((FALSE == JobPid) && (0 != create_process_errno)) { if (create_process_err_msg != "") create_process_err_msg += " "; MyString errbuf; errbuf.formatstr("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno)); create_process_err_msg += errbuf; } // now close the descriptors in fds array. our child has inherited // them already, so we should close them so we do not leak descriptors. // NOTE, we want to use a special method to close the starter's // versions, if that's what we're using, so we don't think we've // still got those available in other parts of the code for any // reason. for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } if ( JobPid == FALSE ) { JobPid = -1; if(!create_process_err_msg.IsEmpty()) { // if the reason Create_Process failed was that registering // a family with the ProcD failed, it is indicative of a // problem regarding this execute machine, not the job. in // this case, we'll want to EXCEPT instead of telling the // Shadow to put the job on hold. there are probably other // error conditions where EXCEPTing would be more appropriate // as well... // if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) { EXCEPT("Create_Process failed to register the job with the ProcD"); } MyString err_msg = "Failed to execute '"; err_msg += JobName; err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_err_msg; if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno ); } } dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n", JobName.Value(), args_string.Value(), create_process_err_msg.Value()); return 0; } num_pids++; dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid); job_start_time.getTime(); return 1; }
int DockerAPI::detect( CondorError & err ) { // FIXME: Remove ::version() as a public API and return it from here, // because there's no point in doing this twice. std::string version; int rval = DockerAPI::version( version, err ); if( rval != 0 ) { dprintf(D_ALWAYS, "DockerAPI::detect() failed to detect the Docker version; assuming absent.\n" ); return -4; } ArgList infoArgs; if ( ! add_docker_arg(infoArgs)) return -1; infoArgs.AppendArg( "info" ); MyString displayString; infoArgs.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() ); #if 1 MyPopenTimer pgm; if (pgm.start_program(infoArgs, true, NULL, false) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } int exitCode; if ( ! pgm.wait_for_exit(default_timeout, &exitCode) || exitCode != 0) { pgm.close_program(1); MyString line; line.readLine(pgm.output(), false); line.chomp(); dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str()); return -3; } if (IsFulldebug(D_ALWAYS)) { MyString line; do { line.readLine(pgm.output(), false); line.chomp(); dprintf( D_FULLDEBUG, "[docker info] %s\n", line.c_str() ); } while (line.readLine(pgm.output(), false)); } #else FILE * dockerResults = my_popen( infoArgs, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } // Even if we don't care about the success output, the failure output // can be handy for debugging... char buffer[1024]; std::vector< std::string > output; while( fgets( buffer, 1024, dockerResults ) != NULL ) { size_t end = strlen(buffer); if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; } output.push_back( buffer ); } for( unsigned i = 0; i < output.size(); ++i ) { dprintf( D_FULLDEBUG, "[docker info] %s\n", output[i].c_str() ); } int exitCode = my_pclose( dockerResults ); if( exitCode != 0 ) { dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, output[0].c_str() ); return -3; } #endif return 0; }
bool DCSchedd::getJobConnectInfo( PROC_ID jobid, int subproc, char const *session_info, int timeout, CondorError *errstack, MyString &starter_addr, MyString &starter_claim_id, MyString &starter_version, MyString &slot_name, MyString &error_msg, bool &retry_is_sensible, int &job_status, MyString &hold_reason) { ClassAd input; ClassAd output; input.Assign(ATTR_CLUSTER_ID,jobid.cluster); input.Assign(ATTR_PROC_ID,jobid.proc); if( subproc != -1 ) { input.Assign(ATTR_SUB_PROC_ID,subproc); } input.Assign(ATTR_SESSION_INFO,session_info); ReliSock sock; if( !connectSock(&sock,timeout,errstack) ) { error_msg = "Failed to connect to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !startCommand(GET_JOB_CONNECT_INFO, &sock, timeout, errstack) ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( !forceAuthentication(&sock, errstack) ) { error_msg = "Failed to authenticate"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.encode(); if( !putClassAd(&sock, input) || !sock.end_of_message() ) { error_msg = "Failed to send GET_JOB_CONNECT_INFO to schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } sock.decode(); if( !getClassAd(&sock, output) || !sock.end_of_message() ) { error_msg = "Failed to get response from schedd"; dprintf( D_ALWAYS, "%s\n",error_msg.Value()); return false; } if( IsFulldebug(D_FULLDEBUG) ) { std::string adstr; sPrintAd(adstr, output, true); dprintf(D_FULLDEBUG,"Response for GET_JOB_CONNECT_INFO:\n%s\n", adstr.c_str()); } bool result=false; output.LookupBool(ATTR_RESULT,result); if( !result ) { output.LookupString(ATTR_HOLD_REASON,hold_reason); output.LookupString(ATTR_ERROR_STRING,error_msg); retry_is_sensible = false; output.LookupBool(ATTR_RETRY,retry_is_sensible); output.LookupInteger(ATTR_JOB_STATUS,job_status); } else { output.LookupString(ATTR_STARTER_IP_ADDR,starter_addr); output.LookupString(ATTR_CLAIM_ID,starter_claim_id); output.LookupString(ATTR_VERSION,starter_version); output.LookupString(ATTR_REMOTE_HOST,slot_name); } return result; }
GridUniverseLogic::gman_node_t * GridUniverseLogic::StartOrFindGManager(const char* owner, const char* domain, const char* attr_value, const char* attr_name, int cluster, int proc) { gman_node_t* gman_node; int pid; // If attr_value is an empty string, convert to NULL since code // after this point expects that. if ( attr_value && strlen(attr_value)==0 ) { attr_value = NULL; attr_name = NULL; } if ( (gman_node=lookupGmanByOwner(owner, attr_value, cluster, proc)) ) { // found it return gman_node; } // not found. fire one up! we want to run the GManager as the user. // but first, make certain we are not shutting down... if (!gman_pid_table) { // destructor has already been called; we are probably // closing down. return NULL; } #ifndef WIN32 if (owner && strcasecmp(owner, "root") == 0 ) { dprintf(D_ALWAYS, "Tried to start condor_gmanager as root.\n"); return NULL; } #endif dprintf( D_FULLDEBUG, "Starting condor_gmanager for owner %s (%d.%d)\n", owner, cluster, proc); char *gman_binary; gman_binary = param("GRIDMANAGER"); if ( !gman_binary ) { dprintf(D_ALWAYS,"ERROR - GRIDMANAGER not defined in config file\n"); return NULL; } ArgList args; MyString error_msg; args.AppendArg("condor_gridmanager"); args.AppendArg("-f"); char *gman_args = param("GRIDMANAGER_ARGS"); if(!args.AppendArgsV1RawOrV2Quoted(gman_args,&error_msg)) { dprintf( D_ALWAYS, "ERROR: failed to parse gridmanager args: %s\n", error_msg.Value()); free(gman_binary); free(gman_args); return NULL; } free(gman_args); // build a constraint if ( !owner ) { dprintf(D_ALWAYS,"ERROR - missing owner field\n"); free(gman_binary); return NULL; } MyString constraint; if ( !attr_name ) { constraint.formatstr("(%s=?=\"%s\"&&%s==%d)", ATTR_OWNER,owner, ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID); } else { constraint.formatstr("(%s=?=\"%s\"&&%s=?=\"%s\"&&%s==%d)", ATTR_OWNER,owner, attr_name,attr_value, ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID); args.AppendArg("-A"); args.AppendArg(attr_value); } args.AppendArg("-C"); args.AppendArg(constraint.Value()); MyString full_owner_name(owner); if ( domain && *domain ) { full_owner_name.formatstr_cat( "@%s", domain ); } args.AppendArg("-o"); args.AppendArg(full_owner_name.Value()); if (!init_user_ids(owner, domain)) { dprintf(D_ALWAYS,"ERROR - init_user_ids() failed in GRIDMANAGER\n"); free(gman_binary); return NULL; } static bool first_time_through = true; if ( first_time_through ) { // Note: Because first_time_through is static, this block runs only // once per schedd invocation. first_time_through = false; // Clean up any old / abandoned scratch dirs. dprintf(D_FULLDEBUG,"Checking for old gridmanager scratch dirs\n"); char *prefix = temp_dir_path(); ASSERT(prefix); Directory tmp( prefix, PRIV_USER ); const char *f; char const *dot; int fname_pid; int mypid = daemonCore->getpid(); int scratch_pre_len = strlen(scratch_prefix); while ( (f=tmp.Next()) ) { // skip regular files -- we only need to inspect subdirs if ( !tmp.IsDirectory() ) { continue; } // skip if it does not start with our prefix if ( strncmp(scratch_prefix,f,scratch_pre_len) ) { continue; } // skip if does not end w/ a pid dot = strrchr(f,'.'); if ( !dot ) { continue; } // skip if this pid is still alive and not ours dot++; // skip over period fname_pid = atoi(dot); if ( fname_pid != mypid && daemonCore->Is_Pid_Alive(fname_pid) ) { continue; } // if we made it here, blow away this subdir if ( tmp.Remove_Current_File() ) { dprintf(D_ALWAYS,"Removed old scratch dir %s\n", tmp.GetFullPath()); } } // end of while for cleanup of old scratch dirs dprintf(D_FULLDEBUG,"Done checking for old scratch dirs\n"); if (prefix != NULL) { free(prefix); prefix = NULL; } } // end of once-per-schedd invocation block // Create a temp dir for the gridmanager and append proper // command-line arguments to tell where it is. bool failed = false; gman_node = new gman_node_t; char *finalpath = scratchFilePath(gman_node); priv_state saved_priv = set_user_priv(); if ( (mkdir(finalpath,0700)) < 0 ) { // mkdir failed. dprintf(D_ALWAYS,"ERROR - mkdir(%s,0700) failed in GRIDMANAGER, errno=%d (%s)\n", finalpath, errno, strerror(errno)); failed = true; } set_priv(saved_priv); uninit_user_ids(); args.AppendArg("-S"); // -S = "ScratchDir" argument args.AppendArg(finalpath); delete [] finalpath; if ( failed ) { // we already did dprintf reason to the log... free(gman_binary); delete gman_node; return NULL; } if(IsFulldebug(D_FULLDEBUG)) { MyString args_string; args.GetArgsStringForDisplay(&args_string); dprintf(D_FULLDEBUG,"Really Execing %s\n",args_string.Value()); } pid = daemonCore->Create_Process( gman_binary, // Program to exec args, // Command-line args PRIV_ROOT, // Run as root, so it can switch to // PRIV_CONDOR rid // Reaper ID ); free(gman_binary); if ( pid <= 0 ) { dprintf ( D_ALWAYS, "StartOrFindGManager: Create_Process problems!\n" ); if (gman_node) delete gman_node; return NULL; } // If we made it here, we happily started up a new gridmanager process dprintf( D_ALWAYS, "Started condor_gmanager for owner %s pid=%d\n", owner,pid); // Make a new gman_node entry for our hashtable & insert it if ( !gman_node ) { gman_node = new gman_node_t; } gman_node->pid = pid; gman_node->owner[0] = '\0'; gman_node->domain[0] = '\0'; if ( owner ) { strcpy(gman_node->owner,owner); } if ( domain ) { strcpy(gman_node->domain,domain); } MyString owner_key(owner); if(attr_value){ owner_key += attr_value; } if (cluster) { owner_key.formatstr_cat( "-%d.%d", cluster, proc ); } ASSERT( gman_pid_table->insert(owner_key,gman_node) == 0 ); // start timer to signal gridmanager if we haven't already if ( gman_node->add_timer_id == -1 ) { // == -1 means no timer set gman_node->add_timer_id = daemonCore->Register_Timer(job_added_delay, GridUniverseLogic::SendAddSignal, "GridUniverseLogic::SendAddSignal"); daemonCore->Register_DataPtr(gman_node); } // All done return gman_node; }
// Extract info from an event int UserLogHeader::ExtractEvent( const ULogEvent *event ) { // Not a generic event -- ignore it if ( ULOG_GENERIC != event->eventNumber ) { return ULOG_NO_EVENT; } const GenericEvent *generic = dynamic_cast <const GenericEvent*>( event ); if ( ! generic ) { dprintf( D_ALWAYS, "Can't pointer cast generic event!\n" ); return ULOG_UNK_ERROR; } { char buf[1024]; memset( buf, 0, sizeof(buf) ); strncpy( buf, generic->info, sizeof(buf)-1 ); buf[COUNTOF(buf)-1] = 0; // make sure it's null terminated. int size = strlen( buf ); while( isspace(buf[size-1]) ) buf[--size] = '\0'; ::dprintf( D_FULLDEBUG, "UserLogHeader::ExtractEvent(): parsing '%s'\n", buf ); } char id[256]; char name[256]; int ctime; id[0] = '\0'; name[0] = '\0'; int n = sscanf( generic->info, "Global JobLog:" " ctime=%d" " id=%255s" " sequence=%d" " size=" FILESIZE_T_FORMAT"" " events=%" PRId64"" " offset=" FILESIZE_T_FORMAT"" " event_off=%" PRId64"" " max_rotation=%d" " creator_name=<%255[^>]>", &ctime, id, &m_sequence, &m_size, &m_num_events, &m_file_offset, &m_event_offset, &m_max_rotation, name ); if ( n >= 3 ) { m_ctime = ctime; m_id = id; m_valid = true; if ( n >= 8 ) { m_creator_name = name; } else { m_creator_name = ""; m_max_rotation = -1; } if (IsFulldebug(D_FULLDEBUG)) { dprint( D_FULLDEBUG, "UserLogHeader::ExtractEvent(): parsed ->" ); } return ULOG_OK; } else { ::dprintf( D_FULLDEBUG, "UserLogHeader::ExtractEvent(): can't parse '%s' => %d\n", generic->info, n ); return ULOG_NO_EVENT; } }