void do_process_request(const ClassAd *inputAd, ClassAd *resultAd, const int req_number, const char *iwd, const char *stdio_iwd) { // Check for inputAd if ( !inputAd ) { handle_process_request_error("No input ad",req_number,resultAd); return; } // Map the CMD specified in the input via the config file. MyString UnmappedJobName,JobName; if (inputAd->LookupString(ATTR_JOB_CMD,UnmappedJobName) == 0 ) { // no CMD specified. handle_process_request_error("No CMD specified",req_number,resultAd); return; } char *auth_commands = param("SOAPSHELL_AUTHORIZED_COMMANDS"); StringList auth_list(auth_commands,","); if ( auth_commands ) free(auth_commands); // Each command needs four tuples; anything else is a misconfiguration if ( auth_list.number() % 4 != 0 ) { handle_process_request_error("Service is misconfigured: SOAPSHELL_AUTHORIZED_COMMANDS malformed",req_number,resultAd); return; } if ( auth_list.contains_anycase(UnmappedJobName.Value()) == TRUE ) { JobName = auth_list.next(); } if ( JobName.IsEmpty() ) { // the CMD not authorized handle_process_request_error("Requested CMD not authorized via SOAPSHELL_AUTHORIZED_COMMANDS",req_number,resultAd); return; } // handle command line arguments. ArgList args; args.SetArgV1SyntaxToCurrentPlatform(); args.AppendArg(JobName.Value()); // set argv[0] to command char *soapshell_args = auth_list.next(); if ( soapshell_args && strcmp(soapshell_args,"*") ) { if(!args.AppendArgsV1RawOrV2Quoted(soapshell_args,NULL)) { dprintf( D_ALWAYS, "ERROR: SOAPSHELL_ARGS config macro invalid\n" ); } } else if(!args.AppendArgsFromClassAd(inputAd,NULL)) { handle_process_request_error("Failed to setup CMD arguments",req_number,resultAd); return; } // handle the environment. Env job_env; char *env_str = auth_list.next(); if ( env_str && strcmp(env_str,"*") ) { if(!job_env.MergeFromV1RawOrV2Quoted(env_str,NULL) ) { dprintf(D_ALWAYS,"ERROR: SOAPSHELL_ENVIRONMENT config macro invalid\n"); } } else if(!job_env.MergeFrom(inputAd,NULL)) { // bad environment string in job ad! handle_process_request_error("Request has faulty environment string",req_number,resultAd); return; } // Write input files into iwd (we will write stdin later) if ( !write_input_files(inputAd, iwd) ) { // failed to write input files handle_process_request_error("Failed to write input files",req_number,resultAd); return; } // handle stdin, stdout, and stderr redirection const char* jobstdin_ = dircat(stdio_iwd,"stdin"); MyString jobstdin(jobstdin_); const char* jobstdout_ = dircat(stdio_iwd,"stdout"); MyString jobstdout(jobstdout_); const char* jobstderr_ = dircat(stdio_iwd,"stderr"); MyString jobstderr(jobstderr_); delete [] jobstdin_; delete [] jobstdout_; delete [] jobstderr_; int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_LARGEFILE; // write stdin file is needed { char *input = NULL; unsigned char *output = NULL; int output_length = 0; int fd = -1; inputAd->LookupString(ATTR_JOB_INPUT,&input); if ( input ) { // Caller needs to free *output if non-NULL condor_base64_decode(input,&output,&output_length); if ( output ) { fd = safe_open_wrapper_follow( jobstdin.Value(), flags, 0666 ); if ( fd > -1 ) { write(fd,output,output_length); close(fd); } free(output); } free(input); if ( fd < 0 ) { handle_process_request_error("Failed to write stdin",req_number,resultAd); return; } } } int fds[3]; // initialize these to -2 to mean they're not specified. // -1 will be treated as an error. fds[0] = -2; fds[1] = -2; fds[2] = -2; fds[0] = safe_open_wrapper_follow( jobstdin.Value(), O_RDONLY | O_LARGEFILE ); // stdin fds[1] = safe_open_wrapper_follow( jobstdout.Value(), flags, 0666 ); // stdout fds[2] = safe_open_wrapper_follow( jobstderr.Value(), flags, 0666 ); // stderr /* Bail out if we couldn't open stdout/err files correctly */ if( fds[1]==-1 || fds[2]==-1 ) { /* only close ones that had been opened correctly */ for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } handle_process_request_error("Failed to write stdout/err files",req_number,resultAd); return; } // Print what we are about to do to the log MyString args_string; args.GetArgsStringForDisplay(&args_string,1); dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); // Spawn a process, baby!!! int JobPid = daemonCore->Create_Process( JobName.Value(), // executable args, // args PRIV_UNKNOWN, // priv_state - TODO 0, // reaper id - TODO FALSE, // want_command_port &job_env, // job environment iwd, // job iwd NULL, // family_info - TODO NULL, // sock_inherit_list fds // stdio redirection ); // NOTE: Create_Process() saves the errno for us if it is an // "interesting" error. char const *create_process_error = NULL; if(JobPid == FALSE && errno) create_process_error = strerror(errno); // now close the descriptors in fds array. our child has inherited // them already, so we should close them so we do not leak descriptors. for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } if ( JobPid == FALSE ) { JobPid = -1; MyString errormsg; errormsg.formatstr("Create_Process failed %s",create_process_error ? create_process_error : ""); handle_process_request_error(errormsg.Value(),req_number,resultAd); return; } dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid); // TODO - For now, just deal w/ one at a time. :( // So for now just wait for the child to exit. #ifdef WIN32 #error This service does not yet work on Windows #else { int exit_status; pid_t pid; for (;;) { pid = wait(&exit_status); dprintf(D_FULLDEBUG,"WAIT returned %d, errno=%d\n",pid,errno); if (pid == JobPid ) break; if (pid == -1 && errno != EINTR) { EXCEPT("waitpid failed errno=%d",errno); } } if ( WIFEXITED(exit_status) ) { int status = WEXITSTATUS(exit_status); resultAd->Assign("EXIT_STATUS",status); } } #endif // Job has completed, exit status is in the ad. Now put // the output files into the result ad. stash_output_file(resultAd, jobstdout.Value(), ATTR_JOB_OUTPUT); stash_output_file(resultAd, jobstderr.Value(), ATTR_JOB_ERROR); }
int VanillaProc::StartJob() { dprintf(D_FULLDEBUG,"in VanillaProc::StartJob()\n"); // vanilla jobs, unlike standard jobs, are allowed to run // shell scripts (or as is the case on NT, batch files). so // edit the ad so we start up a shell, pass the executable as // an argument to the shell, if we are asked to run a .bat file. #ifdef WIN32 CHAR interpreter[MAX_PATH+1], systemshell[MAX_PATH+1]; const char* jobtmp = Starter->jic->origJobName(); int joblen = strlen(jobtmp); const char *extension = joblen > 0 ? &(jobtmp[joblen-4]) : NULL; bool binary_executable = ( extension && ( MATCH == strcasecmp ( ".exe", extension ) || MATCH == strcasecmp ( ".com", extension ) ) ), java_universe = ( CONDOR_UNIVERSE_JAVA == job_universe ); ArgList arguments; MyString filename, jobname, error; if ( extension && !java_universe && !binary_executable ) { /** since we do not actually know how long the extension of the file is, we'll need to hunt down the '.' in the path, if it exists */ extension = strrchr ( jobtmp, '.' ); if ( !extension ) { dprintf ( D_ALWAYS, "VanillaProc::StartJob(): Failed to extract " "the file's extension.\n" ); /** don't fail here, since we want executables to run as usual. That is, some condor jobs submit executables that do not have the '.exe' extension, but are, nonetheless, executable binaries. For instance, a submit script may contain: executable = executable$(OPSYS) */ } else { /** pull out the path to the executable */ if ( !JobAd->LookupString ( ATTR_JOB_CMD, jobname ) ) { /** fall back on Starter->jic->origJobName() */ jobname = jobtmp; } /** If we transferred the job, it may have been renamed to condor_exec.exe even though it is not an executable. Here we rename it back to a the correct extension before it will run. */ if ( MATCH == strcasecmp ( CONDOR_EXEC, condor_basename ( jobname.Value () ) ) ) { filename.formatstr ( "condor_exec%s", extension ); if (rename(CONDOR_EXEC, filename.Value()) != 0) { dprintf (D_ALWAYS, "VanillaProc::StartJob(): ERROR: " "failed to rename executable from %s to %s\n", CONDOR_EXEC, filename.Value() ); } } else { filename = jobname; } /** Since we've renamed our executable, we need to update the job ad to reflect this change. */ if ( !JobAd->Assign ( ATTR_JOB_CMD, filename ) ) { dprintf ( D_ALWAYS, "VanillaProc::StartJob(): ERROR: failed to " "set new executable name.\n" ); return FALSE; } /** We've moved the script to argv[1], so we need to add the remaining arguments to positions argv[2].. argv[/n/]. */ if ( !arguments.AppendArgsFromClassAd ( JobAd, &error ) || !arguments.InsertArgsIntoClassAd ( JobAd, NULL, &error ) ) { dprintf ( D_ALWAYS, "VanillaProc::StartJob(): ERROR: failed to " "get arguments from job ad: %s\n", error.Value () ); return FALSE; } /** Since we know already we don't want this file returned to us, we explicitly add it to an exception list which will stop the file transfer mechanism from considering it for transfer back to its submitter */ Starter->jic->removeFromOutputFiles ( filename.Value () ); } } #endif // set up a FamilyInfo structure to tell OsProc to register a family // with the ProcD in its call to DaemonCore::Create_Process // FamilyInfo fi; // take snapshots at no more than 15 seconds in between, by default // fi.max_snapshot_interval = param_integer("PID_SNAPSHOT_INTERVAL", 15); m_dedicated_account = Starter->jic->getExecuteAccountIsDedicated(); if( ThisProcRunsAlongsideMainProc() ) { // If we track a secondary proc's family tree (such as // sshd) using the same dedicated account as the job's // family tree, we could end up killing the job when we // clean up the secondary family. m_dedicated_account = NULL; } if (m_dedicated_account) { // using login-based family tracking fi.login = m_dedicated_account; // The following message is documented in the manual as the // way to tell whether the dedicated execution account // configuration is being used. dprintf(D_ALWAYS, "Tracking process family by login \"%s\"\n", fi.login); } FilesystemRemap * fs_remap = NULL; #if defined(LINUX) // on Linux, we also have the ability to track processes via // a phony supplementary group ID // gid_t tracking_gid = 0; if (param_boolean("USE_GID_PROCESS_TRACKING", false)) { if (!can_switch_ids() && (Starter->condorPrivSepHelper() == NULL)) { EXCEPT("USE_GID_PROCESS_TRACKING enabled, but can't modify " "the group list of our children unless running as " "root or using PrivSep"); } fi.group_ptr = &tracking_gid; } // Increase the OOM score of this process; the child will inherit it. // This way, the job will be heavily preferred to be killed over a normal process. // OOM score is currently exponential - a score of 4 is a factor-16 increase in // the OOM score. setupOOMScore(4); #endif #if defined(HAVE_EXT_LIBCGROUP) // Determine the cgroup std::string cgroup_base; param(cgroup_base, "BASE_CGROUP", ""); MyString cgroup_str; const char *cgroup = NULL; /* Note on CONDOR_UNIVERSE_LOCAL - The cgroup setup code below * requires a unique name for the cgroup. It relies on * uniqueness of the MachineAd's Name * attribute. Unfortunately, in the local universe the * MachineAd (mach_ad elsewhere) is never populated, because * there is no machine. As a result the ASSERT on * starter_name fails. This means that the local universe * will not work on any machine that has BASE_CGROUP * configured. A potential workaround is to set * STARTER.BASE_CGROUP on any machine that is also running a * schedd, but that disables cgroup support from a * co-resident startd. Instead, I'm disabling cgroup support * from within the local universe until the intraction of * local universe and cgroups can be properly worked * out. -matt 7 nov '12 */ if (CONDOR_UNIVERSE_LOCAL != job_universe && cgroup_base.length()) { MyString cgroup_uniq; std::string starter_name, execute_str; param(execute_str, "EXECUTE", "EXECUTE_UNKNOWN"); // Note: Starter is a global variable from os_proc.cpp Starter->jic->machClassAd()->EvalString(ATTR_NAME, NULL, starter_name); if (starter_name.size() == 0) { char buf[16]; sprintf(buf, "%d", getpid()); starter_name = buf; } //ASSERT (starter_name.size()); cgroup_uniq.formatstr("%s_%s", execute_str.c_str(), starter_name.c_str()); const char dir_delim[2] = {DIR_DELIM_CHAR, '\0'}; cgroup_uniq.replaceString(dir_delim, "_"); cgroup_str.formatstr("%s%ccondor%s", cgroup_base.c_str(), DIR_DELIM_CHAR, cgroup_uniq.Value()); cgroup_str += this->CgroupSuffix(); cgroup = cgroup_str.Value(); ASSERT (cgroup != NULL); fi.cgroup = cgroup; dprintf(D_FULLDEBUG, "Requesting cgroup %s for job.\n", cgroup); } #endif // The chroot stuff really only works on linux #ifdef LINUX { // Have Condor manage a chroot std::string requested_chroot_name; JobAd->EvalString("RequestedChroot", NULL, requested_chroot_name); const char * allowed_root_dirs = param("NAMED_CHROOT"); if (requested_chroot_name.size()) { dprintf(D_FULLDEBUG, "Checking for chroot: %s\n", requested_chroot_name.c_str()); StringList chroot_list(allowed_root_dirs); chroot_list.rewind(); const char * next_chroot; bool acceptable_chroot = false; std::string requested_chroot; while ( (next_chroot=chroot_list.next()) ) { MyString chroot_spec(next_chroot); chroot_spec.Tokenize(); const char * chroot_name = chroot_spec.GetNextToken("=", false); if (chroot_name == NULL) { dprintf(D_ALWAYS, "Invalid named chroot: %s\n", chroot_spec.Value()); } const char * next_dir = chroot_spec.GetNextToken("=", false); if (chroot_name == NULL) { dprintf(D_ALWAYS, "Invalid named chroot: %s\n", chroot_spec.Value()); } dprintf(D_FULLDEBUG, "Considering directory %s for chroot %s.\n", next_dir, chroot_spec.Value()); if (IsDirectory(next_dir) && chroot_name && (strcmp(requested_chroot_name.c_str(), chroot_name) == 0)) { acceptable_chroot = true; requested_chroot = next_dir; } } // TODO: path to chroot MUST be all root-owned, or we have a nice security exploit. // Is this the responsibility of Condor to check, or the sysadmin who set it up? if (!acceptable_chroot) { return FALSE; } dprintf(D_FULLDEBUG, "Will attempt to set the chroot to %s.\n", requested_chroot.c_str()); std::stringstream ss; std::stringstream ss2; ss2 << Starter->GetExecuteDir() << DIR_DELIM_CHAR << "dir_" << getpid(); std::string execute_dir = ss2.str(); ss << requested_chroot << DIR_DELIM_CHAR << ss2.str(); std::string full_dir_str = ss.str(); if (is_trivial_rootdir(requested_chroot)) { dprintf(D_FULLDEBUG, "Requested a trivial chroot %s; this is a no-op.\n", requested_chroot.c_str()); } else if (IsDirectory(execute_dir.c_str())) { { TemporaryPrivSentry sentry(PRIV_ROOT); if( mkdir(full_dir_str.c_str(), S_IRWXU) < 0 ) { dprintf( D_FAILURE|D_ALWAYS, "Failed to create sandbox directory in chroot (%s): %s\n", full_dir_str.c_str(), strerror(errno) ); return FALSE; } if (chown(full_dir_str.c_str(), get_user_uid(), get_user_gid()) == -1) { EXCEPT("chown error on %s: %s", full_dir_str.c_str(), strerror(errno)); } } if (!fs_remap) { fs_remap = new FilesystemRemap(); } dprintf(D_FULLDEBUG, "Adding mapping: %s -> %s.\n", execute_dir.c_str(), full_dir_str.c_str()); if (fs_remap->AddMapping(execute_dir, full_dir_str)) { // FilesystemRemap object prints out an error message for us. return FALSE; } dprintf(D_FULLDEBUG, "Adding mapping %s -> %s.\n", requested_chroot.c_str(), "/"); std::string root_str("/"); if (fs_remap->AddMapping(requested_chroot, root_str)) { return FALSE; } } else { dprintf(D_ALWAYS, "Unable to do chroot because working dir %s does not exist.\n", execute_dir.c_str()); } } else { dprintf(D_FULLDEBUG, "Value of RequestedChroot is unset.\n"); } } // End of chroot #endif // On Linux kernel 2.4.19 and later, we can give each job its // own FS mounts. char * mount_under_scratch = param("MOUNT_UNDER_SCRATCH"); if (mount_under_scratch) { std::string working_dir = Starter->GetWorkingDir(); if (IsDirectory(working_dir.c_str())) { StringList mount_list(mount_under_scratch); free(mount_under_scratch); mount_list.rewind(); if (!fs_remap) { fs_remap = new FilesystemRemap(); } char * next_dir; while ( (next_dir=mount_list.next()) ) { if (!*next_dir) { // empty string? mount_list.deleteCurrent(); continue; } std::string next_dir_str(next_dir); // Gah, I wish I could throw an exception to clean up these nested if statements. if (IsDirectory(next_dir)) { char * full_dir = dirscat(working_dir, next_dir_str); if (full_dir) { std::string full_dir_str(full_dir); delete [] full_dir; full_dir = NULL; if (!mkdir_and_parents_if_needed( full_dir_str.c_str(), S_IRWXU, PRIV_USER )) { dprintf(D_ALWAYS, "Failed to create scratch directory %s\n", full_dir_str.c_str()); return FALSE; } dprintf(D_FULLDEBUG, "Adding mapping: %s -> %s.\n", full_dir_str.c_str(), next_dir_str.c_str()); if (fs_remap->AddMapping(full_dir_str, next_dir_str)) { // FilesystemRemap object prints out an error message for us. return FALSE; } } else { dprintf(D_ALWAYS, "Unable to concatenate %s and %s.\n", working_dir.c_str(), next_dir_str.c_str()); return FALSE; } } else { dprintf(D_ALWAYS, "Unable to add mapping %s -> %s because %s doesn't exist.\n", working_dir.c_str(), next_dir, next_dir); } } } else { dprintf(D_ALWAYS, "Unable to perform mappings because %s doesn't exist.\n", working_dir.c_str()); return FALSE; } } #if defined(LINUX) // On Linux kernel 2.6.24 and later, we can give each // job its own PID namespace if (param_boolean("USE_PID_NAMESPACES", false)) { if (!can_switch_ids()) { EXCEPT("USE_PID_NAMESPACES enabled, but can't perform this " "call in Linux unless running as root."); } fi.want_pid_namespace = this->SupportsPIDNamespace(); if (fi.want_pid_namespace) { if (!fs_remap) { fs_remap = new FilesystemRemap(); } fs_remap->RemapProc(); } // When PID Namespaces are enabled, need to run the job // under the condor_pid_ns_init program, so that signals // propagate through to the child. // First tell the program where to log output status // via an environment variable if (param_boolean("USE_PID_NAMESPACE_INIT", true)) { Env env; MyString env_errors; MyString arg_errors; std::string filename; filename = Starter->GetWorkingDir(); filename += "/.condor_pid_ns_status"; env.MergeFrom(JobAd, &env_errors); env.SetEnv("_CONDOR_PID_NS_INIT_STATUS_FILENAME", filename); env.InsertEnvIntoClassAd(JobAd, &env_errors); Starter->jic->removeFromOutputFiles(filename.c_str()); this->m_pid_ns_init_filename = filename; // Now, set the job's CMD to the wrapper, and shift // over the arguments by one ArgList args; std::string cmd; JobAd->LookupString(ATTR_JOB_CMD, cmd); args.AppendArg(cmd); args.AppendArgsFromClassAd(JobAd, &arg_errors); args.InsertArgsIntoClassAd(JobAd, NULL, & arg_errors); std::string libexec; if( !param(libexec,"LIBEXEC") ) { dprintf(D_ALWAYS, "Cannot find LIBEXEC so can not run condor_pid_ns_init\n"); return 0; } std::string c_p_n_i = libexec + "/condor_pid_ns_init"; JobAd->Assign(ATTR_JOB_CMD, c_p_n_i); } } dprintf(D_FULLDEBUG, "PID namespace option: %s\n", fi.want_pid_namespace ? "true" : "false"); #endif // have OsProc start the job // int retval = OsProc::StartJob(&fi, fs_remap); if (fs_remap != NULL) { delete fs_remap; } #if defined(HAVE_EXT_LIBCGROUP) // Set fairshare limits. Note that retval == 1 indicates success, 0 is failure. // See Note near setup of param(BASE_CGROUP) if (CONDOR_UNIVERSE_LOCAL != job_universe && cgroup && retval) { std::string mem_limit; param(mem_limit, "CGROUP_MEMORY_LIMIT_POLICY", "soft"); bool mem_is_soft = mem_limit == "soft"; std::string cgroup_string = cgroup; CgroupLimits climits(cgroup_string); if (mem_is_soft || (mem_limit == "hard")) { ClassAd * MachineAd = Starter->jic->machClassAd(); int MemMb; if (MachineAd->LookupInteger(ATTR_MEMORY, MemMb)) { uint64_t MemMb_big = MemMb; m_memory_limit = MemMb_big; climits.set_memory_limit_bytes(1024*1024*MemMb_big, mem_is_soft); } else { dprintf(D_ALWAYS, "Not setting memory soft limit in cgroup because " "Memory attribute missing in machine ad.\n"); } } else if (mem_limit == "none") { dprintf(D_FULLDEBUG, "Not enforcing memory soft limit.\n"); } else { dprintf(D_ALWAYS, "Invalid value of CGROUP_MEMORY_LIMIT_POLICY: %s. Ignoring.\n", mem_limit.c_str()); } // Now, set the CPU shares ClassAd * MachineAd = Starter->jic->machClassAd(); int numCores = 1; if (MachineAd->LookupInteger(ATTR_CPUS, numCores)) { climits.set_cpu_shares(numCores*100); } else { dprintf(D_FULLDEBUG, "Invalid value of Cpus in machine ClassAd; ignoring.\n"); } setupOOMEvent(cgroup); } m_statistics.Reconfig(); // Now that the job is started, decrease the likelihood that the starter // is killed instead of the job itself. if (retval) { setupOOMScore(-4); } #endif return retval; }
GridUniverseLogic::gman_node_t * GridUniverseLogic::StartOrFindGManager(const char* owner, const char* domain, const char* attr_value, const char* attr_name, int cluster, int proc) { gman_node_t* gman_node; int pid; // If attr_value is an empty string, convert to NULL since code // after this point expects that. if ( attr_value && strlen(attr_value)==0 ) { attr_value = NULL; attr_name = NULL; } if ( (gman_node=lookupGmanByOwner(owner, attr_value, cluster, proc)) ) { // found it return gman_node; } // not found. fire one up! we want to run the GManager as the user. // but first, make certain we are not shutting down... if (!gman_pid_table) { // destructor has already been called; we are probably // closing down. return NULL; } #ifndef WIN32 if (owner && strcasecmp(owner, "root") == 0 ) { dprintf(D_ALWAYS, "Tried to start condor_gmanager as root.\n"); return NULL; } #endif dprintf( D_FULLDEBUG, "Starting condor_gmanager for owner %s (%d.%d)\n", owner, cluster, proc); char *gman_binary; gman_binary = param("GRIDMANAGER"); if ( !gman_binary ) { dprintf(D_ALWAYS,"ERROR - GRIDMANAGER not defined in config file\n"); return NULL; } ArgList args; MyString error_msg; args.AppendArg("condor_gridmanager"); args.AppendArg("-f"); char *gman_args = param("GRIDMANAGER_ARGS"); if(!args.AppendArgsV1RawOrV2Quoted(gman_args,&error_msg)) { dprintf( D_ALWAYS, "ERROR: failed to parse gridmanager args: %s\n", error_msg.Value()); free(gman_binary); free(gman_args); return NULL; } free(gman_args); // build a constraint if ( !owner ) { dprintf(D_ALWAYS,"ERROR - missing owner field\n"); free(gman_binary); return NULL; } MyString constraint; if ( !attr_name ) { constraint.formatstr("(%s=?=\"%s\"&&%s==%d)", ATTR_OWNER,owner, ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID); } else { constraint.formatstr("(%s=?=\"%s\"&&%s=?=\"%s\"&&%s==%d)", ATTR_OWNER,owner, attr_name,attr_value, ATTR_JOB_UNIVERSE,CONDOR_UNIVERSE_GRID); args.AppendArg("-A"); args.AppendArg(attr_value); } args.AppendArg("-C"); args.AppendArg(constraint.Value()); MyString full_owner_name(owner); if ( domain && *domain ) { full_owner_name.formatstr_cat( "@%s", domain ); } args.AppendArg("-o"); args.AppendArg(full_owner_name.Value()); if (!init_user_ids(owner, domain)) { dprintf(D_ALWAYS,"ERROR - init_user_ids() failed in GRIDMANAGER\n"); free(gman_binary); return NULL; } static bool first_time_through = true; if ( first_time_through ) { // Note: Because first_time_through is static, this block runs only // once per schedd invocation. first_time_through = false; // Clean up any old / abandoned scratch dirs. dprintf(D_FULLDEBUG,"Checking for old gridmanager scratch dirs\n"); char *prefix = temp_dir_path(); ASSERT(prefix); Directory tmp( prefix, PRIV_USER ); const char *f; char const *dot; int fname_pid; int mypid = daemonCore->getpid(); int scratch_pre_len = strlen(scratch_prefix); while ( (f=tmp.Next()) ) { // skip regular files -- we only need to inspect subdirs if ( !tmp.IsDirectory() ) { continue; } // skip if it does not start with our prefix if ( strncmp(scratch_prefix,f,scratch_pre_len) ) { continue; } // skip if does not end w/ a pid dot = strrchr(f,'.'); if ( !dot ) { continue; } // skip if this pid is still alive and not ours dot++; // skip over period fname_pid = atoi(dot); if ( fname_pid != mypid && daemonCore->Is_Pid_Alive(fname_pid) ) { continue; } // if we made it here, blow away this subdir if ( tmp.Remove_Current_File() ) { dprintf(D_ALWAYS,"Removed old scratch dir %s\n", tmp.GetFullPath()); } } // end of while for cleanup of old scratch dirs dprintf(D_FULLDEBUG,"Done checking for old scratch dirs\n"); if (prefix != NULL) { free(prefix); prefix = NULL; } } // end of once-per-schedd invocation block // Create a temp dir for the gridmanager and append proper // command-line arguments to tell where it is. bool failed = false; gman_node = new gman_node_t; char *finalpath = scratchFilePath(gman_node); priv_state saved_priv = set_user_priv(); if ( (mkdir(finalpath,0700)) < 0 ) { // mkdir failed. dprintf(D_ALWAYS,"ERROR - mkdir(%s,0700) failed in GRIDMANAGER, errno=%d (%s)\n", finalpath, errno, strerror(errno)); failed = true; } set_priv(saved_priv); uninit_user_ids(); args.AppendArg("-S"); // -S = "ScratchDir" argument args.AppendArg(finalpath); delete [] finalpath; if ( failed ) { // we already did dprintf reason to the log... free(gman_binary); delete gman_node; return NULL; } if(IsFulldebug(D_FULLDEBUG)) { MyString args_string; args.GetArgsStringForDisplay(&args_string); dprintf(D_FULLDEBUG,"Really Execing %s\n",args_string.Value()); } pid = daemonCore->Create_Process( gman_binary, // Program to exec args, // Command-line args PRIV_ROOT, // Run as root, so it can switch to // PRIV_CONDOR rid // Reaper ID ); free(gman_binary); if ( pid <= 0 ) { dprintf ( D_ALWAYS, "StartOrFindGManager: Create_Process problems!\n" ); if (gman_node) delete gman_node; return NULL; } // If we made it here, we happily started up a new gridmanager process dprintf( D_ALWAYS, "Started condor_gmanager for owner %s pid=%d\n", owner,pid); // Make a new gman_node entry for our hashtable & insert it if ( !gman_node ) { gman_node = new gman_node_t; } gman_node->pid = pid; gman_node->owner[0] = '\0'; gman_node->domain[0] = '\0'; if ( owner ) { strcpy(gman_node->owner,owner); } if ( domain ) { strcpy(gman_node->domain,domain); } MyString owner_key(owner); if(attr_value){ owner_key += attr_value; } if (cluster) { owner_key.formatstr_cat( "-%d.%d", cluster, proc ); } ASSERT( gman_pid_table->insert(owner_key,gman_node) == 0 ); // start timer to signal gridmanager if we haven't already if ( gman_node->add_timer_id == -1 ) { // == -1 means no timer set gman_node->add_timer_id = daemonCore->Register_Timer(job_added_delay, GridUniverseLogic::SendAddSignal, "GridUniverseLogic::SendAddSignal"); daemonCore->Register_DataPtr(gman_node); } // All done return gman_node; }
// I really need a good way to determine the type of a classad // attribute. Right now I just try all four possibilities, which is a // horrible mess... bool VirshType::CreateVirshConfigFile(const char* /*filename*/) { vmprintf(D_FULLDEBUG, "In VirshType::CreateVirshConfigFile\n"); // std::string name; char * tmp = param("LIBVIRT_XML_SCRIPT"); if(tmp == NULL) { vmprintf(D_ALWAYS, "LIBVIRT_XML_SCRIPT not defined\n"); return false; } // This probably needs some work... ArgList args; args.AppendArg(tmp); free(tmp); // We might want to have specific debugging output enabled in the // helper script; however, it is not clear where that output should // go. This gives us a way to do so even in cases where the script // is unable to read from condor_config (why would this ever // happen?) tmp = param("LIBVIRT_XML_SCRIPT_ARGS"); if(tmp != NULL) { MyString errormsg; args.AppendArgsV1RawOrV2Quoted(tmp,&errormsg); free(tmp); } StringList input_strings, output_strings, error_strings; MyString classad_string; m_classAd.sPrint(classad_string); classad_string += VMPARAM_XEN_BOOTLOADER; classad_string += " = \""; classad_string += m_xen_bootloader; classad_string += "\"\n"; if(classad_string.find(VMPARAM_XEN_INITRD) < 1) { classad_string += VMPARAM_XEN_INITRD; classad_string += " = \""; classad_string += m_xen_initrd_file; classad_string += "\"\n"; } if(!m_vm_bridge_interface.empty()) { classad_string += VMPARAM_BRIDGE_INTERFACE; classad_string += " = \""; classad_string += m_vm_bridge_interface.c_str(); classad_string += "\"\n"; } if(classad_string.find(ATTR_JOB_VM_NETWORKING_TYPE) < 1) { classad_string += ATTR_JOB_VM_NETWORKING_TYPE; classad_string += " = \""; classad_string += m_vm_networking_type.Value(); classad_string += "\"\n"; } input_strings.append(classad_string.Value()); tmp = input_strings.print_to_string(); vmprintf(D_FULLDEBUG, "LIBVIRT_XML_SCRIPT_ARGS input_strings= %s\n", tmp); free(tmp); int ret = systemCommand(args, PRIV_ROOT, &output_strings, &input_strings, &error_strings, false); error_strings.rewind(); if(ret != 0) { vmprintf(D_ALWAYS, "XML helper script could not be executed\n"); output_strings.rewind(); // If there is any output from the helper, write it to the debug // log. Presumably, this is separate from the script's own // debug log. while((tmp = error_strings.next()) != NULL) { vmprintf(D_FULLDEBUG, "Helper stderr output: %s\n", tmp); } return false; } error_strings.rewind(); while((tmp = error_strings.next()) != NULL) { vmprintf(D_ALWAYS, "Helper stderr output: %s\n", tmp); } output_strings.rewind(); while((tmp = output_strings.next()) != NULL) { m_xml += tmp; } return true; }
int JavaProc::StartJob() { MyString java_cmd; char* jarfiles = NULL; ArgList args; MyString arg_buf; // Since we are adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional java VM args. args.SetArgV1SyntaxToCurrentPlatform(); // Construct the list of jar files for the command line // If a jar file is transferred locally, use its local name // (in the execute directory) // otherwise use the original name StringList jarfiles_orig_list; StringList jarfiles_local_list; StringList* jarfiles_final_list = NULL; if( JobAd->LookupString(ATTR_JAR_FILES,&jarfiles) ) { jarfiles_orig_list.initializeFromString( jarfiles ); free( jarfiles ); jarfiles = NULL; char * jarfile_name; const char * base_name; struct stat stat_buff; if( Starter->jic->iwdIsChanged() ) { // If the job's IWD has been changed (because we're // running in the sandbox due to file transfer), we // need to use a local version of the path to the jar // files, not the full paths from the submit machine. jarfiles_orig_list.rewind(); while( (jarfile_name = jarfiles_orig_list.next()) ) { // Construct the local name base_name = condor_basename( jarfile_name ); MyString local_name = execute_dir; local_name += DIR_DELIM_CHAR; local_name += base_name; if( stat(local_name.Value(), &stat_buff) == 0 ) { // Jar file exists locally, use local name jarfiles_local_list.append( local_name.Value() ); } else { // Use the original name jarfiles_local_list.append (jarfile_name); } } // while(jarfiles_orig_list) // jarfiles_local_list is our real copy... jarfiles_final_list = &jarfiles_local_list; } else { // !iwdIsChanged() // just use jarfiles_orig_list as our real copy... jarfiles_final_list = &jarfiles_orig_list; } } startfile.formatstr("%s%cjvm.start",execute_dir,DIR_DELIM_CHAR); endfile.formatstr("%s%cjvm.end",execute_dir,DIR_DELIM_CHAR); if( !java_config(java_cmd,&args,jarfiles_final_list) ) { dprintf(D_FAILURE|D_ALWAYS,"JavaProc: Java is not configured!\n"); return 0; } JobAd->Assign(ATTR_JOB_CMD, java_cmd.Value()); arg_buf.formatstr("-Dchirp.config=%s%cchirp.config",execute_dir,DIR_DELIM_CHAR); args.AppendArg(arg_buf.Value()); char *jvm_args1 = NULL; char *jvm_args2 = NULL; MyString jvm_args_error; bool jvm_args_success = true; JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS1, &jvm_args1); JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS2, &jvm_args2); if(jvm_args2) { jvm_args_success = args.AppendArgsV2Raw(jvm_args2, &jvm_args_error); } else if(jvm_args1) { jvm_args_success = args.AppendArgsV1Raw(jvm_args1, &jvm_args_error); } free(jvm_args1); free(jvm_args2); if (!jvm_args_success) { dprintf(D_ALWAYS, "JavaProc: failed to parse JVM args: %s\n", jvm_args_error.Value()); return 0; } args.AppendArg("CondorJavaWrapper"); args.AppendArg(startfile.Value()); args.AppendArg(endfile.Value()); MyString args_error; if(!args.AppendArgsFromClassAd(JobAd,&args_error)) { dprintf(D_ALWAYS,"JavaProc: failed to read job arguments: %s\n", args_error.Value()); return 0; } // We are just talking to ourselves, so it is fine to use argument // syntax compatible with this current version of Condor. CondorVersionInfo ver_info; if(!args.InsertArgsIntoClassAd(JobAd,&ver_info,&args_error)) { dprintf(D_ALWAYS,"JavaProc: failed to insert java job arguments: %s\n", args_error.Value()); return 0; } dprintf(D_ALWAYS,"JavaProc: Cmd=%s\n",java_cmd.Value()); MyString args_string; args.GetArgsStringForDisplay(&args_string); dprintf(D_ALWAYS,"JavaProc: Args=%s\n",args_string.Value()); return VanillaProc::StartJob(); }
/** Submit the DAGMan submit file unless the -no_submit option was given. @param shallowOpts: the condor_submit_dag shallow options @return 0 if successful, 1 if failed */ int submitDag( SubmitDagShallowOptions &shallowOpts ) { printf("-----------------------------------------------------------------------\n"); printf("File for submitting this DAG to Condor : %s\n", shallowOpts.strSubFile.Value()); printf("Log of DAGMan debugging messages : %s\n", shallowOpts.strDebugLog.Value()); printf("Log of Condor library output : %s\n", shallowOpts.strLibOut.Value()); printf("Log of Condor library error messages : %s\n", shallowOpts.strLibErr.Value()); printf("Log of the life of condor_dagman itself : %s\n", shallowOpts.strSchedLog.Value()); printf("\n"); if (shallowOpts.bSubmit) { ArgList args; args.AppendArg( "condor_submit" ); if( shallowOpts.strRemoteSchedd != "" ) { args.AppendArg( "-r" ); args.AppendArg( shallowOpts.strRemoteSchedd ); } args.AppendArg( shallowOpts.strSubFile ); // It is important to set the destination Schedd before // calling condor_submit, otherwise it may submit to the // wrong Schedd. // // my_system() has a variant that takes an Env. // Unfortunately, it results in an execve and no path // searching, which makes the relative path to // "condor_submit" above not work. Instead, we'll set the // env before execvp is called. It may be more correct to // fix my_system to inject the Env after the fork() and // before the execvp(). if ( shallowOpts.strScheddDaemonAdFile != "" ) { SetEnv("_CONDOR_SCHEDD_DAEMON_AD_FILE", shallowOpts.strScheddDaemonAdFile.Value()); } if ( shallowOpts.strScheddAddressFile != "" ) { SetEnv("_CONDOR_SCHEDD_ADDRESS_FILE", shallowOpts.strScheddAddressFile.Value()); } int retval = my_system( args ); if( retval != 0 ) { fprintf( stderr, "ERROR: condor_submit failed; aborting.\n" ); return 1; } } else { printf("-no_submit given, not submitting DAG to Condor. " "You can do this with:\n"); printf("\"condor_submit %s\"\n", shallowOpts.strSubFile.Value()); } printf("-----------------------------------------------------------------------\n"); return 0; }
int RefreshProxyThruMyProxy(X509CredentialWrapper * proxy) { const char * proxy_filename = proxy->GetStorageName(); char * myproxy_host = NULL; int status; if (((X509Credential*)proxy->cred)->GetMyProxyServerHost() == NULL) { dprintf (D_ALWAYS, "Skipping %s\n", proxy->cred->GetName()); return FALSE; } // First check if a refresh process is already running time_t now = time(NULL); if (proxy->get_delegation_pid != GET_DELEGATION_PID_NONE) { time_t time_started = proxy->get_delegation_proc_start_time; // If the old "refresh proxy" proc is still running, kill it if (now - time_started > 500) { dprintf (D_FULLDEBUG, "MyProxy refresh process pid=%d still running, " "sending signal %d\n", proxy->get_delegation_pid, SIGKILL); daemonCore->Send_Signal (proxy->get_delegation_pid, SIGKILL); // Wait for reaper to cleanup. } else { dprintf (D_FULLDEBUG, "MyProxy refresh process pid=%d still running, " "letting it finish\n", proxy->get_delegation_pid); } return FALSE; } proxy->get_delegation_proc_start_time = now; // Set up environnment for myproxy-get-delegation Env myEnv; MyString strBuff; if (((X509Credential*)proxy->cred)->GetMyProxyServerDN()) { strBuff="MYPROXY_SERVER_DN="; strBuff+= ((X509Credential*)proxy->cred)->GetMyProxyServerDN(); myEnv.SetEnv (strBuff.Value()); dprintf (D_FULLDEBUG, "%s\n", strBuff.Value()); } strBuff="X509_USER_PROXY="; strBuff+=proxy->GetStorageName(); dprintf (D_FULLDEBUG, "%s\n", strBuff.Value()); // Get password (this will end up in stdin for myproxy-get-delegation) const char * myproxy_password =((X509Credential*)proxy->cred)->GetRefreshPassword(); if (myproxy_password == NULL ) { dprintf (D_ALWAYS, "No MyProxy password specified for %s:%s\n", proxy->cred->GetName(), proxy->cred->GetOwner()); myproxy_password = ""; } status = pipe (proxy->get_delegation_password_pipe); if (status == -1) { dprintf (D_ALWAYS, "get_delegation pipe() failed: %s\n", strerror(errno) ); proxy->get_delegation_reset(); return FALSE; } // TODO: check write() return values for errors, short writes. int written = write (proxy->get_delegation_password_pipe[1], myproxy_password, strlen (myproxy_password)); if (written < (long)strlen(myproxy_password)) { dprintf (D_ALWAYS, "Write to proxy delegation pipe failed (%s)", strerror(errno)); proxy->get_delegation_reset(); return FALSE; } written = write (proxy->get_delegation_password_pipe[1], "\n", 1); if (written < 1) { dprintf (D_ALWAYS, "Write newline to proxy delegation pipe failed (%s)", strerror(errno) ); proxy->get_delegation_reset(); return FALSE; } // Figure out user name; const char * username = proxy->cred->GetOrigOwner(); // Figure out myproxy host and port myproxy_host = getHostFromAddr (((X509Credential*)proxy->cred)->GetMyProxyServerHost()); int myproxy_port = getPortFromAddr (((X509Credential*)proxy->cred)->GetMyProxyServerHost()); // construct arguments ArgList args; args.AppendArg("--verbose "); args.AppendArg("--out"); args.AppendArg(proxy_filename); args.AppendArg("--pshost"); args.AppendArg(myproxy_host); if ( myproxy_host != NULL ) { free ( myproxy_host ); } args.AppendArg("--dn_as_username"); args.AppendArg("--proxy_lifetime"); // hours args.AppendArg(6); args.AppendArg("--stdin_pass"); args.AppendArg("--username"); args.AppendArg(username); // Optional port argument if (myproxy_port) { args.AppendArg("--psport"); args.AppendArg(myproxy_port); } // Optional credential name if ( ((X509Credential*)proxy->cred)->GetCredentialName() && ( ((X509Credential*)proxy->cred)->GetCredentialName() )[0] ) { args.AppendArg("--credname"); args.AppendArg(((X509Credential*)proxy->cred)->GetCredentialName()); } // Create temporary file to store myproxy-get-delegation's stderr // The file will be owned by the "condor" user priv_state priv = set_condor_priv(); proxy->get_delegation_err_filename = create_temp_file(); if (proxy->get_delegation_err_filename == NULL) { dprintf (D_ALWAYS, "get_delegation create_temp_file() failed: %s\n", strerror(errno) ); proxy->get_delegation_reset(); return FALSE; } status = chmod (proxy->get_delegation_err_filename, 0600); if (status == -1) { dprintf (D_ALWAYS, "chmod() get_delegation_err_filename %s failed: %s\n", proxy->get_delegation_err_filename, strerror(errno) ); proxy->get_delegation_reset(); return FALSE; } proxy->get_delegation_err_fd = safe_open_wrapper_follow(proxy->get_delegation_err_filename,O_RDWR); if (proxy->get_delegation_err_fd == -1) { dprintf (D_ALWAYS, "Error opening get_delegation file %s: %s\n", proxy->get_delegation_err_filename, strerror(errno) ); proxy->get_delegation_reset(); return FALSE; } set_priv (priv); int arrIO[3]; arrIO[0]=proxy->get_delegation_password_pipe[0]; //stdin arrIO[1]=-1; //proxy->get_delegation_err_fd; arrIO[2]=proxy->get_delegation_err_fd; // stderr char * myproxy_get_delegation_pgm = param ("MYPROXY_GET_DELEGATION"); if (!myproxy_get_delegation_pgm) { dprintf (D_ALWAYS, "MYPROXY_GET_DELEGATION not defined in config file\n"); return FALSE; } MyString args_string; args.GetArgsStringForDisplay(&args_string); dprintf (D_ALWAYS, "Calling %s %s\n", myproxy_get_delegation_pgm, args_string.Value()); int pid = daemonCore->Create_Process ( myproxy_get_delegation_pgm, // name args, // args PRIV_USER_FINAL, // priv myproxyGetDelegationReaperId, // reaper_id FALSE, // want_command_port FALSE, // want_command_port &myEnv, // env NULL, // cwd NULL, // family_info NULL, // sock_inherit_list arrIO); // in/out/err streams // nice_inc // job_opt_mask free (myproxy_get_delegation_pgm); myproxy_get_delegation_pgm = NULL; if (pid == FALSE) { dprintf (D_ALWAYS, "Failed to run myproxy-get-delegation\n"); proxy->get_delegation_reset(); return FALSE; } proxy->get_delegation_pid = pid; return TRUE; }
// // FIXME: We have a lot of boilerplate code in this function and file. // int DockerAPI::version( std::string & version, CondorError & /* err */ ) { ArgList versionArgs; if ( ! add_docker_arg(versionArgs)) return -1; versionArgs.AppendArg( "-v" ); MyString displayString; versionArgs.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() ); #if 1 MyPopenTimer pgm; if (pgm.start_program(versionArgs, true, NULL, false) < 0) { // treat 'file not found' as not really error int d_level = (pgm.error_code() == ENOENT) ? D_FULLDEBUG : (D_ALWAYS | D_FAILURE); dprintf(d_level, "Failed to run '%s' errno=%d %s.\n", displayString.c_str(), pgm.error_code(), pgm.error_str() ); return -2; } int exitCode; if ( ! pgm.wait_for_exit(default_timeout, &exitCode)) { pgm.close_program(1); dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), pgm.error_str(), pgm.error_code() ); return -3; } if (pgm.output_size() <= 0) { dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() ); return -3; } MyStringSource * src = &pgm.output(); MyString line; if (line.readLine(*src, false)) { line.chomp(); bool jansens = strstr( line.c_str(), "Jansens" ) != NULL; bool bad_size = ! src->isEof() || line.size() > 1024 || line.size() < (int)sizeof("Docker version "); if (bad_size && ! jansens) { // check second line of output for the word Jansens also. MyString tmp; tmp.readLine(*src, false); jansens = strstr( tmp.c_str(), "Jansens" ) != NULL; } if (jansens) { dprintf( D_ALWAYS | D_FAILURE, "The DOCKER configuration setting appears to point to OpenBox's docker. If you want to use Docker.IO, please set DOCKER appropriately in your configuration.\n" ); return -5; } else if (bad_size) { dprintf( D_ALWAYS | D_FAILURE, "Read more than one line (or a very long line) from '%s', which we think means it's not Docker. The (first line of the) trailing text was '%s'.\n", displayString.c_str(), line.c_str() ); return -5; } } if( exitCode != 0 ) { dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str() ); return -4; } version = line.c_str(); #else FILE * dockerResults = my_popen( versionArgs, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } char buffer[1024]; if( NULL == fgets( buffer, 1024, dockerResults ) ) { if( errno ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), strerror( errno ), errno ); } else { dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() ); } my_pclose( dockerResults ); return -3; } if( NULL != fgets( buffer, 1024, dockerResults ) ) { if( strstr( buffer, "Jansens" ) != NULL ) { dprintf( D_ALWAYS | D_FAILURE, "The DOCKER configuration setting appears to point to OpenBox's docker. If you want to use Docker.IO, please set DOCKER appropriately in your configuration.\n" ); } else { dprintf( D_ALWAYS | D_FAILURE, "Read more than one line (or a very long line) from '%s', which we think means it's not Docker. The (first line of the) trailing text was '%s'.\n", displayString.c_str(), buffer ); } my_pclose( dockerResults ); return -5; } int exitCode = my_pclose( dockerResults ); if( exitCode != 0 ) { dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, buffer ); return -4; } size_t end = strlen(buffer); if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; } version = buffer; #endif sscanf(version.c_str(), "Docker version %d.%d", &DockerAPI::majorVersion, &DockerAPI::minorVersion); return 0; }
int DockerAPI::inspect( const std::string & containerID, ClassAd * dockerAd, CondorError & /* err */ ) { if( dockerAd == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "dockerAd is NULL.\n" ); return -2; } ArgList inspectArgs; if ( ! add_docker_arg(inspectArgs)) return -1; inspectArgs.AppendArg( "inspect" ); inspectArgs.AppendArg( "--format" ); StringList formatElements( "ContainerId=\"{{.Id}}\" " "Pid={{.State.Pid}} " "Name=\"{{.Name}}\" " "Running={{.State.Running}} " "ExitCode={{.State.ExitCode}} " "StartedAt=\"{{.State.StartedAt}}\" " "FinishedAt=\"{{.State.FinishedAt}}\" " "DockerError=\"{{.State.Error}}\" " "OOMKilled=\"{{.State.OOMKilled}}\" " ); char * formatArg = formatElements.print_to_delimed_string( "\n" ); inspectArgs.AppendArg( formatArg ); free( formatArg ); inspectArgs.AppendArg( containerID ); MyString displayString; inspectArgs.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: %s\n", displayString.c_str() ); #if 1 MyPopenTimer pgm; if (pgm.start_program(inspectArgs, true, NULL, false) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -6; } MyStringSource * src = NULL; if (pgm.wait_and_close(default_timeout)) { src = &pgm.output(); } int expected_rows = formatElements.number(); dprintf( D_FULLDEBUG, "exit_status=%d, error=%d, %d bytes. expecting %d lines\n", pgm.exit_status(), pgm.error_code(), pgm.output_size(), expected_rows ); // If the output isn't exactly formatElements.number() lines long, // something has gone wrong and we'll at least be able to print out // the error message(s). std::vector<std::string> correctOutput(expected_rows); if (src) { MyString line; int i=0; while (line.readLine(*src,false)) { line.chomp(); //dprintf( D_FULLDEBUG, "\t[%2d] %s\n", i, line.c_str() ); if (i >= expected_rows) { if (line.empty()) continue; correctOutput.push_back(line.c_str()); } else { correctOutput[i] = line.c_str(); } std::string::iterator first = std::find(correctOutput[i].begin(), correctOutput[i].end(), '\"'); if (first != correctOutput[i].end()) { std::replace(++first, --correctOutput[i].end(), '\"','\''); } //dprintf( D_FULLDEBUG, "\tfix: %s\n", correctOutput[i].c_str() ); ++i; } } #else FILE * dockerResults = my_popen( inspectArgs, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Unable to run '%s'.\n", displayString.c_str() ); return -6; } // If the output isn't exactly formatElements.number() lines long, // something has gone wrong and we'll at least be able to print out // the error message(s). char buffer[1024]; std::vector<std::string> correctOutput(formatElements.number()); for( int i = 0; i < formatElements.number(); ++i ) { if( fgets( buffer, 1024, dockerResults ) != NULL ) { correctOutput[i] = buffer; std::string::iterator first = std::find(correctOutput[i].begin(), correctOutput[i].end(), '\"'); if (first != correctOutput[i].end()) { std::replace(++first, -- --correctOutput[i].end(), '\"','\''); } } } my_pclose( dockerResults ); #endif int attrCount = 0; for( int i = 0; i < formatElements.number(); ++i ) { if( correctOutput[i].empty() || dockerAd->Insert( correctOutput[i].c_str() ) == FALSE ) { break; } ++attrCount; } if( attrCount != formatElements.number() ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to create classad from Docker output (%d). Printing up to the first %d (nonblank) lines.\n", attrCount, formatElements.number() ); for( int i = 0; i < formatElements.number() && ! correctOutput[i].empty(); ++i ) { dprintf( D_ALWAYS | D_FAILURE, "%s", correctOutput[i].c_str() ); } return -4; } dprintf( D_FULLDEBUG, "docker inspect printed:\n" ); for( int i = 0; i < formatElements.number() && ! correctOutput[i].empty(); ++i ) { dprintf( D_FULLDEBUG, "\t%s\n", correctOutput[i].c_str() ); } return 0; }
// // Because we fork before calling docker, we don't actually // care if the image is stored locally or not (except to the extent that // remote image pull violates the principle of least astonishment). // int DockerAPI::run( ClassAd &machineAd, ClassAd &jobAd, const std::string & containerName, const std::string & imageID, const std::string & command, const ArgList & args, const Env & env, const std::string & sandboxPath, const std::list<std::string> extraVolumes, int & pid, int * childFDs, CondorError & /* err */ ) { gc_image(imageID); // // We currently assume that the system has been configured so that // anyone (user) who can run an HTCondor job can also run docker. It's // also apparently a security worry to run Docker as root, so let's not. // ArgList runArgs; if ( ! add_docker_arg(runArgs)) return -1; runArgs.AppendArg( "run" ); // Write out a file with the container ID. // FIXME: The startd can check this to clean up after us. // This needs to go into a directory that condor user // can write to. /* std::string cidFileName = sandboxPath + "/.cidfile"; runArgs.AppendArg( "--cidfile=" + cidFileName ); */ // Configure resource limits. // First cpus int cpus; int cpuShare; if (machineAd.LookupInteger(ATTR_CPUS, cpus)) { cpuShare = 10 * cpus; } else { cpuShare = 10; } std::string cpuShareStr; formatstr(cpuShareStr, "--cpu-shares=%d", cpuShare); runArgs.AppendArg(cpuShareStr); // Now memory int memory; // in Megabytes if (machineAd.LookupInteger(ATTR_MEMORY, memory)) { std::string mem; formatstr(mem, "--memory=%dm", memory); runArgs.AppendArg(mem); } // drop unneeded Linux capabilities if (param_boolean("DOCKER_DROP_ALL_CAPABILITIES", true /*default*/, true /*do_log*/, &machineAd, &jobAd)) { runArgs.AppendArg("--cap-drop=all"); // --no-new-privileges flag appears in docker 1.11 if (DockerAPI::majorVersion > 1 || DockerAPI::minorVersion > 10) { runArgs.AppendArg("--no-new-privileges"); } } // Give the container a useful name std::string hname = makeHostname(&machineAd, &jobAd); runArgs.AppendArg("--hostname"); runArgs.AppendArg(hname.c_str()); // Now the container name runArgs.AppendArg( "--name" ); runArgs.AppendArg( containerName ); if ( ! add_env_to_args_for_docker(runArgs, env)) { dprintf( D_ALWAYS | D_FAILURE, "Failed to pass enviroment to docker.\n" ); return -8; } // Map the external sanbox to the internal sandbox. runArgs.AppendArg( "--volume" ); runArgs.AppendArg( sandboxPath + ":" + sandboxPath ); // Now any extra volumes for (std::list<std::string>::const_iterator it = extraVolumes.begin(); it != extraVolumes.end(); it++) { runArgs.AppendArg("--volume"); std::string volume = *it; runArgs.AppendArg(volume); } // Start in the sandbox. runArgs.AppendArg( "--workdir" ); runArgs.AppendArg( sandboxPath ); // Run with the uid that condor selects for the user // either a slot user or submitting user or nobody uid_t uid = 0; uid_t gid = 0; // Docker doesn't actually run on Windows, but we compile // on Windows because... #ifndef WIN32 uid = get_user_uid(); gid = get_user_gid(); #endif if ((uid == 0) || (gid == 0)) { dprintf(D_ALWAYS|D_FAILURE, "Failed to get userid to run docker job\n"); return -9; } runArgs.AppendArg("--user"); std::string uidgidarg; formatstr(uidgidarg, "%d:%d", uid, gid); runArgs.AppendArg(uidgidarg); // Run the command with its arguments in the image. runArgs.AppendArg( imageID ); // If no command given, the default command in the image will run if (command.length() > 0) { runArgs.AppendArg( command ); } runArgs.AppendArgsFromArgList( args ); MyString displayString; runArgs.GetArgsStringForLogging( & displayString ); dprintf( D_ALWAYS, "Attempting to run: %s\n", displayString.c_str() ); // // If we run Docker attached, we avoid a race condition where // 'docker logs --follow' returns before 'docker rm' knows that the // container is gone (and refuses to remove it). Of course, we // can't block, so we have a proxy process run attached for us. // FamilyInfo fi; fi.max_snapshot_interval = param_integer( "PID_SNAPSHOT_INTERVAL", 15 ); int childPID = daemonCore->Create_Process( runArgs.GetArg(0), runArgs, PRIV_CONDOR_FINAL, 1, FALSE, FALSE, NULL, "/", & fi, NULL, childFDs ); if( childPID == FALSE ) { dprintf( D_ALWAYS | D_FAILURE, "Create_Process() failed.\n" ); return -1; } pid = childPID; return 0; }
int DockerAPI::detect( CondorError & err ) { // FIXME: Remove ::version() as a public API and return it from here, // because there's no point in doing this twice. std::string version; int rval = DockerAPI::version( version, err ); if( rval != 0 ) { dprintf(D_ALWAYS, "DockerAPI::detect() failed to detect the Docker version; assuming absent.\n" ); return -4; } ArgList infoArgs; if ( ! add_docker_arg(infoArgs)) return -1; infoArgs.AppendArg( "info" ); MyString displayString; infoArgs.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() ); #if 1 MyPopenTimer pgm; if (pgm.start_program(infoArgs, true, NULL, false) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } int exitCode; if ( ! pgm.wait_for_exit(default_timeout, &exitCode) || exitCode != 0) { pgm.close_program(1); MyString line; line.readLine(pgm.output(), false); line.chomp(); dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str()); return -3; } if (IsFulldebug(D_ALWAYS)) { MyString line; do { line.readLine(pgm.output(), false); line.chomp(); dprintf( D_FULLDEBUG, "[docker info] %s\n", line.c_str() ); } while (line.readLine(pgm.output(), false)); } #else FILE * dockerResults = my_popen( infoArgs, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } // Even if we don't care about the success output, the failure output // can be handy for debugging... char buffer[1024]; std::vector< std::string > output; while( fgets( buffer, 1024, dockerResults ) != NULL ) { size_t end = strlen(buffer); if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; } output.push_back( buffer ); } for( unsigned i = 0; i < output.size(); ++i ) { dprintf( D_FULLDEBUG, "[docker info] %s\n", output[i].c_str() ); } int exitCode = my_pclose( dockerResults ); if( exitCode != 0 ) { dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, output[0].c_str() ); return -3; } #endif return 0; }
int DockerAPI::rmi(const std::string &image, CondorError &err) { // First, try to remove the named image run_simple_docker_command("rmi", image, default_timeout, err, true); // That may have succeed or failed. It could have // failed if the image doesn't exist (anymore), or // if someone else deleted it outside of condor. // Check to see if the image still exists. If it // has been removed, return 0. ArgList args; if ( ! add_docker_arg(args)) return -1; args.AppendArg( "images" ); args.AppendArg( "-q" ); args.AppendArg( image ); MyString displayString; args.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: '%s'.\n", displayString.c_str() ); #if 1 MyPopenTimer pgm; if (pgm.start_program(args, true, NULL, false) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } int exitCode; if ( ! pgm.wait_for_exit(default_timeout, &exitCode) || exitCode != 0) { pgm.close_program(1); MyString line; line.readLine(pgm.output(), false); line.chomp(); dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, line.c_str()); return -3; } return pgm.output_size() > 0; #else FILE * dockerResults = my_popen( args, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } char buffer[1024]; std::vector< std::string > output; while( fgets( buffer, 1024, dockerResults ) != NULL ) { size_t end = strlen(buffer); if (end > 0 && buffer[end-1] == '\n') { buffer[end-1] = '\0'; } output.push_back( buffer ); } int exitCode = my_pclose( dockerResults ); if( exitCode != 0 ) { dprintf( D_ALWAYS, "'%s' did not exit successfully (code %d); the first line of output was '%s'.\n", displayString.c_str(), exitCode, output[0].c_str() ); return -3; } if (output.size() == 0) { return 0; } else { return 1; } #endif }
int DockerAPI::rm( const std::string & containerID, CondorError & /* err */ ) { ArgList rmArgs; if ( ! add_docker_arg(rmArgs)) return -1; rmArgs.AppendArg( "rm" ); rmArgs.AppendArg( "-f" ); // if for some reason still running, kill first rmArgs.AppendArg( "-v" ); // also remove the volume rmArgs.AppendArg( containerID.c_str() ); MyString displayString; rmArgs.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: %s\n", displayString.c_str() ); // Read from Docker's combined output and error streams. #if 1 MyPopenTimer pgm; if (pgm.start_program( rmArgs, true, NULL, false ) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } const char * got_output = pgm.wait_and_close(default_timeout); // On a success, Docker writes the containerID back out. MyString line; if ( ! got_output || ! line.readLine(pgm.output(), false)) { int error = pgm.error_code(); if( error ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), pgm.error_str(), error ); if (pgm.was_timeout()) { dprintf( D_ALWAYS | D_FAILURE, "Declaring a hung docker\n"); return docker_hung; } } else { dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() ); } return -3; } line.chomp(); line.trim(); if (line != containerID.c_str()) { // Didn't get back the result I expected, report the error and check to see if docker is hung. return check_if_docker_offline(pgm, "Docker remove", -4); } #else FILE * dockerResults = my_popen( rmArgs, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } // On a success, Docker writes the containerID back out. char buffer[1024]; if( NULL == fgets( buffer, 1024, dockerResults ) ) { if( errno ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), strerror( errno ), errno ); } else { dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() ); } my_pclose( dockerResults ); return -3; } int length = strlen( buffer ); if( length < 1 || strncmp( buffer, containerID.c_str(), length - 1 ) != 0 ) { dprintf( D_ALWAYS | D_FAILURE, "Docker remove failed, printing first few lines of output.\n" ); dprintf( D_ALWAYS | D_FAILURE, "%s", buffer ); while( NULL != fgets( buffer, 1024, dockerResults ) ) { dprintf( D_ALWAYS | D_FAILURE, "%s", buffer ); } my_pclose( dockerResults ); return -4; } my_pclose( dockerResults ); #endif return 0; }
static int check_if_docker_offline(MyPopenTimer & pgmIn, const char * cmd_str, int original_error_code) { int rval = original_error_code; // this should not be called with a program that is still running. ASSERT(pgmIn.is_closed()); MyString line; MyStringCharSource * src = NULL; if (pgmIn.output_size() > 0) { src = &pgmIn.output(); src->rewind(); } bool check_for_hung_docker = true; // if no output, we should check for hung docker. dprintf( D_ALWAYS | D_FAILURE, "%s failed, %s output.\n", cmd_str, src ? "printing first few lines of" : "no" ); if (src) { check_for_hung_docker = false; // if we got output, assume docker is not hung. for (int ii = 0; ii < 10; ++ii) { if ( ! line.readLine(*src, false)) break; dprintf( D_ALWAYS | D_FAILURE, "%s\n", line.c_str() ); // if we got something resembling "/var/run/docker.sock: resource temporarily unavaible" // then we should check for a hung docker. const char * p = strstr(line.c_str(), ".sock: resource "); if (p && strstr(p, "unavailable")) { check_for_hung_docker = true; } } } if (check_for_hung_docker) { dprintf( D_ALWAYS, "Checking to see if Docker is offline\n"); ArgList infoArgs; add_docker_arg(infoArgs); infoArgs.AppendArg( "info" ); MyString displayString; infoArgs.GetArgsStringForLogging( & displayString ); MyPopenTimer pgm2; if (pgm2.start_program(infoArgs, true, NULL, false) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); rval = DockerAPI::docker_hung; } else { int exitCode = 0; if ( ! pgm2.wait_for_exit(60, &exitCode) || pgm2.output_size() <= 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to get output from '%s' : %s.\n", displayString.c_str(), pgm2.error_str() ); rval = DockerAPI::docker_hung; } else { while (line.readLine(pgm2.output(),false)) { line.chomp(); dprintf( D_FULLDEBUG, "[Docker Info] %s\n", line.c_str() ); } } } if (rval == DockerAPI::docker_hung) { dprintf( D_ALWAYS | D_FAILURE, "Docker is not responding. returning docker_hung error code.\n"); } } return rval; }
void secondPass (int argc, char *argv[]) { const char * pcolon = NULL; char *daemonname; for (int i = 1; i < argc; i++) { // omit parameters which qualify switches if( matchPrefix(argv[i],"-pool", 2) || matchPrefix(argv[i],"-direct", 4) ) { i++; continue; } if( matchPrefix(argv[i],"-subsystem", 5) ) { i++; continue; } if (matchPrefix (argv[i], "-format", 2)) { pm.registerFormat (argv[i+1], argv[i+2]); StringList attributes; ClassAd ad; if(!ad.GetExprReferences(argv[i+2],NULL,&attributes)){ fprintf( stderr, "Error: Parse error of: %s\n", argv[i+2]); exit(1); } attributes.rewind(); char const *s; while( (s=attributes.next()) ) { projList.AppendArg(s); } if (diagnose) { printf ("Arg %d --- register format [%s] for [%s]\n", i, argv[i+1], argv[i+2]); } i += 2; continue; } if (*argv[i] == '-' && (is_arg_colon_prefix(argv[i]+1, "autoformat", &pcolon, 5) || is_arg_colon_prefix(argv[i]+1, "af", &pcolon, 2)) ) { // make sure we have at least one more argument if ( !argv[i+1] || *(argv[i+1]) == '-') { fprintf( stderr, "Error: Argument %s requires " "at last one attribute parameter\n", argv[i] ); fprintf( stderr, "Use \"%s -help\" for details\n", myName ); exit( 1 ); } bool flabel = false; bool fCapV = false; bool fRaw = false; bool fheadings = false; const char * prowpre = NULL; const char * pcolpre = " "; const char * pcolsux = NULL; if (pcolon) { ++pcolon; while (*pcolon) { switch (*pcolon) { case ',': pcolsux = ","; break; case 'n': pcolsux = "\n"; break; case 'g': pcolpre = NULL; prowpre = "\n"; break; case 't': pcolpre = "\t"; break; case 'l': flabel = true; break; case 'V': fCapV = true; break; case 'r': case 'o': fRaw = true; break; case 'h': fheadings = true; break; } ++pcolon; } } pm.SetAutoSep(prowpre, pcolpre, pcolsux, "\n"); while (argv[i+1] && *(argv[i+1]) != '-') { ++i; ClassAd ad; StringList attributes; if(!ad.GetExprReferences(argv[i],NULL,&attributes)){ fprintf( stderr, "Error: Parse error of: %s\n", argv[i]); exit(1); } attributes.rewind(); char const *s; while ((s = attributes.next())) { projList.AppendArg(s); } MyString lbl = ""; int wid = 0; int opts = FormatOptionNoTruncate; if (fheadings || pm_head.Length() > 0) { const char * hd = fheadings ? argv[i] : "(expr)"; wid = 0 - (int)strlen(hd); opts = FormatOptionAutoWidth | FormatOptionNoTruncate; pm_head.Append(hd); } else if (flabel) { lbl.formatstr("%s = ", argv[i]); wid = 0; opts = 0; } lbl += fRaw ? "%r" : (fCapV ? "%V" : "%v"); if (diagnose) { printf ("Arg %d --- register format [%s] width=%d, opt=0x%x for [%s]\n", i, lbl.Value(), wid, opts, argv[i]); } pm.registerFormat(lbl.Value(), wid, opts, argv[i]); } // if autoformat list ends in a '-' without any characters after it, just eat the arg and keep going. if (i+1 < argc && '-' == (argv[i+1])[0] && 0 == (argv[i+1])[1]) { ++i; } continue; } if (is_dash_arg_colon_prefix(argv[i], "print-format", &pcolon, 2)) { if ( (i+1 >= argc) || (*(argv[i+1]) == '-' && (argv[i+1])[1] != 0)) { fprintf( stderr, "Error: Argument -print-format requires a filename argument\n"); exit( 1 ); } // hack allow -pr ! to disable use of user-default print format files. if (MATCH == strcmp(argv[i+1], "!")) { ++i; disable_user_print_files = true; continue; } ppTotalStyle = ppStyle; setPPstyle (PP_CUSTOM, i, argv[i]); setPPwidth(); ++i; // skip to the next argument. if (set_status_print_mask_from_stream(argv[i], true, &mode_constraint) < 0) { fprintf(stderr, "Error: invalid select file %s\n", argv[i]); exit (1); } if (mode_constraint) { query->addANDConstraint(mode_constraint); } using_print_format = true; // so we can hack totals. continue; } if (matchPrefix (argv[i], "-target", 5)) { i++; continue; } if (is_dash_arg_prefix(argv[i], "ads", 2)) { ++i; continue; } if( matchPrefix(argv[i], "-sort", 3) ) { i++; if ( ! noSort) { sprintf( buffer, "%s =!= UNDEFINED", argv[i] ); query->addANDConstraint( buffer ); } continue; } if (matchPrefix (argv[i], "-statistics", 6)) { i += 2; sprintf(buffer,"STATISTICS_TO_PUBLISH = \"%s\"", statistics); if (diagnose) { printf ("[%s]\n", buffer); } query->addExtraAttribute(buffer); continue; } if (matchPrefix (argv[i], "-attributes", 3) ) { // parse attributes to be selected and split them along "," StringList more_attrs(argv[i+1],","); char const *s; more_attrs.rewind(); while( (s=more_attrs.next()) ) { projList.AppendArg(s); dashAttributes.append(s); } i++; continue; } // figure out what the other parameters should do if (*argv[i] != '-') { // display extra information for diagnosis if (diagnose) { printf ("Arg %d (%s) --- adding constraint", i, argv[i]); } if( !(daemonname = get_daemon_name(argv[i])) ) { if ( (mode==MODE_SCHEDD_SUBMITTORS) && strchr(argv[i],'@') ) { // For a submittor query, it is possible that the // hostname is really a UID_DOMAIN. And there is // no requirement that UID_DOMAIN actually have // an inverse lookup in DNS... so if get_daemon_name() // fails with a fully qualified submittor lookup, just // use what we are given and do not flag an error. daemonname = strnewp(argv[i]); } else { dprintf_WriteOnErrorBuffer(stderr, true); fprintf( stderr, "%s: unknown host %s\n", argv[0], get_host_part(argv[i]) ); exit(1); } } switch (mode) { case MODE_DEFRAG_NORMAL: case MODE_STARTD_NORMAL: case MODE_STARTD_COD: #ifdef HAVE_EXT_POSTGRESQL case MODE_QUILL_NORMAL: #endif /* HAVE_EXT_POSTGRESQL */ case MODE_SCHEDD_NORMAL: case MODE_SCHEDD_SUBMITTORS: case MODE_MASTER_NORMAL: case MODE_COLLECTOR_NORMAL: case MODE_CKPT_SRVR_NORMAL: case MODE_NEGOTIATOR_NORMAL: case MODE_STORAGE_NORMAL: case MODE_ANY_NORMAL: case MODE_GENERIC_NORMAL: case MODE_STARTD_AVAIL: case MODE_OTHER: case MODE_GRID_NORMAL: case MODE_HAD_NORMAL: sprintf(buffer,"(%s==\"%s\") || (%s==\"%s\")", ATTR_NAME, daemonname, ATTR_MACHINE, daemonname ); if (diagnose) { printf ("[%s]\n", buffer); } query->addORConstraint (buffer); break; case MODE_STARTD_RUN: sprintf (buffer,"%s == \"%s\"",ATTR_REMOTE_USER,argv[i]); if (diagnose) { printf ("[%s]\n", buffer); } query->addORConstraint (buffer); break; default: fprintf(stderr,"Error: Don't know how to process %s\n",argv[i]); } delete [] daemonname; daemonname = NULL; } else if (matchPrefix (argv[i], "-constraint", 4)) { if (diagnose) { printf ("[%s]\n", argv[i+1]); } query->addANDConstraint (argv[i+1]); i++; } } }
int run_simple_docker_command(const std::string &command, const std::string &container, int timeout, CondorError &, bool ignore_output) { ArgList args; if ( ! add_docker_arg(args)) return -1; args.AppendArg( command ); args.AppendArg( container.c_str() ); MyString displayString; args.GetArgsStringForLogging( & displayString ); dprintf( D_FULLDEBUG, "Attempting to run: %s\n", displayString.c_str() ); #if 1 MyPopenTimer pgm; if (pgm.start_program( args, true, NULL, false ) < 0) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } if ( ! pgm.wait_and_close(timeout) || pgm.output_size() <= 0) { int error = pgm.error_code(); if( error ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), pgm.error_str(), error ); if (pgm.was_timeout()) { dprintf( D_ALWAYS | D_FAILURE, "Declaring a hung docker\n"); return DockerAPI::docker_hung; } } else { dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() ); } return -3; } // On a success, Docker writes the containerID back out. MyString line; line.readLine(pgm.output()); line.chomp(); line.trim(); if (!ignore_output && line != container.c_str()) { // Didn't get back the result I expected, report the error and check to see if docker is hung. dprintf( D_ALWAYS | D_FAILURE, "Docker %s failed, printing first few lines of output.\n", command.c_str()); for (int ii = 0; ii < 10; ++ii) { if ( ! line.readLine(pgm.output(), false)) break; dprintf( D_ALWAYS | D_FAILURE, "%s\n", line.c_str() ); } return -4; } #else // Read from Docker's combined output and error streams. FILE * dockerResults = my_popen( args, "r", 1 , 0, false); if( dockerResults == NULL ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to run '%s'.\n", displayString.c_str() ); return -2; } // On a success, Docker writes the containerID back out. char buffer[1024]; if( NULL == fgets( buffer, 1024, dockerResults ) ) { if( errno ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read results from '%s': '%s' (%d)\n", displayString.c_str(), strerror( errno ), errno ); } else { dprintf( D_ALWAYS | D_FAILURE, "'%s' returned nothing.\n", displayString.c_str() ); } my_pclose( dockerResults ); return -3; } size_t length = strlen( buffer ); if (!ignore_output) { if( length < 1 || strncmp( buffer, container.c_str(), length - 1 ) != 0 ) { dprintf( D_ALWAYS | D_FAILURE, "Docker %s failed, printing first few lines of output.\n", command.c_str() ); dprintf( D_ALWAYS | D_FAILURE, "%s", buffer ); while( NULL != fgets( buffer, 1024, dockerResults ) ) { dprintf( D_ALWAYS | D_FAILURE, "%s", buffer ); } my_pclose( dockerResults ); return -4; } } my_pclose( dockerResults ); #endif return 0; }
int main (int argc, char *argv[]) { #if !defined(WIN32) install_sig_handler(SIGPIPE, (SIG_HANDLER)SIG_IGN ); #endif // initialize to read from config file myDistro->Init( argc, argv ); myName = argv[0]; config(); dprintf_config_tool_on_error(0); // The arguments take two passes to process --- the first pass // figures out the mode, after which we can instantiate the required // query object. We add implied constraints from the command line in // the second pass. firstPass (argc, argv); // if the mode has not been set, it is STARTD_NORMAL if (mode == MODE_NOTSET) { setMode (MODE_STARTD_NORMAL, 0, DEFAULT); } // instantiate query object if (!(query = new CondorQuery (type))) { dprintf_WriteOnErrorBuffer(stderr, true); fprintf (stderr, "Error: Out of memory\n"); exit (1); } // if a first-pass setMode set a mode_constraint, apply it now to the query object if (mode_constraint && ! explicit_format) { query->addANDConstraint(mode_constraint); } // set pretty print style implied by the type of entity being queried // but do it with default priority, so that explicitly requested options // can override it switch (type) { #ifdef HAVE_EXT_POSTGRESQL case QUILL_AD: setPPstyle(PP_QUILL_NORMAL, 0, DEFAULT); break; #endif /* HAVE_EXT_POSTGRESQL */ case DEFRAG_AD: setPPstyle(PP_GENERIC_NORMAL, 0, DEFAULT); break; case STARTD_AD: setPPstyle(PP_STARTD_NORMAL, 0, DEFAULT); break; case SCHEDD_AD: setPPstyle(PP_SCHEDD_NORMAL, 0, DEFAULT); break; case MASTER_AD: setPPstyle(PP_MASTER_NORMAL, 0, DEFAULT); break; case CKPT_SRVR_AD: setPPstyle(PP_CKPT_SRVR_NORMAL, 0, DEFAULT); break; case COLLECTOR_AD: setPPstyle(PP_COLLECTOR_NORMAL, 0, DEFAULT); break; case STORAGE_AD: setPPstyle(PP_STORAGE_NORMAL, 0, DEFAULT); break; case NEGOTIATOR_AD: setPPstyle(PP_NEGOTIATOR_NORMAL, 0, DEFAULT); break; case GRID_AD: setPPstyle(PP_GRID_NORMAL, 0, DEFAULT); break; case GENERIC_AD: setPPstyle(PP_GENERIC, 0, DEFAULT); break; case ANY_AD: setPPstyle(PP_ANY_NORMAL, 0, DEFAULT); break; default: setPPstyle(PP_VERBOSE, 0, DEFAULT); } // set the constraints implied by the mode switch (mode) { #ifdef HAVE_EXT_POSTGRESQL case MODE_QUILL_NORMAL: #endif /* HAVE_EXT_POSTGRESQL */ case MODE_DEFRAG_NORMAL: case MODE_STARTD_NORMAL: case MODE_MASTER_NORMAL: case MODE_CKPT_SRVR_NORMAL: case MODE_SCHEDD_NORMAL: case MODE_SCHEDD_SUBMITTORS: case MODE_COLLECTOR_NORMAL: case MODE_NEGOTIATOR_NORMAL: case MODE_STORAGE_NORMAL: case MODE_GENERIC_NORMAL: case MODE_ANY_NORMAL: case MODE_GRID_NORMAL: case MODE_HAD_NORMAL: break; case MODE_OTHER: // tell the query object what the type we're querying is query->setGenericQueryType(genericType); free(genericType); genericType = NULL; break; case MODE_STARTD_AVAIL: // For now, -avail shows you machines avail to anyone. sprintf (buffer, "%s == \"%s\"", ATTR_STATE, state_to_string(unclaimed_state)); if (diagnose) { printf ("Adding constraint [%s]\n", buffer); } query->addORConstraint (buffer); break; case MODE_STARTD_RUN: sprintf (buffer, "%s == \"%s\"", ATTR_STATE, state_to_string(claimed_state)); if (diagnose) { printf ("Adding constraint [%s]\n", buffer); } query->addORConstraint (buffer); break; case MODE_STARTD_COD: sprintf (buffer, "%s > 0", ATTR_NUM_COD_CLAIMS ); if (diagnose) { printf ("Adding constraint [%s]\n", buffer); } query->addORConstraint (buffer); break; default: break; } if(javaMode) { sprintf( buffer, "%s == TRUE", ATTR_HAS_JAVA ); if (diagnose) { printf ("Adding constraint [%s]\n", buffer); } query->addANDConstraint (buffer); projList.AppendArg(ATTR_HAS_JAVA); projList.AppendArg(ATTR_JAVA_MFLOPS); projList.AppendArg(ATTR_JAVA_VENDOR); projList.AppendArg(ATTR_JAVA_VERSION); } if(offlineMode) { query->addANDConstraint( "size( OfflineUniverses ) != 0" ); projList.AppendArg( "OfflineUniverses" ); // // Since we can't add a regex to a projection, explicitly list all // the attributes we know about. // projList.AppendArg( "HasVM" ); projList.AppendArg( "VMOfflineReason" ); projList.AppendArg( "VMOfflineTime" ); } if(absentMode) { sprintf( buffer, "%s == TRUE", ATTR_ABSENT ); if (diagnose) { printf( "Adding constraint %s\n", buffer ); } query->addANDConstraint( buffer ); projList.AppendArg( ATTR_ABSENT ); projList.AppendArg( ATTR_LAST_HEARD_FROM ); projList.AppendArg( ATTR_CLASSAD_LIFETIME ); } if(vmMode) { sprintf( buffer, "%s == TRUE", ATTR_HAS_VM); if (diagnose) { printf ("Adding constraint [%s]\n", buffer); } query->addANDConstraint (buffer); projList.AppendArg(ATTR_VM_TYPE); projList.AppendArg(ATTR_VM_MEMORY); projList.AppendArg(ATTR_VM_NETWORKING); projList.AppendArg(ATTR_VM_NETWORKING_TYPES); projList.AppendArg(ATTR_VM_HARDWARE_VT); projList.AppendArg(ATTR_VM_AVAIL_NUM); projList.AppendArg(ATTR_VM_ALL_GUEST_MACS); projList.AppendArg(ATTR_VM_ALL_GUEST_IPS); projList.AppendArg(ATTR_VM_GUEST_MAC); projList.AppendArg(ATTR_VM_GUEST_IP); } // second pass: add regular parameters and constraints if (diagnose) { printf ("----------\n"); } secondPass (argc, argv); // initialize the totals object if (ppStyle == PP_CUSTOM && using_print_format) { if (pmHeadFoot & HF_NOSUMMARY) ppTotalStyle = PP_CUSTOM; } else { ppTotalStyle = ppStyle; } TrackTotals totals(ppTotalStyle); // fetch the query QueryResult q; if ((mode == MODE_STARTD_NORMAL) && (ppStyle == PP_STARTD_NORMAL)) { projList.AppendArg("Name"); projList.AppendArg("Machine"); projList.AppendArg("Opsys"); projList.AppendArg("Arch"); projList.AppendArg("State"); projList.AppendArg("Activity"); projList.AppendArg("LoadAvg"); projList.AppendArg("Memory"); projList.AppendArg("ActvtyTime"); projList.AppendArg("MyCurrentTime"); projList.AppendArg("EnteredCurrentActivity"); } else if( ppStyle == PP_VERBOSE ) { // Remove everything from the projection list if we're displaying // the "long form" of the ads. projList.Clear(); // but if -attributes was supplied, show only those attributes if ( ! dashAttributes.isEmpty()) { const char * s; dashAttributes.rewind(); while ((s = dashAttributes.next())) { projList.AppendArg(s); } } } if( projList.Count() > 0 ) { char **attr_list = projList.GetStringArray(); query->setDesiredAttrs(attr_list); deleteStringArray(attr_list); } // if diagnose was requested, just print the query ad if (diagnose) { ClassAd queryAd; // print diagnostic information about inferred internal state setMode ((Mode) 0, 0, NULL); setType (NULL, 0, NULL); setPPstyle ((ppOption) 0, 0, DEFAULT); printf ("----------\n"); q = query->getQueryAd (queryAd); fPrintAd (stdout, queryAd); printf ("----------\n"); fprintf (stderr, "Result of making query ad was: %d\n", q); exit (1); } // Address (host:port) is taken from requested pool, if given. char* addr = (NULL != pool) ? pool->addr() : NULL; Daemon* requested_daemon = pool; // If we're in "direct" mode, then we attempt to locate the daemon // associated with the requested subsystem (here encoded by value of mode) // In this case the host:port of pool (if given) denotes which // pool is being consulted if( direct ) { Daemon *d = NULL; switch( mode ) { case MODE_MASTER_NORMAL: d = new Daemon( DT_MASTER, direct, addr ); break; case MODE_STARTD_NORMAL: case MODE_STARTD_AVAIL: case MODE_STARTD_RUN: case MODE_STARTD_COD: d = new Daemon( DT_STARTD, direct, addr ); break; #ifdef HAVE_EXT_POSTGRESQL case MODE_QUILL_NORMAL: d = new Daemon( DT_QUILL, direct, addr ); break; #endif /* HAVE_EXT_POSTGRESQL */ case MODE_SCHEDD_NORMAL: case MODE_SCHEDD_SUBMITTORS: d = new Daemon( DT_SCHEDD, direct, addr ); break; case MODE_NEGOTIATOR_NORMAL: d = new Daemon( DT_NEGOTIATOR, direct, addr ); break; case MODE_CKPT_SRVR_NORMAL: case MODE_COLLECTOR_NORMAL: case MODE_LICENSE_NORMAL: case MODE_STORAGE_NORMAL: case MODE_GENERIC_NORMAL: case MODE_ANY_NORMAL: case MODE_OTHER: case MODE_GRID_NORMAL: case MODE_HAD_NORMAL: // These have to go to the collector, anyway. break; default: fprintf( stderr, "Error: Illegal mode %d\n", mode ); exit( 1 ); break; } // Here is where we actually override 'addr', if we can obtain // address of the requested daemon/subsys. If it can't be // located, then fail with error msg. // 'd' will be null (unset) if mode is one of above that must go to // collector (MODE_ANY_NORMAL, MODE_COLLECTOR_NORMAL, etc) if (NULL != d) { if( d->locate() ) { addr = d->addr(); requested_daemon = d; } else { const char* id = d->idStr(); if (NULL == id) id = d->name(); dprintf_WriteOnErrorBuffer(stderr, true); if (NULL == id) id = "daemon"; fprintf(stderr, "Error: Failed to locate %s\n", id); fprintf(stderr, "%s\n", d->error()); exit( 1 ); } } } ClassAdList result; CondorError errstack; if (NULL != ads_file) { MyString req; // query requirements q = query->getRequirements(req); const char * constraint = req.empty() ? NULL : req.c_str(); if (read_classad_file(ads_file, result, constraint)) { q = Q_OK; } } else if (NULL != addr) { // this case executes if pool was provided, or if in "direct" mode with // subsystem that corresponds to a daemon (above). // Here 'addr' represents either the host:port of requested pool, or // alternatively the host:port of daemon associated with requested subsystem (direct mode) q = query->fetchAds (result, addr, &errstack); } else { // otherwise obtain list of collectors and submit query that way CollectorList * collectors = CollectorList::create(); q = collectors->query (*query, result, &errstack); delete collectors; } // if any error was encountered during the query, report it and exit if (Q_OK != q) { dprintf_WriteOnErrorBuffer(stderr, true); // we can always provide these messages: fprintf( stderr, "Error: %s\n", getStrQueryResult(q) ); fprintf( stderr, "%s\n", errstack.getFullText(true).c_str() ); if ((NULL != requested_daemon) && ((Q_NO_COLLECTOR_HOST == q) || (requested_daemon->type() == DT_COLLECTOR))) { // Specific long message if connection to collector failed. const char* fullhost = requested_daemon->fullHostname(); if (NULL == fullhost) fullhost = "<unknown_host>"; const char* daddr = requested_daemon->addr(); if (NULL == daddr) daddr = "<unknown>"; char info[1000]; sprintf(info, "%s (%s)", fullhost, daddr); printNoCollectorContact( stderr, info, !expert ); } else if ((NULL != requested_daemon) && (Q_COMMUNICATION_ERROR == q)) { // more helpful message for failure to connect to some daemon/subsys const char* id = requested_daemon->idStr(); if (NULL == id) id = requested_daemon->name(); if (NULL == id) id = "daemon"; const char* daddr = requested_daemon->addr(); if (NULL == daddr) daddr = "<unknown>"; fprintf(stderr, "Error: Failed to contact %s at %s\n", id, daddr); } // fail exit (1); } if (noSort) { // do nothing } else if (sortSpecs.empty()) { // default classad sorting result.Sort((SortFunctionType)lessThanFunc); } else { // User requested custom sorting expressions: // insert attributes related to custom sorting result.Open(); while (ClassAd* ad = result.Next()) { for (vector<SortSpec>::iterator ss(sortSpecs.begin()); ss != sortSpecs.end(); ++ss) { ss->expr->SetParentScope(ad); classad::Value v; ss->expr->Evaluate(v); stringstream vs; // This will properly render all supported value types, // including undefined and error, although current semantic // pre-filters classads where sort expressions are undef/err: vs << ((v.IsStringValue())?"\"":"") << v << ((v.IsStringValue())?"\"":""); ad->AssignExpr(ss->keyAttr.c_str(), vs.str().c_str()); // Save the full expr in case user wants to examine on output: ad->AssignExpr(ss->keyExprAttr.c_str(), ss->arg.c_str()); } } result.Open(); result.Sort((SortFunctionType)customLessThanFunc); } // output result prettyPrint (result, &totals); delete query; return 0; }
int OsProc::StartJob(FamilyInfo* family_info, NetworkNamespaceManager * network_manager = NULL, FilesystemRemap* fs_remap=NULL) { int nice_inc = 0; bool has_wrapper = false; dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n"); if ( !JobAd ) { dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" ); return 0; } MyString JobName; if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting StartJob.\n", ATTR_JOB_CMD ); return 0; } const char* job_iwd = Starter->jic->jobRemoteIWD(); dprintf( D_ALWAYS, "IWD: %s\n", job_iwd ); // some operations below will require a PrivSepHelper if // PrivSep is enabled (if it's not, privsep_helper will be // NULL) PrivSepHelper* privsep_helper = Starter->privSepHelper(); // // // // // // // Arguments // // // // // // // prepend the full path to this name so that we // don't have to rely on the PATH inside the // USER_JOB_WRAPPER or for exec(). bool transfer_exe = false; if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) { transfer_exe = false; } bool preserve_rel = false; if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) { preserve_rel = false; } bool relative_exe = is_relative_to_cwd(JobName.Value()); if (relative_exe && preserve_rel && !transfer_exe) { dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value()); } else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) { JobName.sprintf( "%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, CONDOR_EXEC ); } else if (relative_exe && job_iwd && *job_iwd) { MyString full_name; full_name.sprintf("%s%c%s", job_iwd, DIR_DELIM_CHAR, JobName.Value()); JobName = full_name; } if( Starter->isGridshell() ) { // if we're a gridshell, just try to chmod our job, since // globus probably transfered it for us and left it with // bad permissions... priv_state old_priv = set_user_priv(); int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG ); set_priv( old_priv ); if( retval < 0 ) { dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() ); return 0; } } ArgList args; // Since we may be adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional wrapper args. args.SetArgV1SyntaxToCurrentPlatform(); // First, put "condor_exec" or whatever at the front of Args, // since that will become argv[0] of what we exec(), either // the wrapper or the actual job. if( !getArgv0() ) { args.AppendArg(JobName.Value()); } else { args.AppendArg(getArgv0()); } // Support USER_JOB_WRAPPER parameter... char *wrapper = NULL; if( (wrapper=param("USER_JOB_WRAPPER")) ) { // make certain this wrapper program exists and is executable if( access(wrapper,X_OK) < 0 ) { dprintf( D_ALWAYS, "Cannot find/execute USER_JOB_WRAPPER file %s\n", wrapper ); free( wrapper ); return 0; } has_wrapper = true; // Now, we've got a valid wrapper. We want that to become // "JobName" so we exec it directly, and we want to put // what was the JobName (with the full path) as the first // argument to the wrapper args.AppendArg(JobName.Value()); JobName = wrapper; free(wrapper); } // Support USE_PARROT bool use_parrot = false; if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) { // Check for parrot executable char *parrot = NULL; if( (parrot=param("PARROT")) ) { if( access(parrot,X_OK) < 0 ) { dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute " "at %s(%s)).\n", parrot, strerror(errno) ); free( parrot ); return 0; } else { args.AppendArg(JobName.Value()); JobName = parrot; free( parrot ); } } else { dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config" " file)" ); return 0; } } // Either way, we now have to add the user-specified args as // the rest of the Args string. MyString args_error; if(!args.AppendArgsFromClassAd(JobAd,&args_error)) { dprintf(D_ALWAYS, "Failed to read job arguments from JobAd. " "Aborting OsProc::StartJob: %s\n",args_error.Value()); return 0; } // // // // // // // Environment // // // // // // // Now, instantiate an Env object so we can manipulate the // environment as needed. Env job_env; MyString env_errors; if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) { dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n", env_errors.Value()); return 0; } // // // // // // // Standard Files // // // // // // // handle stdin, stdout, and stderr redirection int fds[3]; // initialize these to -2 to mean they're not specified. // -1 will be treated as an error. fds[0] = -2; fds[1] = -2; fds[2] = -2; // in order to open these files we must have the user's privs: priv_state priv; priv = set_user_priv(); // if we're in PrivSep mode, we won't necessarily be able to // open the files for the job. getStdFile will return us an // open FD in some situations, but otherwise will give us // a filename that we'll pass to the PrivSep Switchboard // bool stdin_ok; bool stdout_ok; bool stderr_ok; MyString privsep_stdin_name; MyString privsep_stdout_name; MyString privsep_stderr_name; if (privsep_helper != NULL) { stdin_ok = getStdFile(SFT_IN, NULL, true, "Input file", &fds[0], &privsep_stdin_name); stdout_ok = getStdFile(SFT_OUT, NULL, true, "Output file", &fds[1], &privsep_stdout_name); stderr_ok = getStdFile(SFT_ERR, NULL, true, "Error file", &fds[2], &privsep_stderr_name); } else { fds[0] = openStdFile( SFT_IN, NULL, true, "Input file"); stdin_ok = (fds[0] != -1); fds[1] = openStdFile( SFT_OUT, NULL, true, "Output file"); stdout_ok = (fds[1] != -1); fds[2] = openStdFile( SFT_ERR, NULL, true, "Error file"); stderr_ok = (fds[2] != -1); } /* Bail out if we couldn't open the std files correctly */ if( !stdin_ok || !stdout_ok || !stderr_ok ) { /* only close ones that had been opened correctly */ for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n"); dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n"); set_priv(priv); /* go back to original priv state before leaving */ return 0; } // // // // // // // Misc + Exec // // // // // // if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyJobPreSpawn(); } // compute job's renice value by evaluating the machine's // JOB_RENICE_INCREMENT in the context of the job ad... char* ptmp = param( "JOB_RENICE_INCREMENT" ); if( ptmp ) { // insert renice expr into our copy of the job ad MyString reniceAttr = "Renice = "; reniceAttr += ptmp; if( !JobAd->Insert( reniceAttr.Value() ) ) { dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT " "into job ad, Aborting OsProc::StartJob...\n" ); free( ptmp ); return 0; } // evaluate if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) { dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n", ptmp, nice_inc ); } else { dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't " "eval to int! Using default of 10...\n", ptmp ); nice_inc = 10; } // enforce valid ranges for nice_inc if( nice_inc < 0 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "low: adjusted to 0\n", nice_inc ); nice_inc = 0; } else if( nice_inc > 19 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "high: adjusted to 19\n", nice_inc ); nice_inc = 19; } ASSERT( ptmp ); free( ptmp ); ptmp = NULL; } else { // if JOB_RENICE_INCREMENT is undefined, default to 10 nice_inc = 10; } // in the below dprintfs, we want to skip past argv[0], which // is sometimes condor_exec, in the Args string. MyString args_string; args.GetArgsStringForDisplay(&args_string, 1); if( has_wrapper ) { // print out exactly what we're doing so folks can debug // it, if they need to. dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), args_string.Value() ); MyString wrapper_err; wrapper_err.sprintf("%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, JOB_WRAPPER_FAILURE_FILE); if( ! job_env.SetEnv("_CONDOR_WRAPPER_ERROR_FILE", wrapper_err.Value()) ) { dprintf( D_ALWAYS, "Failed to set _CONDOR_WRAPPER_ERROR_FILE environment variable\n"); } } else { dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); } MyString path; path.sprintf("%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, MACHINE_AD_FILENAME); if( ! job_env.SetEnv("_CONDOR_MACHINE_AD", path.Value()) ) { dprintf( D_ALWAYS, "Failed to set _CONDOR_MACHINE_AD environment variable\n"); } path.sprintf("%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, JOB_AD_FILENAME); if( ! job_env.SetEnv("_CONDOR_JOB_AD", path.Value()) ) { dprintf( D_ALWAYS, "Failed to set _CONDOR_JOB_AD environment variable\n"); } // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_string; job_env.getDelimitedStringForDisplay(&env_string); dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value()); } // Check to see if we need to start this process paused, and if // so, pass the right flag to DC::Create_Process(). int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT; if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) { job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT; } int suspend_job_at_exec = 0; JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec); if( suspend_job_at_exec ) { dprintf( D_FULLDEBUG, "OsProc::StartJob(): " "Job wants to be suspended at exec\n" ); job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC; } // If there is a requested coresize for this job, enforce it. // It is truncated because you can't put an unsigned integer // into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to // be a float, but then when that attribute is read/written to the // job queue log by/or shared between versions of Condor which view the // type of that attribute differently, calamity would arise. int core_size_truncated; size_t core_size; size_t *core_size_ptr = NULL; if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_truncated ) ) { core_size = (size_t)core_size_truncated; core_size_ptr = &core_size; } long rlimit_as_hard_limit = 0; char *rlimit_expr = param("STARTER_RLIMIT_AS"); if (rlimit_expr) { classad::ClassAdParser parser; classad::ExprTree *tree = parser.ParseExpression(rlimit_expr); if (tree) { classad::Value val; int result; if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && val.IsIntegerValue(result)) { rlimit_as_hard_limit = ((long)result) * 1024 * 1024; dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit); } else { dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr); } } else { dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr); } } int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber()); #if defined ( WIN32 ) owner_profile_.update (); /************************************************************* NOTE: We currently *ONLY* support loading slot-user profiles. This limitation will be addressed shortly, by allowing regular users to load their registry hive - Ben [2008-09-31] **************************************************************/ bool load_profile = false, run_as_owner = false; JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile ); JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER, run_as_owner ); if ( load_profile && !run_as_owner ) { if ( owner_profile_.load () ) { /* publish the users environment into that of the main job's environment */ if ( !owner_profile_.environment ( job_env ) ) { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to " "export owner's environment.\n" ); } } else { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load " "owner's profile.\n" ); } } #endif // While we are still in user priv, print out the username #if defined(LINUX) if( Starter->glexecPrivSepHelper() ) { // TODO: if there is some way to figure out the final username, // print it out here or after starting the job. dprintf(D_ALWAYS,"Running job via glexec\n"); } #else if( false ) { } #endif else { char const *username = NULL; char const *how = ""; CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper(); if( cpsh ) { username = cpsh->get_user_name(); how = "via privsep switchboard "; } else { username = get_real_username(); } if( !username ) { username = "******"; } dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username); } set_priv ( priv ); // use this to return more detailed and reliable error message info // from create-process operation. MyString create_process_err_msg; if (privsep_helper != NULL) { const char* std_file_names[3] = { privsep_stdin_name.Value(), privsep_stdout_name.Value(), privsep_stderr_name.Value() }; JobPid = privsep_helper->create_process(JobName.Value(), args, job_env, job_iwd, fds, std_file_names, nice_inc, core_size_ptr, 1, job_opt_mask, family_info, affinity_mask, &create_process_err_msg); } else { JobPid = daemonCore->Create_Process( JobName.Value(), args, PRIV_USER_FINAL, 1, FALSE, &job_env, job_iwd, family_info, NULL, fds, NULL, nice_inc, NULL, job_opt_mask, core_size_ptr, affinity_mask, NULL, &create_process_err_msg, fs_remap, rlimit_as_hard_limit, network_manager); } // Create_Process() saves the errno for us if it is an "interesting" error. int create_process_errno = errno; // errno is 0 in the privsep case. This executes for the daemon core create-process logic if ((FALSE == JobPid) && (0 != create_process_errno)) { if (create_process_err_msg != "") create_process_err_msg += " "; MyString errbuf; errbuf.sprintf("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno)); create_process_err_msg += errbuf; } // now close the descriptors in fds array. our child has inherited // them already, so we should close them so we do not leak descriptors. // NOTE, we want to use a special method to close the starter's // versions, if that's what we're using, so we don't think we've // still got those available in other parts of the code for any // reason. for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } if ( JobPid == FALSE ) { JobPid = -1; if(!create_process_err_msg.IsEmpty()) { // if the reason Create_Process failed was that registering // a family with the ProcD failed, it is indicative of a // problem regarding this execute machine, not the job. in // this case, we'll want to EXCEPT instead of telling the // Shadow to put the job on hold. there are probably other // error conditions where EXCEPTing would be more appropriate // as well... // if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) { EXCEPT("Create_Process failed to register the job with the ProcD"); } MyString err_msg = "Failed to execute '"; err_msg += JobName; err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_err_msg; if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno ); } } dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n", JobName.Value(), args_string.Value(), create_process_err_msg.Value()); return 0; } num_pids++; dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid); job_start_time.getTime(); return 1; }
//--------------------------------------------------------------------------- void writeSubmitFile(/* const */ SubmitDagDeepOptions &deepOpts, /* const */ SubmitDagShallowOptions &shallowOpts) { FILE *pSubFile = safe_fopen_wrapper_follow(shallowOpts.strSubFile.Value(), "w"); if (!pSubFile) { fprintf( stderr, "ERROR: unable to create submit file %s\n", shallowOpts.strSubFile.Value() ); exit( 1 ); } const char *executable = NULL; MyString valgrindPath; // outside if so executable is valid! if ( shallowOpts.runValgrind ) { valgrindPath = which( valgrind_exe ); if ( valgrindPath == "" ) { fprintf( stderr, "ERROR: can't find %s in PATH, aborting.\n", valgrind_exe ); exit( 1 ); } else { executable = valgrindPath.Value(); } } else { executable = deepOpts.strDagmanPath.Value(); } fprintf(pSubFile, "# Filename: %s\n", shallowOpts.strSubFile.Value()); fprintf(pSubFile, "# Generated by condor_submit_dag "); shallowOpts.dagFiles.rewind(); char *dagFile; while ( (dagFile = shallowOpts.dagFiles.next()) != NULL ) { fprintf(pSubFile, "%s ", dagFile); } fprintf(pSubFile, "\n"); fprintf(pSubFile, "universe\t= scheduler\n"); fprintf(pSubFile, "executable\t= %s\n", executable); fprintf(pSubFile, "getenv\t\t= True\n"); fprintf(pSubFile, "output\t\t= %s\n", shallowOpts.strLibOut.Value()); fprintf(pSubFile, "error\t\t= %s\n", shallowOpts.strLibErr.Value()); fprintf(pSubFile, "log\t\t= %s\n", shallowOpts.strSchedLog.Value()); #if !defined ( WIN32 ) fprintf(pSubFile, "remove_kill_sig\t= SIGUSR1\n" ); #endif fprintf(pSubFile, "+%s\t= \"%s =?= $(cluster)\"\n", ATTR_OTHER_JOB_REMOVE_REQUIREMENTS, ATTR_DAGMAN_JOB_ID ); // ensure DAGMan is automatically requeued by the schedd if it // exits abnormally or is killed (e.g., during a reboot) const char *defaultRemoveExpr = "( ExitSignal =?= 11 || " "(ExitCode =!= UNDEFINED && ExitCode >=0 && ExitCode <= 2))"; MyString removeExpr(defaultRemoveExpr); char *tmpRemoveExpr = param("DAGMAN_ON_EXIT_REMOVE"); if ( tmpRemoveExpr ) { removeExpr = tmpRemoveExpr; free(tmpRemoveExpr); } fprintf(pSubFile, "# Note: default on_exit_remove expression:\n"); fprintf(pSubFile, "# %s\n", defaultRemoveExpr); fprintf(pSubFile, "# attempts to ensure that DAGMan is automatically\n"); fprintf(pSubFile, "# requeued by the schedd if it exits abnormally or\n"); fprintf(pSubFile, "# is killed (e.g., during a reboot).\n"); fprintf(pSubFile, "on_exit_remove\t= %s\n", removeExpr.Value() ); fprintf(pSubFile, "copy_to_spool\t= %s\n", shallowOpts.copyToSpool ? "True" : "False" ); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Be sure to change MIN_SUBMIT_FILE_VERSION in dagman_main.cpp // if the arguments passed to condor_dagman change in an // incompatible way!! //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ArgList args; if ( shallowOpts.runValgrind ) { args.AppendArg("--tool=memcheck"); args.AppendArg("--leak-check=yes"); args.AppendArg("--show-reachable=yes"); args.AppendArg(deepOpts.strDagmanPath.Value()); } args.AppendArg("-f"); args.AppendArg("-l"); args.AppendArg("."); if ( shallowOpts.iDebugLevel != DEBUG_UNSET ) { args.AppendArg("-Debug"); args.AppendArg(shallowOpts.iDebugLevel); } args.AppendArg("-Lockfile"); args.AppendArg(shallowOpts.strLockFile.Value()); args.AppendArg("-AutoRescue"); args.AppendArg(deepOpts.autoRescue); args.AppendArg("-DoRescueFrom"); args.AppendArg(deepOpts.doRescueFrom); if(!deepOpts.always_use_node_log) { args.AppendArg("-dont_use_default_node_log"); } shallowOpts.dagFiles.rewind(); while ( (dagFile = shallowOpts.dagFiles.next()) != NULL ) { args.AppendArg("-Dag"); args.AppendArg(dagFile); } if(shallowOpts.iMaxIdle != 0) { args.AppendArg("-MaxIdle"); args.AppendArg(shallowOpts.iMaxIdle); } if(shallowOpts.iMaxJobs != 0) { args.AppendArg("-MaxJobs"); args.AppendArg(shallowOpts.iMaxJobs); } if(shallowOpts.iMaxPre != 0) { args.AppendArg("-MaxPre"); args.AppendArg(shallowOpts.iMaxPre); } if(shallowOpts.iMaxPost != 0) { args.AppendArg("-MaxPost"); args.AppendArg(shallowOpts.iMaxPost); } if(shallowOpts.bNoEventChecks) { // strArgs += " -NoEventChecks"; printf( "Warning: -NoEventChecks is ignored; please use " "the DAGMAN_ALLOW_EVENTS config parameter instead\n"); } if(!shallowOpts.bPostRun) { args.AppendArg("-DontAlwaysRunPost"); } if(deepOpts.bAllowLogError) { args.AppendArg("-AllowLogError"); } if(deepOpts.useDagDir) { args.AppendArg("-UseDagDir"); } if(deepOpts.suppress_notification) { args.AppendArg("-Suppress_notification"); } else { args.AppendArg("-Dont_Suppress_notification"); } if ( shallowOpts.doRecovery ) { args.AppendArg( "-DoRecov" ); } args.AppendArg("-CsdVersion"); args.AppendArg(CondorVersion()); if(deepOpts.allowVerMismatch) { args.AppendArg("-AllowVersionMismatch"); } if(shallowOpts.dumpRescueDag) { args.AppendArg("-DumpRescue"); } if(deepOpts.bVerbose) { args.AppendArg("-Verbose"); } if(deepOpts.bForce) { args.AppendArg("-Force"); } if(deepOpts.strNotification != "") { args.AppendArg("-Notification"); args.AppendArg(deepOpts.strNotification); } if(deepOpts.strDagmanPath != "") { args.AppendArg("-Dagman"); args.AppendArg(deepOpts.strDagmanPath); } if(deepOpts.strOutfileDir != "") { args.AppendArg("-Outfile_dir"); args.AppendArg(deepOpts.strOutfileDir); } if(deepOpts.updateSubmit) { args.AppendArg("-Update_submit"); } if(deepOpts.importEnv) { args.AppendArg("-Import_env"); } if( deepOpts.priority != 0 ) { args.AppendArg("-Priority"); args.AppendArg(deepOpts.priority); } MyString arg_str,args_error; if(!args.GetArgsStringV1WackedOrV2Quoted(&arg_str,&args_error)) { fprintf(stderr,"Failed to insert arguments: %s",args_error.Value()); exit(1); } fprintf(pSubFile, "arguments\t= %s\n", arg_str.Value()); //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // Be sure to change MIN_SUBMIT_FILE_VERSION in dagman_main.cpp // if the environment passed to condor_dagman changes in an // incompatible way!! //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EnvFilter env; if ( deepOpts.importEnv ) { env.Import( ); } env.SetEnv("_CONDOR_DAGMAN_LOG", shallowOpts.strDebugLog.Value()); env.SetEnv("_CONDOR_MAX_DAGMAN_LOG=0"); if ( shallowOpts.strScheddDaemonAdFile != "" ) { env.SetEnv("_CONDOR_SCHEDD_DAEMON_AD_FILE", shallowOpts.strScheddDaemonAdFile.Value()); } if ( shallowOpts.strScheddAddressFile != "" ) { env.SetEnv("_CONDOR_SCHEDD_ADDRESS_FILE", shallowOpts.strScheddAddressFile.Value()); } if ( shallowOpts.strConfigFile != "" ) { if ( access( shallowOpts.strConfigFile.Value(), F_OK ) != 0 ) { fprintf( stderr, "ERROR: unable to read config file %s " "(error %d, %s)\n", shallowOpts.strConfigFile.Value(), errno, strerror(errno) ); exit(1); } env.SetEnv("_CONDOR_DAGMAN_CONFIG_FILE", shallowOpts.strConfigFile.Value()); } MyString env_str; MyString env_errors; if(!env.getDelimitedStringV1RawOrV2Quoted(&env_str,&env_errors)) { fprintf(stderr,"Failed to insert environment: %s",env_errors.Value()); exit(1); } fprintf(pSubFile, "environment\t= %s\n",env_str.Value()); if(deepOpts.strNotification != "") { fprintf(pSubFile, "notification\t= %s\n", deepOpts.strNotification.Value()); } // Append user-specified stuff to submit file... // ...first, the insert file, if any... if (shallowOpts.appendFile != "") { FILE *aFile = safe_fopen_wrapper_follow(shallowOpts.appendFile.Value(), "r"); if (!aFile) { fprintf( stderr, "ERROR: unable to read submit append file (%s)\n", shallowOpts.appendFile.Value() ); exit( 1 ); } char *line; while ((line = getline(aFile)) != NULL) { fprintf(pSubFile, "%s\n", line); } fclose(aFile); } // ...now things specified directly on the command line. shallowOpts.appendLines.rewind(); char *command; while ((command = shallowOpts.appendLines.next()) != NULL) { fprintf(pSubFile, "%s\n", command); } fprintf(pSubFile, "queue\n"); fclose(pSubFile); }
//------------------------------------------------------------------------- bool condor_submit( const Dagman &dm, const char* cmdFile, CondorID& condorID, const char* DAGNodeName, MyString &DAGParentNodeNames, List<Job::NodeVar> *vars, int priority, int retry, const char* directory, const char *workflowLogFile, bool hold_claim, const MyString &batchName ) { TmpDir tmpDir; MyString errMsg; if ( !tmpDir.Cd2TmpDir( directory, errMsg ) ) { debug_printf( DEBUG_QUIET, "Could not change to node directory %s: %s\n", directory, errMsg.Value() ); return false; } ArgList args; // construct arguments to condor_submit to add attributes to the // job classad which identify the job's node name in the DAG, the // node names of its parents in the DAG, and the job ID of DAGMan // itself; then, define submit_event_notes to print the job's node // name inside the submit event in the userlog // NOTE: we specify the job ID of DAGMan using only its cluster ID // so that it may be referenced by jobs in their priority // attribute (which needs an int, not a string). Doing so allows // users to effectively "batch" jobs by DAG so that when they // submit many DAGs to the same schedd, all the ready jobs from // one DAG complete before any jobs from another begin. args.AppendArg( dm.condorSubmitExe ); args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString nodeName = MyString(ATTR_DAG_NODE_NAME_ALT) + " = " + DAGNodeName; args.AppendArg( nodeName.Value() ); // append a line adding the parent DAGMan's cluster ID to the job ad args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString dagJobId = MyString( "+" ) + ATTR_DAGMAN_JOB_ID + " = " + IntToStr( dm.DAGManJobId._cluster ); args.AppendArg( dagJobId.Value() ); // now we append a line setting the same thing as a submit-file macro // (this is necessary so the user can reference it in the priority) args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString dagJobIdMacro = MyString( "" ) + ATTR_DAGMAN_JOB_ID + " = " + IntToStr( dm.DAGManJobId._cluster ); args.AppendArg( dagJobIdMacro.Value() ); // Pass the batch name to lower levels. if ( batchName != "" ) { args.AppendArg( "-batch-name" ); args.AppendArg( batchName.Value() ); } args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString submitEventNotes = MyString( "submit_event_notes = DAG Node: " ) + DAGNodeName; args.AppendArg( submitEventNotes.Value() ); ASSERT( workflowLogFile ); // We need to append the DAGman default log file to // the log file list args.AppendArg( "-a" ); // -a == -append; using -a to save chars std::string dlog( "dagman_log = " ); dlog += workflowLogFile; args.AppendArg( dlog.c_str() ); debug_printf( DEBUG_VERBOSE, "Adding a DAGMan workflow log %s\n", workflowLogFile ); // Now append the mask debug_printf( DEBUG_VERBOSE, "Masking the events recorded in the DAGMAN workflow log\n" ); args.AppendArg( "-a" ); // -a == -append; using -a to save chars std::string dmask("+"); dmask += ATTR_DAGMAN_WORKFLOW_MASK; dmask += " = \""; const char *eventMask = getEventMask(); debug_printf( DEBUG_VERBOSE, "Mask for workflow log is %s\n", eventMask ); dmask += eventMask; dmask += "\""; args.AppendArg( dmask.c_str() ); // Append the priority, if we have one. if ( priority != 0 ) { args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString prioStr = "priority="; prioStr += IntToStr( priority ); args.AppendArg( prioStr.Value() ); } // Suppress the job's log file if that option is enabled. if ( dm._suppressJobLogs ) { debug_printf( DEBUG_VERBOSE, "Suppressing node job log file\n" ); args.AppendArg( "-a" ); // -a == -append; using -a to save chars args.AppendArg( "log=" ); } ArgList parentNameArgs; parentNameArgs.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString parentNodeNames = MyString( "+DAGParentNodeNames = " ) + "\"" + DAGParentNodeNames + "\""; parentNameArgs.AppendArg( parentNodeNames.Value() ); // set any VARS specified in the DAG file MyString anotherLine; ListIterator<Job::NodeVar> varsIter(*vars); Job::NodeVar nodeVar; while ( varsIter.Next(nodeVar) ) { // Substitute the node retry count if necessary. Note that // we can't do this in Job::ResolveVarsInterpolations() // because that's only called at parse time. MyString value = nodeVar._value; MyString retryStr = IntToStr( retry ); value.replaceString( "$(RETRY)", retryStr.Value() ); MyString varStr = nodeVar._name + " = " + value; args.AppendArg( "-a" ); // -a == -append; using -a to save chars args.AppendArg( varStr.Value() ); } // Set the special DAG_STATUS variable (mainly for use by // "final" nodes). args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString var = "DAG_STATUS = "; var += IntToStr( (int)dm.dag->_dagStatus ); args.AppendArg( var.Value() ); // Set the special FAILED_COUNT variable (mainly for use by // "final" nodes). args.AppendArg( "-a" ); // -a == -append; using -a to save chars var = "FAILED_COUNT = "; var += IntToStr( dm.dag->NumNodesFailed() ); args.AppendArg( var.Value() ); if( hold_claim ){ args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString holdit = MyString("+") + MyString(ATTR_JOB_KEEP_CLAIM_IDLE) + " = " + IntToStr( dm._claim_hold_time ); args.AppendArg( holdit.Value() ); } if (dm._submitDagDeepOpts.suppress_notification) { args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString notify = MyString("notification = never"); args.AppendArg( notify.Value() ); } // // Add accounting group and user if we have them. // if ( dm._submitDagDeepOpts.acctGroup != "" ) { args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString arg = "accounting_group="; arg += dm._submitDagDeepOpts.acctGroup; args.AppendArg( arg ); } if ( dm._submitDagDeepOpts.acctGroupUser != "" ) { args.AppendArg( "-a" ); // -a == -append; using -a to save chars MyString arg = "accounting_group_user="******"Warning: node %s has too many parents " "to list in its classad; leaving its DAGParentNodeNames " "attribute undefined\n", DAGNodeName ); check_warning_strictness( DAG_STRICT_3 ); } else { args.AppendArgsFromArgList( parentNameArgs ); } args.AppendArg( cmdFile ); bool success = do_submit( args, condorID, dm.prohibitMultiJobs ); if ( !tmpDir.Cd2MainDir( errMsg ) ) { debug_printf( DEBUG_QUIET, "Could not change to original directory: %s\n", errMsg.Value() ); success = false; } return success; }
void Pigeon::initialize() { /* if (m_state == STATE_RUNNING) { */ MyString* qpidPort; char *path = NULL; //notify us when our process is down. m_reaper = daemonCore->Register_Reaper( "reaperQpid", (ReaperHandlercpp) &Pigeon::reaperResponse, "Qpid process reaper", (Service*) this); ASSERT(m_reaper != FALSE); //ClassAd Initialization //cleanup metafile ArgList argClean; clean(); char* proc= param("QPID_EXEC"); if (!proc) { dprintf(D_ALWAYS, "You need to specify the QPID executable as QPID_EXEC in your condor config \n"); EXCEPT("No qpid executable (QPID_EXEC) specified!"); } const char *hostname = my_full_hostname() ; ArgList arglist; arglist.AppendArg("qpidd"); char *qpidConf = param("QPID_CONF"); if (qpidConf) { arglist.AppendArg("--config"); arglist.AppendArg(qpidConf); free(qpidConf); } else { arglist.AppendArg("-p0"); arglist.AppendArg("--auth"); arglist.AppendArg("no"); } MyString argString; arglist.GetArgsStringForDisplay(&argString); dprintf(D_ALWAYS, "\n Invoking: %s\n", argString.Value()); path = getPortPath(); int fd_stdout = safe_open_wrapper(path, O_RDWR|O_CREAT, 0666); free(path); int fds[3] = {-1, fd_stdout, -1}; int mm_pid = daemonCore->Create_Process(proc,arglist,PRIV_CONDOR_FINAL, 0,FALSE,FALSE,NULL,NULL,NULL,NULL,fds); if (mm_pid <= 0) EXCEPT("Failed to launch qpid process using Create_Process."); dprintf(D_ALWAYS, "Launched qpid process pid=%d \n", mm_pid); sleep(10); close(fd_stdout); char *portChr = getPort(false); string portStr = string(portChr); free(portChr); free(proc); if(strcmp(portStr.c_str(),"") != 0){ m_qpidAd.Assign("PORT", portStr.c_str()); dprintf(D_ALWAYS,"qpid process started on port number %s \n", portStr.c_str()); } SetMyTypeName(m_qpidAd, "pigeon"); SetTargetTypeName(m_qpidAd, ""); std::string hostAddr = "pigeon@"; hostAddr += hostname; m_qpidAd.Assign(ATTR_NAME, "pigeon"); //hostAddr.c_str()); m_qpidAd.Assign("Key", "qpidKey"); m_qpidAd.Assign("IP","128" ); daemonCore->publish(&m_qpidAd); //Register a timer for periodically pushing classads. //TODO: Make these rate and interval configurable dprintf(D_ALWAYS, "Calling the classAd publish()\n"); daemonCore->Register_Timer(1, m_adPubInterval, (TimerHandlercpp) &Pigeon::publishClassAd, "publishClassAd", this); dprintf(D_ALWAYS, "Launched qpid process pid=%d at port=|%s|\n", mm_pid,portStr.c_str()); char *execDir = param("SBIN"); if (execDir) { dprintf(D_ALWAYS, "Declaring queues... \n"); ArgList qArglist; proc = (char*)malloc(strlen(execDir) + 15); sprintf(proc, "%s%c%s",execDir, DIR_DELIM_CHAR, "declareQueues"); qArglist.AppendArg(proc); qArglist.AppendArg(hostname); qArglist.AppendArg(portStr.c_str()); mm_pid = daemonCore->Create_Process(proc,qArglist,PRIV_CONDOR_FINAL, 0,FALSE,FALSE,NULL,NULL,NULL,NULL); if (mm_pid <= 0) EXCEPT("Failed to launch declareQueues process using Create_Process."); free(proc); free(execDir); dprintf(D_ALWAYS, "QPID queues declared. \n"); } }
bool VMUniverseMgr::testVMGahp(const char* gahppath, const char* vmtype) { m_needCheck = false; if( !m_starter_has_vmcode ) { return false; } if( !gahppath || !vmtype ) { return false; } #if defined(WIN32) // On Windows machine, the option that Starter log file includes // logs from vmgahp causes deadlock even if the option works well // on Linux machine. I guess that is due to Windows Pipes but // I don't know the exact reason. // Until the problem is solved, // this option will be disabled on Windows machine. char *need_log_file = param("VM_GAHP_LOG"); if( need_log_file ) { free(need_log_file); }else { dprintf( D_ALWAYS, "To support vm universe, '%s' must be defined " "in condor config file, which is a log file for vmgahp.\n", "VM_GAHP_LOG"); return false; } #endif // vmgahp is daemonCore, so we need to add -f -t options of daemonCore. // Then, try to execute vmgahp with // vmtype <vmtype>" // and grab the output as a ClassAd ArgList systemcmd; systemcmd.AppendArg(gahppath); systemcmd.AppendArg("-f"); char *gahp_log_file = param("VM_GAHP_LOG"); if( gahp_log_file ) { free(gahp_log_file); }else { systemcmd.AppendArg("-t"); } systemcmd.AppendArg("-M"); systemcmd.AppendArg(VMGAHP_TEST_MODE); systemcmd.AppendArg("vmtype"); systemcmd.AppendArg(vmtype); #if !defined(WIN32) if( can_switch_ids() ) { MyString tmp_str; tmp_str.formatstr("%d", (int)get_condor_uid()); SetEnv("VMGAHP_USER_UID", tmp_str.Value()); } #endif priv_state prev_priv; if( (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_XEN) == MATCH) || (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_KVM) == MATCH) ) { // Xen requires root privilege prev_priv = set_root_priv(); }else { prev_priv = set_condor_priv(); } FILE* fp = NULL; fp = my_popen(systemcmd, "r", FALSE ); set_priv(prev_priv); if( !fp ) { dprintf( D_ALWAYS, "Failed to execute %s, ignoring\n", gahppath ); return false; } bool read_something = false; char buf[2048]; m_vmgahp_info.Clear(); while( fgets(buf, 2048, fp) ) { if( !m_vmgahp_info.Insert(buf) ) { dprintf( D_ALWAYS, "Failed to insert \"%s\" into VMInfo, " "ignoring invalid parameter\n", buf ); continue; } read_something = true; } my_pclose( fp ); if( !read_something ) { MyString args_string; systemcmd.GetArgsStringForDisplay(&args_string,0); dprintf( D_ALWAYS, "Warning: '%s' did not produce any valid output.\n", args_string.Value()); if( (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_XEN) == 0) ) { MyString err_msg; err_msg += "\n#######################################################\n"; err_msg += "##### Make sure the followings "; err_msg += "to use VM universe for Xen\n"; err_msg += "### - The owner of script progrm like "; err_msg += "'condor_vm_xen.sh' must be root\n"; err_msg += "### - The script program must be executable\n"; err_msg += "### - Other writable bit for the above files is "; err_msg += "not allowed.\n"; err_msg += "#########################################################\n"; dprintf( D_ALWAYS, "%s", err_msg.Value()); } else if( (strcasecmp(vmtype, CONDOR_VM_UNIVERSE_KVM) == 0)) { MyString err_msg; err_msg += "\n#######################################################\n"; err_msg += "##### Make sure the followings "; err_msg += "to use VM universe for KVM\n"; err_msg += "### - The owner of script progrm like "; err_msg += "'condor_vm_xen.sh' must be root\n"; err_msg += "### - The script program must be executable\n"; err_msg += "### - Other writable bit for the above files is "; err_msg += "not allowed.\n"; err_msg += "#########################################################\n"; dprintf( D_ALWAYS, "%s", err_msg.Value()); }else if( strcasecmp(vmtype, CONDOR_VM_UNIVERSE_VMWARE ) == 0 ) { MyString err_msg; MyString err_msg2; err_msg += "\n#######################################################\n"; err_msg += "##### Make sure the followings "; err_msg += "to use VM universe for VMware\n"; if( can_switch_ids() ) { // Condor runs as root err_msg += "### - The script program like 'condor_vm_vmware'"; err_msg += " must be readable for anybody.\n"; } err_msg += "### - Check the path of vmware-cmd, vmrun, and mkisofs "; err_msg += "in 'condor_vm_vmware\n'"; err_msg += "#########################################################\n"; dprintf( D_ALWAYS, "%s", err_msg.Value()); } return false; } // For debug printVMGahpInfo(D_ALWAYS); // Read vm_type MyString tmp_vmtype; if( m_vmgahp_info.LookupString( ATTR_VM_TYPE, tmp_vmtype) != 1 ) { dprintf( D_ALWAYS, "There is no %s in the output of vmgahp. " "So VM Universe will be disabled\n", ATTR_VM_TYPE); return false; } if( strcasecmp(tmp_vmtype.Value(), vmtype) != 0 ) { dprintf( D_ALWAYS, "The vmgahp(%s) doesn't support this vmtype(%s)\n", gahppath, vmtype); return false; } dprintf( D_ALWAYS, "VMType('%s') is supported\n", vmtype); // Read vm_memory if( m_vmgahp_info.LookupInteger(ATTR_VM_MEMORY, m_vm_max_memory) != 1 ) { dprintf( D_ALWAYS, "There is no %s in the output of vmgahp\n",ATTR_VM_MEMORY); return false; } if( m_vm_max_memory == 0 ) { dprintf( D_ALWAYS, "There is no sufficient memory for virtual machines\n"); return false; } dprintf( D_ALWAYS, "The maximum available memory for vm universe is " "set to %d MB\n", m_vm_max_memory); // Read vm_networking bool tmp_networking = false; MyString tmp_networking_types; m_vmgahp_info.LookupBool(ATTR_VM_NETWORKING, tmp_networking); if( tmp_networking ) { if( m_vmgahp_info.LookupString( ATTR_VM_NETWORKING_TYPES, tmp_networking_types) != 1 ) { tmp_networking = false; m_vmgahp_info.Assign(ATTR_VM_NETWORKING, false); } } m_vm_networking = param_boolean("VM_NETWORKING",false); if( m_vm_networking ) { if( !tmp_networking ) { dprintf( D_ALWAYS, "Even if VM_NETWORKING is TRUE in condor config," " VM_NETWORKING is disabled because vmgahp doesn't " "support VM_NETWORKING\n"); m_vm_networking = false; } } if( m_vm_networking == false ) { dprintf( D_ALWAYS, "VM networking is disabled\n"); }else { dprintf( D_ALWAYS, "VM networking is enabled\n"); dprintf( D_ALWAYS, "Supported networking types are %s\n", tmp_networking_types.Value()); } // Now, we received correct information from vmgahp m_vm_type = tmp_vmtype; m_vmgahp_server = gahppath; return true; }