// // Because we fork before calling docker, we don't actually // care if the image is stored locally or not (except to the extent that // remote image pull violates the principle of least astonishment). // int DockerAPI::run( ClassAd &machineAd, ClassAd &jobAd, const std::string & containerName, const std::string & imageID, const std::string & command, const ArgList & args, const Env & env, const std::string & sandboxPath, const std::list<std::string> extraVolumes, int & pid, int * childFDs, CondorError & /* err */ ) { gc_image(imageID); // // We currently assume that the system has been configured so that // anyone (user) who can run an HTCondor job can also run docker. It's // also apparently a security worry to run Docker as root, so let's not. // ArgList runArgs; if ( ! add_docker_arg(runArgs)) return -1; runArgs.AppendArg( "run" ); // Write out a file with the container ID. // FIXME: The startd can check this to clean up after us. // This needs to go into a directory that condor user // can write to. /* std::string cidFileName = sandboxPath + "/.cidfile"; runArgs.AppendArg( "--cidfile=" + cidFileName ); */ // Configure resource limits. // First cpus int cpus; int cpuShare; if (machineAd.LookupInteger(ATTR_CPUS, cpus)) { cpuShare = 10 * cpus; } else { cpuShare = 10; } std::string cpuShareStr; formatstr(cpuShareStr, "--cpu-shares=%d", cpuShare); runArgs.AppendArg(cpuShareStr); // Now memory int memory; // in Megabytes if (machineAd.LookupInteger(ATTR_MEMORY, memory)) { std::string mem; formatstr(mem, "--memory=%dm", memory); runArgs.AppendArg(mem); } // drop unneeded Linux capabilities if (param_boolean("DOCKER_DROP_ALL_CAPABILITIES", true /*default*/, true /*do_log*/, &machineAd, &jobAd)) { runArgs.AppendArg("--cap-drop=all"); // --no-new-privileges flag appears in docker 1.11 if (DockerAPI::majorVersion > 1 || DockerAPI::minorVersion > 10) { runArgs.AppendArg("--no-new-privileges"); } } // Give the container a useful name std::string hname = makeHostname(&machineAd, &jobAd); runArgs.AppendArg("--hostname"); runArgs.AppendArg(hname.c_str()); // Now the container name runArgs.AppendArg( "--name" ); runArgs.AppendArg( containerName ); if ( ! add_env_to_args_for_docker(runArgs, env)) { dprintf( D_ALWAYS | D_FAILURE, "Failed to pass enviroment to docker.\n" ); return -8; } // Map the external sanbox to the internal sandbox. runArgs.AppendArg( "--volume" ); runArgs.AppendArg( sandboxPath + ":" + sandboxPath ); // Now any extra volumes for (std::list<std::string>::const_iterator it = extraVolumes.begin(); it != extraVolumes.end(); it++) { runArgs.AppendArg("--volume"); std::string volume = *it; runArgs.AppendArg(volume); } // Start in the sandbox. runArgs.AppendArg( "--workdir" ); runArgs.AppendArg( sandboxPath ); // Run with the uid that condor selects for the user // either a slot user or submitting user or nobody uid_t uid = 0; uid_t gid = 0; // Docker doesn't actually run on Windows, but we compile // on Windows because... #ifndef WIN32 uid = get_user_uid(); gid = get_user_gid(); #endif if ((uid == 0) || (gid == 0)) { dprintf(D_ALWAYS|D_FAILURE, "Failed to get userid to run docker job\n"); return -9; } runArgs.AppendArg("--user"); std::string uidgidarg; formatstr(uidgidarg, "%d:%d", uid, gid); runArgs.AppendArg(uidgidarg); // Run the command with its arguments in the image. runArgs.AppendArg( imageID ); // If no command given, the default command in the image will run if (command.length() > 0) { runArgs.AppendArg( command ); } runArgs.AppendArgsFromArgList( args ); MyString displayString; runArgs.GetArgsStringForLogging( & displayString ); dprintf( D_ALWAYS, "Attempting to run: %s\n", displayString.c_str() ); // // If we run Docker attached, we avoid a race condition where // 'docker logs --follow' returns before 'docker rm' knows that the // container is gone (and refuses to remove it). Of course, we // can't block, so we have a proxy process run attached for us. // FamilyInfo fi; fi.max_snapshot_interval = param_integer( "PID_SNAPSHOT_INTERVAL", 15 ); int childPID = daemonCore->Create_Process( runArgs.GetArg(0), runArgs, PRIV_CONDOR_FINAL, 1, FALSE, FALSE, NULL, "/", & fi, NULL, childFDs ); if( childPID == FALSE ) { dprintf( D_ALWAYS | D_FAILURE, "Create_Process() failed.\n" ); return -1; } pid = childPID; return 0; }
//------------------------------------------------------------------------- bool condor_submit( const Dagman &dm, const char* cmdFile, CondorID& condorID, const char* DAGNodeName, MyString &DAGParentNodeNames, List<Job::NodeVar> *vars, int retry, const char* directory, const char *workflowLogFile, bool hold_claim ) { TmpDir tmpDir; MyString errMsg; if ( !tmpDir.Cd2TmpDir( directory, errMsg ) ) { debug_printf( DEBUG_QUIET, "Could not change to node directory %s: %s\n", directory, errMsg.Value() ); return false; } ArgList args; // construct arguments to condor_submit to add attributes to the // job classad which identify the job's node name in the DAG, the // node names of its parents in the DAG, and the job ID of DAGMan // itself; then, define submit_event_notes to print the job's node // name inside the submit event in the userlog // NOTE: we specify the job ID of DAGMan using only its cluster ID // so that it may be referenced by jobs in their priority // attribute (which needs an int, not a string). Doing so allows // users to effectively "batch" jobs by DAG so that when they // submit many DAGs to the same schedd, all the ready jobs from // one DAG complete before any jobs from another begin. args.AppendArg( dm.condorSubmitExe ); args.AppendArg( "-a" ); MyString nodeName = MyString(ATTR_DAG_NODE_NAME_ALT) + " = " + DAGNodeName; args.AppendArg( nodeName.Value() ); // append a line adding the parent DAGMan's cluster ID to the job ad args.AppendArg( "-a" ); MyString dagJobId = MyString( "+" ) + ATTR_DAGMAN_JOB_ID + " = " + dm.DAGManJobId._cluster; args.AppendArg( dagJobId.Value() ); // now we append a line setting the same thing as a submit-file macro // (this is necessary so the user can reference it in the priority) args.AppendArg( "-a" ); MyString dagJobIdMacro = MyString( "" ) + ATTR_DAGMAN_JOB_ID + " = " + dm.DAGManJobId._cluster; args.AppendArg( dagJobIdMacro.Value() ); args.AppendArg( "-a" ); MyString submitEventNotes = MyString( "submit_event_notes = DAG Node: " ) + DAGNodeName; args.AppendArg( submitEventNotes.Value() ); ASSERT( workflowLogFile ); // We need to append the DAGman default log file to // the log file list args.AppendArg( "-a" ); std::string dlog( "dagman_log = " ); dlog += workflowLogFile; args.AppendArg( dlog.c_str() ); debug_printf( DEBUG_VERBOSE, "Adding a DAGMan workflow log %s\n", workflowLogFile ); // Now append the mask debug_printf( DEBUG_VERBOSE, "Masking the events recorded in the DAGMAN workflow log\n" ); args.AppendArg( "-a" ); std::string dmask("+"); dmask += ATTR_DAGMAN_WORKFLOW_MASK; dmask += " = \""; const char *eventMask = getEventMask(); debug_printf( DEBUG_VERBOSE, "Mask for workflow log is %s\n", eventMask ); dmask += eventMask; dmask += "\""; args.AppendArg( dmask.c_str() ); // Suppress the job's log file if that option is enabled. if ( dm._suppressJobLogs ) { debug_printf( DEBUG_VERBOSE, "Suppressing node job log file\n" ); args.AppendArg( "-a" ); args.AppendArg( "log = ''" ); } ArgList parentNameArgs; parentNameArgs.AppendArg( "-a" ); MyString parentNodeNames = MyString( "+DAGParentNodeNames = " ) + "\"" + DAGParentNodeNames + "\""; parentNameArgs.AppendArg( parentNodeNames.Value() ); // set any VARS specified in the DAG file MyString anotherLine; ListIterator<Job::NodeVar> varsIter(*vars); Job::NodeVar nodeVar; while ( varsIter.Next(nodeVar) ) { // Substitute the node retry count if necessary. Note that // we can't do this in Job::ResolveVarsInterpolations() // because that's only called at parse time. MyString value = nodeVar._value; MyString retryStr( retry ); value.replaceString( "$(RETRY)", retryStr.Value() ); MyString varStr = nodeVar._name + " = " + value; args.AppendArg( "-a" ); args.AppendArg( varStr.Value() ); } // Set the special DAG_STATUS variable (mainly for use by // "final" nodes). args.AppendArg( "-a" ); MyString var = "DAG_STATUS = "; var += dm.dag->_dagStatus; args.AppendArg( var.Value() ); // Set the special FAILED_COUNT variable (mainly for use by // "final" nodes). args.AppendArg( "-a" ); var = "FAILED_COUNT = "; var += dm.dag->NumNodesFailed(); args.AppendArg( var.Value() ); // how big is the command line so far MyString display; args.GetArgsStringForDisplay( &display ); int cmdLineSize = display.Length(); parentNameArgs.GetArgsStringForDisplay( &display ); int DAGParentNodeNamesLen = display.Length(); // how many additional chars must we still add to command line // NOTE: according to the POSIX spec, the args + // environ given to exec() cannot exceed // _POSIX_ARG_MAX, so we also need to calculate & add // the size of environ** to reserveNeeded int reserveNeeded = strlen( cmdFile ); int maxCmdLine = _POSIX_ARG_MAX; // if we don't have room for DAGParentNodeNames, leave it unset if( cmdLineSize + reserveNeeded + DAGParentNodeNamesLen > maxCmdLine ) { debug_printf( DEBUG_NORMAL, "Warning: node %s has too many parents " "to list in its classad; leaving its DAGParentNodeNames " "attribute undefined\n", DAGNodeName ); check_warning_strictness( DAG_STRICT_3 ); } else { args.AppendArgsFromArgList( parentNameArgs ); } if( hold_claim ){ args.AppendArg( "-a" ); MyString holdit = MyString("+") + MyString(ATTR_JOB_KEEP_CLAIM_IDLE) + " = " + dm._claim_hold_time; args.AppendArg( holdit.Value() ); } if (dm._submitDagDeepOpts.suppress_notification) { args.AppendArg( "-a" ); MyString notify = MyString("notification = never"); args.AppendArg( notify.Value() ); } args.AppendArg( cmdFile ); bool success = do_submit( args, condorID, dm.prohibitMultiJobs ); if ( !tmpDir.Cd2MainDir( errMsg ) ) { debug_printf( DEBUG_QUIET, "Could not change to original directory: %s\n", errMsg.Value() ); success = false; } return success; }
bool glexec_starter_prepare(const char* starter_path, const char* proxy_file, const ArgList& orig_args, const Env* orig_env, const int orig_std_fds[3], ArgList& glexec_args, Env& glexec_env, int glexec_std_fds[3]) { // if GLEXEC_STARTER is set, use glexec to invoke the // starter (or fail if we can't). this involves: // - verifying that we have a delegated proxy from // the user stored, since we need to hand it to // glexec so it can look up the UID/GID // - invoking 'glexec_starter_setup.sh' via glexec to // setup the starter's "private directory" for a copy // of the job's proxy to go into, as well as the StarterLog // and execute dir // - adding the contents of the GLEXEC and config param // and the path to 'condor_glexec_wrapper' to the front // of the command line // - setting up glexec's environment (setting the // mode, handing off the proxy, etc.) // - creating a UNIX-domain socket to use to communicate // with our wrapper script, and munging the std_fds // array // verify that we have a stored proxy if( proxy_file == NULL ) { dprintf( D_ALWAYS, "cannot use glexec to spawn starter: no proxy " "(is GLEXEC_STARTER set in the shadow?)\n" ); return false; } // using the file name of the proxy that was stashed ealier, construct // the name of the starter's "private directory". the naming scheme is // (where XXXXXX is randomly generated via condor_mkstemp): // - $(GLEXEC_USER_DIR)/startd-tmp-proxy-XXXXXX // - startd's copy of the job's proxy // - $(GLEXEC_USER_DIR)/starter-tmp-dir-XXXXXX // - starter's private dir // MyString glexec_private_dir; char* dir_part = condor_dirname(proxy_file); ASSERT(dir_part != NULL); glexec_private_dir = dir_part; free(dir_part); glexec_private_dir += "/starter-tmp-dir-"; const char* random_part = proxy_file; random_part += strlen(random_part) - 6; glexec_private_dir += random_part; dprintf(D_ALWAYS, "GLEXEC: starter private dir is '%s'\n", glexec_private_dir.Value()); // get the glexec command line prefix from config char* glexec_argstr = param( "GLEXEC" ); if ( ! glexec_argstr ) { dprintf( D_ALWAYS, "cannot use glexec to spawn starter: " "GLEXEC not given in config\n" ); return false; } // cons up a command line for my_system. we'll run the // script $(LIBEXEC)/glexec_starter_setup.sh, which // will create the starter's "private directory" (and // its log and execute subdirectories). the value of // glexec_private_dir will be passed as an argument to // the script // parse the glexec args for invoking glexec_starter_setup.sh. // do not free them yet, except on an error, as we use them // again below. MyString setup_err; ArgList glexec_setup_args; glexec_setup_args.SetArgV1SyntaxToCurrentPlatform(); if( ! glexec_setup_args.AppendArgsV1RawOrV2Quoted( glexec_argstr, &setup_err ) ) { dprintf( D_ALWAYS, "GLEXEC: failed to parse GLEXEC from config: %s\n", setup_err.Value() ); free( glexec_argstr ); return 0; } // set up the rest of the arguments for the glexec setup script char* tmp = param("LIBEXEC"); if (tmp == NULL) { dprintf( D_ALWAYS, "GLEXEC: LIBEXEC not defined; can't find setup script\n" ); free( glexec_argstr ); return 0; } MyString libexec = tmp; free(tmp); MyString setup_script = libexec; setup_script += "/glexec_starter_setup.sh"; glexec_setup_args.AppendArg(setup_script.Value()); glexec_setup_args.AppendArg(glexec_private_dir.Value()); // debug info. this display format totally screws up the quoting, but // my_system gets it right. MyString disp_args; glexec_setup_args.GetArgsStringForDisplay(&disp_args, 0); dprintf (D_ALWAYS, "GLEXEC: about to glexec: ** %s **\n", disp_args.Value()); // the only thing actually needed by glexec at this point is the cert, so // that it knows who to map to. the pipe outputs the username that glexec // ended up using, on a single text line by itself. SetEnv( "GLEXEC_CLIENT_CERT", proxy_file ); // create the starter's private dir int ret = my_system(glexec_setup_args); // clean up UnsetEnv( "GLEXEC_CLIENT_CERT"); if ( ret != 0 ) { dprintf(D_ALWAYS, "GLEXEC: error creating private dir: my_system returned %d\n", ret); free( glexec_argstr ); return 0; } // now prepare the starter command line, starting with glexec and its // options (if any), then condor_glexec_wrapper. MyString err; if( ! glexec_args.AppendArgsV1RawOrV2Quoted( glexec_argstr, &err ) ) { dprintf( D_ALWAYS, "failed to parse GLEXEC from config: %s\n", err.Value() ); free( glexec_argstr ); return 0; } free( glexec_argstr ); MyString wrapper_path = libexec; wrapper_path += "/condor_glexec_wrapper"; glexec_args.AppendArg(wrapper_path.Value()); // complete the command line by adding in the original // arguments. we also make sure that the full path to the // starter is given int starter_path_pos = glexec_args.Count(); glexec_args.AppendArgsFromArgList( orig_args ); glexec_args.RemoveArg( starter_path_pos ); glexec_args.InsertArg( starter_path, starter_path_pos ); // set up the environment stuff if( orig_env ) { // first merge in the original glexec_env.MergeFrom( *orig_env ); } // GLEXEC_MODE - get account from lcmaps glexec_env.SetEnv( "GLEXEC_MODE", "lcmaps_get_account" ); // GLEXEC_CLIENT_CERT - cert to use for the mapping glexec_env.SetEnv( "GLEXEC_CLIENT_CERT", proxy_file ); #if defined(HAVE_EXT_GLOBUS) && !defined(SKIP_AUTHENTICATION) // GLEXEC_SOURCE_PROXY - proxy to provide to the child // (file is owned by us) glexec_env.SetEnv( "GLEXEC_SOURCE_PROXY", proxy_file ); dprintf (D_ALWAYS, "GLEXEC: setting GLEXEC_SOURCE_PROXY to %s\n", proxy_file); // GLEXEC_TARGET_PROXY - child-owned file to copy its proxy to. // this needs to be in a directory owned by that user, and not world // writable. glexec enforces this. hence, all the whoami/mkdir mojo // above. MyString child_proxy_file = glexec_private_dir; child_proxy_file += "/glexec_starter_proxy"; dprintf (D_ALWAYS, "GLEXEC: setting GLEXEC_TARGET_PROXY to %s\n", child_proxy_file.Value()); glexec_env.SetEnv( "GLEXEC_TARGET_PROXY", child_proxy_file.Value() ); // _CONDOR_GSI_DAEMON_PROXY - starter's proxy MyString var_name; var_name.sprintf("_CONDOR_%s", STR_GSI_DAEMON_PROXY); glexec_env.SetEnv( var_name.Value(), child_proxy_file.Value() ); var_name.sprintf("_condor_%s", STR_GSI_DAEMON_PROXY); glexec_env.SetEnv( var_name.Value(), child_proxy_file.Value() ); #endif // the EXECUTE dir should be owned by the mapped user. we created this // earlier, and now we override it in the condor_config via the // environment. MyString execute_dir = glexec_private_dir; execute_dir += "/execute"; glexec_env.SetEnv ( "_CONDOR_EXECUTE", execute_dir.Value()); glexec_env.SetEnv ( "_condor_EXECUTE", execute_dir.Value()); // the LOG dir should be owned by the mapped user. we created this // earlier, and now we override it in the condor_config via the // environment. MyString log_dir = glexec_private_dir; log_dir += "/log"; glexec_env.SetEnv ( "_CONDOR_LOG", log_dir.Value()); glexec_env.SetEnv ( "_condor_LOG", log_dir.Value()); glexec_env.SetEnv ( "_CONDOR_LOCK", log_dir.Value()); glexec_env.SetEnv ( "_condor_LOCK", log_dir.Value()); // PROCD_ADDRESS: the Starter that we are about to create will // not have access to our ProcD. we'll explicitly set PROCD_ADDRESS // to be in its LOG directory. the Starter will see that its // PROCD_ADDRESS knob is different from what it inherits in // CONDOR_PROCD_ADDRESS, and know it needs to create its own ProcD // MyString procd_address = log_dir; procd_address += "/procd_pipe"; glexec_env.SetEnv( "_CONDOR_PROCD_ADDRESS", procd_address.Value() ); glexec_env.SetEnv( "_condor_PROCD_ADDRESS", procd_address.Value() ); // CONDOR_GLEXEC_STARTER_CLEANUP_FLAG: this serves as a flag in the // Starter's environment that it will check for in order to determine // whether to do GLEXEC_STARTER-specific cleanup // glexec_env.SetEnv( "CONDOR_GLEXEC_STARTER_CLEANUP_FLAG", "CONDOR_GLEXEC_STARTER_CLEANUP_FLAG" ); // now set up a socket pair for communication with // condor_glexec_wrapper // if (socketpair(PF_UNIX, SOCK_STREAM, 0, s_saved_sock_fds) == -1) { dprintf(D_ALWAYS, "GLEXEC: socketpair error: %s\n", strerror(errno)); return false; } glexec_std_fds[0] = s_saved_sock_fds[1]; if (orig_std_fds == NULL) { s_saved_starter_stdin = -1; glexec_std_fds[1] = glexec_std_fds[2] = -1; } else { s_saved_starter_stdin = orig_std_fds[0]; glexec_std_fds[1] = orig_std_fds[1]; glexec_std_fds[2] = orig_std_fds[2]; } // save the environment we're handing back to the caller for use in // glexec_starter_handle_env() // s_saved_env.Clear(); s_saved_env.MergeFrom(glexec_env); return true; }