Example #1
0
//
// Because we fork before calling docker, we don't actually
// care if the image is stored locally or not (except to the extent that
// remote image pull violates the principle of least astonishment).
//
int DockerAPI::run(
	ClassAd &machineAd,
	ClassAd &jobAd,
	const std::string & containerName,
	const std::string & imageID,
	const std::string & command,
	const ArgList & args,
	const Env & env,
	const std::string & sandboxPath,
	const std::list<std::string> extraVolumes,
	int & pid,
	int * childFDs,
	CondorError & /* err */ )
{
	gc_image(imageID);
	//
	// We currently assume that the system has been configured so that
	// anyone (user) who can run an HTCondor job can also run docker.  It's
	// also apparently a security worry to run Docker as root, so let's not.
	//
	ArgList runArgs;
	if ( ! add_docker_arg(runArgs))
		return -1;
	runArgs.AppendArg( "run" );

	// Write out a file with the container ID.
	// FIXME: The startd can check this to clean up after us.
	// This needs to go into a directory that condor user
	// can write to.

/*
	std::string cidFileName = sandboxPath + "/.cidfile";
	runArgs.AppendArg( "--cidfile=" + cidFileName );
*/

	
	// Configure resource limits.
	
	// First cpus
	int  cpus;
	int cpuShare;

	if (machineAd.LookupInteger(ATTR_CPUS, cpus)) {
		cpuShare = 10 * cpus;
	} else {
		cpuShare = 10;
	}
	std::string cpuShareStr;
	formatstr(cpuShareStr, "--cpu-shares=%d", cpuShare);
	runArgs.AppendArg(cpuShareStr);

	// Now memory
	int memory; // in Megabytes
	if (machineAd.LookupInteger(ATTR_MEMORY, memory)) {
		std::string mem;
		formatstr(mem, "--memory=%dm", memory);
		runArgs.AppendArg(mem);
	} 

	// drop unneeded Linux capabilities
	if (param_boolean("DOCKER_DROP_ALL_CAPABILITIES", true /*default*/,
		true /*do_log*/, &machineAd, &jobAd)) {
		runArgs.AppendArg("--cap-drop=all");
			
		// --no-new-privileges flag appears in docker 1.11
		if (DockerAPI::majorVersion > 1 ||
		    DockerAPI::minorVersion > 10) {
			runArgs.AppendArg("--no-new-privileges");
		}
	}

	// Give the container a useful name
	std::string hname = makeHostname(&machineAd, &jobAd);
	runArgs.AppendArg("--hostname");
	runArgs.AppendArg(hname.c_str());

		// Now the container name
	runArgs.AppendArg( "--name" );
	runArgs.AppendArg( containerName );

	if ( ! add_env_to_args_for_docker(runArgs, env)) {
		dprintf( D_ALWAYS | D_FAILURE, "Failed to pass enviroment to docker.\n" );
		return -8;
	}

	// Map the external sanbox to the internal sandbox.
	runArgs.AppendArg( "--volume" );
	runArgs.AppendArg( sandboxPath + ":" + sandboxPath );

	// Now any extra volumes
	for (std::list<std::string>::const_iterator it = extraVolumes.begin(); it != extraVolumes.end(); it++) {
		runArgs.AppendArg("--volume");
		std::string volume = *it;
		runArgs.AppendArg(volume);
	}
	
	// Start in the sandbox.
	runArgs.AppendArg( "--workdir" );
	runArgs.AppendArg( sandboxPath );

	// Run with the uid that condor selects for the user
	// either a slot user or submitting user or nobody
	uid_t uid = 0;
	uid_t gid = 0;

	// Docker doesn't actually run on Windows, but we compile
	// on Windows because...
#ifndef WIN32
	uid = get_user_uid();
	gid = get_user_gid();
#endif
	
	if ((uid == 0) || (gid == 0)) {
		dprintf(D_ALWAYS|D_FAILURE, "Failed to get userid to run docker job\n");
		return -9;
	}

	runArgs.AppendArg("--user");
	std::string uidgidarg;
	formatstr(uidgidarg, "%d:%d", uid, gid);
	runArgs.AppendArg(uidgidarg);

	// Run the command with its arguments in the image.
	runArgs.AppendArg( imageID );

	
	// If no command given, the default command in the image will run
	if (command.length() > 0) {
		runArgs.AppendArg( command );
	}

	runArgs.AppendArgsFromArgList( args );

	MyString displayString;
	runArgs.GetArgsStringForLogging( & displayString );
	dprintf( D_ALWAYS, "Attempting to run: %s\n", displayString.c_str() );

	//
	// If we run Docker attached, we avoid a race condition where
	// 'docker logs --follow' returns before 'docker rm' knows that the
	// container is gone (and refuses to remove it).  Of course, we
	// can't block, so we have a proxy process run attached for us.
	//
	FamilyInfo fi;
	fi.max_snapshot_interval = param_integer( "PID_SNAPSHOT_INTERVAL", 15 );
	int childPID = daemonCore->Create_Process( runArgs.GetArg(0), runArgs,
		PRIV_CONDOR_FINAL, 1, FALSE, FALSE, NULL, "/",
		& fi, NULL, childFDs );

	if( childPID == FALSE ) {
		dprintf( D_ALWAYS | D_FAILURE, "Create_Process() failed.\n" );
		return -1;
	}
	pid = childPID;

	return 0;
}
Example #2
0
//-------------------------------------------------------------------------
bool
condor_submit( const Dagman &dm, const char* cmdFile, CondorID& condorID,
			   const char* DAGNodeName, MyString &DAGParentNodeNames,
			   List<Job::NodeVar> *vars, int retry,
			   const char* directory, const char *workflowLogFile,
			   bool hold_claim )
{
	TmpDir		tmpDir;
	MyString	errMsg;
	if ( !tmpDir.Cd2TmpDir( directory, errMsg ) ) {
		debug_printf( DEBUG_QUIET,
				"Could not change to node directory %s: %s\n",
				directory, errMsg.Value() );
		return false;
	}

	ArgList args;

	// construct arguments to condor_submit to add attributes to the
	// job classad which identify the job's node name in the DAG, the
	// node names of its parents in the DAG, and the job ID of DAGMan
	// itself; then, define submit_event_notes to print the job's node
	// name inside the submit event in the userlog

	// NOTE: we specify the job ID of DAGMan using only its cluster ID
	// so that it may be referenced by jobs in their priority
	// attribute (which needs an int, not a string).  Doing so allows
	// users to effectively "batch" jobs by DAG so that when they
	// submit many DAGs to the same schedd, all the ready jobs from
	// one DAG complete before any jobs from another begin.

	args.AppendArg( dm.condorSubmitExe );

	args.AppendArg( "-a" );
	MyString nodeName = MyString(ATTR_DAG_NODE_NAME_ALT) + " = " + DAGNodeName;
	args.AppendArg( nodeName.Value() );

		// append a line adding the parent DAGMan's cluster ID to the job ad
	args.AppendArg( "-a" );
	MyString dagJobId = MyString( "+" ) + ATTR_DAGMAN_JOB_ID + " = " +
				dm.DAGManJobId._cluster;
	args.AppendArg( dagJobId.Value() );

		// now we append a line setting the same thing as a submit-file macro
		// (this is necessary so the user can reference it in the priority)
	args.AppendArg( "-a" );
	MyString dagJobIdMacro = MyString( "" ) + ATTR_DAGMAN_JOB_ID + " = " +
				dm.DAGManJobId._cluster;
	args.AppendArg( dagJobIdMacro.Value() );

	args.AppendArg( "-a" );
	MyString submitEventNotes = MyString(
				"submit_event_notes = DAG Node: " ) + DAGNodeName;
	args.AppendArg( submitEventNotes.Value() );

	ASSERT( workflowLogFile );

		// We need to append the DAGman default log file to
		// the log file list
	args.AppendArg( "-a" );
	std::string dlog( "dagman_log = " );
	dlog += workflowLogFile;
	args.AppendArg( dlog.c_str() );
	debug_printf( DEBUG_VERBOSE, "Adding a DAGMan workflow log %s\n",
				workflowLogFile );

		// Now append the mask
	debug_printf( DEBUG_VERBOSE, "Masking the events recorded in the DAGMAN workflow log\n" );
	args.AppendArg( "-a" );
	std::string dmask("+");
	dmask += ATTR_DAGMAN_WORKFLOW_MASK;
	dmask += " = \"";
	const char *eventMask = getEventMask();
	debug_printf( DEBUG_VERBOSE, "Mask for workflow log is %s\n",
				eventMask );
	dmask += eventMask;
	dmask += "\"";
	args.AppendArg( dmask.c_str() );

		// Suppress the job's log file if that option is enabled.
	if ( dm._suppressJobLogs ) {
		debug_printf( DEBUG_VERBOSE, "Suppressing node job log file\n" );
		args.AppendArg( "-a" );
		args.AppendArg( "log = ''" );
	}

	ArgList parentNameArgs;
	parentNameArgs.AppendArg( "-a" );
	MyString parentNodeNames = MyString( "+DAGParentNodeNames = " ) +
	                        "\"" + DAGParentNodeNames + "\"";
	parentNameArgs.AppendArg( parentNodeNames.Value() );

		// set any VARS specified in the DAG file
	MyString anotherLine;
	ListIterator<Job::NodeVar> varsIter(*vars);
	Job::NodeVar nodeVar;
	while ( varsIter.Next(nodeVar) ) {

			// Substitute the node retry count if necessary.  Note that
			// we can't do this in Job::ResolveVarsInterpolations()
			// because that's only called at parse time.
		MyString value = nodeVar._value;
		MyString retryStr( retry );
		value.replaceString( "$(RETRY)", retryStr.Value() );
		MyString varStr = nodeVar._name + " = " + value;

		args.AppendArg( "-a" );
		args.AppendArg( varStr.Value() );
	}

		// Set the special DAG_STATUS variable (mainly for use by
		// "final" nodes).
	args.AppendArg( "-a" );
	MyString var = "DAG_STATUS = ";
	var += dm.dag->_dagStatus;
	args.AppendArg( var.Value() );

		// Set the special FAILED_COUNT variable (mainly for use by
		// "final" nodes).
	args.AppendArg( "-a" );
	var = "FAILED_COUNT = ";
	var += dm.dag->NumNodesFailed();
	args.AppendArg( var.Value() );

		// how big is the command line so far
	MyString display;
	args.GetArgsStringForDisplay( &display );
	int cmdLineSize = display.Length();

	parentNameArgs.GetArgsStringForDisplay( &display );
	int DAGParentNodeNamesLen = display.Length();
		// how many additional chars must we still add to command line
	        // NOTE: according to the POSIX spec, the args +
   	        // environ given to exec() cannot exceed
   	        // _POSIX_ARG_MAX, so we also need to calculate & add
   	        // the size of environ** to reserveNeeded
	int reserveNeeded = strlen( cmdFile );
	int maxCmdLine = _POSIX_ARG_MAX;

		// if we don't have room for DAGParentNodeNames, leave it unset
	if( cmdLineSize + reserveNeeded + DAGParentNodeNamesLen > maxCmdLine ) {
		debug_printf( DEBUG_NORMAL, "Warning: node %s has too many parents "
					  "to list in its classad; leaving its DAGParentNodeNames "
					  "attribute undefined\n", DAGNodeName );
		check_warning_strictness( DAG_STRICT_3 );
	} else {
		args.AppendArgsFromArgList( parentNameArgs );
	}

	if( hold_claim ){
		args.AppendArg( "-a" );
		MyString holdit = MyString("+") + MyString(ATTR_JOB_KEEP_CLAIM_IDLE) + " = "
			+ dm._claim_hold_time;
		args.AppendArg( holdit.Value() );	
	}
	
	if (dm._submitDagDeepOpts.suppress_notification) {
		args.AppendArg( "-a" );
		MyString notify = MyString("notification = never");
		args.AppendArg( notify.Value() );
	}

	args.AppendArg( cmdFile );

	bool success = do_submit( args, condorID, dm.prohibitMultiJobs );

	if ( !tmpDir.Cd2MainDir( errMsg ) ) {
		debug_printf( DEBUG_QUIET,
				"Could not change to original directory: %s\n",
				errMsg.Value() );
		success = false;
	}

	return success;
}
Example #3
0
bool
glexec_starter_prepare(const char* starter_path,
                       const char* proxy_file,
                       const ArgList& orig_args,
                       const Env* orig_env,
                       const int orig_std_fds[3],
                       ArgList& glexec_args,
                       Env& glexec_env,
                       int glexec_std_fds[3])
{
    // if GLEXEC_STARTER is set, use glexec to invoke the
    // starter (or fail if we can't). this involves:
    //   - verifying that we have a delegated proxy from
    //     the user stored, since we need to hand it to
    //     glexec so it can look up the UID/GID
    //   - invoking 'glexec_starter_setup.sh' via glexec to
    //     setup the starter's "private directory" for a copy
    //     of the job's proxy to go into, as well as the StarterLog
    //     and execute dir
    //   - adding the contents of the GLEXEC and config param
    //     and the path to 'condor_glexec_wrapper' to the front
    //     of the command line
    //   - setting up glexec's environment (setting the
    //     mode, handing off the proxy, etc.)
    //   - creating a UNIX-domain socket to use to communicate
    //     with our wrapper script, and munging the std_fds
    //     array

    // verify that we have a stored proxy
    if( proxy_file == NULL ) {
        dprintf( D_ALWAYS,
                 "cannot use glexec to spawn starter: no proxy "
                 "(is GLEXEC_STARTER set in the shadow?)\n" );
        return false;
    }

    // using the file name of the proxy that was stashed ealier, construct
    // the name of the starter's "private directory". the naming scheme is
    // (where XXXXXX is randomly generated via condor_mkstemp):
    //   - $(GLEXEC_USER_DIR)/startd-tmp-proxy-XXXXXX
    //       - startd's copy of the job's proxy
    //   - $(GLEXEC_USER_DIR)/starter-tmp-dir-XXXXXX
    //       - starter's private dir
    //
    MyString glexec_private_dir;
    char* dir_part = condor_dirname(proxy_file);
    ASSERT(dir_part != NULL);
    glexec_private_dir = dir_part;
    free(dir_part);
    glexec_private_dir += "/starter-tmp-dir-";
    const char* random_part = proxy_file;
    random_part += strlen(random_part) - 6;
    glexec_private_dir += random_part;
    dprintf(D_ALWAYS,
            "GLEXEC: starter private dir is '%s'\n",
            glexec_private_dir.Value());

    // get the glexec command line prefix from config
    char* glexec_argstr = param( "GLEXEC" );
    if ( ! glexec_argstr ) {
        dprintf( D_ALWAYS,
                 "cannot use glexec to spawn starter: "
                 "GLEXEC not given in config\n" );
        return false;
    }

    // cons up a command line for my_system. we'll run the
    // script $(LIBEXEC)/glexec_starter_setup.sh, which
    // will create the starter's "private directory" (and
    // its log and execute subdirectories). the value of
    // glexec_private_dir will be passed as an argument to
    // the script

    // parse the glexec args for invoking glexec_starter_setup.sh.
    // do not free them yet, except on an error, as we use them
    // again below.
    MyString setup_err;
    ArgList  glexec_setup_args;
    glexec_setup_args.SetArgV1SyntaxToCurrentPlatform();
    if( ! glexec_setup_args.AppendArgsV1RawOrV2Quoted( glexec_argstr,
            &setup_err ) ) {
        dprintf( D_ALWAYS,
                 "GLEXEC: failed to parse GLEXEC from config: %s\n",
                 setup_err.Value() );
        free( glexec_argstr );
        return 0;
    }

    // set up the rest of the arguments for the glexec setup script
    char* tmp = param("LIBEXEC");
    if (tmp == NULL) {
        dprintf( D_ALWAYS,
                 "GLEXEC: LIBEXEC not defined; can't find setup script\n" );
        free( glexec_argstr );
        return 0;
    }
    MyString libexec = tmp;
    free(tmp);
    MyString setup_script = libexec;
    setup_script += "/glexec_starter_setup.sh";
    glexec_setup_args.AppendArg(setup_script.Value());
    glexec_setup_args.AppendArg(glexec_private_dir.Value());

    // debug info.  this display format totally screws up the quoting, but
    // my_system gets it right.
    MyString disp_args;
    glexec_setup_args.GetArgsStringForDisplay(&disp_args, 0);
    dprintf (D_ALWAYS, "GLEXEC: about to glexec: ** %s **\n",
             disp_args.Value());

    // the only thing actually needed by glexec at this point is the cert, so
    // that it knows who to map to.  the pipe outputs the username that glexec
    // ended up using, on a single text line by itself.
    SetEnv( "GLEXEC_CLIENT_CERT", proxy_file );

    // create the starter's private dir
    int ret = my_system(glexec_setup_args);

    // clean up
    UnsetEnv( "GLEXEC_CLIENT_CERT");

    if ( ret != 0 ) {
        dprintf(D_ALWAYS,
                "GLEXEC: error creating private dir: my_system returned %d\n",
                ret);
        free( glexec_argstr );
        return 0;
    }

    // now prepare the starter command line, starting with glexec and its
    // options (if any), then condor_glexec_wrapper.
    MyString err;
    if( ! glexec_args.AppendArgsV1RawOrV2Quoted( glexec_argstr,
            &err ) ) {
        dprintf( D_ALWAYS,
                 "failed to parse GLEXEC from config: %s\n",
                 err.Value() );
        free( glexec_argstr );
        return 0;
    }
    free( glexec_argstr );
    MyString wrapper_path = libexec;
    wrapper_path += "/condor_glexec_wrapper";
    glexec_args.AppendArg(wrapper_path.Value());

    // complete the command line by adding in the original
    // arguments. we also make sure that the full path to the
    // starter is given
    int starter_path_pos = glexec_args.Count();
    glexec_args.AppendArgsFromArgList( orig_args );
    glexec_args.RemoveArg( starter_path_pos );
    glexec_args.InsertArg( starter_path, starter_path_pos );

    // set up the environment stuff
    if( orig_env ) {
        // first merge in the original
        glexec_env.MergeFrom( *orig_env );
    }

    // GLEXEC_MODE - get account from lcmaps
    glexec_env.SetEnv( "GLEXEC_MODE", "lcmaps_get_account" );

    // GLEXEC_CLIENT_CERT - cert to use for the mapping
    glexec_env.SetEnv( "GLEXEC_CLIENT_CERT", proxy_file );

#if defined(HAVE_EXT_GLOBUS) && !defined(SKIP_AUTHENTICATION)
    // GLEXEC_SOURCE_PROXY -  proxy to provide to the child
    //                        (file is owned by us)
    glexec_env.SetEnv( "GLEXEC_SOURCE_PROXY", proxy_file );
    dprintf (D_ALWAYS,
             "GLEXEC: setting GLEXEC_SOURCE_PROXY to %s\n",
             proxy_file);

    // GLEXEC_TARGET_PROXY - child-owned file to copy its proxy to.
    // this needs to be in a directory owned by that user, and not world
    // writable.  glexec enforces this.  hence, all the whoami/mkdir mojo
    // above.
    MyString child_proxy_file = glexec_private_dir;
    child_proxy_file += "/glexec_starter_proxy";
    dprintf (D_ALWAYS, "GLEXEC: setting GLEXEC_TARGET_PROXY to %s\n",
             child_proxy_file.Value());
    glexec_env.SetEnv( "GLEXEC_TARGET_PROXY", child_proxy_file.Value() );

    // _CONDOR_GSI_DAEMON_PROXY - starter's proxy
    MyString var_name;
    var_name.sprintf("_CONDOR_%s", STR_GSI_DAEMON_PROXY);
    glexec_env.SetEnv( var_name.Value(), child_proxy_file.Value() );
    var_name.sprintf("_condor_%s", STR_GSI_DAEMON_PROXY);
    glexec_env.SetEnv( var_name.Value(), child_proxy_file.Value() );
#endif

    // the EXECUTE dir should be owned by the mapped user.  we created this
    // earlier, and now we override it in the condor_config via the
    // environment.
    MyString execute_dir = glexec_private_dir;
    execute_dir += "/execute";
    glexec_env.SetEnv ( "_CONDOR_EXECUTE", execute_dir.Value());
    glexec_env.SetEnv ( "_condor_EXECUTE", execute_dir.Value());

    // the LOG dir should be owned by the mapped user.  we created this
    // earlier, and now we override it in the condor_config via the
    // environment.
    MyString log_dir = glexec_private_dir;
    log_dir += "/log";
    glexec_env.SetEnv ( "_CONDOR_LOG", log_dir.Value());
    glexec_env.SetEnv ( "_condor_LOG", log_dir.Value());
    glexec_env.SetEnv ( "_CONDOR_LOCK", log_dir.Value());
    glexec_env.SetEnv ( "_condor_LOCK", log_dir.Value());

    // PROCD_ADDRESS: the Starter that we are about to create will
    // not have access to our ProcD. we'll explicitly set PROCD_ADDRESS
    // to be in its LOG directory. the Starter will see that its
    // PROCD_ADDRESS knob is different from what it inherits in
    // CONDOR_PROCD_ADDRESS, and know it needs to create its own ProcD
    //
    MyString procd_address = log_dir;
    procd_address += "/procd_pipe";
    glexec_env.SetEnv( "_CONDOR_PROCD_ADDRESS", procd_address.Value() );
    glexec_env.SetEnv( "_condor_PROCD_ADDRESS", procd_address.Value() );

    // CONDOR_GLEXEC_STARTER_CLEANUP_FLAG: this serves as a flag in the
    // Starter's environment that it will check for in order to determine
    // whether to do GLEXEC_STARTER-specific cleanup
    //
    glexec_env.SetEnv( "CONDOR_GLEXEC_STARTER_CLEANUP_FLAG",
                       "CONDOR_GLEXEC_STARTER_CLEANUP_FLAG" );

    // now set up a socket pair for communication with
    // condor_glexec_wrapper
    //
    if (socketpair(PF_UNIX, SOCK_STREAM, 0, s_saved_sock_fds) == -1)
    {
        dprintf(D_ALWAYS,
                "GLEXEC: socketpair error: %s\n",
                strerror(errno));
        return false;
    }
    glexec_std_fds[0] = s_saved_sock_fds[1];
    if (orig_std_fds == NULL) {
        s_saved_starter_stdin = -1;
        glexec_std_fds[1] = glexec_std_fds[2] = -1;
    }
    else {
        s_saved_starter_stdin = orig_std_fds[0];
        glexec_std_fds[1] = orig_std_fds[1];
        glexec_std_fds[2] = orig_std_fds[2];
    }

    // save the environment we're handing back to the caller for use in
    // glexec_starter_handle_env()
    //
    s_saved_env.Clear();
    s_saved_env.MergeFrom(glexec_env);

    return true;
}