int DockerProc::StartJob() { std::string imageID; if( ! JobAd->LookupString( ATTR_DOCKER_IMAGE, imageID ) ) { dprintf( D_ALWAYS | D_FAILURE, "%s not defined in job ad, unable to start job.\n", ATTR_DOCKER_IMAGE ); return FALSE; } std::string command; JobAd->LookupString( ATTR_JOB_CMD, command ); dprintf( D_FULLDEBUG, "%s: '%s'\n", ATTR_JOB_CMD, command.c_str() ); std::string sandboxPath = Starter->jic->jobRemoteIWD(); // // This code is deliberately wrong, probably for backwards-compability. // (See the code in JICShadow::beginFileTransfer(), which assumes that // we transferred the executable if ATTR_TRANSFER_EXECUTABLE is unset.) // Rather than risk breaking anything by fixing condor_submit (which // does not set ATTR_TRANSFER_EXECUTABLE unless it's false) -- and // introducing a version dependency -- assume the executable was // transferred unless it was explicitly noted otherwise. // bool transferExecutable = true; JobAd->LookupBool( ATTR_TRANSFER_EXECUTABLE, transferExecutable ); if( transferExecutable ) { command = sandboxPath + "/" + command; } ArgList args; args.SetArgV1SyntaxToCurrentPlatform(); MyString argsError; if( ! args.AppendArgsFromClassAd( JobAd, & argsError ) ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read job arguments from job ad: '%s'.\n", argsError.c_str() ); return FALSE; } Env job_env; MyString env_errors; if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) { dprintf( D_ALWAYS, "Aborting DockerProc::StartJob: %s\n", env_errors.Value()); return 0; } // The GlobalJobID is unsuitable by virtue its octothorpes. This // construction is informative, but could be made even less likely // to collide if it had a timestamp. formatstr( containerName, "HTCJob%d_%d_%s_PID%d", Starter->jic->jobCluster(), Starter->jic->jobProc(), Starter->getMySlotName().c_str(), // note: this can be "" for single slot machines. getpid() ); // // Do I/O redirection (includes streaming). // int childFDs[3] = { -2, -2, -2 }; { TemporaryPrivSentry sentry(PRIV_USER); // getStdFile() returns -1 on error. if( -1 == (childFDs[0] = openStdFile( SFT_IN, NULL, true, "Input file" )) ) { dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stdin.\n" ); return FALSE; } if( -1 == (childFDs[1] = openStdFile( SFT_OUT, NULL, true, "Output file" )) ) { dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stdout.\n" ); daemonCore->Close_FD( childFDs[0] ); return FALSE; } if( -1 == (childFDs[2] = openStdFile( SFT_ERR, NULL, true, "Error file" )) ) { dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stderr.\n" ); daemonCore->Close_FD( childFDs[0] ); daemonCore->Close_FD( childFDs[1] ); return FALSE; } } // Ulog the execute event Starter->jic->notifyJobPreSpawn(); CondorError err; // DockerAPI::run() returns a PID from daemonCore->Create_Process(), which // makes it suitable for passing up into VanillaProc. This combination // will trigger the reaper(s) when the container terminates. ClassAd *machineAd = Starter->jic->machClassAd(); std::list<std::string> extras; buildExtraVolumes(extras); int rv = DockerAPI::run( *machineAd, containerName, imageID, command, args, job_env, sandboxPath, extras, JobPid, childFDs, err ); if( rv < 0 ) { dprintf( D_ALWAYS | D_FAILURE, "DockerAPI::run( %s, %s, ... ) failed with return value %d\n", imageID.c_str(), command.c_str(), rv ); return FALSE; } dprintf( D_FULLDEBUG, "DockerAPI::run() returned pid %d\n", JobPid ); // TODO: Start a timer to poll for job usage updates. ++num_pids; // Used by OsProc::PublishUpdateAd(). return TRUE; }
int OsProc::StartJob(FamilyInfo* family_info, FilesystemRemap* fs_remap=NULL) { int nice_inc = 0; bool has_wrapper = false; dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n"); if ( !JobAd ) { dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" ); return 0; } MyString JobName; if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting StartJob.\n", ATTR_JOB_CMD ); return 0; } const char* job_iwd = Starter->jic->jobRemoteIWD(); dprintf( D_ALWAYS, "IWD: %s\n", job_iwd ); // some operations below will require a PrivSepHelper if // PrivSep is enabled (if it's not, privsep_helper will be // NULL) PrivSepHelper* privsep_helper = Starter->privSepHelper(); // // // // // // // Arguments // // // // // // // prepend the full path to this name so that we // don't have to rely on the PATH inside the // USER_JOB_WRAPPER or for exec(). bool transfer_exe = false; if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) { transfer_exe = false; } bool preserve_rel = false; if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) { preserve_rel = false; } bool relative_exe = is_relative_to_cwd(JobName.Value()); if (relative_exe && preserve_rel && !transfer_exe) { dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value()); } else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) { JobName.formatstr( "%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, CONDOR_EXEC ); } else if (relative_exe && job_iwd && *job_iwd) { MyString full_name; full_name.formatstr("%s%c%s", job_iwd, DIR_DELIM_CHAR, JobName.Value()); JobName = full_name; } if( Starter->isGridshell() ) { // if we're a gridshell, just try to chmod our job, since // globus probably transfered it for us and left it with // bad permissions... priv_state old_priv = set_user_priv(); int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG ); set_priv( old_priv ); if( retval < 0 ) { dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() ); return 0; } } ArgList args; // Since we may be adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional wrapper args. args.SetArgV1SyntaxToCurrentPlatform(); // First, put "condor_exec" or whatever at the front of Args, // since that will become argv[0] of what we exec(), either // the wrapper or the actual job. if( !getArgv0() ) { args.AppendArg(JobName.Value()); } else { args.AppendArg(getArgv0()); } // Support USER_JOB_WRAPPER parameter... char *wrapper = NULL; if( (wrapper=param("USER_JOB_WRAPPER")) ) { // make certain this wrapper program exists and is executable if( access(wrapper,X_OK) < 0 ) { dprintf( D_ALWAYS, "Cannot find/execute USER_JOB_WRAPPER file %s\n", wrapper ); free( wrapper ); return 0; } has_wrapper = true; // Now, we've got a valid wrapper. We want that to become // "JobName" so we exec it directly, and we want to put // what was the JobName (with the full path) as the first // argument to the wrapper args.AppendArg(JobName.Value()); JobName = wrapper; free(wrapper); } // Support USE_PARROT bool use_parrot = false; if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) { // Check for parrot executable char *parrot = NULL; if( (parrot=param("PARROT")) ) { if( access(parrot,X_OK) < 0 ) { dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute " "at %s(%s)).\n", parrot, strerror(errno) ); free( parrot ); return 0; } else { args.AppendArg(JobName.Value()); JobName = parrot; free( parrot ); } } else { dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config" " file)" ); return 0; } } // Either way, we now have to add the user-specified args as // the rest of the Args string. MyString args_error; if(!args.AppendArgsFromClassAd(JobAd,&args_error)) { dprintf(D_ALWAYS, "Failed to read job arguments from JobAd. " "Aborting OsProc::StartJob: %s\n",args_error.Value()); return 0; } // // // // // // // Environment // // // // // // // Now, instantiate an Env object so we can manipulate the // environment as needed. Env job_env; MyString env_errors; if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) { dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n", env_errors.Value()); return 0; } // // // // // // // Standard Files // // // // // // // handle stdin, stdout, and stderr redirection int fds[3]; // initialize these to -2 to mean they're not specified. // -1 will be treated as an error. fds[0] = -2; fds[1] = -2; fds[2] = -2; // in order to open these files we must have the user's privs: priv_state priv; priv = set_user_priv(); // if we're in PrivSep mode, we won't necessarily be able to // open the files for the job. getStdFile will return us an // open FD in some situations, but otherwise will give us // a filename that we'll pass to the PrivSep Switchboard // bool stdin_ok; bool stdout_ok; bool stderr_ok; MyString privsep_stdin_name; MyString privsep_stdout_name; MyString privsep_stderr_name; if (privsep_helper != NULL) { stdin_ok = getStdFile(SFT_IN, NULL, true, "Input file", &fds[0], &privsep_stdin_name); stdout_ok = getStdFile(SFT_OUT, NULL, true, "Output file", &fds[1], &privsep_stdout_name); stderr_ok = getStdFile(SFT_ERR, NULL, true, "Error file", &fds[2], &privsep_stderr_name); } else { fds[0] = openStdFile( SFT_IN, NULL, true, "Input file"); stdin_ok = (fds[0] != -1); fds[1] = openStdFile( SFT_OUT, NULL, true, "Output file"); stdout_ok = (fds[1] != -1); fds[2] = openStdFile( SFT_ERR, NULL, true, "Error file"); stderr_ok = (fds[2] != -1); } /* Bail out if we couldn't open the std files correctly */ if( !stdin_ok || !stdout_ok || !stderr_ok ) { /* only close ones that had been opened correctly */ for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n"); dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n"); set_priv(priv); /* go back to original priv state before leaving */ return 0; } // // // // // // // Misc + Exec // // // // // // if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyJobPreSpawn(); } // compute job's renice value by evaluating the machine's // JOB_RENICE_INCREMENT in the context of the job ad... char* ptmp = param( "JOB_RENICE_INCREMENT" ); if( ptmp ) { // insert renice expr into our copy of the job ad MyString reniceAttr = "Renice = "; reniceAttr += ptmp; if( !JobAd->Insert( reniceAttr.Value() ) ) { dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT " "into job ad, Aborting OsProc::StartJob...\n" ); free( ptmp ); return 0; } // evaluate if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) { dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n", ptmp, nice_inc ); } else { dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't " "eval to int! Using default of 10...\n", ptmp ); nice_inc = 10; } // enforce valid ranges for nice_inc if( nice_inc < 0 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "low: adjusted to 0\n", nice_inc ); nice_inc = 0; } else if( nice_inc > 19 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "high: adjusted to 19\n", nice_inc ); nice_inc = 19; } ASSERT( ptmp ); free( ptmp ); ptmp = NULL; } else { // if JOB_RENICE_INCREMENT is undefined, default to 0 nice_inc = 0; } // in the below dprintfs, we want to skip past argv[0], which // is sometimes condor_exec, in the Args string. MyString args_string; args.GetArgsStringForDisplay(&args_string, 1); if( has_wrapper ) { // print out exactly what we're doing so folks can debug // it, if they need to. dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), args_string.Value() ); } else { dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); } // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_string; job_env.getDelimitedStringForDisplay(&env_string); dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value()); } // Check to see if we need to start this process paused, and if // so, pass the right flag to DC::Create_Process(). int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT; if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) { job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT; } int suspend_job_at_exec = 0; JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec); if( suspend_job_at_exec ) { dprintf( D_FULLDEBUG, "OsProc::StartJob(): " "Job wants to be suspended at exec\n" ); job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC; } // If there is a requested coresize for this job, enforce it. // Convert negative and very large values to RLIM_INFINITY, meaning // no size limit. // RLIM_INFINITY is unsigned, but its value and type size vary. long long core_size_ad; size_t core_size; size_t *core_size_ptr = NULL; #if !defined(WIN32) if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_ad ) ) { if ( core_size_ad < 0 || (unsigned long long)core_size_ad > RLIM_INFINITY ) { core_size = RLIM_INFINITY; } else { core_size = (size_t)core_size_ad; } core_size_ptr = &core_size; } #endif // !defined(WIN32) long rlimit_as_hard_limit = 0; char *rlimit_expr = param("STARTER_RLIMIT_AS"); if (rlimit_expr) { classad::ClassAdParser parser; classad::ExprTree *tree = parser.ParseExpression(rlimit_expr); if (tree) { classad::Value val; long long result; if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && val.IsIntegerValue(result)) { result *= 1024 * 1024; // convert to megabytes rlimit_as_hard_limit = (long)result; // truncate for Create_Process if (result > rlimit_as_hard_limit) { // if truncation to long results in a change in the value, then // the requested limit must be > 2 GB and we are on a 32 bit platform // in that case, the requested limit is > than what the process can get anyway // so just don't set a limit. rlimit_as_hard_limit = 0; } if (rlimit_as_hard_limit > 0) { dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit); } } else { dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr); } } else { dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr); } } int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber()); #if defined ( WIN32 ) owner_profile_.update (); /************************************************************* NOTE: We currently *ONLY* support loading slot-user profiles. This limitation will be addressed shortly, by allowing regular users to load their registry hive - Ben [2008-09-31] **************************************************************/ bool load_profile = false, run_as_owner = false; JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile ); JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER, run_as_owner ); if ( load_profile && !run_as_owner ) { if ( owner_profile_.load () ) { /* publish the users environment into that of the main job's environment */ if ( !owner_profile_.environment ( job_env ) ) { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to " "export owner's environment.\n" ); } } else { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load " "owner's profile.\n" ); } } #endif // While we are still in user priv, print out the username #if defined(LINUX) if( Starter->glexecPrivSepHelper() ) { // TODO: if there is some way to figure out the final username, // print it out here or after starting the job. dprintf(D_ALWAYS,"Running job via glexec\n"); } #else if( false ) { } #endif else { char const *username = NULL; char const *how = ""; CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper(); if( cpsh ) { username = cpsh->get_user_name(); how = "via privsep switchboard "; } else { username = get_user_loginname(); } if( !username ) { username = "******"; } dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username); } set_priv ( priv ); // use this to return more detailed and reliable error message info // from create-process operation. MyString create_process_err_msg; if (privsep_helper != NULL) { const char* std_file_names[3] = { privsep_stdin_name.Value(), privsep_stdout_name.Value(), privsep_stderr_name.Value() }; JobPid = privsep_helper->create_process(JobName.Value(), args, job_env, job_iwd, fds, std_file_names, nice_inc, core_size_ptr, 1, job_opt_mask, family_info, affinity_mask, &create_process_err_msg); } else { JobPid = daemonCore->Create_Process( JobName.Value(), args, PRIV_USER_FINAL, 1, FALSE, FALSE, &job_env, job_iwd, family_info, NULL, fds, NULL, nice_inc, NULL, job_opt_mask, core_size_ptr, affinity_mask, NULL, &create_process_err_msg, fs_remap, rlimit_as_hard_limit); } // Create_Process() saves the errno for us if it is an "interesting" error. int create_process_errno = errno; // errno is 0 in the privsep case. This executes for the daemon core create-process logic if ((FALSE == JobPid) && (0 != create_process_errno)) { if (create_process_err_msg != "") create_process_err_msg += " "; MyString errbuf; errbuf.formatstr("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno)); create_process_err_msg += errbuf; } // now close the descriptors in fds array. our child has inherited // them already, so we should close them so we do not leak descriptors. // NOTE, we want to use a special method to close the starter's // versions, if that's what we're using, so we don't think we've // still got those available in other parts of the code for any // reason. for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } if ( JobPid == FALSE ) { JobPid = -1; if(!create_process_err_msg.IsEmpty()) { // if the reason Create_Process failed was that registering // a family with the ProcD failed, it is indicative of a // problem regarding this execute machine, not the job. in // this case, we'll want to EXCEPT instead of telling the // Shadow to put the job on hold. there are probably other // error conditions where EXCEPTing would be more appropriate // as well... // if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) { EXCEPT("Create_Process failed to register the job with the ProcD"); } MyString err_msg = "Failed to execute '"; err_msg += JobName; err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_err_msg; if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno ); } } dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n", JobName.Value(), args_string.Value(), create_process_err_msg.Value()); return 0; } num_pids++; dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid); job_start_time.getTime(); return 1; }