int DockerProc::StartJob() { std::string imageID; if( ! JobAd->LookupString( ATTR_DOCKER_IMAGE, imageID ) ) { dprintf( D_ALWAYS | D_FAILURE, "%s not defined in job ad, unable to start job.\n", ATTR_DOCKER_IMAGE ); return FALSE; } std::string command; JobAd->LookupString( ATTR_JOB_CMD, command ); dprintf( D_FULLDEBUG, "%s: '%s'\n", ATTR_JOB_CMD, command.c_str() ); std::string sandboxPath = Starter->jic->jobRemoteIWD(); // // This code is deliberately wrong, probably for backwards-compability. // (See the code in JICShadow::beginFileTransfer(), which assumes that // we transferred the executable if ATTR_TRANSFER_EXECUTABLE is unset.) // Rather than risk breaking anything by fixing condor_submit (which // does not set ATTR_TRANSFER_EXECUTABLE unless it's false) -- and // introducing a version dependency -- assume the executable was // transferred unless it was explicitly noted otherwise. // bool transferExecutable = true; JobAd->LookupBool( ATTR_TRANSFER_EXECUTABLE, transferExecutable ); if( transferExecutable ) { command = sandboxPath + "/" + command; } ArgList args; args.SetArgV1SyntaxToCurrentPlatform(); MyString argsError; if( ! args.AppendArgsFromClassAd( JobAd, & argsError ) ) { dprintf( D_ALWAYS | D_FAILURE, "Failed to read job arguments from job ad: '%s'.\n", argsError.c_str() ); return FALSE; } Env job_env; MyString env_errors; if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) { dprintf( D_ALWAYS, "Aborting DockerProc::StartJob: %s\n", env_errors.Value()); return 0; } // The GlobalJobID is unsuitable by virtue its octothorpes. This // construction is informative, but could be made even less likely // to collide if it had a timestamp. formatstr( containerName, "HTCJob%d_%d_%s_PID%d", Starter->jic->jobCluster(), Starter->jic->jobProc(), Starter->getMySlotName().c_str(), // note: this can be "" for single slot machines. getpid() ); // // Do I/O redirection (includes streaming). // int childFDs[3] = { -2, -2, -2 }; { TemporaryPrivSentry sentry(PRIV_USER); // getStdFile() returns -1 on error. if( -1 == (childFDs[0] = openStdFile( SFT_IN, NULL, true, "Input file" )) ) { dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stdin.\n" ); return FALSE; } if( -1 == (childFDs[1] = openStdFile( SFT_OUT, NULL, true, "Output file" )) ) { dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stdout.\n" ); daemonCore->Close_FD( childFDs[0] ); return FALSE; } if( -1 == (childFDs[2] = openStdFile( SFT_ERR, NULL, true, "Error file" )) ) { dprintf( D_ALWAYS | D_FAILURE, "DockerProc::StartJob(): failed to open stderr.\n" ); daemonCore->Close_FD( childFDs[0] ); daemonCore->Close_FD( childFDs[1] ); return FALSE; } } // Ulog the execute event Starter->jic->notifyJobPreSpawn(); CondorError err; // DockerAPI::run() returns a PID from daemonCore->Create_Process(), which // makes it suitable for passing up into VanillaProc. This combination // will trigger the reaper(s) when the container terminates. ClassAd *machineAd = Starter->jic->machClassAd(); std::list<std::string> extras; buildExtraVolumes(extras); int rv = DockerAPI::run( *machineAd, containerName, imageID, command, args, job_env, sandboxPath, extras, JobPid, childFDs, err ); if( rv < 0 ) { dprintf( D_ALWAYS | D_FAILURE, "DockerAPI::run( %s, %s, ... ) failed with return value %d\n", imageID.c_str(), command.c_str(), rv ); return FALSE; } dprintf( D_FULLDEBUG, "DockerAPI::run() returned pid %d\n", JobPid ); // TODO: Start a timer to poll for job usage updates. ++num_pids; // Used by OsProc::PublishUpdateAd(). return TRUE; }
int ScriptProc::StartJob() { dprintf(D_FULLDEBUG,"in ScriptProc::StartJob()\n"); if ( !JobAd ) { dprintf ( D_ALWAYS, "No JobAd in ScriptProc::StartJob()!\n" ); return 0; } MyString attr; attr = name; attr += ATTR_JOB_CMD; char* tmp = NULL; if( ! JobAd->LookupString( attr.Value(), &tmp ) ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting StartJob.\n", attr.Value() ); return 0; } // // // // // // // executable // // // // // // // TODO: make it smart in cases we're not the gridshell and/or // didn't transfer files so that we don't prepend the wrong // path to the binary, and don't try to chmod it. MyString exe_path = Starter->GetWorkingDir(); exe_path += DIR_DELIM_CHAR; exe_path += tmp; free( tmp ); tmp = NULL; if( Starter->isGridshell() ) { // if we're a gridshell, chmod() the binary, since globus // probably transfered it for us and left it with bad // permissions... priv_state old_priv = set_user_priv(); int retval = chmod( exe_path.Value(), 0755 ); set_priv( old_priv ); if( retval < 0 ) { dprintf( D_ALWAYS, "Failed to chmod %s: %s (errno %d)\n", exe_path.Value(), strerror(errno), errno ); return 0; } } // // // // // // // Args // // // // // // char *args1 = NULL; char *args2 = NULL; MyString args1_attr; MyString args2_attr; args1_attr = name; args1_attr += ATTR_JOB_ARGUMENTS1; args2_attr = name; args2_attr += ATTR_JOB_ARGUMENTS2; JobAd->LookupString(args1_attr.Value(), &args1); JobAd->LookupString(args2_attr.Value(), &args2); ArgList args; // Since we are adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional args. args.SetArgV1SyntaxToCurrentPlatform(); // First, put "condor_<name>script" at the front of Args, // since that will become argv[0] of what we exec(), either // the wrapper or the actual job. MyString arg0; arg0 = "condor_"; arg0 += name; arg0 += "script"; args.AppendArg(arg0.Value()); MyString args_error; if(args2 && *args2) { args.AppendArgsV2Raw(args2,&args_error); } else if(args1 && *args1) { args.AppendArgsV1Raw(args1,&args_error); } else { dprintf( D_FULLDEBUG, "neither %s nor %s could be found in JobAd\n", args1_attr.Value(), args2_attr.Value()); } free( args1 ); free( args2 ); // // // // // // // Environment // // // // // // char *env1 = NULL; char *env2 = NULL; MyString env1_attr; MyString env2_attr; env1_attr = name; env1_attr += ATTR_JOB_ENVIRONMENT1; env2_attr = name; env2_attr += ATTR_JOB_ENVIRONMENT2; JobAd->LookupString( env1_attr.Value(), &env1 ); JobAd->LookupString( env2_attr.Value(), &env2 ); // TODO do we want to use the regular ATTR_JOB_ENVIRONMENT // if there's nothing specific for this script? // Now, instantiate an Env object so we can manipulate the // environment as needed. Env job_env; MyString env_errors; if( env2 && *env2 ) { if( ! job_env.MergeFromV2Raw(env2,&env_errors) ) { dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s). " "Aborting ScriptProc::StartJob.\n", env2_attr.Value(),env_errors.Value() ); free( env1 ); free( env2 ); return 0; } } else if( env1 && *env1 ) { if( ! job_env.MergeFromV1Raw(env1,&env_errors) ) { dprintf( D_ALWAYS, "Invalid %s found in JobAd (%s). " "Aborting ScriptProc::StartJob.\n", env1_attr.Value(),env_errors.Value() ); free( env1 ); free( env2 ); return 0; } } free(env1); free(env2); // Now, let the starter publish any env vars it wants to add Starter->PublishToEnv( &job_env ); // TODO: Deal with port regulation stuff? // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_str; job_env.getDelimitedStringForDisplay(&env_str); dprintf(D_FULLDEBUG, "%sEnv = %s\n", name, env_str.Value() ); } // // // // // // // Standard Files // // // // // // // TODO??? // // // // // // // Misc + Exec // // // // // // // TODO? // Starter->jic->notifyJobPreSpawn( name ); // compute job's renice value by evaluating the machine's // JOB_RENICE_INCREMENT in the context of the job ad... // TODO? int nice_inc = 10; // in the below dprintfs, we want to skip past argv[0], which // is sometimes condor_exec, in the Args string. MyString args_string; args.GetArgsStringForDisplay(&args_string,1); dprintf( D_ALWAYS, "About to exec %s script: %s %s\n", name, exe_path.Value(), args_string.Value() ); // If there is a requested coresize for this job, enforce it. // It is truncated because you can't put an unsigned integer // into a classad. I could rewrite condor's use of ATTR_CORE_SIZE to // be a float, but then when that attribute is read/written to the // job queue log by/or shared between versions of Condor which view the // type of that attribute differently, calamity would arise. int core_size_truncated; size_t core_size; size_t *core_size_ptr = NULL; if ( JobAd->LookupInteger(ATTR_CORE_SIZE, core_size_truncated) ) { core_size = (size_t)core_size_truncated; core_size_ptr = &core_size; } JobPid = daemonCore->Create_Process(exe_path.Value(), args, PRIV_USER_FINAL, 1, FALSE, FALSE, &job_env, Starter->jic->jobIWD(), NULL, NULL, NULL, NULL, nice_inc, NULL, DCJOBOPT_NO_ENV_INHERIT, core_size_ptr ); //NOTE: Create_Process() saves the errno for us if it is an //"interesting" error. char const *create_process_error = NULL; int create_process_errno = errno; if( JobPid == FALSE && errno ) { create_process_error = strerror( errno ); } if( JobPid == FALSE ) { JobPid = -1; if( create_process_error ) { MyString err_msg = "Failed to execute '"; err_msg += exe_path.Value(); err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_error; Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno ); } EXCEPT( "Create_Process(%s,%s, ...) failed", exe_path.Value(), args_string.Value() ); return 0; } dprintf( D_ALWAYS, "Create_Process succeeded, pid=%d\n", JobPid ); job_start_time.getTime(); return 1; }
void do_process_request(const ClassAd *inputAd, ClassAd *resultAd, const int req_number, const char *iwd, const char *stdio_iwd) { // Check for inputAd if ( !inputAd ) { handle_process_request_error("No input ad",req_number,resultAd); return; } // Map the CMD specified in the input via the config file. MyString UnmappedJobName,JobName; if (inputAd->LookupString(ATTR_JOB_CMD,UnmappedJobName) == 0 ) { // no CMD specified. handle_process_request_error("No CMD specified",req_number,resultAd); return; } char *auth_commands = param("SOAPSHELL_AUTHORIZED_COMMANDS"); StringList auth_list(auth_commands,","); if ( auth_commands ) free(auth_commands); // Each command needs four tuples; anything else is a misconfiguration if ( auth_list.number() % 4 != 0 ) { handle_process_request_error("Service is misconfigured: SOAPSHELL_AUTHORIZED_COMMANDS malformed",req_number,resultAd); return; } if ( auth_list.contains_anycase(UnmappedJobName.Value()) == TRUE ) { JobName = auth_list.next(); } if ( JobName.IsEmpty() ) { // the CMD not authorized handle_process_request_error("Requested CMD not authorized via SOAPSHELL_AUTHORIZED_COMMANDS",req_number,resultAd); return; } // handle command line arguments. ArgList args; args.SetArgV1SyntaxToCurrentPlatform(); args.AppendArg(JobName.Value()); // set argv[0] to command char *soapshell_args = auth_list.next(); if ( soapshell_args && strcmp(soapshell_args,"*") ) { if(!args.AppendArgsV1RawOrV2Quoted(soapshell_args,NULL)) { dprintf( D_ALWAYS, "ERROR: SOAPSHELL_ARGS config macro invalid\n" ); } } else if(!args.AppendArgsFromClassAd(inputAd,NULL)) { handle_process_request_error("Failed to setup CMD arguments",req_number,resultAd); return; } // handle the environment. Env job_env; char *env_str = auth_list.next(); if ( env_str && strcmp(env_str,"*") ) { if(!job_env.MergeFromV1RawOrV2Quoted(env_str,NULL) ) { dprintf(D_ALWAYS,"ERROR: SOAPSHELL_ENVIRONMENT config macro invalid\n"); } } else if(!job_env.MergeFrom(inputAd,NULL)) { // bad environment string in job ad! handle_process_request_error("Request has faulty environment string",req_number,resultAd); return; } // Write input files into iwd (we will write stdin later) if ( !write_input_files(inputAd, iwd) ) { // failed to write input files handle_process_request_error("Failed to write input files",req_number,resultAd); return; } // handle stdin, stdout, and stderr redirection const char* jobstdin_ = dircat(stdio_iwd,"stdin"); MyString jobstdin(jobstdin_); const char* jobstdout_ = dircat(stdio_iwd,"stdout"); MyString jobstdout(jobstdout_); const char* jobstderr_ = dircat(stdio_iwd,"stderr"); MyString jobstderr(jobstderr_); delete [] jobstdin_; delete [] jobstdout_; delete [] jobstderr_; int flags = O_WRONLY | O_CREAT | O_TRUNC | O_APPEND | O_LARGEFILE; // write stdin file is needed { char *input = NULL; unsigned char *output = NULL; int output_length = 0; int fd = -1; inputAd->LookupString(ATTR_JOB_INPUT,&input); if ( input ) { // Caller needs to free *output if non-NULL condor_base64_decode(input,&output,&output_length); if ( output ) { fd = safe_open_wrapper_follow( jobstdin.Value(), flags, 0666 ); if ( fd > -1 ) { write(fd,output,output_length); close(fd); } free(output); } free(input); if ( fd < 0 ) { handle_process_request_error("Failed to write stdin",req_number,resultAd); return; } } } int fds[3]; // initialize these to -2 to mean they're not specified. // -1 will be treated as an error. fds[0] = -2; fds[1] = -2; fds[2] = -2; fds[0] = safe_open_wrapper_follow( jobstdin.Value(), O_RDONLY | O_LARGEFILE ); // stdin fds[1] = safe_open_wrapper_follow( jobstdout.Value(), flags, 0666 ); // stdout fds[2] = safe_open_wrapper_follow( jobstderr.Value(), flags, 0666 ); // stderr /* Bail out if we couldn't open stdout/err files correctly */ if( fds[1]==-1 || fds[2]==-1 ) { /* only close ones that had been opened correctly */ for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } handle_process_request_error("Failed to write stdout/err files",req_number,resultAd); return; } // Print what we are about to do to the log MyString args_string; args.GetArgsStringForDisplay(&args_string,1); dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); // Spawn a process, baby!!! int JobPid = daemonCore->Create_Process( JobName.Value(), // executable args, // args PRIV_UNKNOWN, // priv_state - TODO 0, // reaper id - TODO FALSE, // want_command_port &job_env, // job environment iwd, // job iwd NULL, // family_info - TODO NULL, // sock_inherit_list fds // stdio redirection ); // NOTE: Create_Process() saves the errno for us if it is an // "interesting" error. char const *create_process_error = NULL; if(JobPid == FALSE && errno) create_process_error = strerror(errno); // now close the descriptors in fds array. our child has inherited // them already, so we should close them so we do not leak descriptors. for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } if ( JobPid == FALSE ) { JobPid = -1; MyString errormsg; errormsg.formatstr("Create_Process failed %s",create_process_error ? create_process_error : ""); handle_process_request_error(errormsg.Value(),req_number,resultAd); return; } dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid); // TODO - For now, just deal w/ one at a time. :( // So for now just wait for the child to exit. #ifdef WIN32 #error This service does not yet work on Windows #else { int exit_status; pid_t pid; for (;;) { pid = wait(&exit_status); dprintf(D_FULLDEBUG,"WAIT returned %d, errno=%d\n",pid,errno); if (pid == JobPid ) break; if (pid == -1 && errno != EINTR) { EXCEPT("waitpid failed errno=%d",errno); } } if ( WIFEXITED(exit_status) ) { int status = WEXITSTATUS(exit_status); resultAd->Assign("EXIT_STATUS",status); } } #endif // Job has completed, exit status is in the ad. Now put // the output files into the result ad. stash_output_file(resultAd, jobstdout.Value(), ATTR_JOB_OUTPUT); stash_output_file(resultAd, jobstderr.Value(), ATTR_JOB_ERROR); }
int OsProc::StartJob(FamilyInfo* family_info, FilesystemRemap* fs_remap=NULL) { int nice_inc = 0; bool has_wrapper = false; dprintf(D_FULLDEBUG,"in OsProc::StartJob()\n"); if ( !JobAd ) { dprintf ( D_ALWAYS, "No JobAd in OsProc::StartJob()!\n" ); return 0; } MyString JobName; if ( JobAd->LookupString( ATTR_JOB_CMD, JobName ) != 1 ) { dprintf( D_ALWAYS, "%s not found in JobAd. Aborting StartJob.\n", ATTR_JOB_CMD ); return 0; } const char* job_iwd = Starter->jic->jobRemoteIWD(); dprintf( D_ALWAYS, "IWD: %s\n", job_iwd ); // some operations below will require a PrivSepHelper if // PrivSep is enabled (if it's not, privsep_helper will be // NULL) PrivSepHelper* privsep_helper = Starter->privSepHelper(); // // // // // // // Arguments // // // // // // // prepend the full path to this name so that we // don't have to rely on the PATH inside the // USER_JOB_WRAPPER or for exec(). bool transfer_exe = false; if (!JobAd->LookupBool(ATTR_TRANSFER_EXECUTABLE, transfer_exe)) { transfer_exe = false; } bool preserve_rel = false; if (!JobAd->LookupBool(ATTR_PRESERVE_RELATIVE_EXECUTABLE, preserve_rel)) { preserve_rel = false; } bool relative_exe = is_relative_to_cwd(JobName.Value()); if (relative_exe && preserve_rel && !transfer_exe) { dprintf(D_ALWAYS, "Preserving relative executable path: %s\n", JobName.Value()); } else if ( strcmp(CONDOR_EXEC,JobName.Value()) == 0 ) { JobName.formatstr( "%s%c%s", Starter->GetWorkingDir(), DIR_DELIM_CHAR, CONDOR_EXEC ); } else if (relative_exe && job_iwd && *job_iwd) { MyString full_name; full_name.formatstr("%s%c%s", job_iwd, DIR_DELIM_CHAR, JobName.Value()); JobName = full_name; } if( Starter->isGridshell() ) { // if we're a gridshell, just try to chmod our job, since // globus probably transfered it for us and left it with // bad permissions... priv_state old_priv = set_user_priv(); int retval = chmod( JobName.Value(), S_IRWXU | S_IRWXO | S_IRWXG ); set_priv( old_priv ); if( retval < 0 ) { dprintf ( D_ALWAYS, "Failed to chmod %s!\n", JobName.Value() ); return 0; } } ArgList args; // Since we may be adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional wrapper args. args.SetArgV1SyntaxToCurrentPlatform(); // First, put "condor_exec" or whatever at the front of Args, // since that will become argv[0] of what we exec(), either // the wrapper or the actual job. if( !getArgv0() ) { args.AppendArg(JobName.Value()); } else { args.AppendArg(getArgv0()); } // Support USER_JOB_WRAPPER parameter... char *wrapper = NULL; if( (wrapper=param("USER_JOB_WRAPPER")) ) { // make certain this wrapper program exists and is executable if( access(wrapper,X_OK) < 0 ) { dprintf( D_ALWAYS, "Cannot find/execute USER_JOB_WRAPPER file %s\n", wrapper ); free( wrapper ); return 0; } has_wrapper = true; // Now, we've got a valid wrapper. We want that to become // "JobName" so we exec it directly, and we want to put // what was the JobName (with the full path) as the first // argument to the wrapper args.AppendArg(JobName.Value()); JobName = wrapper; free(wrapper); } // Support USE_PARROT bool use_parrot = false; if( JobAd->LookupBool( ATTR_USE_PARROT, use_parrot) ) { // Check for parrot executable char *parrot = NULL; if( (parrot=param("PARROT")) ) { if( access(parrot,X_OK) < 0 ) { dprintf( D_ALWAYS, "Unable to use parrot(Cannot find/execute " "at %s(%s)).\n", parrot, strerror(errno) ); free( parrot ); return 0; } else { args.AppendArg(JobName.Value()); JobName = parrot; free( parrot ); } } else { dprintf( D_ALWAYS, "Unable to use parrot(Undefined path in config" " file)" ); return 0; } } // Either way, we now have to add the user-specified args as // the rest of the Args string. MyString args_error; if(!args.AppendArgsFromClassAd(JobAd,&args_error)) { dprintf(D_ALWAYS, "Failed to read job arguments from JobAd. " "Aborting OsProc::StartJob: %s\n",args_error.Value()); return 0; } // // // // // // // Environment // // // // // // // Now, instantiate an Env object so we can manipulate the // environment as needed. Env job_env; MyString env_errors; if( !Starter->GetJobEnv(JobAd,&job_env,&env_errors) ) { dprintf( D_ALWAYS, "Aborting OSProc::StartJob: %s\n", env_errors.Value()); return 0; } // // // // // // // Standard Files // // // // // // // handle stdin, stdout, and stderr redirection int fds[3]; // initialize these to -2 to mean they're not specified. // -1 will be treated as an error. fds[0] = -2; fds[1] = -2; fds[2] = -2; // in order to open these files we must have the user's privs: priv_state priv; priv = set_user_priv(); // if we're in PrivSep mode, we won't necessarily be able to // open the files for the job. getStdFile will return us an // open FD in some situations, but otherwise will give us // a filename that we'll pass to the PrivSep Switchboard // bool stdin_ok; bool stdout_ok; bool stderr_ok; MyString privsep_stdin_name; MyString privsep_stdout_name; MyString privsep_stderr_name; if (privsep_helper != NULL) { stdin_ok = getStdFile(SFT_IN, NULL, true, "Input file", &fds[0], &privsep_stdin_name); stdout_ok = getStdFile(SFT_OUT, NULL, true, "Output file", &fds[1], &privsep_stdout_name); stderr_ok = getStdFile(SFT_ERR, NULL, true, "Error file", &fds[2], &privsep_stderr_name); } else { fds[0] = openStdFile( SFT_IN, NULL, true, "Input file"); stdin_ok = (fds[0] != -1); fds[1] = openStdFile( SFT_OUT, NULL, true, "Output file"); stdout_ok = (fds[1] != -1); fds[2] = openStdFile( SFT_ERR, NULL, true, "Error file"); stderr_ok = (fds[2] != -1); } /* Bail out if we couldn't open the std files correctly */ if( !stdin_ok || !stdout_ok || !stderr_ok ) { /* only close ones that had been opened correctly */ for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } dprintf(D_ALWAYS, "Failed to open some/all of the std files...\n"); dprintf(D_ALWAYS, "Aborting OsProc::StartJob.\n"); set_priv(priv); /* go back to original priv state before leaving */ return 0; } // // // // // // // Misc + Exec // // // // // // if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyJobPreSpawn(); } // compute job's renice value by evaluating the machine's // JOB_RENICE_INCREMENT in the context of the job ad... char* ptmp = param( "JOB_RENICE_INCREMENT" ); if( ptmp ) { // insert renice expr into our copy of the job ad MyString reniceAttr = "Renice = "; reniceAttr += ptmp; if( !JobAd->Insert( reniceAttr.Value() ) ) { dprintf( D_ALWAYS, "ERROR: failed to insert JOB_RENICE_INCREMENT " "into job ad, Aborting OsProc::StartJob...\n" ); free( ptmp ); return 0; } // evaluate if( JobAd->EvalInteger( "Renice", NULL, nice_inc ) ) { dprintf( D_ALWAYS, "Renice expr \"%s\" evaluated to %d\n", ptmp, nice_inc ); } else { dprintf( D_ALWAYS, "WARNING: job renice expr (\"%s\") doesn't " "eval to int! Using default of 10...\n", ptmp ); nice_inc = 10; } // enforce valid ranges for nice_inc if( nice_inc < 0 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "low: adjusted to 0\n", nice_inc ); nice_inc = 0; } else if( nice_inc > 19 ) { dprintf( D_FULLDEBUG, "WARNING: job renice value (%d) is too " "high: adjusted to 19\n", nice_inc ); nice_inc = 19; } ASSERT( ptmp ); free( ptmp ); ptmp = NULL; } else { // if JOB_RENICE_INCREMENT is undefined, default to 0 nice_inc = 0; } // in the below dprintfs, we want to skip past argv[0], which // is sometimes condor_exec, in the Args string. MyString args_string; args.GetArgsStringForDisplay(&args_string, 1); if( has_wrapper ) { // print out exactly what we're doing so folks can debug // it, if they need to. dprintf( D_ALWAYS, "Using wrapper %s to exec %s\n", JobName.Value(), args_string.Value() ); } else { dprintf( D_ALWAYS, "About to exec %s %s\n", JobName.Value(), args_string.Value() ); } // Grab the full environment back out of the Env object if(IsFulldebug(D_FULLDEBUG)) { MyString env_string; job_env.getDelimitedStringForDisplay(&env_string); dprintf(D_FULLDEBUG, "Env = %s\n", env_string.Value()); } // Check to see if we need to start this process paused, and if // so, pass the right flag to DC::Create_Process(). int job_opt_mask = DCJOBOPT_NO_CONDOR_ENV_INHERIT; if (!param_boolean("JOB_INHERITS_STARTER_ENVIRONMENT",false)) { job_opt_mask |= DCJOBOPT_NO_ENV_INHERIT; } int suspend_job_at_exec = 0; JobAd->LookupBool( ATTR_SUSPEND_JOB_AT_EXEC, suspend_job_at_exec); if( suspend_job_at_exec ) { dprintf( D_FULLDEBUG, "OsProc::StartJob(): " "Job wants to be suspended at exec\n" ); job_opt_mask |= DCJOBOPT_SUSPEND_ON_EXEC; } // If there is a requested coresize for this job, enforce it. // Convert negative and very large values to RLIM_INFINITY, meaning // no size limit. // RLIM_INFINITY is unsigned, but its value and type size vary. long long core_size_ad; size_t core_size; size_t *core_size_ptr = NULL; #if !defined(WIN32) if ( JobAd->LookupInteger( ATTR_CORE_SIZE, core_size_ad ) ) { if ( core_size_ad < 0 || (unsigned long long)core_size_ad > RLIM_INFINITY ) { core_size = RLIM_INFINITY; } else { core_size = (size_t)core_size_ad; } core_size_ptr = &core_size; } #endif // !defined(WIN32) long rlimit_as_hard_limit = 0; char *rlimit_expr = param("STARTER_RLIMIT_AS"); if (rlimit_expr) { classad::ClassAdParser parser; classad::ExprTree *tree = parser.ParseExpression(rlimit_expr); if (tree) { classad::Value val; long long result; if (EvalExprTree(tree, Starter->jic->machClassAd(), JobAd, val) && val.IsIntegerValue(result)) { result *= 1024 * 1024; // convert to megabytes rlimit_as_hard_limit = (long)result; // truncate for Create_Process if (result > rlimit_as_hard_limit) { // if truncation to long results in a change in the value, then // the requested limit must be > 2 GB and we are on a 32 bit platform // in that case, the requested limit is > than what the process can get anyway // so just don't set a limit. rlimit_as_hard_limit = 0; } if (rlimit_as_hard_limit > 0) { dprintf(D_ALWAYS, "Setting job's virtual memory rlimit to %ld megabytes\n", rlimit_as_hard_limit); } } else { dprintf(D_ALWAYS, "Can't evaluate STARTER_RLIMIT_AS expression %s\n", rlimit_expr); } } else { dprintf(D_ALWAYS, "Can't parse STARTER_RLIMIT_AS expression: %s\n", rlimit_expr); } } int *affinity_mask = makeCpuAffinityMask(Starter->getMySlotNumber()); #if defined ( WIN32 ) owner_profile_.update (); /************************************************************* NOTE: We currently *ONLY* support loading slot-user profiles. This limitation will be addressed shortly, by allowing regular users to load their registry hive - Ben [2008-09-31] **************************************************************/ bool load_profile = false, run_as_owner = false; JobAd->LookupBool ( ATTR_JOB_LOAD_PROFILE, load_profile ); JobAd->LookupBool ( ATTR_JOB_RUNAS_OWNER, run_as_owner ); if ( load_profile && !run_as_owner ) { if ( owner_profile_.load () ) { /* publish the users environment into that of the main job's environment */ if ( !owner_profile_.environment ( job_env ) ) { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to " "export owner's environment.\n" ); } } else { dprintf ( D_ALWAYS, "OsProc::StartJob(): Failed to load " "owner's profile.\n" ); } } #endif // While we are still in user priv, print out the username #if defined(LINUX) if( Starter->glexecPrivSepHelper() ) { // TODO: if there is some way to figure out the final username, // print it out here or after starting the job. dprintf(D_ALWAYS,"Running job via glexec\n"); } #else if( false ) { } #endif else { char const *username = NULL; char const *how = ""; CondorPrivSepHelper* cpsh = Starter->condorPrivSepHelper(); if( cpsh ) { username = cpsh->get_user_name(); how = "via privsep switchboard "; } else { username = get_user_loginname(); } if( !username ) { username = "******"; } dprintf(D_ALWAYS,"Running job %sas user %s\n",how,username); } set_priv ( priv ); // use this to return more detailed and reliable error message info // from create-process operation. MyString create_process_err_msg; if (privsep_helper != NULL) { const char* std_file_names[3] = { privsep_stdin_name.Value(), privsep_stdout_name.Value(), privsep_stderr_name.Value() }; JobPid = privsep_helper->create_process(JobName.Value(), args, job_env, job_iwd, fds, std_file_names, nice_inc, core_size_ptr, 1, job_opt_mask, family_info, affinity_mask, &create_process_err_msg); } else { JobPid = daemonCore->Create_Process( JobName.Value(), args, PRIV_USER_FINAL, 1, FALSE, FALSE, &job_env, job_iwd, family_info, NULL, fds, NULL, nice_inc, NULL, job_opt_mask, core_size_ptr, affinity_mask, NULL, &create_process_err_msg, fs_remap, rlimit_as_hard_limit); } // Create_Process() saves the errno for us if it is an "interesting" error. int create_process_errno = errno; // errno is 0 in the privsep case. This executes for the daemon core create-process logic if ((FALSE == JobPid) && (0 != create_process_errno)) { if (create_process_err_msg != "") create_process_err_msg += " "; MyString errbuf; errbuf.formatstr("(errno=%d: '%s')", create_process_errno, strerror(create_process_errno)); create_process_err_msg += errbuf; } // now close the descriptors in fds array. our child has inherited // them already, so we should close them so we do not leak descriptors. // NOTE, we want to use a special method to close the starter's // versions, if that's what we're using, so we don't think we've // still got those available in other parts of the code for any // reason. for ( int i = 0; i <= 2; i++ ) { if ( fds[i] >= 0 ) { daemonCore->Close_FD ( fds[i] ); } } if ( JobPid == FALSE ) { JobPid = -1; if(!create_process_err_msg.IsEmpty()) { // if the reason Create_Process failed was that registering // a family with the ProcD failed, it is indicative of a // problem regarding this execute machine, not the job. in // this case, we'll want to EXCEPT instead of telling the // Shadow to put the job on hold. there are probably other // error conditions where EXCEPTing would be more appropriate // as well... // if (create_process_errno == DaemonCore::ERRNO_REGISTRATION_FAILED) { EXCEPT("Create_Process failed to register the job with the ProcD"); } MyString err_msg = "Failed to execute '"; err_msg += JobName; err_msg += "'"; if(!args_string.IsEmpty()) { err_msg += " with arguments "; err_msg += args_string.Value(); } err_msg += ": "; err_msg += create_process_err_msg; if( !ThisProcRunsAlongsideMainProc() ) { Starter->jic->notifyStarterError( err_msg.Value(), true, CONDOR_HOLD_CODE_FailedToCreateProcess, create_process_errno ); } } dprintf(D_ALWAYS,"Create_Process(%s,%s, ...) failed: %s\n", JobName.Value(), args_string.Value(), create_process_err_msg.Value()); return 0; } num_pids++; dprintf(D_ALWAYS,"Create_Process succeeded, pid=%d\n",JobPid); job_start_time.getTime(); return 1; }
int JavaProc::StartJob() { MyString java_cmd; char* jarfiles = NULL; ArgList args; MyString arg_buf; // Since we are adding to the argument list, we may need to deal // with platform-specific arg syntax in the user's args in order // to successfully merge them with the additional java VM args. args.SetArgV1SyntaxToCurrentPlatform(); // Construct the list of jar files for the command line // If a jar file is transferred locally, use its local name // (in the execute directory) // otherwise use the original name StringList jarfiles_orig_list; StringList jarfiles_local_list; StringList* jarfiles_final_list = NULL; if( JobAd->LookupString(ATTR_JAR_FILES,&jarfiles) ) { jarfiles_orig_list.initializeFromString( jarfiles ); free( jarfiles ); jarfiles = NULL; char * jarfile_name; const char * base_name; struct stat stat_buff; if( Starter->jic->iwdIsChanged() ) { // If the job's IWD has been changed (because we're // running in the sandbox due to file transfer), we // need to use a local version of the path to the jar // files, not the full paths from the submit machine. jarfiles_orig_list.rewind(); while( (jarfile_name = jarfiles_orig_list.next()) ) { // Construct the local name base_name = condor_basename( jarfile_name ); MyString local_name = execute_dir; local_name += DIR_DELIM_CHAR; local_name += base_name; if( stat(local_name.Value(), &stat_buff) == 0 ) { // Jar file exists locally, use local name jarfiles_local_list.append( local_name.Value() ); } else { // Use the original name jarfiles_local_list.append (jarfile_name); } } // while(jarfiles_orig_list) // jarfiles_local_list is our real copy... jarfiles_final_list = &jarfiles_local_list; } else { // !iwdIsChanged() // just use jarfiles_orig_list as our real copy... jarfiles_final_list = &jarfiles_orig_list; } } startfile.formatstr("%s%cjvm.start",execute_dir,DIR_DELIM_CHAR); endfile.formatstr("%s%cjvm.end",execute_dir,DIR_DELIM_CHAR); if( !java_config(java_cmd,&args,jarfiles_final_list) ) { dprintf(D_FAILURE|D_ALWAYS,"JavaProc: Java is not configured!\n"); return 0; } JobAd->Assign(ATTR_JOB_CMD, java_cmd.Value()); arg_buf.formatstr("-Dchirp.config=%s%cchirp.config",execute_dir,DIR_DELIM_CHAR); args.AppendArg(arg_buf.Value()); char *jvm_args1 = NULL; char *jvm_args2 = NULL; MyString jvm_args_error; bool jvm_args_success = true; JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS1, &jvm_args1); JobAd->LookupString(ATTR_JOB_JAVA_VM_ARGS2, &jvm_args2); if(jvm_args2) { jvm_args_success = args.AppendArgsV2Raw(jvm_args2, &jvm_args_error); } else if(jvm_args1) { jvm_args_success = args.AppendArgsV1Raw(jvm_args1, &jvm_args_error); } free(jvm_args1); free(jvm_args2); if (!jvm_args_success) { dprintf(D_ALWAYS, "JavaProc: failed to parse JVM args: %s\n", jvm_args_error.Value()); return 0; } args.AppendArg("CondorJavaWrapper"); args.AppendArg(startfile.Value()); args.AppendArg(endfile.Value()); MyString args_error; if(!args.AppendArgsFromClassAd(JobAd,&args_error)) { dprintf(D_ALWAYS,"JavaProc: failed to read job arguments: %s\n", args_error.Value()); return 0; } // We are just talking to ourselves, so it is fine to use argument // syntax compatible with this current version of Condor. CondorVersionInfo ver_info; if(!args.InsertArgsIntoClassAd(JobAd,&ver_info,&args_error)) { dprintf(D_ALWAYS,"JavaProc: failed to insert java job arguments: %s\n", args_error.Value()); return 0; } dprintf(D_ALWAYS,"JavaProc: Cmd=%s\n",java_cmd.Value()); MyString args_string; args.GetArgsStringForDisplay(&args_string); dprintf(D_ALWAYS,"JavaProc: Args=%s\n",args_string.Value()); return VanillaProc::StartJob(); }