/* * Run a prolog or epilog script (does NOT drop privileges) * name IN: class of program (prolog, epilog, etc.), * path IN: pathname of program to run * job_id IN: info on associated job * max_wait IN: maximum time to wait in seconds, -1 for no limit * env IN: environment variables to use on exec, sets minimal environment * if NULL * uid IN: user ID of job owner * RET 0 on success, -1 on failure. */ static int _run_one_script(const char *name, const char *path, uint32_t job_id, int max_wait, char **env, uid_t uid) { int status; pid_t cpid; xassert(env); if (path == NULL || path[0] == '\0') return 0; if (job_id) { debug("[job %u] attempting to run %s [%s]", job_id, name, path); } else debug("attempting to run %s [%s]", name, path); if (access(path, R_OK | X_OK) < 0) { error("Can not run %s [%s]: %m", name, path); return -1; } if ((cpid = fork()) < 0) { error ("executing %s: fork: %m", name); return -1; } if (cpid == 0) { char *argv[2]; /* container_g_add_pid needs to be called in the forked process part of the fork to avoid a race condition where if this process makes a file or detacts itself from a child before we add the pid to the container in the parent of the fork. */ if (container_g_add_pid(job_id, getpid(), getuid()) != SLURM_SUCCESS) error("container_g_add_pid(%u): %m", job_id); argv[0] = (char *)xstrdup(path); argv[1] = NULL; setpgid(0, 0); execve(path, argv, env); error("execve(%s): %m", path); exit(127); } if (waitpid_timeout(name, cpid, &status, max_wait) < 0) return (-1); return status; }
static int _call_external_program(stepd_step_rec_t *job) { int status, rc, opt; pid_t cpid; int max_wait = 300; /* seconds */ int time_remaining; if ((job->state != SLURMSTEPD_STEP_RUNNING) || program_name == NULL || program_name[0] == '\0') return 0; debug("step_terminate_monitor: unkillable after %d sec, calling: %s", timeout, program_name); if (access(program_name, R_OK | X_OK) < 0) { debug("step_terminate_monitor not running %s: %m", program_name); return 0; } if ((cpid = fork()) < 0) { error("step_terminate_monitor executing %s: fork: %m", program_name); return -1; } if (cpid == 0) { /* child */ char *argv[2]; char buf[16]; /* container_g_add_pid needs to be called in the forked process part of the fork to avoid a race condition where if this process makes a file or detacts itself from a child before we add the pid to the container in the parent of the fork. */ if (container_g_add_pid(recorded_jobid, getpid(), getuid()) != SLURM_SUCCESS) error("container_g_add_pid(%u): %m", recorded_jobid); snprintf(buf, 16, "%u", recorded_jobid); setenv("SLURM_JOBID", buf, 1); setenv("SLURM_JOB_ID", buf, 1); snprintf(buf, 16, "%u", recorded_stepid); setenv("SLURM_STEPID", buf, 1); setenv("SLURM_STEP_ID", buf, 1); argv[0] = program_name; argv[1] = NULL; setpgid(0, 0); execv(program_name, argv); error("step_terminate_monitor execv(): %m"); exit(127); } opt = WNOHANG; time_remaining = max_wait; while (1) { rc = waitpid(cpid, &status, opt); if (rc < 0) { if (errno == EINTR) continue; /* waitpid may very well fail under normal conditions because the wait3() in mgr.c:_wait_for_any_task() may have reaped the return code. */ return 0; } else if (rc == 0) { sleep(1); if ((--time_remaining) == 0) { error("step_terminate_monitor: %s still running" " after %d seconds. Killing.", program_name, max_wait); killpg(cpid, SIGKILL); opt = 0; } } else { return status; } } /* NOTREACHED */ }