Example #1
0
/*
 *  Current process is running as the user when this is called.
 */
void
exec_task(slurmd_job_t *job, int i)
{
	uint32_t *gtids;		/* pointer to arrary of ranks */
	int fd, j;
	slurmd_task_info_t *task = job->task[i];
	char **tmp_env;

	if (i == 0)
		_make_tmpdir(job);

	gtids = xmalloc(job->node_tasks * sizeof(uint32_t));
	for (j = 0; j < job->node_tasks; j++)
		gtids[j] = job->task[j]->gtid;
	job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids);
	xfree(gtids);

	job->envtp->jobid = job->jobid;
	job->envtp->stepid = job->stepid;
	job->envtp->nodeid = job->nodeid;
	job->envtp->cpus_on_node = job->cpus;
	job->envtp->procid = task->gtid;
	job->envtp->localid = task->id;
	job->envtp->task_pid = getpid();
	job->envtp->distribution = job->task_dist;
	job->envtp->cpu_bind = xstrdup(job->cpu_bind);
	job->envtp->cpu_bind_type = job->cpu_bind_type;
	job->envtp->cpu_freq = job->cpu_freq;
	job->envtp->mem_bind = xstrdup(job->mem_bind);
	job->envtp->mem_bind_type = job->mem_bind_type;
	job->envtp->distribution = -1;
	job->envtp->ckpt_dir = xstrdup(job->ckpt_dir);
	job->envtp->batch_flag = job->batch;

	/* Modify copy of job's environment. Do not alter in place or
	 * concurrent searches of the environment can generate invalid memory
	 * references. */
	job->envtp->env = env_array_copy((const char **) job->env);
	setup_env(job->envtp, false);
	setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name);
	tmp_env = job->env;
	job->env = job->envtp->env;
	env_array_free(tmp_env);
	job->envtp->env = NULL;

	xfree(job->envtp->task_count);

	if (task->argv[0] && *task->argv[0] != '/') {
		/*
		 * Normally the client (srun) expands the command name
		 * to a fully qualified path, but in --multi-prog mode it
		 * is left up to the server to search the PATH for the
		 * executable.
		 */
		task->argv[0] = _build_path(task->argv[0], job->env);
	}

	if (!job->batch) {
		if (interconnect_attach(job->switch_job, &job->env,
				job->nodeid, (uint32_t) i, job->nnodes,
				job->ntasks, task->gtid) < 0) {
			error("Unable to attach to interconnect: %m");
			log_fini();
			exit(1);
		}

		if (_setup_mpi(job, i) != SLURM_SUCCESS) {
			error("Unable to configure MPI plugin: %m");
			log_fini();
			exit(1);
		}
	}

	/* task-specific pre-launch activities */

	if (spank_user_task (job, i) < 0) {
		error ("Failed to invoke task plugin stack");
		exit (1);
	}

	/* task plugin hook */
	if (pre_launch(job)) {
		error ("Failed task affinity setup");
		exit (1);
	}

	if (conf->task_prolog) {
		char *my_prolog;
		slurm_mutex_lock(&conf->config_mutex);
		my_prolog = xstrdup(conf->task_prolog);
		slurm_mutex_unlock(&conf->config_mutex);
		_run_script_and_set_env("slurm task_prolog",
					my_prolog, job);
		xfree(my_prolog);
	}
	if (job->task_prolog) {
		_run_script_and_set_env("user task_prolog",
					job->task_prolog, job);
	}

	if (!job->batch)
		pdebug_stop_current(job);
	if (job->env == NULL) {
		debug("job->env is NULL");
		job->env = (char **)xmalloc(sizeof(char *));
		job->env[0] = (char *)NULL;
	}

	if (job->restart_dir) {
		info("restart from %s", job->restart_dir);
		/* no return on success */
		checkpoint_restart_task(job, job->restart_dir, task->gtid);
		error("Restart task failed: %m");
		exit(errno);
	}

	if (task->argv[0] == NULL) {
		error("No executable program specified for this task");
		exit(2);
	}

	/* Do this last so you don't worry too much about the users
	   limits including the slurmstepd in with it.
	*/
	if (set_user_limits(job) < 0) {
		debug("Unable to set user limits");
		log_fini();
		exit(5);
	}

	execve(task->argv[0], task->argv, job->env);

	/*
	 * print error message and clean up if execve() returns:
	 */
	if ((errno == ENOENT) &&
	    ((fd = open(task->argv[0], O_RDONLY)) >= 0)) {
		char buf[256], *eol;
		int sz;
		sz = read(fd, buf, sizeof(buf));
		if ((sz >= 3) && (strncmp(buf, "#!", 2) == 0)) {
			eol = strchr(buf, '\n');
			if (eol)
				eol[0] = '\0';
			else
				buf[sizeof(buf)-1] = '\0';
			error("execve(): bad interpreter(%s): %m", buf+2);
			exit(errno);
		}
	}
	error("execve(): %s: %m", task->argv[0]);
	exit(errno);
}
Example #2
0
/* execute archive script */
extern int archive_run_script(slurmdb_archive_cond_t *arch_cond,
		   char *cluster_name, time_t last_submit)
{
	char * args[] = {arch_cond->archive_script, NULL};
	struct stat st;
	char **env = NULL;
	time_t curr_end;

	if (stat(arch_cond->archive_script, &st) < 0) {
		error("archive_run_script: failed to stat %s: %m",
		      arch_cond->archive_script);
		return SLURM_ERROR;
	}

	if (!(st.st_mode & S_IFREG)) {
		errno = EACCES;
		error("archive_run_script: %s isn't a regular file",
		      arch_cond->archive_script);
		return SLURM_ERROR;
	}

	if (access(arch_cond->archive_script, X_OK) < 0) {
		errno = EACCES;
		error("archive_run_script: %s is not executable",
		      arch_cond->archive_script);
		return SLURM_ERROR;
	}

	env = env_array_create();
	env_array_append_fmt(&env, "SLURM_ARCHIVE_CLUSTER", "%s",
			     cluster_name);

	if (arch_cond->purge_event != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_event))) {
			error("Parsing purge events failed");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_EVENTS", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_event));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_EVENT", "%ld",
				     (long)curr_end);
	}

	if (arch_cond->purge_job != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_job))) {
			error("Parsing purge job failed");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_JOBS", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_job));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_JOB", "%ld",
				     (long)curr_end);
	}

	if (arch_cond->purge_resv != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_job))) {
			error("Parsing purge job failed");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_RESV", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_job));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_RESV", "%ld",
				     (long)curr_end);
	}

	if (arch_cond->purge_step != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_step))) {
			error("Parsing purge step");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_STEPS", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_step));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_STEP", "%ld",
				     (long)curr_end);
	}

	if (arch_cond->purge_suspend != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_suspend))) {
			error("Parsing purge suspend");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_SUSPEND", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_suspend));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_SUSPEND", "%ld",
				     (long)curr_end);
	}

	if (arch_cond->purge_txn != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_txn))) {
			error("Parsing purge txn");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_TXN", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_txn));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_TXN", "%ld",
				     (long)curr_end);
	}

	if (arch_cond->purge_usage != NO_VAL) {
		if (!(curr_end = archive_setup_end_time(
			     last_submit, arch_cond->purge_usage))) {
			error("Parsing purge usage");
			return SLURM_ERROR;
		}

		env_array_append_fmt(&env, "SLURM_ARCHIVE_USAGE", "%u",
				     SLURMDB_PURGE_ARCHIVE_SET(
					     arch_cond->purge_usage));
		env_array_append_fmt(&env, "SLURM_ARCHIVE_LAST_USAGE", "%ld",
				     (long)curr_end);
	}

#ifdef _PATH_STDPATH
	env_array_append (&env, "PATH", _PATH_STDPATH);
#else
	env_array_append (&env, "PATH", "/bin:/usr/bin");
#endif
	execve(arch_cond->archive_script, args, env);

	env_array_free(env);

	return SLURM_SUCCESS;
}
Example #3
0
File: task.c Project: mej/slurm
/*
 *  Current process is running as the user when this is called.
 */
extern void exec_task(stepd_step_rec_t *job, int local_proc_id)
{
	uint32_t *gtids;		/* pointer to array of ranks */
	int fd, j;
	stepd_step_task_info_t *task = job->task[local_proc_id];
	char **tmp_env;
	int saved_errno;
	uint32_t node_offset = 0, task_offset = 0;

	if (job->node_offset != NO_VAL)
		node_offset = job->node_offset;
	if (job->pack_task_offset != NO_VAL)
		task_offset = job->pack_task_offset;

	gtids = xmalloc(job->node_tasks * sizeof(uint32_t));
	for (j = 0; j < job->node_tasks; j++)
		gtids[j] = job->task[j]->gtid + task_offset;
	job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids);
	xfree(gtids);

	if (job->pack_jobid != NO_VAL)
		job->envtp->jobid = job->pack_jobid;
	else
		job->envtp->jobid = job->jobid;
	job->envtp->stepid = job->stepid;
	job->envtp->nodeid = job->nodeid + node_offset;
	job->envtp->cpus_on_node = job->cpus;
	job->envtp->procid = task->gtid + task_offset;
	job->envtp->localid = task->id;
	job->envtp->task_pid = getpid();
	job->envtp->distribution = job->task_dist;
	job->envtp->cpu_bind = xstrdup(job->cpu_bind);
	job->envtp->cpu_bind_type = job->cpu_bind_type;
	job->envtp->cpu_freq_min = job->cpu_freq_min;
	job->envtp->cpu_freq_max = job->cpu_freq_max;
	job->envtp->cpu_freq_gov = job->cpu_freq_gov;
	job->envtp->mem_bind = xstrdup(job->mem_bind);
	job->envtp->mem_bind_type = job->mem_bind_type;
	job->envtp->distribution = -1;
	job->envtp->ckpt_dir = xstrdup(job->ckpt_dir);
	job->envtp->batch_flag = job->batch;
	job->envtp->uid = job->uid;
	job->envtp->user_name = xstrdup(job->user_name);

	/*
	 * Modify copy of job's environment. Do not alter in place or
	 * concurrent searches of the environment can generate invalid memory
	 * references.
	 */
	job->envtp->env = env_array_copy((const char **) job->env);
	setup_env(job->envtp, false);
	setenvf(&job->envtp->env, "SLURM_JOB_GID", "%d", job->gid);
	setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name);
	if (job->tres_bind) {
		setenvf(&job->envtp->env, "SLURMD_TRES_BIND", "%s",
			job->tres_bind);
	}
	if (job->tres_freq) {
		setenvf(&job->envtp->env, "SLURMD_TRES_FREQ", "%s",
			job->tres_freq);
	}
	tmp_env = job->env;
	job->env = job->envtp->env;
	env_array_free(tmp_env);
	job->envtp->env = NULL;

	xfree(job->envtp->task_count);

	if (task->argv[0] && *task->argv[0] != '/') {
		/*
		 * Normally the client (srun) expands the command name
		 * to a fully qualified path, but in --multi-prog mode it
		 * is left up to the server to search the PATH for the
		 * executable.
		 */
		task->argv[0] = _build_path(task->argv[0], job->env, NULL);
	}

	if (!job->batch && (job->stepid != SLURM_EXTERN_CONT)) {
		if (switch_g_job_attach(job->switch_job, &job->env,
					job->nodeid, (uint32_t) local_proc_id,
					job->nnodes, job->ntasks,
					task->gtid) < 0) {
			error("Unable to attach to interconnect: %m");
			log_fini();
			exit(1);
		}

		if (_setup_mpi(job, local_proc_id) != SLURM_SUCCESS) {
			error("Unable to configure MPI plugin: %m");
			log_fini();
			exit(1);
		}
	}

	/* task-specific pre-launch activities */

	/* task plugin hook */
	if (task_g_pre_launch(job)) {
		error("Failed to invoke task plugins: task_p_pre_launch error");
		exit(1);
	}
	if (!job->batch &&
	    (job->accel_bind_type || job->tres_bind || job->tres_freq)) {
		/*
		 * Modify copy of job's environment. Do not alter in place or
		 * concurrent searches of the environment can generate invalid
		 * memory references.
		 *
		 * Also sets GRES frequency as needed.
		 */
		job->envtp->env = env_array_copy((const char **) job->env);
		gres_plugin_step_set_env(&job->envtp->env, job->step_gres_list,
					 job->accel_bind_type, job->tres_bind,
					 job->tres_freq, local_proc_id);
		tmp_env = job->env;
		job->env = job->envtp->env;
		env_array_free(tmp_env);
	}

	if (spank_user_task(job, local_proc_id) < 0) {
		error("Failed to invoke spank plugin stack");
		exit(1);
	}

	if (conf->task_prolog) {
		char *my_prolog;
		slurm_mutex_lock(&conf->config_mutex);
		my_prolog = xstrdup(conf->task_prolog);
		slurm_mutex_unlock(&conf->config_mutex);
		_run_script_and_set_env("slurm task_prolog",
					my_prolog, job);
		xfree(my_prolog);
	}
	if (job->task_prolog) {
		_run_script_and_set_env("user task_prolog",
					job->task_prolog, job);
	}

	/*
	 * Set TMPDIR after running prolog scripts, since TMPDIR
	 * might be set or changed in one of the prolog scripts.
	 */
	if (local_proc_id == 0)
		_make_tmpdir(job);

	if (!job->batch)
		pdebug_stop_current(job);
	if (job->env == NULL) {
		debug("job->env is NULL");
		job->env = (char **)xmalloc(sizeof(char *));
		job->env[0] = (char *)NULL;
	}

	if (job->restart_dir) {
		info("restart from %s", job->restart_dir);
		/* no return on success */
		checkpoint_restart_task(job, job->restart_dir, task->gtid);
		error("Restart task failed: %m");
		exit(errno);
	}

	if (task->argv[0] == NULL) {
		error("No executable program specified for this task");
		exit(2);
	}

	/* Do this last so you don't worry too much about the users
	   limits including the slurmstepd in with it.
	*/
	if (set_user_limits(job) < 0) {
		debug("Unable to set user limits");
		log_fini();
		exit(5);
	}

	execve(task->argv[0], task->argv, job->env);
	saved_errno = errno;

	/*
	 * print error message and clean up if execve() returns:
	 */
	if ((errno == ENOENT) &&
	    ((fd = open(task->argv[0], O_RDONLY)) >= 0)) {
		char buf[256], *eol;
		int sz;
		sz = read(fd, buf, sizeof(buf));
		if ((sz >= 3) && (xstrncmp(buf, "#!", 2) == 0)) {
			buf[sizeof(buf)-1] = '\0';
			eol = strchr(buf, '\n');
			if (eol)
				eol[0] = '\0';
			slurm_seterrno(saved_errno);
			error("execve(): bad interpreter(%s): %m", buf+2);
			exit(errno);
		}
	}
	slurm_seterrno(saved_errno);
	error("execve(): %s: %m", task->argv[0]);
	exit(errno);
}
Example #4
0
int main(int argc, char *argv[])
{
	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
	job_desc_msg_t desc;
	resource_allocation_response_msg_t *alloc;
	time_t before, after;
	allocation_msg_thread_t *msg_thr;
	char **env = NULL;
	int status = 0;
	int retries = 0;
	pid_t pid  = getpid();
	pid_t tpgid = 0;
	pid_t rc_pid = 0;
	int i, rc = 0;
	static char *msg = "Slurm job queue full, sleeping and retrying.";
	slurm_allocation_callbacks_t callbacks;

	log_init(xbasename(argv[0]), logopt, 0, NULL);
	_set_exit_code();

	if (spank_init_allocator() < 0) {
		error("Failed to initialize plugin stack");
		exit(error_exit);
	}

	/* Be sure to call spank_fini when salloc exits
	 */
	if (atexit((void (*) (void)) spank_fini) < 0)
		error("Failed to register atexit handler for plugins: %m");


	if (initialize_and_process_args(argc, argv) < 0) {
		error("salloc parameter parsing");
		exit(error_exit);
	}
	/* reinit log with new verbosity (if changed by command line) */
	if (opt.verbose || opt.quiet) {
		logopt.stderr_level += opt.verbose;
		logopt.stderr_level -= opt.quiet;
		logopt.prefix_level = 1;
		log_alter(logopt, 0, NULL);
	}

	if (spank_init_post_opt() < 0) {
		error("Plugin stack post-option processing failed");
		exit(error_exit);
	}

	_set_spank_env();
	_set_submit_dir_env();
	if (opt.cwd && chdir(opt.cwd)) {
		error("chdir(%s): %m", opt.cwd);
		exit(error_exit);
	}

	if (opt.get_user_env_time >= 0) {
		char *user = uid_to_string(opt.uid);
		if (strcmp(user, "nobody") == 0) {
			error("Invalid user id %u: %m", (uint32_t)opt.uid);
			exit(error_exit);
		}
		env = env_array_user_default(user,
					     opt.get_user_env_time,
					     opt.get_user_env_mode);
		xfree(user);
		if (env == NULL)
			exit(error_exit);    /* error already logged */
		_set_rlimits(env);
	}

	/*
	 * Job control for interactive salloc sessions: only if ...
	 *
	 * a) input is from a terminal (stdin has valid termios attributes),
	 * b) controlling terminal exists (non-negative tpgid),
	 * c) salloc is not run in allocation-only (--no-shell) mode,
	 * d) salloc runs in its own process group (true in interactive
	 *    shells that support job control),
	 * e) salloc has been configured at compile-time to support background
	 *    execution and is not currently in the background process group.
	 */
	if (tcgetattr(STDIN_FILENO, &saved_tty_attributes) < 0) {
		/*
		 * Test existence of controlling terminal (tpgid > 0)
		 * after first making sure stdin is not redirected.
		 */
	} else if ((tpgid = tcgetpgrp(STDIN_FILENO)) < 0) {
		if (!opt.no_shell) {
			error("no controlling terminal: please set --no-shell");
			exit(error_exit);
		}
	} else if ((!opt.no_shell) && (pid == getpgrp())) {
		if (tpgid == pid)
			is_interactive = true;
#ifdef SALLOC_RUN_FOREGROUND
		while (tcgetpgrp(STDIN_FILENO) != pid) {
			if (!is_interactive) {
				error("Waiting for program to be placed in "
				      "the foreground");
				is_interactive = true;
			}
			killpg(pid, SIGTTIN);
		}
#endif
	}
	/*
	 * Reset saved tty attributes at exit, in case a child
	 * process died before properly resetting terminal.
	 */
	if (is_interactive)
		atexit (_reset_input_mode);

	/*
	 * Request a job allocation
	 */
	slurm_init_job_desc_msg(&desc);
	if (_fill_job_desc_from_opts(&desc) == -1) {
		exit(error_exit);
	}
	if (opt.gid != (gid_t) -1) {
		if (setgid(opt.gid) < 0) {
			error("setgid: %m");
			exit(error_exit);
		}
	}

	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;
	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&desc.other_port,
						  &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	for (i = 0; sig_array[i]; i++)
		xsignal(sig_array[i], _signal_while_allocating);

	before = time(NULL);
	while ((alloc = slurm_allocate_resources_blocking(&desc, opt.immediate,
					_pending_callback)) == NULL) {
		if ((errno != ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) ||
		    (retries >= MAX_RETRIES))
			break;
		if (retries == 0)
			error("%s", msg);
		else
			debug("%s", msg);
		sleep (++retries);
	}

	/* become the user after the allocation has been requested. */
	if (opt.uid != (uid_t) -1) {
		if (setuid(opt.uid) < 0) {
			error("setuid: %m");
			exit(error_exit);
		}
	}
	if (alloc == NULL) {
		if (allocation_interrupted) {
			/* cancelled by signal */
			info("Job aborted due to signal");
		} else if (errno == EINTR) {
			error("Interrupted by signal."
			      "  Allocation request rescinded.");
		} else if (opt.immediate &&
			   ((errno == ETIMEDOUT) ||
			    (errno == ESLURM_NOT_TOP_PRIORITY) ||
			    (errno == ESLURM_NODES_BUSY))) {
			error("Unable to allocate resources: %m");
			error_exit = immediate_exit;
		} else {
			error("Failed to allocate resources: %m");
		}
		slurm_allocation_msg_thr_destroy(msg_thr);
		exit(error_exit);
	} else if (!allocation_interrupted) {
		/*
		 * Allocation granted!
		 */
		info("Granted job allocation %u", alloc->job_id);
		pending_job_id = alloc->job_id;
#ifdef HAVE_BG
		if (!_wait_bluegene_block_ready(alloc)) {
			if(!allocation_interrupted)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		if (!_wait_nodes_ready(alloc)) {
			if(!allocation_interrupted)
				error("Something is wrong with the "
				      "boot of the nodes.");
			goto relinquish;
		}
#endif
	}

	after = time(NULL);
	if (opt.bell == BELL_ALWAYS
	    || (opt.bell == BELL_AFTER_DELAY
		&& ((after - before) > DEFAULT_BELL_DELAY))) {
		_ring_terminal_bell();
	}
	if (opt.no_shell)
		exit(0);
	if (allocation_interrupted) {
		/* salloc process received a signal after
		 * slurm_allocate_resources_blocking returned with the
		 * allocation, but before the new signal handlers were
		 * registered.
		 */
		goto relinquish;
	}

	/*
	 * Run the user's command.
	 */
	if (env_array_for_job(&env, alloc, &desc) != SLURM_SUCCESS)
		goto relinquish;

	/* Add default task count for srun, if not already set */
	if (opt.ntasks_set) {
		env_array_append_fmt(&env, "SLURM_NTASKS", "%d", opt.ntasks);
		/* keep around for old scripts */
		env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.ntasks);
	}
	if (opt.cpus_per_task > 1) {
		env_array_append_fmt(&env, "SLURM_CPUS_PER_TASK", "%d",
				     opt.cpus_per_task);
	}
	if (opt.overcommit) {
		env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d",
			opt.overcommit);
	}
	if (opt.acctg_freq >= 0) {
		env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d",
			opt.acctg_freq);
	}
	if (opt.network)
		env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network);

	env_array_set_environment(env);
	env_array_free(env);
	pthread_mutex_lock(&allocation_state_lock);
	if (allocation_state == REVOKED) {
		error("Allocation was revoked for job %u before command could "
		      "be run", alloc->job_id);
		pthread_mutex_unlock(&allocation_state_lock);
		if (slurm_complete_job(alloc->job_id, status) != 0) {
			error("Unable to clean up allocation for job %u: %m",
			      alloc->job_id);
		}
		return 1;
 	}
	allocation_state = GRANTED;
	pthread_mutex_unlock(&allocation_state_lock);

	/*  Ensure that salloc has initial terminal foreground control.  */
	if (is_interactive) {
		/*
		 * Ignore remaining job-control signals (other than those in
		 * sig_array, which at this state act like SIG_IGN).
		 */
		xsignal(SIGTSTP, SIG_IGN);
		xsignal(SIGTTIN, SIG_IGN);
		xsignal(SIGTTOU, SIG_IGN);

		pid = getpid();
		setpgid(pid, pid);

		tcsetpgrp(STDIN_FILENO, pid);
	}

	command_pid = _fork_command(command_argv);
	/*
	 * Wait for command to exit, OR for waitpid to be interrupted by a
	 * signal.  Either way, we are going to release the allocation next.
	 */
	if (command_pid > 0) {
		setpgid(command_pid, command_pid);
		if (is_interactive)
			tcsetpgrp(STDIN_FILENO, command_pid);

		/* NOTE: Do not process signals in separate pthread.
		 * The signal will cause waitpid() to exit immediately. */
		xsignal(SIGHUP,  _exit_on_signal);

		/* Use WUNTRACED to treat stopped children like terminated ones */
		do {
			rc_pid = waitpid(command_pid, &status, WUNTRACED);
		} while ((rc_pid == -1) && (!exit_flag));

		if ((rc_pid == -1) && (errno != EINTR))
			error("waitpid for %s failed: %m", command_argv[0]);
	}

	if (is_interactive)
		tcsetpgrp(STDIN_FILENO, pid);

	/*
	 * Relinquish the job allocation (if not already revoked).
	 */
relinquish:
	pthread_mutex_lock(&allocation_state_lock);
	if (allocation_state != REVOKED) {
		pthread_mutex_unlock(&allocation_state_lock);

		info("Relinquishing job allocation %d", alloc->job_id);
		if ((slurm_complete_job(alloc->job_id, status) != 0) &&
		    (slurm_get_errno() != ESLURM_ALREADY_DONE))
			error("Unable to clean up job allocation %d: %m",
			      alloc->job_id);
		pthread_mutex_lock(&allocation_state_lock);
		allocation_state = REVOKED;
	}
	pthread_mutex_unlock(&allocation_state_lock);

	slurm_free_resource_allocation_response_msg(alloc);
	slurm_allocation_msg_thr_destroy(msg_thr);

	/*
	 * Figure out what return code we should use.  If the user's command
	 * exited normally, return the user's return code.
	 */
	rc = 1;
	if (rc_pid != -1) {

		if (WIFEXITED(status)) {
			rc = WEXITSTATUS(status);
		} else if (WIFSIGNALED(status)) {
			verbose("Command \"%s\" was terminated by signal %d",
				command_argv[0], WTERMSIG(status));
			/* if we get these signals we return a normal
			 * exit since this was most likely sent from the
			 * user */
			switch(WTERMSIG(status)) {
			case SIGHUP:
			case SIGINT:
			case SIGQUIT:
			case SIGKILL:
				rc = 0;
				break;
			default:
				break;
			}
		}
	}
	return rc;
}