Beispiel #1
0
extern int stepd_cleanup(slurm_msg_t *msg, stepd_step_rec_t *job,
			 slurm_addr_t *cli, slurm_addr_t *self,
			 int rc, bool only_mem)
{
	if (!only_mem) {
		if (job->batch)
			batch_finish(job, rc); /* sends batch complete message */

		/* signal the message thread to shutdown, and wait for it */
		eio_signal_shutdown(job->msg_handle);
		pthread_join(job->msgid, NULL);
	}

	mpi_fini();	/* Remove stale PMI2 sockets */
#ifdef MEMORY_LEAK_DEBUG
	acct_gather_conf_destroy();
	(void) core_spec_g_fini();
	_step_cleanup(job, msg, rc);

	fini_setproctitle();

	xfree(cli);
	xfree(self);
	xfree(conf->block_map);
	xfree(conf->block_map_inv);
	xfree(conf->hostname);
	xfree(conf->job_acct_gather_freq);
	xfree(conf->job_acct_gather_type);
	xfree(conf->logfile);
	xfree(conf->node_name);
	xfree(conf->node_topo_addr);
	xfree(conf->node_topo_pattern);
	xfree(conf->spooldir);
	xfree(conf->task_epilog);
	xfree(conf->task_prolog);
	xfree(conf);
#endif
	info("done with job");
	return rc;
}
Beispiel #2
0
int
main (int argc, char *argv[])
{
	slurm_addr_t *cli;
	slurm_addr_t *self;
	slurm_msg_t *msg;
	slurmd_job_t *job;
	int ngids;
	gid_t *gids;
	int rc = 0;

	if (process_cmdline (argc, argv) < 0)
		fatal ("Error in slurmstepd command line");

	xsignal_block(slurmstepd_blocked_signals);
	conf = xmalloc(sizeof(*conf));
	conf->argv = &argv;
	conf->argc = &argc;
	init_setproctitle(argc, argv);
	if (slurm_select_init(1) != SLURM_SUCCESS )
		fatal( "failed to initialize node selection plugin" );

	/* Receive job parameters from the slurmd */
	_init_from_slurmd(STDIN_FILENO, argv, &cli, &self, &msg,
			  &ngids, &gids);

	/* Fancy way of closing stdin that keeps STDIN_FILENO from being
	 * allocated to any random file.  The slurmd already opened /dev/null
	 * on STDERR_FILENO for us. */
	dup2(STDERR_FILENO, STDIN_FILENO);

	/* Create the slurmd_job_t, mostly from info in a
	   launch_tasks_request_msg_t or a batch_job_launch_msg_t */
	if(!(job = _step_setup(cli, self, msg))) {
		_send_fail_to_slurmd(STDOUT_FILENO);
		rc = SLURM_FAILURE;
		goto ending;
	}
	job->ngids = ngids;
	job->gids = gids;

	/* fork handlers cause mutexes on some global data structures
	   to be re-initialized after the fork. */
	list_install_fork_handlers();
	slurm_conf_install_fork_handlers();

	/* sets job->msg_handle and job->msgid */
	if (msg_thr_create(job) == SLURM_ERROR) {
		_send_fail_to_slurmd(STDOUT_FILENO);
		rc = SLURM_FAILURE;
		goto ending;
	}

	_send_ok_to_slurmd(STDOUT_FILENO);

	/* Fancy way of closing stdout that keeps STDOUT_FILENO from being
	 * allocated to any random file.  The slurmd already opened /dev/null
	 * on STDERR_FILENO for us. */
	dup2(STDERR_FILENO, STDOUT_FILENO);

	/* This does most of the stdio setup, then launches all the tasks,
	   and blocks until the step is complete */
	rc = job_manager(job);

	/* signal the message thread to shutdown, and wait for it */
	eio_signal_shutdown(job->msg_handle);
	pthread_join(job->msgid, NULL);

	if (job->batch)
		batch_finish(job, rc); /* sends batch complete message */

ending:
#ifdef MEMORY_LEAK_DEBUG
	_step_cleanup(job, msg, rc);

	xfree(cli);
	xfree(self);
	xfree(conf->hostname);
	xfree(conf->block_map);
	xfree(conf->block_map_inv);
	xfree(conf->spooldir);
	xfree(conf->node_name);
	xfree(conf->node_topo_addr);
	xfree(conf->node_topo_pattern);
	xfree(conf->logfile);
	xfree(conf);
#endif
	info("done with job");
	return rc;
}