Exemple #1
0
/*
 * Initialize context for plugin
 */
extern int launch_init(void)
{
	int retval = SLURM_SUCCESS;
	char *plugin_type = "launch";
	char *type = NULL;

	if (init_run && plugin_context)
		return retval;

	slurm_mutex_lock(&plugin_context_lock);

	if (plugin_context)
		goto done;

	type = slurm_get_launch_type();
	plugin_context = plugin_context_create(
		plugin_type, type, (void **)&ops, syms, sizeof(syms));

	if (!plugin_context) {
		error("cannot create %s context for %s", plugin_type, type);
		retval = SLURM_ERROR;
		goto done;
	}
	init_run = true;

done:
	slurm_mutex_unlock(&plugin_context_lock);
	xfree(type);

	return retval;
}
Exemple #2
0
/*
 * opt_default(): used by initialize_and_process_args to set defaults
 */
static void _opt_default(void)
{
	opt.account	= NULL;
	opt.batch	= false;
	opt.clusters    = NULL;
#ifdef HAVE_FRONT_END
	opt.ctld	= true;
#else
{
	char *launch_type = slurm_get_launch_type();
	/* do this for all but slurm (poe, aprun, etc...) */
	if (strcmp(launch_type, "launch/slurm"))
		opt.ctld	= true;
	else
		opt.ctld	= false;
	xfree(launch_type);
}
#endif
	opt.interactive	= false;
	opt.job_cnt	= 0;
	opt.job_list    = NULL;
	opt.job_name	= NULL;
	opt.nodelist	= NULL;
	opt.partition	= NULL;
	opt.qos		= NULL;
	opt.reservation	= NULL;
	opt.signal	= (uint16_t) NO_VAL;
	opt.state	= JOB_END;
	opt.user_id	= 0;
	opt.user_name	= NULL;
	opt.verbose	= 0;
	opt.wckey	= NULL;
}
Exemple #3
0
/*
 * opt_default(): used by initialize_and_process_args to set defaults
 */
static void _opt_default(void)
{
	opt.account	= NULL;
	opt.batch	= false;
	opt.clusters    = NULL;
#ifdef HAVE_FRONT_END
	opt.ctld	= true;
#else
{
	char *launch_type = slurm_get_launch_type();
	if (!strcmp(launch_type, "launch/poe"))
		opt.ctld	= true;
	else
		opt.ctld	= false;
	xfree(launch_type);
}
#endif
	opt.interactive	= false;
	opt.job_cnt	= 0;
	opt.job_name	= NULL;
	opt.nodelist	= NULL;
	opt.partition	= NULL;
	opt.qos		= NULL;
	opt.reservation	= NULL;
	opt.signal	= (uint16_t)-1; /* no signal specified */
	opt.state	= JOB_END;
	opt.user_id	= 0;
	opt.user_name	= NULL;
	opt.verbose	= 0;
	opt.wckey	= NULL;
}
Exemple #4
0
static int
_handle_suspend(int fd, stepd_step_rec_t *job, uid_t uid)
{
	static int launch_poe = -1;
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	uint16_t job_core_spec = (uint16_t) NO_VAL;

	safe_read(fd, &job_core_spec, sizeof(uint16_t));

	debug("_handle_suspend for step:%u.%u uid:%ld core_spec:%u",
	      job->jobid, job->stepid, (long)uid, job_core_spec);

	if (!_slurm_authorized_user(uid)) {
		debug("job step suspend request from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		goto done;
	}

	if (job->cont_id == 0) {
		debug ("step %u.%u invalid container [cont_id:%"PRIu64"]",
			job->jobid, job->stepid, job->cont_id);
		rc = -1;
		errnum = ESLURMD_JOB_NOTRUNNING;
		goto done;
	}

	acct_gather_suspend_poll();
	if (launch_poe == -1) {
		char *launch_type = slurm_get_launch_type();
		if (!strcmp(launch_type, "launch/poe"))
			launch_poe = 1;
		else
			launch_poe = 0;
		xfree(launch_type);
	}

	/*
	 * Signal the container
	 */
	pthread_mutex_lock(&suspend_mutex);
	if (suspended) {
		rc = -1;
		errnum = ESLURMD_STEP_SUSPENDED;
		pthread_mutex_unlock(&suspend_mutex);
		goto done;
	} else {
		if (!job->batch && switch_g_job_step_pre_suspend(job))
			error("switch_g_job_step_pre_suspend: %m");

		/* SIGTSTP is sent first to let MPI daemons stop their tasks,
		 * then wait 2 seconds, then send SIGSTOP to the spawned
		 * process's container to stop everything else.
		 *
		 * In some cases, 1 second has proven insufficient. Longer
		 * delays may help insure that all MPI tasks have been stopped
		 * (that depends upon the MPI implementaiton used), but will
		 * also permit longer time periods when more than one job can
		 * be running on each resource (not good). */
		if (launch_poe == 0) {
			/* IBM MPI seens to periodically hang upon receipt
			 * of SIGTSTP. */
			if (proctrack_g_signal(job->cont_id, SIGTSTP) < 0) {
				verbose("Error suspending %u.%u (SIGTSTP): %m",
					job->jobid, job->stepid);
			} else
				sleep(2);
		}

		if (proctrack_g_signal(job->cont_id, SIGSTOP) < 0) {
			verbose("Error suspending %u.%u (SIGSTOP): %m",
				job->jobid, job->stepid);
		} else {
			verbose("Suspended %u.%u", job->jobid, job->stepid);
		}
		suspended = true;
	}
	if (!job->batch && switch_g_job_step_post_suspend(job))
		error("switch_g_job_step_post_suspend: %m");
	if (!job->batch && core_spec_g_suspend(job->cont_id, job_core_spec))
		error("core_spec_g_suspend: %m");

	pthread_mutex_unlock(&suspend_mutex);

done:
	/* Send the return code and errno */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
Exemple #5
0
static void _launch_app(srun_job_t *job, List srun_job_list, bool got_alloc)
{
	ListIterator opt_iter, job_iter;
	opt_t *opt_local = NULL;
	_launch_app_data_t *opts;
	int total_ntasks = 0, total_nnodes = 0, step_cnt = 0, node_offset = 0;
	pthread_mutex_t step_mutex = PTHREAD_MUTEX_INITIALIZER;
	pthread_cond_t step_cond   = PTHREAD_COND_INITIALIZER;
	srun_job_t *first_job = NULL;
	char *launch_type, *pack_node_list = NULL;
	bool need_mpir = false;
	uint16_t *tmp_task_cnt = NULL, *pack_task_cnts = NULL;
	uint32_t **tmp_tids = NULL, **pack_tids = NULL;

	launch_type = slurm_get_launch_type();
	if (launch_type && strstr(launch_type, "slurm"))
		need_mpir = true;
	xfree(launch_type);

	if (srun_job_list) {
		int pack_step_cnt = list_count(srun_job_list);
		first_job = (srun_job_t *) list_peek(srun_job_list);
		if (!opt_list) {
			if (first_job)
				fini_srun(first_job, got_alloc, &global_rc, 0);
			fatal("%s: have srun_job_list, but no opt_list",
			      __func__);
		}

		job_iter = list_iterator_create(srun_job_list);
		while ((job = (srun_job_t *) list_next(job_iter))) {
			char *node_list = NULL;
			int i, node_inx;
			total_ntasks += job->ntasks;
			total_nnodes += job->nhosts;

			xrealloc(pack_task_cnts, sizeof(uint16_t)*total_nnodes);
			(void) slurm_step_ctx_get(job->step_ctx,
						  SLURM_STEP_CTX_TASKS,
						  &tmp_task_cnt);
			if (!tmp_task_cnt) {
				fatal("%s: job %u has NULL task array",
				      __func__, job->jobid);
				break;	/* To eliminate CLANG error */
			}
			memcpy(pack_task_cnts + node_offset, tmp_task_cnt,
			       sizeof(uint16_t) * job->nhosts);

			xrealloc(pack_tids, sizeof(uint32_t *) * total_nnodes);
			(void) slurm_step_ctx_get(job->step_ctx,
						  SLURM_STEP_CTX_TIDS,
						  &tmp_tids);
			if (!tmp_tids) {
				fatal("%s: job %u has NULL task ID array",
				      __func__, job->jobid);
				break;	/* To eliminate CLANG error */
			}
			for (node_inx = 0; node_inx < job->nhosts; node_inx++) {
				uint32_t *node_tids;
				node_tids = xmalloc(sizeof(uint32_t) *
						    tmp_task_cnt[node_inx]);
				for (i = 0; i < tmp_task_cnt[node_inx]; i++) {
					node_tids[i] = tmp_tids[node_inx][i] +
						       job->pack_task_offset;
				}
				pack_tids[node_offset + node_inx] =
					node_tids;
			}

			(void) slurm_step_ctx_get(job->step_ctx,
						  SLURM_STEP_CTX_NODE_LIST,
						  &node_list);
			if (!node_list) {
				fatal("%s: job %u has NULL hostname",
				      __func__, job->jobid);
			}
			if (pack_node_list)
				xstrfmtcat(pack_node_list, ",%s", node_list);
			else
				pack_node_list = xstrdup(node_list);
			xfree(node_list);
			node_offset += job->nhosts;
		}
		list_iterator_reset(job_iter);
		_reorder_pack_recs(&pack_node_list, &pack_task_cnts,
				   &pack_tids, total_nnodes);

		if (need_mpir)
			mpir_init(total_ntasks);

		opt_iter = list_iterator_create(opt_list);
		while ((opt_local = (opt_t *) list_next(opt_iter))) {
			job = (srun_job_t *) list_next(job_iter);
			if (!job) {
				slurm_mutex_lock(&step_mutex);
				while (step_cnt > 0)
					slurm_cond_wait(&step_cond,&step_mutex);
				slurm_mutex_unlock(&step_mutex);
				if (first_job) {
					fini_srun(first_job, got_alloc,
						  &global_rc, 0);
				}
				fatal("%s: job allocation count does not match request count (%d != %d)",
				      __func__, list_count(srun_job_list),
				      list_count(opt_list));
				break;	/* To eliminate CLANG error */
			}

			slurm_mutex_lock(&step_mutex);
			step_cnt++;
			slurm_mutex_unlock(&step_mutex);
			job->pack_node_list = xstrdup(pack_node_list);
			if ((pack_step_cnt > 1) && pack_task_cnts) {
				xassert(node_offset == job->pack_nnodes);
				job->pack_task_cnts = xmalloc(sizeof(uint16_t) *
							      job->pack_nnodes);
				memcpy(job->pack_task_cnts, pack_task_cnts,
				       sizeof(uint16_t) * job->pack_nnodes);
				job->pack_tids = xmalloc(sizeof(uint32_t *) *
							 job->pack_nnodes);
				memcpy(job->pack_tids, pack_tids,
				       sizeof(uint32_t *) * job->pack_nnodes);
			}
			opts = xmalloc(sizeof(_launch_app_data_t));
			opts->got_alloc   = got_alloc;
			opts->job         = job;
			opts->opt_local   = opt_local;
			opts->step_cond   = &step_cond;
			opts->step_cnt    = &step_cnt;
			opts->step_mutex  = &step_mutex;
			opt_local->pack_step_cnt = pack_step_cnt;

			slurm_thread_create_detached(NULL, _launch_one_app,
						     opts);
		}
		xfree(pack_node_list);
		xfree(pack_task_cnts);
		list_iterator_destroy(job_iter);
		list_iterator_destroy(opt_iter);
		slurm_mutex_lock(&step_mutex);
		while (step_cnt > 0)
			slurm_cond_wait(&step_cond, &step_mutex);
		slurm_mutex_unlock(&step_mutex);

		if (first_job)
			fini_srun(first_job, got_alloc, &global_rc, 0);
	} else {
		if (need_mpir)
			mpir_init(job->ntasks);
		opts = xmalloc(sizeof(_launch_app_data_t));
		opts->got_alloc   = got_alloc;
		opts->job         = job;
		opts->opt_local   = &opt;
		opt.pack_step_cnt = 1;
		_launch_one_app(opts);
		fini_srun(job, got_alloc, &global_rc, 0);
	}
}