Example #1
0
int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
				char ***env)
{
	int i;
	
	env_array_overwrite_fmt(env, "PMI_FD", "%u",
				TASK_PMI_SOCK(job->ltaskid));
	
	env_array_overwrite_fmt(env, "PMI_JOBID", "%s",
				job_info.pmi_jobid);
	env_array_overwrite_fmt(env, "PMI_RANK", "%u", job->gtaskid);
	env_array_overwrite_fmt(env, "PMI_SIZE", "%u", job->ntasks);
	if (job_info.spawn_seq) { /* PMI1.1 needs this env-var */
		env_array_overwrite_fmt(env, "PMI_SPAWNED", "%u", 1);
	}
	/* close unused sockets in task */
	close(tree_sock);
	tree_sock = 0;
	for (i = 0; i < job->ltasks; i ++) {
		close(STEPD_PMI_SOCK(i));
		STEPD_PMI_SOCK(i) = 0;
		if (i != job->ltaskid) {
			close(TASK_PMI_SOCK(i));
			TASK_PMI_SOCK(i) = 0;
		}
	}
	return SLURM_SUCCESS;
}
/*
 * task_p_pre_launch() is called prior to exec of application task.
 *	It is followed by TaskProlog program (from slurm.conf) and
 *	--task-prolog (from srun command line).
 */
extern int task_p_pre_launch (stepd_step_rec_t *job)
{
#ifdef HAVE_NATIVE_CRAY
	int rc;
	uint64_t apid;
	DEF_TIMERS;

	START_TIMER;
	apid = SLURM_ID_HASH(job->jobid, job->stepid);
	debug2("task_p_pre_launch: %u.%u, apid %"PRIu64", task %d",
	       job->jobid, job->stepid, apid, job->envtp->procid);

	/*
	 * Send the rank to the application's PMI layer via an environment
	 * variable.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV,
				     "%d", job->envtp->procid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the PMI_NO_FORK environment variable.
	 */
	rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1");
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV);
		return SLURM_ERROR;
	}

	/*
	 *  Notify the task which offset to use
	 */
	rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV,
				     "%d", job->envtp->localid + 1);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 LLI_STATUS_OFFS_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the ALPS_APP_ID environment variable for use by
	 * Cray tools.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_ID_ENV, "%"PRIu64,
				     apid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 ALPS_APP_ID_ENV);
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
#endif
	return SLURM_SUCCESS;
}
Example #3
0
int p_mpi_hook_slurmstepd_task (const mpi_plugin_client_info_t *job,
				char ***env)
{
	char *nodelist, *task_cnt;

	nodelist = getenvp(*env, "SLURM_NODELIST");
	if (nodelist) {
		char *host_str = NULL, *tmp;
		hostlist_t hl = hostlist_create(nodelist);
		while ((tmp = hostlist_shift(hl))) {
			if (host_str)
				xstrcat(host_str, ",");
			xstrcat(host_str, tmp);
			free(tmp);
		}
		hostlist_destroy(hl);
		env_array_overwrite_fmt(env, "SLURM_MPICH_NODELIST", "%s",
			host_str);
		xfree(host_str);
	}

	task_cnt = getenvp(*env, "SLURM_TASKS_PER_NODE");
	if (task_cnt) {
		char *task_str = NULL, tmp_str[32];
		int i=0, val, reps;
		while (task_cnt[i]) {
			if ((task_cnt[i] >= '0') && (task_cnt[i] <= '9'))
				val = atoi(&task_cnt[i]);
			else
				break;	/* bad parse */
			i++;
			while (task_cnt[i]
			&&     (task_cnt[i] != 'x') && (task_cnt[i] != ','))
				i++;
			if (task_cnt[i] == 'x') {
				i++;
				reps = atoi(&task_cnt[i]);
				while (task_cnt[i] && (task_cnt[i] != ','))
					i++;
			} else
				reps = 1;
			if (task_cnt[i] == ',')
				i++;
			while (reps) {
				if (task_str)
					xstrcat(task_str, ",");
				snprintf(tmp_str, sizeof(tmp_str), "%d", val);
				xstrcat(task_str, tmp_str);
				reps--;
			}
		}
		env_array_overwrite_fmt(env, "SLURM_MPICH_TASKS", "%s",
			task_str);
		xfree(task_str);
	}

	return SLURM_SUCCESS;
}
Example #4
0
File: setup.c Project: Cray/slurm
static int
_setup_srun_environ(const mpi_plugin_client_info_t *job, char ***env)
{
	/* ifhn will be set in SLURM_SRUN_COMM_HOST by slurmd */
	env_array_overwrite_fmt(env, PMI2_SRUN_PORT_ENV, "%hu",
				tree_info.pmi_port);
	env_array_overwrite_fmt(env, PMI2_STEP_NODES_ENV, "%s",
				job_info.step_nodelist);
	env_array_overwrite_fmt(env, PMI2_PROC_MAPPING_ENV, "%s",
				job_info.proc_mapping);
	return SLURM_SUCCESS;
}
Example #5
0
int p_mpi_hook_slurmstepd_task(const mpi_plugin_task_info_t *job,
			       char ***env)
{
	char addrbuf[1024];
	char *p;
	char *addr = getenvp(*env, "SLURM_LAUNCH_NODE_IPADDR");

	debug("Using mpi/mpich-gm");
	slurm_print_slurm_addr (job->self, addrbuf, sizeof(addrbuf));

	if ((p = strchr (addrbuf, ':')) != NULL)
		*p = '\0';

	env_array_overwrite_fmt(env, "GMPI_MASTER", "%s", addr);
	env_array_overwrite_fmt(env, "GMPI_SLAVE",  "%s", addrbuf);
	env_array_overwrite_fmt(env, "GMPI_ID",  "%u", job->gtaskid);
	if (!getenv("GMPI_RECV")) {
		env_array_overwrite_fmt(env, "GMPI_RECV",  "%s", "hybrid");
	}

	env_array_overwrite_fmt(env, "MXMPI_MASTER", "%s", addr);
	env_array_overwrite_fmt(env, "MXMPI_ID", "%u", job->gtaskid);
	env_array_overwrite_fmt(env, "MXMPI_SLAVE", "%s", addrbuf);
	if (!getenv("MXMPI_RECV")) {
		env_array_overwrite_fmt(env, "MXMPI_RECV",  "%s", "hybrid");
	}
	debug2("init for mpi rank %u", job->gtaskid);

	return SLURM_SUCCESS;
}
Example #6
0
static void
_setup_exec_srun(spawn_req_t *req)
{
	char **env, env_key[32];
	int i, rc;
	spawn_resp_t *resp;

	debug3("mpi/pmi2: in _setup_exec_srun");

	/* setup environments */
	env = env_array_copy((const char **)job_info.job_env);
	/* TODO: unset some env-vars */

	env_array_overwrite_fmt(&env, "SLURM_JOB_ID", "%u", job_info.jobid);
	env_array_overwrite_fmt(&env, PMI2_SPAWNER_JOBID_ENV, "%s",
				job_info.pmi_jobid);
	env_array_overwrite_fmt(&env, PMI2_PMI_JOBID_ENV, "%s-%u",
				job_info.pmi_jobid, req->seq);
	env_array_overwrite_fmt(&env, PMI2_SPAWN_SEQ_ENV, "%u", req->seq);
	env_array_overwrite_fmt(&env, PMI2_SPAWNER_PORT_ENV, "%hu",
				tree_info.pmi_port);
	/* preput kvs */
	env_array_overwrite_fmt(&env, PMI2_PREPUT_CNT_ENV, "%d",
				req->preput_cnt);
	for (i = 0; i < req->preput_cnt; i ++) {
		snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i);
		env_array_overwrite_fmt(&env, env_key, "%s", req->pp_keys[i]);
		snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i);
		env_array_overwrite_fmt(&env, env_key, "%s", req->pp_vals[i]);
	}

	if (req->subcmd_cnt == 1) {
		/* no return if success */
		rc = _exec_srun_single(req, env);
	} else {
		/* no return if success */
		rc = _exec_srun_multiple(req, env);
	}

	resp = spawn_resp_new();
	resp->seq = req->seq;
	xstrfmtcat(resp->jobid, "%s-%u", job_info.pmi_jobid, req->seq);
	resp->error_cnt = 0;
	resp->rc = rc;

	/* fake a srun address */
	tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t));
	slurm_set_addr(tree_info.srun_addr, tree_info.pmi_port,
		       "127.0.0.1");
	spawn_resp_send_to_srun(resp);
	spawn_resp_free(resp);
	exit(errno);
}
/*
 * task_p_pre_launch() is called prior to exec of application task.
 *	It is followed by TaskProlog program (from slurm.conf) and
 *	--task-prolog (from srun command line).
 */
extern int task_p_pre_launch (stepd_step_rec_t *job)
{
#ifdef HAVE_NATIVE_CRAY
	int rc;

	debug("task_p_pre_launch: %u.%u, task %d",
	      job->jobid, job->stepid, job->envtp->procid);

	/*
	 * Send the rank to the application's PMI layer via an environment
	 * variable.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV,
				     "%d", job->envtp->procid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the PMI_NO_FORK environment variable.
	 */
	rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1");
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV);
		return SLURM_ERROR;
	}

	/*
	 *  Notify the task which offset to use
	 */
	rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV,
				     "%d", job->envtp->localid + 1);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 LLI_STATUS_OFFS_ENV);
		return SLURM_ERROR;
	}
#endif
	return SLURM_SUCCESS;
}
Example #8
0
int p_mpi_hook_slurmstepd_task (const mpi_plugin_task_info_t *job,
				char ***env)
{
	int i;
	char *processes = NULL;
	char *addr = getenvp (*env, "SLURM_LAUNCH_NODE_IPADDR");

	debug("Using mpi/mvapich");
	env_array_overwrite_fmt(env, "MPIRUN_HOST", "%s", addr);
	env_array_overwrite_fmt(env, "MPIRUN_RANK", "%u", job->gtaskid);
	env_array_overwrite_fmt(env, "MPIRUN_MPD", "0");

	debug2("init for mpi rank %u", job->gtaskid);

	if (getenvp (*env, "SLURM_NEED_MVAPICH_MPIRUN_PROCESSES")) {
		/*
		 * Fake MPIRUN_PROCESSES env var -- we don't need this for
		 *  SLURM at this time. (what a waste)
		 */
		for (i = 0; i < job->ntasks; i++)
			xstrcat (processes, "x:");

		env_array_overwrite_fmt(env, "MPIRUN_PROCESSES", "%s",
			processes);
	}

	/*
	 * Some mvapich versions will ignore MPIRUN_PROCESSES If
	 *  the following env var is set.
	 */
	env_array_overwrite_fmt(env, "NOT_USE_TOTALVIEW", "1");

	/*
	 * Set VIADEV_ENABLE_AFFINITY=0 so that mvapich doesn't
	 *  override SLURM's CPU affinity. (Unless this var is
	 *  already set in user env)
	 */
	if (!getenvp (*env, "VIADEV_ENABLE_AFFINITY"))
		env_array_overwrite_fmt(env, "VIADEV_ENABLE_AFFINITY", "0");

	return SLURM_SUCCESS;
}
Example #9
0
extern gmpi_state_t *
gmpi_thr_create(const mpi_plugin_client_info_t *job, char ***env)
{
	uint16_t port;
	pthread_attr_t attr;
	gmpi_state_t *st = NULL;

	st = gmpi_state_create(job);

	/*
	 * It is possible for one to modify the mpirun command in
	 * MPICH-GM distribution so that it calls srun, instead of
	 * rsh, for remote process invocations.  In that case, we
	 * should not override envs nor open the master port.
	 */
	if (getenv("GMPI_PORT"))
		return st;

	if (net_stream_listen (&st->fd, &port) < 0) {
		error ("Unable to create GMPI listen port: %m");
		gmpi_state_destroy(st);
		return NULL;
	}

	/*
	 * Accept in a separate thread.
	 */
	slurm_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	if (pthread_create(&st->tid, &attr, &_gmpi_thr, (void *)st)) {
		slurm_attr_destroy(&attr);
		gmpi_state_destroy(st);
		return NULL;
	}
	slurm_attr_destroy(&attr);

	env_array_overwrite_fmt(env, "GMPI_PORT",  "%hu", port);
	env_array_overwrite_fmt(env, "GMPI_MAGIC", "%u", job->jobid);
	env_array_overwrite_fmt(env, "GMPI_NP",    "%d",
				job->step_layout->task_cnt);
	env_array_overwrite_fmt(env, "GMPI_SHMEM", "1");
	/* FIXME for multi-board config. */
	env_array_overwrite_fmt(env, "GMPI_BOARD", "-1");


	/* For new MX version */
	env_array_overwrite_fmt(env, "MXMPI_PORT",  "%hu", port);
	env_array_overwrite_fmt(env, "MXMPI_MAGIC", "%u", job->jobid);
	env_array_overwrite_fmt(env, "MXMPI_NP",    "%d",
				job->step_layout->task_cnt);
	/* FIXME for multi-board config. */
	env_array_overwrite_fmt(env, "MXMPI_BOARD", "-1");


	/* for MACOSX to override default malloc */
	env_array_overwrite_fmt(env, "DYLD_FORCE_FLAT_NAMESPACE", "1");


	debug("Started GMPI master thread (%lu)", (unsigned long) st->tid);

	return st;
}
Example #10
0
File: env.c Project: donaghy1/slurm
/*
 * Set in "dest" the environment variables relevant to a SLURM job
 * allocation, overwriting any environment variables of the same name.
 * If the address pointed to by "dest" is NULL, memory will automatically be
 * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
 * suitable for use by execle() and other env_array_* functions.
 *
 * Sets the variables:
 *	SLURM_JOB_ID
 *	SLURM_JOB_NUM_NODES
 *	SLURM_JOB_NODELIST
 *	SLURM_JOB_CPUS_PER_NODE
 *	LOADLBATCH (AIX only)
 *	SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and
 *	MPIRUN_NOALLOCATE (BG only)
 *
 * Sets OBSOLETE variables (needed for MPI, do not remove):
 *	SLURM_JOBID
 *	SLURM_NNODES
 *	SLURM_NODELIST
 *	SLURM_TASKS_PER_NODE
 */
int
env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc,
		  const job_desc_msg_t *desc)
{
	char *tmp = NULL;
	char *dist = NULL, *lllp_dist = NULL;
	slurm_step_layout_t *step_layout = NULL;
	uint32_t num_tasks = desc->num_tasks;
	int rc = SLURM_SUCCESS;
	uint32_t node_cnt = alloc->node_cnt;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	_setup_particulars(cluster_flags, dest, alloc->select_jobinfo);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(alloc->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
		if (!node_cnt)
			node_cnt = alloc->node_cnt;

		env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES",
					"%u", node_cnt);
	}

	env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id);
	env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt);
	env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s",
				alloc->node_list);

	_set_distribution(desc->task_dist, &dist, &lllp_dist);
	if(dist)
		env_array_overwrite_fmt(dest, "SLURM_DISTRIBUTION", "%s",
					dist);

	if(desc->task_dist == SLURM_DIST_PLANE)
		env_array_overwrite_fmt(dest, "SLURM_DIST_PLANESIZE",
					"%u", desc->plane_size);

	if(lllp_dist)
		env_array_overwrite_fmt(dest, "SLURM_DIST_LLLP", "%s",
					lllp_dist);

	tmp = uint32_compressed_to_str(alloc->num_cpu_groups,
					alloc->cpus_per_node,
					alloc->cpu_count_reps);
	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
	xfree(tmp);

	/* OBSOLETE, but needed by MPI, do not remove */
	env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id);
	env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt);
	env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list);

	if(num_tasks == NO_VAL) {
		/* If we know how many tasks we are going to do then
		   we set SLURM_TASKS_PER_NODE */
		int i=0;
		/* If no tasks were given we can figure it out here
		 * by totalling up the cpus and then dividing by the
		 * number of cpus per task */

		num_tasks = 0;
		for (i = 0; i < alloc->num_cpu_groups; i++) {
			num_tasks += alloc->cpu_count_reps[i]
				* alloc->cpus_per_node[i];
		}
		if((int)desc->cpus_per_task > 1
		   && desc->cpus_per_task != (uint16_t)NO_VAL)
			num_tasks /= desc->cpus_per_task;
		//num_tasks = desc->min_cpus;
	}

	if(desc->task_dist == SLURM_DIST_ARBITRARY) {
		tmp = desc->req_nodes;
		env_array_overwrite_fmt(dest, "SLURM_ARBITRARY_NODELIST",
					"%s", tmp);
	} else
		tmp = alloc->node_list;

	if(!(step_layout = slurm_step_layout_create(tmp,
						    alloc->cpus_per_node,
						    alloc->cpu_count_reps,
						    node_cnt,
						    num_tasks,
						    desc->cpus_per_task,
						    desc->task_dist,
						    desc->plane_size)))
		return SLURM_ERROR;

	tmp = _uint16_array_to_str(step_layout->node_cnt,
				   step_layout->tasks);
	slurm_step_layout_destroy(step_layout);
	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
	xfree(tmp);
	return rc;
}
Example #11
0
File: env.c Project: donaghy1/slurm
/*
 * Set in "dest" the environment variables relevant to a SLURM job step,
 * overwriting any environment variables of the same name.  If the address
 * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed.
 * The array is terminated by a NULL pointer, and thus is suitable for
 * use by execle() and other env_array_* functions.  If preserve_env is
 * true, the variables SLURM_NNODES, SLURM_NTASKS and SLURM_TASKS_PER_NODE
 * remain unchanged.
 *
 * Sets variables:
 *	SLURM_STEP_ID
 *	SLURM_STEP_NUM_NODES
 *	SLURM_STEP_NUM_TASKS
 *	SLURM_STEP_TASKS_PER_NODE
 *	SLURM_STEP_LAUNCHER_PORT
 *	SLURM_STEP_LAUNCHER_IPADDR
 *	SLURM_STEP_RESV_PORTS
 *
 * Sets OBSOLETE variables:
 *	SLURM_STEPID
 *      SLURM_NNODES
 *	SLURM_NTASKS
 *	SLURM_NODELIST
 *	SLURM_TASKS_PER_NODE
 *	SLURM_SRUN_COMM_PORT
 *	SLURM_LAUNCH_NODE_IPADDR
 *
 */
void
env_array_for_step(char ***dest,
		   const job_step_create_response_msg_t *step,
		   uint16_t launcher_port,
		   bool preserve_env)
{
	char *tmp;

	tmp = _uint16_array_to_str(step->step_layout->node_cnt,
				   step->step_layout->tasks);
	env_array_overwrite_fmt(dest, "SLURM_STEP_ID", "%u", step->job_step_id);
	env_array_overwrite_fmt(dest, "SLURM_STEP_NODELIST",
				"%s", step->step_layout->node_list);
	env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_NODES",
				"%hu", step->step_layout->node_cnt);
	env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_TASKS",
				"%u", step->step_layout->task_cnt);
	env_array_overwrite_fmt(dest, "SLURM_STEP_TASKS_PER_NODE", "%s", tmp);
	env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_PORT",
				"%hu", launcher_port);
	if (step->resv_ports) {
		env_array_overwrite_fmt(dest, "SLURM_STEP_RESV_PORTS",
					"%s", step->resv_ports);
	}

	/* OBSOLETE, but needed by MPI, do not remove */
	env_array_overwrite_fmt(dest, "SLURM_STEPID", "%u", step->job_step_id);
	if (!preserve_env) {
		env_array_overwrite_fmt(dest, "SLURM_NNODES",
					"%hu", step->step_layout->node_cnt);
		env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u",
					step->step_layout->task_cnt);
		/* keep around for old scripts */
		env_array_overwrite_fmt(dest, "SLURM_NPROCS",
					"%u", step->step_layout->task_cnt);
		env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s",
					tmp);
	}
	env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT",
				"%hu", launcher_port);

	xfree(tmp);
}
Example #12
0
File: env.c Project: donaghy1/slurm
/*
 * Set in "dest" the environment variables strings relevant to a SLURM batch
 * job allocation, overwriting any environment variables of the same name.
 * If the address pointed to by "dest" is NULL, memory will automatically be
 * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
 * suitable for use by execle() and other env_array_* functions.
 *
 * Sets the variables:
 *	SLURM_JOB_ID
 *	SLURM_JOB_NUM_NODES
 *	SLURM_JOB_NODELIST
 *	SLURM_JOB_CPUS_PER_NODE
 *	ENVIRONMENT=BATCH
 *	HOSTNAME
 *	LOADLBATCH (AIX only)
 *
 * Sets OBSOLETE variables (needed for MPI, do not remove):
 *	SLURM_JOBID
 *	SLURM_NNODES
 *	SLURM_NODELIST
 *	SLURM_NTASKS
 *	SLURM_TASKS_PER_NODE
 */
extern int
env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch,
			const char *node_name)
{
	char *tmp = NULL;
	uint32_t num_nodes = 0;
	uint32_t num_cpus = 0;
	int i;
	slurm_step_layout_t *step_layout = NULL;
	uint32_t num_tasks = batch->ntasks;
	uint16_t cpus_per_task;
	uint16_t task_dist;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	_setup_particulars(cluster_flags, dest, batch->select_jobinfo);

	/* There is no explicit node count in the batch structure,
	 * so we need to calculate the node count. */
	for (i = 0; i < batch->num_cpu_groups; i++) {
		num_nodes += batch->cpu_count_reps[i];
		num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i];
	}

	env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id);
	env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes);
	if(cluster_flags & CLUSTER_FLAG_BG)
		env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES",
					"%u", num_nodes);

	env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes);

	tmp = uint32_compressed_to_str(batch->num_cpu_groups,
				       batch->cpus_per_node,
				       batch->cpu_count_reps);
	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
	xfree(tmp);

	env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH");
	if (node_name)
		env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name);

	/* OBSOLETE, but needed by MPI, do not remove */
	env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id);
	env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes);
	env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes);

	if((batch->cpus_per_task != 0) &&
	   (batch->cpus_per_task != (uint16_t) NO_VAL))
		cpus_per_task = batch->cpus_per_task;
	else
		cpus_per_task = 1;	/* default value */
	if (cpus_per_task > 1) {
		env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u",
					cpus_per_task);
	}

	if(num_tasks) {
		env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u",
					num_tasks);
		/* keep around for old scripts */
		env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u",
					num_tasks);
	} else {
		num_tasks = num_cpus / cpus_per_task;
	}

	if((tmp = getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) {
		task_dist = SLURM_DIST_ARBITRARY;
	} else {
		tmp = batch->nodes;
		task_dist = SLURM_DIST_BLOCK;
	}

	if(!(step_layout = slurm_step_layout_create(tmp,
						    batch->cpus_per_node,
						    batch->cpu_count_reps,
						    num_nodes,
						    num_tasks,
						    cpus_per_task,
						    task_dist,
						    (uint16_t)NO_VAL)))
		return SLURM_ERROR;

	tmp = _uint16_array_to_str(step_layout->node_cnt,
				   step_layout->tasks);
	slurm_step_layout_destroy(step_layout);
	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
	xfree(tmp);
	return SLURM_SUCCESS;

}
Example #13
0
mpi_plugin_client_state_t *
p_mpi_hook_client_prelaunch(mpi_plugin_client_info_t *job, char ***env)
{
	struct sockaddr_in sin;
	pthread_attr_t attr;
	socklen_t len = sizeof(sin);
	short port1, port2;

	debug("Using mpi/mpich1_p4");
	if ((p4_fd1 = socket(PF_INET, SOCK_DGRAM, 0)) < 0) {
		error("socket: %m");
		return NULL;
	}
	memset(&sin, 0, sizeof(sin));
	sin.sin_family = PF_INET;
	if (bind(p4_fd1, (struct sockaddr *) &sin, len) < 0) {
		error("bind: %m");
		return NULL;
	}
	if (getsockname(p4_fd1, (struct sockaddr *) &sin, &len) < 0) {
		error("getsockname: %m");
		return NULL;
	}
	port1 = ntohs(sin.sin_port);

	if ((p4_fd2 = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
		error("socket: %m");
		return NULL;
	}
	memset(&sin, 0, sizeof(sin));
	sin.sin_family = PF_INET;
	sin.sin_addr.s_addr = htonl(INADDR_ANY);
	if (bind(p4_fd2, (struct sockaddr *) &sin, len) < 0) {
		error("bind: %m");
		return NULL;
	}
	if (listen(p4_fd2, 64) < 0)
		error("listen: %m");
	if (getsockname(p4_fd2, (struct sockaddr *) &sin, &len) < 0) {
		error("getsockname: %m");
		return NULL;
	}
	port2 = ntohs(sin.sin_port);

	if (pipe(shutdown_pipe) < 0) {
		error ("pipe: %m");
		return (NULL);
	}
	shutdown_complete = false;
	shutdown_timeout = 5;
	slurm_mutex_init(&shutdown_lock);
	pthread_cond_init(&shutdown_cond, NULL);

	/* Process messages in a separate thread */
	slurm_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
	if (pthread_create(&p4_tid, &attr, &mpich1_thr, NULL)) {
		error("pthread_create: %m");
		slurm_attr_destroy(&attr);
		return NULL;
	}
	slurm_attr_destroy(&attr);
	env_array_overwrite_fmt(env, "SLURM_MPICH_PORT1", "%hu", port1);
	env_array_overwrite_fmt(env, "SLURM_MPICH_PORT2", "%hu", port2);
	debug("mpich_p4 plugin listening on fd=%d,%d ports=%d,%d",
		p4_fd1, p4_fd2, port1, port2);

	/* only return NULL on error */
	return (void *)0xdeadbeef;
}
Example #14
0
static int
_exec_srun_single(spawn_req_t *req, char **env)
{
	int argc, i, j;
	char **argv = NULL;
	spawn_subcmd_t *subcmd;

	debug3("mpi/mpi2: in _exec_srun_single");
	subcmd = req->subcmds[0];
	argc = subcmd->argc + 7;
	xrealloc(argv, (argc + 1) * sizeof(char *));

	j = 0;
	argv[j ++] = "srun";
	argv[j ++] = "--mpi=pmi2";
	if (job_info.srun_opt && job_info.srun_opt->no_alloc) {
		argv[j ++] = "--no-alloc";
		xstrfmtcat(argv[j ++], "--nodelist=%s",
			   job_info.srun_opt->nodelist);
	}

	xstrfmtcat(argv[j ++], "--ntasks=%d", subcmd->max_procs);
	/* TODO: inherit options from srun_opt. */
	for (i = 0; i < subcmd->info_cnt; i ++) {
		if (0) {

		} else if (! strcmp(subcmd->info_keys[i], "host")) {
			xstrfmtcat(argv[j ++], "--nodelist=%s",
				   subcmd->info_vals[i]);

		} else if (! strcmp(subcmd->info_keys[i], "arch")) {
			error("mpi/pmi2: spawn info key 'arch' not supported");

		} else if (! strcmp(subcmd->info_keys[i], "wdir")) {
			xstrfmtcat(argv[j ++], "--chdir=%s",
				   subcmd->info_vals[i]);

		} else if (! strcmp(subcmd->info_keys[i], "path")) {
			env_array_overwrite_fmt(&env, "PATH", "%s",
						subcmd->info_vals[i]);

		} else if (! strcmp(subcmd->info_keys[i], "file")) {
			error("mpi/pmi2: spawn info key 'file' not supported");

		} else if (! strcmp(subcmd->info_keys[i], "soft")) {
			error("mpi/pmi2: spawn info key 'soft' not supported");

		} else {
			error("mpi/pmi2: unknown spawn info key '%s' ignored",
				subcmd->info_keys[i]);
		}
	}
	argv[j ++] = subcmd->cmd;
	for (i = 0; i < subcmd->argc; i ++) {
		argv[j ++] = subcmd->argv[i];
	}
	argv[j ++] = NULL;

	{
		debug3("mpi/mpi2: to execve");
		for (i = 0; i < j; i ++) {
			debug3("mpi/pmi2:   argv[%d]=%s", i, argv[i]);
		}
	}
	execve(SLURM_PREFIX"/bin/srun", argv, env);
	error("mpi/pmi2: failed to exec srun: %m");
	return SLURM_ERROR;
}