예제 #1
0
파일: env.c 프로젝트: donaghy1/slurm
/*
 * Set in "dest" the environment variables relevant to a SLURM job
 * allocation, overwriting any environment variables of the same name.
 * If the address pointed to by "dest" is NULL, memory will automatically be
 * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
 * suitable for use by execle() and other env_array_* functions.
 *
 * Sets the variables:
 *	SLURM_JOB_ID
 *	SLURM_JOB_NUM_NODES
 *	SLURM_JOB_NODELIST
 *	SLURM_JOB_CPUS_PER_NODE
 *	LOADLBATCH (AIX only)
 *	SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and
 *	MPIRUN_NOALLOCATE (BG only)
 *
 * Sets OBSOLETE variables (needed for MPI, do not remove):
 *	SLURM_JOBID
 *	SLURM_NNODES
 *	SLURM_NODELIST
 *	SLURM_TASKS_PER_NODE
 */
int
env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc,
		  const job_desc_msg_t *desc)
{
	char *tmp = NULL;
	char *dist = NULL, *lllp_dist = NULL;
	slurm_step_layout_t *step_layout = NULL;
	uint32_t num_tasks = desc->num_tasks;
	int rc = SLURM_SUCCESS;
	uint32_t node_cnt = alloc->node_cnt;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	_setup_particulars(cluster_flags, dest, alloc->select_jobinfo);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(alloc->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
		if (!node_cnt)
			node_cnt = alloc->node_cnt;

		env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES",
					"%u", node_cnt);
	}

	env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id);
	env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt);
	env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s",
				alloc->node_list);

	_set_distribution(desc->task_dist, &dist, &lllp_dist);
	if(dist)
		env_array_overwrite_fmt(dest, "SLURM_DISTRIBUTION", "%s",
					dist);

	if(desc->task_dist == SLURM_DIST_PLANE)
		env_array_overwrite_fmt(dest, "SLURM_DIST_PLANESIZE",
					"%u", desc->plane_size);

	if(lllp_dist)
		env_array_overwrite_fmt(dest, "SLURM_DIST_LLLP", "%s",
					lllp_dist);

	tmp = uint32_compressed_to_str(alloc->num_cpu_groups,
					alloc->cpus_per_node,
					alloc->cpu_count_reps);
	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
	xfree(tmp);

	/* OBSOLETE, but needed by MPI, do not remove */
	env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id);
	env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt);
	env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list);

	if(num_tasks == NO_VAL) {
		/* If we know how many tasks we are going to do then
		   we set SLURM_TASKS_PER_NODE */
		int i=0;
		/* If no tasks were given we can figure it out here
		 * by totalling up the cpus and then dividing by the
		 * number of cpus per task */

		num_tasks = 0;
		for (i = 0; i < alloc->num_cpu_groups; i++) {
			num_tasks += alloc->cpu_count_reps[i]
				* alloc->cpus_per_node[i];
		}
		if((int)desc->cpus_per_task > 1
		   && desc->cpus_per_task != (uint16_t)NO_VAL)
			num_tasks /= desc->cpus_per_task;
		//num_tasks = desc->min_cpus;
	}

	if(desc->task_dist == SLURM_DIST_ARBITRARY) {
		tmp = desc->req_nodes;
		env_array_overwrite_fmt(dest, "SLURM_ARBITRARY_NODELIST",
					"%s", tmp);
	} else
		tmp = alloc->node_list;

	if(!(step_layout = slurm_step_layout_create(tmp,
						    alloc->cpus_per_node,
						    alloc->cpu_count_reps,
						    node_cnt,
						    num_tasks,
						    desc->cpus_per_task,
						    desc->task_dist,
						    desc->plane_size)))
		return SLURM_ERROR;

	tmp = _uint16_array_to_str(step_layout->node_cnt,
				   step_layout->tasks);
	slurm_step_layout_destroy(step_layout);
	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
	xfree(tmp);
	return rc;
}
예제 #2
0
static void _update_job_size(uint32_t job_id)
{
	resource_allocation_response_msg_t *alloc_info;
	char *fname_csh = NULL, *fname_sh = NULL;
	FILE *resize_csh = NULL, *resize_sh = NULL;

	if (!getenv("SLURM_JOBID"))
		return;		/*No job environment here to update */

	if (slurm_allocation_lookup_lite(job_id, &alloc_info) !=
	    SLURM_SUCCESS) {
		slurm_perror("slurm_allocation_lookup_lite");
		return;
	}

	xstrfmtcat(fname_csh, "slurm_job_%u_resize.csh", job_id);
	xstrfmtcat(fname_sh,  "slurm_job_%u_resize.sh", job_id);
	(void) unlink(fname_csh);
	(void) unlink(fname_sh);
 	if (!(resize_csh = fopen(fname_csh, "w"))) {
		fprintf(stderr, "Could not create file %s: %s\n", fname_csh, 
			strerror(errno));
		goto fini;
	}
 	if (!(resize_sh = fopen(fname_sh, "w"))) {
		fprintf(stderr, "Could not create file %s: %s\n", fname_sh, 
			strerror(errno));
		goto fini;
	}
	chmod(fname_csh, 0700);	/* Make file executable */
	chmod(fname_sh,  0700);

	if (getenv("SLURM_NODELIST")) {
		fprintf(resize_sh, "export SLURM_NODELIST=\"%s\"\n",
			alloc_info->node_list);
		fprintf(resize_csh, "setenv SLURM_NODELIST \"%s\"\n",
			alloc_info->node_list);
	}
	if (getenv("SLURM_JOB_NODELIST")) {
		fprintf(resize_sh, "export SLURM_JOB_NODELIST=\"%s\"\n", 
			alloc_info->node_list);
		fprintf(resize_csh, "setenv SLURM_JOB_NODELIST \"%s\"\n", 
			alloc_info->node_list);
	}
	if (getenv("SLURM_NNODES")) {
		fprintf(resize_sh, "export SLURM_NNODES=%u\n",
			alloc_info->node_cnt);
		fprintf(resize_csh, "setenv SLURM_NNODES %u\n",
			alloc_info->node_cnt);
	}
	if (getenv("SLURM_JOB_NUM_NODES")) {
		fprintf(resize_sh, "export SLURM_JOB_NUM_NODES=%u\n",
			alloc_info->node_cnt);
		fprintf(resize_csh, "setenv SLURM_JOB_NUM_NODES %u\n",
			alloc_info->node_cnt);
	}
	if (getenv("SLURM_JOB_CPUS_PER_NODE")) {
		char *tmp;
		tmp = uint32_compressed_to_str(alloc_info->num_cpu_groups,
					       alloc_info->cpus_per_node,
					       alloc_info->cpu_count_reps);
		fprintf(resize_sh, "export SLURM_JOB_CPUS_PER_NODE=\"%s\"\n",
			tmp);
		fprintf(resize_csh, "setenv SLURM_JOB_CPUS_PER_NODE \"%s\"\n",
			tmp);
		xfree(tmp);
	}
	if (getenv("SLURM_TASKS_PER_NODE")) {
		/* We don't have sufficient information to recreate this */
		fprintf(resize_sh, "unset SLURM_TASKS_PER_NODE\n");
		fprintf(resize_csh, "unsetenv SLURM_TASKS_PER_NODE\n");
	}

	printf("To reset SLURM environment variables, execute\n");
	printf("  For bash or sh shells:  . ./%s\n", fname_sh);
	printf("  For csh shells:         source ./%s\n", fname_csh);

fini:	slurm_free_resource_allocation_response_msg(alloc_info);
	xfree(fname_csh);
	xfree(fname_sh);
	if (resize_csh)
		fclose(resize_csh);
	if (resize_sh)
		fclose(resize_sh);
}
예제 #3
0
파일: env.c 프로젝트: donaghy1/slurm
/*
 * Set in "dest" the environment variables strings relevant to a SLURM batch
 * job allocation, overwriting any environment variables of the same name.
 * If the address pointed to by "dest" is NULL, memory will automatically be
 * xmalloc'ed.  The array is terminated by a NULL pointer, and thus is
 * suitable for use by execle() and other env_array_* functions.
 *
 * Sets the variables:
 *	SLURM_JOB_ID
 *	SLURM_JOB_NUM_NODES
 *	SLURM_JOB_NODELIST
 *	SLURM_JOB_CPUS_PER_NODE
 *	ENVIRONMENT=BATCH
 *	HOSTNAME
 *	LOADLBATCH (AIX only)
 *
 * Sets OBSOLETE variables (needed for MPI, do not remove):
 *	SLURM_JOBID
 *	SLURM_NNODES
 *	SLURM_NODELIST
 *	SLURM_NTASKS
 *	SLURM_TASKS_PER_NODE
 */
extern int
env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch,
			const char *node_name)
{
	char *tmp = NULL;
	uint32_t num_nodes = 0;
	uint32_t num_cpus = 0;
	int i;
	slurm_step_layout_t *step_layout = NULL;
	uint32_t num_tasks = batch->ntasks;
	uint16_t cpus_per_task;
	uint16_t task_dist;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	_setup_particulars(cluster_flags, dest, batch->select_jobinfo);

	/* There is no explicit node count in the batch structure,
	 * so we need to calculate the node count. */
	for (i = 0; i < batch->num_cpu_groups; i++) {
		num_nodes += batch->cpu_count_reps[i];
		num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i];
	}

	env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id);
	env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes);
	if(cluster_flags & CLUSTER_FLAG_BG)
		env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES",
					"%u", num_nodes);

	env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes);

	tmp = uint32_compressed_to_str(batch->num_cpu_groups,
				       batch->cpus_per_node,
				       batch->cpu_count_reps);
	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
	xfree(tmp);

	env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH");
	if (node_name)
		env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name);

	/* OBSOLETE, but needed by MPI, do not remove */
	env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id);
	env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes);
	env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes);

	if((batch->cpus_per_task != 0) &&
	   (batch->cpus_per_task != (uint16_t) NO_VAL))
		cpus_per_task = batch->cpus_per_task;
	else
		cpus_per_task = 1;	/* default value */
	if (cpus_per_task > 1) {
		env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u",
					cpus_per_task);
	}

	if(num_tasks) {
		env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u",
					num_tasks);
		/* keep around for old scripts */
		env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u",
					num_tasks);
	} else {
		num_tasks = num_cpus / cpus_per_task;
	}

	if((tmp = getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) {
		task_dist = SLURM_DIST_ARBITRARY;
	} else {
		tmp = batch->nodes;
		task_dist = SLURM_DIST_BLOCK;
	}

	if(!(step_layout = slurm_step_layout_create(tmp,
						    batch->cpus_per_node,
						    batch->cpu_count_reps,
						    num_nodes,
						    num_tasks,
						    cpus_per_task,
						    task_dist,
						    (uint16_t)NO_VAL)))
		return SLURM_ERROR;

	tmp = _uint16_array_to_str(step_layout->node_cnt,
				   step_layout->tasks);
	slurm_step_layout_destroy(step_layout);
	env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
	xfree(tmp);
	return SLURM_SUCCESS;

}