Exemple #1
0
/*
 * Set environment variables as appropriate for a job (i.e. all tasks) based
 * upon the job step's GRES state.
 */
extern void step_set_env(char ***job_env_ptr, void *gres_ptr)
{
	int i, len;
	char *dev_list = NULL;
	gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr;

	if ((gres_step_ptr != NULL) &&
	    (gres_step_ptr->node_cnt == 1) &&
	    (gres_step_ptr->gres_bit_alloc != NULL) &&
	    (gres_step_ptr->gres_bit_alloc[0] != NULL)) {
		len = bit_size(gres_step_ptr->gres_bit_alloc[0]);
		for (i=0; i<len; i++) {
			if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			xstrfmtcat(dev_list, "%d", i);
		}
	}
	if (dev_list) {
		env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES",
				    dev_list);
		xfree(dev_list);
	} else {
		/* The gres.conf file must identify specific device files
		 * in order to set the CUDA_VISIBLE_DEVICES env var */
		env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES",
				    "NoDevFiles");
	}
}
Exemple #2
0
static stepd_step_rec_t *
_step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg)
{
	stepd_step_rec_t *job = NULL;

	switch (msg->msg_type) {
	case REQUEST_BATCH_JOB_LAUNCH:
		debug2("setup for a batch_job");
		job = mgr_launch_batch_job_setup(msg->data, cli);
		break;
	case REQUEST_LAUNCH_TASKS:
		debug2("setup for a launch_task");
		job = mgr_launch_tasks_setup(msg->data, cli, self,
					     msg->protocol_version);
		break;
	default:
		fatal("handle_launch_message: Unrecognized launch RPC");
		break;
	}

	if (!job) {
		error("_step_setup: no job returned");
		return NULL;
	}

	job->jmgr_pid = getpid();
	job->jobacct = jobacctinfo_create(NULL);

	/* Establish GRES environment variables */
	if (conf->debug_flags & DEBUG_FLAG_GRES) {
		gres_plugin_job_state_log(job->job_gres_list, job->jobid);
		gres_plugin_step_state_log(job->step_gres_list, job->jobid,
					   job->stepid);
	}
	if (msg->msg_type == REQUEST_BATCH_JOB_LAUNCH)
		gres_plugin_job_set_env(&job->env, job->job_gres_list, 0);
	else if (msg->msg_type == REQUEST_LAUNCH_TASKS)
		gres_plugin_step_set_env(&job->env, job->step_gres_list, 0);

	/*
	 * Add slurmd node topology informations to job env array
	 */
	env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR",
			    conf->node_topo_addr);
	env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR_PATTERN",
			    conf->node_topo_pattern);

	set_msg_node_id(job);

	return job;
}
/*
 * task_p_pre_launch() is called prior to exec of application task.
 *	It is followed by TaskProlog program (from slurm.conf) and
 *	--task-prolog (from srun command line).
 */
extern int task_p_pre_launch (stepd_step_rec_t *job)
{
#ifdef HAVE_NATIVE_CRAY
	int rc;
	uint64_t apid;
	DEF_TIMERS;

	START_TIMER;
	apid = SLURM_ID_HASH(job->jobid, job->stepid);
	debug2("task_p_pre_launch: %u.%u, apid %"PRIu64", task %d",
	       job->jobid, job->stepid, apid, job->envtp->procid);

	/*
	 * Send the rank to the application's PMI layer via an environment
	 * variable.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV,
				     "%d", job->envtp->procid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the PMI_NO_FORK environment variable.
	 */
	rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1");
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV);
		return SLURM_ERROR;
	}

	/*
	 *  Notify the task which offset to use
	 */
	rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV,
				     "%d", job->envtp->localid + 1);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 LLI_STATUS_OFFS_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the ALPS_APP_ID environment variable for use by
	 * Cray tools.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_ID_ENV, "%"PRIu64,
				     apid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 ALPS_APP_ID_ENV);
	}
	END_TIMER;
	if (debug_flags & DEBUG_FLAG_TIME_CRAY)
		INFO_LINE("call took: %s", TIME_STR);
#endif
	return SLURM_SUCCESS;
}
Exemple #4
0
/*
 * Reset environment variables as appropriate for a job (i.e. this one tasks)
 * based upon the job step's GRES state and assigned CPUs.
 */
extern void step_reset_env(char ***job_env_ptr, void *gres_ptr,
			   bitstr_t *usable_gres)
{
	int i, len, first_match = -1;
	char *dev_list = NULL;
	gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr;

	if ((gres_step_ptr != NULL) &&
	    (gres_step_ptr->node_cnt == 1) &&
	    (gres_step_ptr->gres_bit_alloc != NULL) &&
	    (gres_step_ptr->gres_bit_alloc[0] != NULL) &&
	    (usable_gres != NULL)) {
		len = MIN(bit_size(gres_step_ptr->gres_bit_alloc[0]),
			  bit_size(usable_gres));
		for (i = 0; i < len; i++) {
			if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i))
				continue;
			if (first_match == -1)
				first_match = i;
			if (!bit_test(usable_gres, i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			if (nic_devices && (i < nb_available_files) &&
			    (nic_devices[i] >= 0)) {
				xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]);
			} else {
				xstrfmtcat(dev_list, "mlx4_%d", i);
			}
		}
		if (!dev_list && (first_match != -1)) {
			i = first_match;
			dev_list = xmalloc(128);
			if (nic_devices && (i < nb_available_files) &&
			    (nic_devices[i] >= 0)) {
				xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]);
			} else {
				xstrfmtcat(dev_list, "mlx4_%d", i);
			}
		}
	}

	if (dev_list) {
		/* we assume mellanox cards and OpenMPI programm */
		env_array_overwrite(job_env_ptr,
				    "OMPI_MCA_btl_openib_if_include",
				    dev_list);
		xfree(dev_list);
	}
}
Exemple #5
0
static void _set_env(char ***env_ptr, void *gres_ptr, int node_inx,
		     bitstr_t *usable_gres,
		     bool *already_seen, int *local_inx,
		     bool reset, bool is_job)
{
	char *global_list = NULL, *local_list = NULL;
	char *slurm_env_var = NULL;

	if (is_job)
			slurm_env_var = "SLURM_JOB_NICS";
	else
			slurm_env_var = "SLURM_STEP_NICS";

	if (*already_seen) {
		global_list = xstrdup(getenvp(*env_ptr, slurm_env_var));
		local_list = xstrdup(getenvp(*env_ptr,
					     "OMPI_MCA_btl_openib_if_include"));
	}

	common_gres_set_env(gres_devices, env_ptr, gres_ptr, node_inx,
			    usable_gres, "mlx4_", local_inx,
			    &local_list, &global_list,
			    reset, is_job);

	if (global_list) {
		env_array_overwrite(env_ptr, slurm_env_var, global_list);
		xfree(global_list);
	}

	if (local_list) {
		env_array_overwrite(
			env_ptr, "OMPI_MCA_btl_openib_if_include", local_list);
		xfree(local_list);
		*already_seen = true;
	}
}
Exemple #6
0
/*
 * Merge all of the environment variables in src_array into the
 * array dest_array.  Any variables already found in dest_array
 * will be overwritten with the value from src_array.
 */
void env_array_merge(char ***dest_array, const char **src_array)
{
	char **ptr;
	char name[256], *value;

	if (src_array == NULL)
		return;

	value = xmalloc(ENV_BUFSIZE);
	for (ptr = (char **)src_array; *ptr != NULL; ptr++) {
		if (_env_array_entry_splitter(*ptr, name, sizeof(name),
					      value, ENV_BUFSIZE))
			env_array_overwrite(dest_array, name, value);
	}
	xfree(value);
}
Exemple #7
0
/*
 * Append a single environment variable to an environment variable array
 * if a variable by that name does not already exist.  If a variable
 * by the same name is found in the array, it is overwritten with the
 * new value.
 *
 * "value_fmt" supports printf-style formatting.
 *
 * Return 1 on success, and 0 on error.
 */
int env_array_overwrite_fmt(char ***array_ptr, const char *name,
			    const char *value_fmt, ...)
{
	int rc;
	char *value;
	va_list ap;

	value = xmalloc(ENV_BUFSIZE);
	va_start(ap, value_fmt);
	vsnprintf (value, ENV_BUFSIZE, value_fmt, ap);
	va_end(ap);
	rc = env_array_overwrite(array_ptr, name, value);
	xfree(value);

	return rc;
}
Exemple #8
0
/*
 * Set environment variables as appropriate for a job (i.e. all tasks) based
 * upon the job step's GRES state.
 */
extern void step_set_env(char ***job_env_ptr, void *gres_ptr)
{
	int i, len, local_inx = 0;
	char *dev_list = NULL;
	gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr;
	bool use_local_dev_index = _use_local_device_index();

	if ((gres_step_ptr != NULL) &&
	    (gres_step_ptr->node_cnt == 1) &&
	    (gres_step_ptr->gres_bit_alloc != NULL) &&
	    (gres_step_ptr->gres_bit_alloc[0] != NULL)) {
		len = bit_size(gres_step_ptr->gres_bit_alloc[0]);
		for (i = 0; i < len; i++) {
			if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			if (use_local_dev_index) {
				xstrfmtcat(dev_list, "mlx4_%d", local_inx++);
			} else if (nic_devices && (i < nb_available_files) &&
				   (nic_devices[i] >= 0)) {
				xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]);
			} else {
				xstrfmtcat(dev_list, "mlx4_%d", i);
			}
		}
	} else if (gres_step_ptr && (gres_step_ptr->gres_cnt_alloc > 0)) {
		/* The gres.conf file must identify specific device files
		 * in order to set the OMPI_MCA_btl_openib_if_include env var */
		error("gres/nic unable to set OMPI_MCA_btl_openib_if_include, "
		      "no device files configured");
	} else {
		xstrcat(dev_list, "NoDevFiles");
	}

	if (dev_list) {
		/* we assume mellanox cards and OpenMPI programm */
		env_array_overwrite(job_env_ptr,
				    "OMPI_MCA_btl_openib_if_include",
				    dev_list);
		xfree(dev_list);
	}
}
/*
 * task_p_pre_launch() is called prior to exec of application task.
 *	It is followed by TaskProlog program (from slurm.conf) and
 *	--task-prolog (from srun command line).
 */
extern int task_p_pre_launch (stepd_step_rec_t *job)
{
#ifdef HAVE_NATIVE_CRAY
	int rc;

	debug("task_p_pre_launch: %u.%u, task %d",
	      job->jobid, job->stepid, job->envtp->procid);

	/*
	 * Send the rank to the application's PMI layer via an environment
	 * variable.
	 */
	rc = env_array_overwrite_fmt(&job->env, ALPS_APP_PE_ENV,
				     "%d", job->envtp->procid);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", ALPS_APP_PE_ENV);
		return SLURM_ERROR;
	}

	/*
	 * Set the PMI_NO_FORK environment variable.
	 */
	rc = env_array_overwrite(&job->env, PMI_NO_FORK_ENV, "1");
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s", PMI_NO_FORK_ENV);
		return SLURM_ERROR;
	}

	/*
	 *  Notify the task which offset to use
	 */
	rc = env_array_overwrite_fmt(&job->env, LLI_STATUS_OFFS_ENV,
				     "%d", job->envtp->localid + 1);
	if (rc == 0) {
		CRAY_ERR("Failed to set env variable %s",
			 LLI_STATUS_OFFS_ENV);
		return SLURM_ERROR;
	}
#endif
	return SLURM_SUCCESS;
}
Exemple #10
0
/*
 * Set environment variables as appropriate for a job (i.e. all tasks) based
 * upon the job's GRES state.
 */
extern void job_set_env(char ***job_env_ptr, void *gres_ptr)
{
	int i, len;
	char *dev_list = NULL;
	gres_job_state_t *gres_job_ptr = (gres_job_state_t *) gres_ptr;

	if ((gres_job_ptr != NULL) &&
	    (gres_job_ptr->node_cnt == 1) &&
	    (gres_job_ptr->gres_bit_alloc != NULL) &&
	    (gres_job_ptr->gres_bit_alloc[0] != NULL)) {
		len = bit_size(gres_job_ptr->gres_bit_alloc[0]);
		for (i=0; i<len; i++) {
			if (!bit_test(gres_job_ptr->gres_bit_alloc[0], i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			if (gpu_devices && (i < nb_available_files) &&
			    (gpu_devices[i] >= 0))
				xstrfmtcat(dev_list, "%d", gpu_devices[i]);
			else
				xstrfmtcat(dev_list, "%d", i);
		}
	} else if (gres_job_ptr && (gres_job_ptr->gres_cnt_alloc > 0)) {
		/* The gres.conf file must identify specific device files
		 * in order to set the CUDA_VISIBLE_DEVICES env var */
		error("gres/gpu unable to set CUDA_VISIBLE_DEVICES, "
		      "no device files configured");
	} else {
		xstrcat(dev_list, "NoDevFiles");
	}

	if (dev_list) {
		env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES",
				    dev_list);
		xfree(dev_list);
	}
}
Exemple #11
0
/*
 * Write the IAA file and set the filename in the job's environment
 */
int write_iaa_file(stepd_step_rec_t *job, slurm_cray_jobinfo_t *sw_job,
		   int *ptags, int num_ptags, alpsc_peInfo_t *alpsc_pe_info)
{
	char *fname = xstrdup_printf(CRAY_IAA_FILE, sw_job->apid);
	int rc, ret = SLURM_ERROR;
	char *err_msg = NULL;

	do {
		// Write the file
		rc = alpsc_write_iaa_info(&err_msg, fname, sw_job->num_cookies,
					  (const char **)sw_job->cookies,
					  num_ptags, ptags, alpsc_pe_info);
		ALPSC_CN_DEBUG("alpsc_write_iaa_info");
		if (rc != 1) {
			break;
		}

		// chown the file to the job user
		rc = chown(fname, job->uid, job->gid);
		if (rc == -1) {
			CRAY_ERR("chown(%s, %d, %d) failed: %m",
				 fname, (int)job->uid, (int)job->gid);
			break;
		}

		// Write the environment variable
		rc = env_array_overwrite(&job->env, CRAY_IAA_INFO_FILE_ENV,
					 fname);
		if (rc == 0) {
			CRAY_ERR("Failed to set env variable %s",
				 CRAY_IAA_INFO_FILE_ENV);
			break;
		}
		ret = SLURM_SUCCESS;
	} while(0);

	xfree(fname);
	return ret;
}
Exemple #12
0
int setenvf(char ***envp, const char *name, const char *fmt, ...)
{
	char *str = NULL, *value;
	va_list ap;
	int rc;

	value = xmalloc(ENV_BUFSIZE);
	va_start(ap, fmt);
	vsnprintf (value, ENV_BUFSIZE, fmt, ap);
	va_end(ap);

	if (envp && *envp) {
		if (env_array_overwrite(envp, name, value) == 1)
			rc = 0;
		else
			rc = 1;
	} else {
		xstrfmtcat(str, "%s=%s", name, value);
		rc = putenv(str);
	}
	xfree(value);
	return rc;
}
Exemple #13
0
static void _set_env(char ***env_ptr, void *gres_ptr, int node_inx,
		     bitstr_t *usable_gres,
		     bool *already_seen, int *local_inx,
		     bool reset, bool is_job)
{
	char *global_list = NULL, *local_list = NULL, *perc_env = NULL;
	char perc_str[64], *slurm_env_var = NULL;
	uint64_t count_on_dev, gres_per_node = 0, percentage;
	int global_id = -1;

	if (is_job)
		slurm_env_var = "SLURM_JOB_GRES";
	else
		slurm_env_var = "SLURM_STEP_GRES";

	if (*already_seen) {
		global_list = xstrdup(getenvp(*env_ptr, slurm_env_var));
		local_list = xstrdup(getenvp(*env_ptr,
					     "CUDA_VISIBLE_DEVICES"));
		perc_env = xstrdup(getenvp(*env_ptr,
					  "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE"));
	}

	common_gres_set_env(gres_devices, env_ptr, gres_ptr, node_inx,
			    usable_gres, "", local_inx,
			    &gres_per_node, &local_list, &global_list,
			    reset, is_job, &global_id);

	if (perc_env) {
		env_array_overwrite(env_ptr,
				    "CUDA_MPS_ACTIVE_THREAD_perc_str",
				    perc_env);
		xfree(perc_env);
	} else if (gres_per_node && mps_info) {
		count_on_dev = _get_dev_count(global_id);
		if (count_on_dev > 0) {
			percentage = (gres_per_node * 100) / count_on_dev;
			percentage = MAX(percentage, 1);
		} else
			percentage = 0;
		snprintf(perc_str, sizeof(perc_str), "%"PRIu64, percentage);
		env_array_overwrite(env_ptr,
				    "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE",
				    perc_str);
	} else if (gres_per_node) {
		error("%s: mps_info list is NULL", __func__);
		snprintf(perc_str, sizeof(perc_str), "%"PRIu64, gres_per_node);
		env_array_overwrite(env_ptr,
				    "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE",
				    perc_str);
	}

	if (global_list) {
		env_array_overwrite(env_ptr, slurm_env_var, global_list);
		xfree(global_list);
	}

	if (local_list) {
		/*
		 * CUDA_VISIBLE_DEVICES is relative to the MPS server.
		 * With only one GPU under the control of MPS, the device
		 * number will always be "0".
		 */
		env_array_overwrite(env_ptr, "CUDA_VISIBLE_DEVICES", "0");
		env_array_overwrite(env_ptr, "GPU_DEVICE_ORDINAL", "0");
		xfree(local_list);
		*already_seen = true;
	}
}