Ejemplo n.º 1
0
Archivo: setup.c Proyecto: Cray/slurm
static int
_setup_srun_kvs(const mpi_plugin_client_info_t *job)
{
	int rc;

	rc = temp_kvs_init();
	return rc;
}
Ejemplo n.º 2
0
static int
_setup_stepd_kvs(const stepd_step_rec_t *job, char ***env)
{
	int rc = SLURM_SUCCESS, i = 0, pp_cnt = 0;
	char *p, env_key[32], *ppkey, *ppval;

	kvs_seq = 1;
	rc = temp_kvs_init();
	if (rc != SLURM_SUCCESS)
		return rc;

	rc = kvs_init();
	if (rc != SLURM_SUCCESS)
		return rc;

	/* preput */
	p = getenvp(*env, PMI2_PREPUT_CNT_ENV);
	if (p) {
		pp_cnt = atoi(p);
	}

	for (i = 0; i < pp_cnt; i ++) {
		snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i);
		p = getenvp(*env, env_key);
		ppkey = p; /* getenvp will not modify p */
		snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i);
		p = getenvp(*env, env_key);
		ppval = p;
		kvs_put(ppkey, ppval);
	}

	/*
	 * For PMI11.
	 * A better logic would be to put PMI_process_mapping in KVS only if
	 * the task distribution method is not "arbitrary", because in
	 * "arbitrary" distribution the process mapping varible is not correct.
	 * MPICH2 may deduce the clique info from the hostnames. But that
	 * is rather costly.
	 */
	kvs_put("PMI_process_mapping", job_info.proc_mapping);

	return SLURM_SUCCESS;
}
Ejemplo n.º 3
0
Archivo: kvs.c Proyecto: SchedMD/slurm
extern int
temp_kvs_send(void)
{
	int rc = SLURM_ERROR, retry = 0;
	unsigned int delay = 1;
	char *nodelist = NULL;

	if (!in_stepd())	/* srun */
		nodelist = xstrdup(job_info.step_nodelist);
	else if (tree_info.parent_node)
		nodelist = xstrdup(tree_info.parent_node);

	/* cmd included in temp_kvs_buf */
	kvs_seq++; /* expecting new kvs after now */

	while (1) {
		if (retry == 1)
			verbose("failed to send temp kvs, rc=%d, retrying", rc);

		if (nodelist)
			/* srun or non-first-level stepds */
			rc = slurm_forward_data(&nodelist,
						tree_sock_addr,
						temp_kvs_cnt,
						temp_kvs_buf);
		else		/* first level stepds */
			rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf);

		if (rc == SLURM_SUCCESS)
			break;

		if (++retry >= MAX_RETRIES)
			break;
		/* wait, in case parent stepd / srun not ready */
		sleep(delay);
		delay *= 2;
	}
	temp_kvs_init();	/* clear old temp kvs */

	xfree(nodelist);

	return rc;
}
Ejemplo n.º 4
0
Archivo: kvs.c Proyecto: BYUHPC/slurm
extern int
temp_kvs_send(void)
{
	int rc = SLURM_ERROR, retry = 0;
	unsigned int delay = 1;

	/* cmd included in temp_kvs_buf */
	kvs_seq ++; /* expecting new kvs after now */

	while (1) {
		if (retry == 1) {
			verbose("failed to send temp kvs, rc=%d, retrying", rc);
		}
		if (! in_stepd()) {	/* srun */
			rc = tree_msg_to_stepds(job_info.step_nodelist,
						temp_kvs_cnt,
						temp_kvs_buf);
		} else if (tree_info.parent_node != NULL) {
			/* non-first-level stepds */
			rc = tree_msg_to_stepds(tree_info.parent_node,
						temp_kvs_cnt,
						temp_kvs_buf);
		} else {		/* first level stepds */
			rc = tree_msg_to_srun(temp_kvs_cnt, temp_kvs_buf);
		}
		if (rc == SLURM_SUCCESS)
			break;
		retry ++;
		if (retry >= MAX_RETRIES)
			break;
		/* wait, in case parent stepd / srun not ready */
		sleep(delay);
		delay *= 2;
	}
	temp_kvs_init();	/* clear old temp kvs */
	return rc;
}