Esempio n. 1
0
// Dumb brute force function
static int dumb_direct_children(int *children, int width, int id,
				int max_node_id)
{
	int child;
	int count = 0;
	for(child = id+1; child < max_node_id; child++){
		int parent_id, child_num, depth, max_depth;
		reverse_tree_info(child, max_node_id, width,
				  &parent_id, &child_num,
				  &depth, &max_depth);
		if( parent_id == id ){
			children[count++] = child;
		}
	}
	return count;
}
Esempio n. 2
0
int
main(int argc, char **argv)
{
	int i, j;
	int n = 8192;
	int w = 5;

	int parent, children, depth, maxdepth;

	for (i = 0; i < n; i++) {
		int children1[w], children2[w];
		int cnt1, cnt2;

		reverse_tree_info(i, n, w, &parent,
				  &children, &depth, &maxdepth);
		printf("\
%d : par: %d nchild: %d depth: %d, maxdepth: %d\n",
		       i, parent, children, depth, maxdepth);
		cnt1 = dumb_direct_children(children1, w, i, n);
		cnt2 = reverce_tree_direct_children(i, n, w, depth, children2);
		if (cnt1 != cnt2 ) {
			printf("\
Direct children sanity check error: cnt1 = %d, cnt2 = %d\n", cnt1, cnt2);
			return -1;
		}

		for(j = 0; j < cnt1; j++){

			if (children1[j] != children2[j]) {
				printf("\
Direct children sanity check error: cnt1 = %d, cnt2 = %d\n", cnt1, cnt2);
				printf("\
Failed on %d'th element: children1[%d] = %d, children2[%d] = %d\n",
				       j, j, children1[j], j, children2[j]);
				return -1;
			}
		}
Esempio n. 3
0
File: setup.c Progetto: Cray/slurm
static int
_setup_stepd_tree_info(const stepd_step_rec_t *job, char ***env)
{
	hostlist_t hl;
	char srun_host[64];
	uint16_t port;
	char *p;
	int tree_width;

	/* job info available */

	memset(&tree_info, 0, sizeof(tree_info));

	hl = hostlist_create(job_info.step_nodelist);
	p = hostlist_nth(hl, job_info.nodeid); /* strdup-ed */
	tree_info.this_node = xstrdup(p);
	free(p);

	/* this only controls the upward communication tree width */
	p = getenvp(*env, PMI2_TREE_WIDTH_ENV);
	if (p) {
		tree_width = atoi(p);
		if (tree_width < 2) {
			info("invalid PMI2 tree width value (%d) detected. "
			     "fallback to default value.", tree_width);
			tree_width = slurm_get_tree_width();
		}
	} else {
		tree_width = slurm_get_tree_width();
	}

	/* TODO: cannot launch 0 tasks on node */

	/*
	 * In tree position calculation, root of the tree is srun with id 0.
	 * Stepd's id will be its nodeid plus 1.
	 */
	reverse_tree_info(job_info.nodeid + 1, job_info.nnodes + 1,
			  tree_width, &tree_info.parent_id,
			  &tree_info.num_children, &tree_info.depth,
			  &tree_info.max_depth);
	tree_info.parent_id --;	       /* restore real nodeid */
	if (tree_info.parent_id < 0) {	/* parent is srun */
		tree_info.parent_node = NULL;
	} else {
		p = hostlist_nth(hl, tree_info.parent_id);
		tree_info.parent_node = xstrdup(p);
		free(p);
	}
	hostlist_destroy(hl);

	tree_info.pmi_port = 0;	/* not used */

	p = getenvp(*env, "SLURM_SRUN_COMM_HOST");
	if (!p) {
		error("mpi/pmi2: unable to find srun comm ifhn in env");
		return SLURM_ERROR;
	} else {
		strncpy(srun_host, p, 64);
	}
	p = getenvp(*env, PMI2_SRUN_PORT_ENV);
	if (!p) {
		error("mpi/pmi2: unable to find srun pmi2 port in env");
		return SLURM_ERROR;
	} else {
		port = atoi(p);
		unsetenvp(*env, PMI2_SRUN_PORT_ENV);
	}
	tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t));
	slurm_set_addr(tree_info.srun_addr, port, srun_host);

	/* init kvs seq to 0. TODO: reduce array size */
	tree_info.children_kvs_seq = xmalloc(sizeof(uint32_t) *
					     job_info.nnodes);

	return SLURM_SUCCESS;
}
Esempio n. 4
0
/*
 * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin
 */
int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs,
		    size_t nprocs, pmixp_coll_type_t type)
{
	hostlist_t hl;
	int max_depth, width, depth, i;
	char *p;

#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->state = PMIXP_COLL_SYNC;
	coll->pset.procs = xmalloc(sizeof(*procs) * nprocs);
	coll->pset.nprocs = nprocs;
	memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs);

	if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		goto err_exit;
	}
#ifdef PMIXP_COLL_DEBUG
	/* if we debug collectives - store a copy of a full
	 * hostlist to resolve participant id to the hostname */
	coll->peers_hl = hostlist_copy(hl);
#endif

	width = slurm_get_tree_width();
	coll->peers_cnt = hostlist_count(hl);
	coll->my_peerid = hostlist_find(hl, pmixp_info_hostname());
	reverse_tree_info(coll->my_peerid, coll->peers_cnt, width,
			  &coll->prnt_peerid, &coll->chldrn_cnt, &depth,
			  &max_depth);

	/* We interested in amount of direct childs */
	coll->seq = 0;
	coll->contrib_children = 0;
	coll->contrib_local = false;
	coll->chldrn_ids = xmalloc(sizeof(int) * width);
	coll->contrib_chld = xmalloc(sizeof(int) * width);
	coll->chldrn_cnt = reverse_tree_direct_children(coll->my_peerid,
							coll->peers_cnt,
							  width, depth,
							  coll->chldrn_ids);
	if (coll->prnt_peerid == -1) {
		/* if we are the root of the tree:
		 * - we don't have a parent;
		 * - we have large list of all_childrens (we don't want
		 * ourselfs there)
		 */
		coll->prnt_host = NULL;
		coll->all_chldrn_hl = hostlist_copy(hl);
		hostlist_delete_host(coll->all_chldrn_hl,
				     pmixp_info_hostname());
		coll->chldrn_str =
			hostlist_ranged_string_xmalloc(coll->all_chldrn_hl);
	} else {
		/* for all other nodes in the tree we need to know:
		 * - nodename of our parent;
		 * - we don't need a list of all_childrens and hl anymore
		 */

		/*
		 * setup parent id's
		 */
		p = hostlist_nth(hl, coll->prnt_peerid);
		coll->prnt_host = xstrdup(p);
		free(p);
		/* reset prnt_peerid to the global peer */
		coll->prnt_peerid = pmixp_info_job_hostid(coll->prnt_host);

		/*
		 * setup root id's
		 * (we need this for the SLURM API communication case)
		 */
		p = hostlist_nth(hl, 0);
		coll->root_host = xstrdup(p);
		free(p);
		/* reset prnt_peerid to the global peer */
		coll->root_peerid = pmixp_info_job_hostid(coll->root_host);

		/* use empty hostlist here */
		coll->all_chldrn_hl = hostlist_create("");
		coll->chldrn_str = NULL;
	}

	/* fixup children peer ids to te global ones */
	for(i=0; i<coll->chldrn_cnt; i++){
		p = hostlist_nth(hl, coll->chldrn_ids[i]);
		coll->chldrn_ids[i] = pmixp_info_job_hostid(p);
		free(p);
	}
	hostlist_destroy(hl);

	/* Collective state */
	coll->ufwd_buf = pmixp_server_buf_new();
	coll->dfwd_buf = pmixp_server_buf_new();
	_reset_coll_ufwd(coll);
	_reset_coll_dfwd(coll);
	coll->cbdata = NULL;
	coll->cbfunc = NULL;

	/* init fine grained lock */
	slurm_mutex_init(&coll->lock);

	return SLURM_SUCCESS;
err_exit:
	return SLURM_ERROR;
}
Esempio n. 5
0
/*
 * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin
 */
int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs,
		size_t nprocs, pmixp_coll_type_t type)
{
	hostlist_t hl;
	uint32_t nodeid = 0, nodes = 0;
	int parent_id, depth, max_depth, tmp;
	int width, my_nspace = -1;
	char *p;
	int i, *ch_nodeids = NULL;

#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->state = PMIXP_COLL_SYNC;
	coll->procs = xmalloc(sizeof(*procs) * nprocs);
	memcpy(coll->procs, procs, sizeof(*procs) * nprocs);
	coll->nprocs = nprocs;
	coll->my_nspace = my_nspace;

	if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		goto err_exit;
	}

	width = slurm_get_tree_width();
	nodes = hostlist_count(hl);
	nodeid = hostlist_find(hl, pmixp_info_hostname());
	reverse_tree_info(nodeid, nodes, width, &parent_id, &tmp, &depth,
			&max_depth);
	coll->children_cnt = tmp;
	coll->nodeid = nodeid;

	/* We interested in amount of direct childs */
	coll->seq = 0;
	coll->contrib_cntr = 0;
	coll->contrib_local = false;
	ch_nodeids = xmalloc(sizeof(int) * width);
	coll->ch_contribs = xmalloc(sizeof(int) * width);
	coll->children_cnt = reverse_tree_direct_children(nodeid, nodes, width,
			depth, ch_nodeids);

	/* create the hostlist with extract direct children's hostnames */
	coll->ch_hosts = hostlist_create("");
	for (i = 0; i < coll->children_cnt; i++) {
		char *hname = hostlist_nth(hl, ch_nodeids[i]);
		hostlist_push(coll->ch_hosts, hname);
	}
	/* just in case, shouldn't be needed */
	hostlist_uniq(coll->ch_hosts);
	xfree(ch_nodeids);

	if (parent_id == -1) {
		/* if we are the root of the tree:
		 * - we don't have a parent;
		 * - we have large list of all_childrens (we don't want ourselfs there)
		 */
		coll->parent_host = NULL;
		hostlist_delete_host(hl, pmixp_info_hostname());
		coll->all_children = hl;
	} else if (parent_id >= 0) {
		/* for all other nodes in the tree we need to know:
		 * - nodename of our parent;
		 * - we don't need a list of all_childrens and hl anymore
		 */
		p = hostlist_nth(hl, parent_id);
		coll->parent_host = xstrdup(p);
		/* use empty hostlist here */
		coll->all_children = hostlist_create("");
		free(p);
		hostlist_destroy(hl);
	}

	/* Collective data */
	coll->buf = pmixp_server_new_buf();
	coll->serv_offs = get_buf_offset(coll->buf);

	if (SLURM_SUCCESS != _pack_ranges(coll)) {
		PMIXP_ERROR("Cannot pack ranges to coll message header!");
		goto err_exit;
	}

	/* Callback information */
	coll->cbdata = NULL;
	coll->cbfunc = NULL;

	/* init fine grained lock */
	slurm_mutex_init(&coll->lock);

	return SLURM_SUCCESS;
err_exit:
	return SLURM_ERROR;
}