예제 #1
0
static int _hostset_from_ranges(const pmix_proc_t *procs, size_t nprocs,
				hostlist_t *hl_out)
{
	int i;
	hostlist_t hl = hostlist_create("");
	pmixp_namespace_t *nsptr = NULL;
	for (i = 0; i < nprocs; i++) {
		char *node = NULL;
		hostlist_t tmp;
		nsptr = pmixp_nspaces_find(procs[i].nspace);
		if (NULL == nsptr) {
			goto err_exit;
		}
		if (procs[i].rank == PMIX_RANK_WILDCARD) {
			tmp = hostlist_copy(nsptr->hl);
		} else {
			tmp = pmixp_nspace_rankhosts(nsptr, &procs[i].rank, 1);
		}
		while (NULL != (node = hostlist_pop(tmp))) {
			hostlist_push(hl, node);
			free(node);
		}
		hostlist_destroy(tmp);
	}
	hostlist_uniq(hl);
	*hl_out = hl;
	return SLURM_SUCCESS;
err_exit:
	hostlist_destroy(hl);
	return SLURM_ERROR;
}
예제 #2
0
파일: pmixp_coll.c 프로젝트: SchedMD/slurm
int pmixp_coll_init(pmixp_coll_t *coll, pmixp_coll_type_t type,
		    const pmixp_proc_t *procs, size_t nprocs)
{
	int rc = SLURM_SUCCESS;
	hostlist_t hl;

	coll->seq = 0;
#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->pset.procs = xmalloc(sizeof(*procs) * nprocs);
	coll->pset.nprocs = nprocs;
	memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs);

	if (SLURM_SUCCESS != pmixp_hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		rc = SLURM_ERROR;
		goto exit;
	}
	coll->peers_cnt = hostlist_count(hl);
	coll->my_peerid = hostlist_find(hl, pmixp_info_hostname());
#ifdef PMIXP_COLL_DEBUG
	/* if we debug collectives - store a copy of a full
	 * hostlist to resolve participant id to the hostname */
	coll->peers_hl = hostlist_copy(hl);
#endif

	switch(type) {
	case PMIXP_COLL_TYPE_FENCE_TREE:
		rc = pmixp_coll_tree_init(coll, &hl);
		break;
	case PMIXP_COLL_TYPE_FENCE_RING:
		rc = pmixp_coll_ring_init(coll, &hl);
		break;
	default:
		PMIXP_ERROR("Unknown coll type");
		rc = SLURM_ERROR;
	}
	hostlist_destroy(hl);
	if (rc) {
		goto exit;
	}

exit:
	return rc;
}
예제 #3
0
파일: slurm_route.c 프로젝트: c02y/slurm
/*
 * _set_collectors call the split_hostlist API on the all nodes hostlist
 * to set the node to be used as a collector for unsolicited node aggregation.
 *
 * If this node is a forwarding node (first node in any hostlist),
 * then its collector and backup are the ControlMachine and it's backup.
 *
 * Otherwise, we find the hostlist containing this node.
 * The forwarding node in that hostlist becomes a collector, the next node
 * which is not this node becomes the backup.
 * That list is split, we iterate through it and searching for a list in
 * which this node is a forwarding node. If found, we set the collector and
 * backup, else this process is repeated.
 */
static void _set_collectors(char *this_node_name)
{
	slurm_ctl_conf_t *conf;
	hostlist_t  nodes;
	hostlist_t* hll = NULL;
	char *parent = NULL, *backup = NULL;
	char addrbuf[32];
	int i, j, f = -1;
	int hl_count = 0;
	uint16_t parent_port;
	uint16_t backup_port;
	bool found = false;
	bool ctldparent = true;

#ifdef HAVE_FRONT_END
	return; /* on a FrontEnd system this would never be useful. */
#endif

	if (!run_in_daemon("slurmd"))
		return; /* Only compute nodes have collectors */

	/* Set the initial iteration, collector is controller,
	 * full list is split */
	xassert(this_node_name);

	conf = slurm_conf_lock();
	nodes = _get_all_nodes();
	parent = strdup(conf->control_addr);
	if (conf->backup_addr) {
		backup = strdup(conf->backup_addr);
	}
	parent_port = conf->slurmctld_port;
	backup_port = parent_port;
	slurm_conf_unlock();
	while (!found) {
		if ( route_g_split_hostlist(nodes, &hll, &hl_count) ) {
			error("unable to split forward hostlist");
			goto clean; /* collector addrs remains null */
		}
		/* Find which hostlist contains this node */
		for (i=0; i < hl_count; i++) {
			f = hostlist_find(hll[i], this_node_name);
			if (f != -1)
				break;
		}
		if (i == hl_count) {
			fatal("ROUTE -- %s not found in node_record_table",
			      this_node_name);
		}
		if (f == 0) {
			/* we are a forwarded to node,
			 * so our parent is parent */
			if (hostlist_count(hll[i]) > 1)
				this_is_collector = true;
			xfree(msg_collect_node);
			msg_collect_node = xmalloc(sizeof(slurm_addr_t));
			if (ctldparent)
				slurm_set_addr(msg_collect_node, parent_port,
					       parent);
			else {
				slurm_conf_get_addr(parent, msg_collect_node);
				msg_collect_node->sin_port = htons(parent_port);
			}
			if (debug_flags & DEBUG_FLAG_ROUTE) {
				slurm_print_slurm_addr(msg_collect_node,
						       addrbuf, 32);
				info("ROUTE -- message collector address is %s",
				     addrbuf);
			}
			xfree(msg_collect_backup);
			if (backup) {
				msg_collect_backup =
					xmalloc(sizeof(slurm_addr_t));
				if (ctldparent) {
					slurm_set_addr(msg_collect_backup,
						       backup_port, backup);
				} else {
					slurm_conf_get_addr(backup,
							    msg_collect_backup);
					msg_collect_backup->sin_port =
						htons(backup_port);
				}
				if (debug_flags & DEBUG_FLAG_ROUTE) {
					slurm_print_slurm_addr(
						msg_collect_backup,
						addrbuf, 32);
					info("ROUTE -- message collector backup"
					     " address is %s", addrbuf);
				}
			} else {
				if (debug_flags & DEBUG_FLAG_ROUTE) {
					info("ROUTE -- no message collector "
					     "backup");
				}

			}
			found = true;
			goto clean;
		}

		/* We are not a forwarding node, the first node in this list
		 * will split the forward_list.
		 * We also know that the forwarding node is not a controller.
		 *
		 * clean up parent context */
		ctldparent = false;
		hostlist_destroy(nodes);
		if (parent)
			free(parent);
		if (backup)
			free(backup);
		nodes = hostlist_copy(hll[i]);
		for (j=0; j < hl_count; j++) {
			hostlist_destroy(hll[j]);
		}
		xfree(hll);

		/* set our parent, backup, and continue search */
		parent = hostlist_shift(nodes);
		backup = hostlist_nth(nodes, 0);
		if (strcmp(backup, this_node_name) == 0) {
			free(backup);
			backup = NULL;
			if (hostlist_count(nodes) > 1)
				backup = hostlist_nth(nodes, 1);
		}
		parent_port =  slurm_conf_get_port(parent);
		if (backup) {
			backup_port = slurm_conf_get_port(backup);
		} else
			backup_port = 0;

	}
clean:
	if (debug_flags & DEBUG_FLAG_ROUTE) {
		if (this_is_collector)
			info("ROUTE -- %s is a collector node", this_node_name);
		else
			info("ROUTE -- %s is a leaf node", this_node_name);
	}
	hostlist_destroy(nodes);
	if (parent)
		free(parent);
	if (backup)
		free(backup);
	for (i=0; i < hl_count; i++) {
		hostlist_destroy(hll[i]);
	}
	xfree(hll);
}
예제 #4
0
/*
 * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin
 */
int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs,
		    size_t nprocs, pmixp_coll_type_t type)
{
	hostlist_t hl;
	int max_depth, width, depth, i;
	char *p;

#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->state = PMIXP_COLL_SYNC;
	coll->pset.procs = xmalloc(sizeof(*procs) * nprocs);
	coll->pset.nprocs = nprocs;
	memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs);

	if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		goto err_exit;
	}
#ifdef PMIXP_COLL_DEBUG
	/* if we debug collectives - store a copy of a full
	 * hostlist to resolve participant id to the hostname */
	coll->peers_hl = hostlist_copy(hl);
#endif

	width = slurm_get_tree_width();
	coll->peers_cnt = hostlist_count(hl);
	coll->my_peerid = hostlist_find(hl, pmixp_info_hostname());
	reverse_tree_info(coll->my_peerid, coll->peers_cnt, width,
			  &coll->prnt_peerid, &coll->chldrn_cnt, &depth,
			  &max_depth);

	/* We interested in amount of direct childs */
	coll->seq = 0;
	coll->contrib_children = 0;
	coll->contrib_local = false;
	coll->chldrn_ids = xmalloc(sizeof(int) * width);
	coll->contrib_chld = xmalloc(sizeof(int) * width);
	coll->chldrn_cnt = reverse_tree_direct_children(coll->my_peerid,
							coll->peers_cnt,
							  width, depth,
							  coll->chldrn_ids);
	if (coll->prnt_peerid == -1) {
		/* if we are the root of the tree:
		 * - we don't have a parent;
		 * - we have large list of all_childrens (we don't want
		 * ourselfs there)
		 */
		coll->prnt_host = NULL;
		coll->all_chldrn_hl = hostlist_copy(hl);
		hostlist_delete_host(coll->all_chldrn_hl,
				     pmixp_info_hostname());
		coll->chldrn_str =
			hostlist_ranged_string_xmalloc(coll->all_chldrn_hl);
	} else {
		/* for all other nodes in the tree we need to know:
		 * - nodename of our parent;
		 * - we don't need a list of all_childrens and hl anymore
		 */

		/*
		 * setup parent id's
		 */
		p = hostlist_nth(hl, coll->prnt_peerid);
		coll->prnt_host = xstrdup(p);
		free(p);
		/* reset prnt_peerid to the global peer */
		coll->prnt_peerid = pmixp_info_job_hostid(coll->prnt_host);

		/*
		 * setup root id's
		 * (we need this for the SLURM API communication case)
		 */
		p = hostlist_nth(hl, 0);
		coll->root_host = xstrdup(p);
		free(p);
		/* reset prnt_peerid to the global peer */
		coll->root_peerid = pmixp_info_job_hostid(coll->root_host);

		/* use empty hostlist here */
		coll->all_chldrn_hl = hostlist_create("");
		coll->chldrn_str = NULL;
	}

	/* fixup children peer ids to te global ones */
	for(i=0; i<coll->chldrn_cnt; i++){
		p = hostlist_nth(hl, coll->chldrn_ids[i]);
		coll->chldrn_ids[i] = pmixp_info_job_hostid(p);
		free(p);
	}
	hostlist_destroy(hl);

	/* Collective state */
	coll->ufwd_buf = pmixp_server_buf_new();
	coll->dfwd_buf = pmixp_server_buf_new();
	_reset_coll_ufwd(coll);
	_reset_coll_dfwd(coll);
	coll->cbdata = NULL;
	coll->cbfunc = NULL;

	/* init fine grained lock */
	slurm_mutex_init(&coll->lock);

	return SLURM_SUCCESS;
err_exit:
	return SLURM_ERROR;
}
예제 #5
0
void pmixp_coll_ring_log(pmixp_coll_t *coll)
{
	int i;
	pmixp_coll_ring_t *ring = &coll->state.ring;
	char *nodename, *next, *prev;
	char *out_str = NULL;

	PMIXP_ERROR("%p: %s state seq=%d",
		    coll, pmixp_coll_type2str(coll->type), coll->seq);
	nodename = pmixp_info_job_host(coll->my_peerid);
	PMIXP_ERROR("my peerid: %d:%s", coll->my_peerid, nodename);
	xfree(nodename);

	next = pmixp_info_job_host(_ring_next_id(coll));
	prev = pmixp_info_job_host(_ring_prev_id(coll));
	xstrfmtcat(out_str,"neighbor id: next %d:%s, prev %d:%s",
		   _ring_next_id(coll), next, _ring_prev_id(coll), prev);
	PMIXP_ERROR("%s", out_str);
	xfree(next);
	xfree(prev);
	xfree(out_str);


	for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
		pmixp_coll_ring_ctx_t *coll_ctx = &ring->ctx_array[i];

		PMIXP_ERROR("Context ptr=%p, #%d, in-use=%d",
			    coll_ctx, i, coll_ctx->in_use);

		if (coll_ctx->in_use) {
			int id;
			char *done_contrib, *wait_contrib;
			hostlist_t hl_done_contrib, hl_wait_contrib;

			pmixp_hostset_from_ranges(coll->pset.procs,
						  coll->pset.nprocs,
						  &hl_done_contrib);
			hl_wait_contrib = hostlist_copy(hl_done_contrib);

			PMIXP_ERROR("\t seq=%d contribs: loc=%d/prev=%d/fwd=%d",
				    coll_ctx->seq, coll_ctx->contrib_local,
				    coll_ctx->contrib_prev,
				    coll_ctx->forward_cnt);
			PMIXP_ERROR("\t neighbor contribs [%d]:",
				    coll->peers_cnt);

			for (id = 0; id < coll->peers_cnt; id++) {
				char *nodename = pmixp_info_job_host(id);

				if(coll_ctx->contrib_map[id]) {
					hostlist_delete_host(hl_wait_contrib,
							     nodename);
				} else {
					hostlist_delete_host(hl_done_contrib,
							     nodename);
				}
				xfree(nodename);
			}
			done_contrib = slurm_hostlist_ranged_string_xmalloc(
				hl_done_contrib);
			wait_contrib = slurm_hostlist_ranged_string_xmalloc(
				hl_wait_contrib);
			PMIXP_ERROR("\t done contrib: %s",
				    strlen(done_contrib) ? done_contrib : "-");
			PMIXP_ERROR("\t wait contrib: %s",
				    strlen(wait_contrib) ? wait_contrib : "-");
			PMIXP_ERROR("\t status=%s",
				    pmixp_coll_ring_state2str(coll_ctx->state));
			PMIXP_ERROR("\t buf size=%u, remain=%u",
				    size_buf(coll_ctx->ring_buf),
				    remaining_buf(coll_ctx->ring_buf));
			xfree(done_contrib);
			xfree(wait_contrib);
			hostlist_destroy(hl_done_contrib);
			hostlist_destroy(hl_wait_contrib);
		}
	}
}
예제 #6
0
파일: srun_job.c 프로젝트: VURM/slurm
/*
 * Create an srun job structure for a step w/out an allocation response msg.
 * (i.e. inside an allocation)
 */
srun_job_t *
job_step_create_allocation(resource_allocation_response_msg_t *resp)
{
	uint32_t job_id = resp->job_id;
	srun_job_t *job = NULL;
	allocation_info_t *ai = xmalloc(sizeof(*ai));
	hostlist_t hl = NULL;
	char *buf = NULL;
	int count = 0;
	uint32_t alloc_count = 0;

	ai->jobid          = job_id;
	ai->stepid         = NO_VAL;
	ai->nodelist = opt.alloc_nodelist;
	hl = hostlist_create(ai->nodelist);
	hostlist_uniq(hl);
	alloc_count = hostlist_count(hl);
	ai->nnodes = alloc_count;
	hostlist_destroy(hl);

	if (opt.exc_nodes) {
		hostlist_t exc_hl = hostlist_create(opt.exc_nodes);
		hostlist_t inc_hl = NULL;
		char *node_name = NULL;

		hl = hostlist_create(ai->nodelist);
		if(opt.nodelist) {
			inc_hl = hostlist_create(opt.nodelist);
		}
		hostlist_uniq(hl);
		//info("using %s or %s", opt.nodelist, ai->nodelist);
		while ((node_name = hostlist_shift(exc_hl))) {
			int inx = hostlist_find(hl, node_name);
			if (inx >= 0) {
				debug("excluding node %s", node_name);
				hostlist_delete_nth(hl, inx);
				ai->nnodes--;	/* decrement node count */
			}
			if(inc_hl) {
				inx = hostlist_find(inc_hl, node_name);
				if (inx >= 0) {
					error("Requested node %s is also "
					      "in the excluded list.",
					      node_name);
					error("Job not submitted.");
					hostlist_destroy(exc_hl);
					hostlist_destroy(inc_hl);
					goto error;
				}
			}
			free(node_name);
		}
		hostlist_destroy(exc_hl);

		/* we need to set this here so if there are more nodes
		 * available than we requested we can set it
		 * straight. If there is no exclude list then we set
		 * the vars then.
		 */
		if (!opt.nodes_set) {
			/* we don't want to set the number of nodes =
			 * to the number of requested processes unless we
			 * know it is less than the number of nodes
			 * in the allocation
			 */
			if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
				opt.min_nodes = opt.ntasks;
			else
				opt.min_nodes = ai->nnodes;
			opt.nodes_set = true;
		}
		if(!opt.max_nodes)
			opt.max_nodes = opt.min_nodes;
		if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
			ai->nnodes = opt.max_nodes;

		count = hostlist_count(hl);
		if(!count) {
			error("Hostlist is now nothing!  Can't run job.");
			hostlist_destroy(hl);
			goto error;
		}
		if(inc_hl) {
			count = hostlist_count(inc_hl);
			if(count < ai->nnodes) {
				/* add more nodes to get correct number for
				   allocation */
				hostlist_t tmp_hl = hostlist_copy(hl);
				int i=0;
				int diff = ai->nnodes - count;
				buf = hostlist_ranged_string_xmalloc(inc_hl);
				hostlist_delete(tmp_hl, buf);
				xfree(buf);
				while ((node_name = hostlist_shift(tmp_hl)) &&
				       (i < diff)) {
					hostlist_push(inc_hl, node_name);
					i++;
				}
				hostlist_destroy(tmp_hl);
			}
			buf = hostlist_ranged_string_xmalloc(inc_hl);
			hostlist_destroy(inc_hl);
			xfree(opt.nodelist);
			opt.nodelist = buf;
		} else {
			if (count > ai->nnodes) {
				/* remove more nodes than needed for
				   allocation */
				int i=0;
				for (i=count; i>ai->nnodes; i--)
					hostlist_delete_nth(hl, i);
			}
			xfree(opt.nodelist);
			opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		}

		hostlist_destroy(hl);
	} else {
		if (!opt.nodes_set) {
			/* we don't want to set the number of nodes =
			 * to the number of requested processes unless we
			 * know it is less than the number of nodes
			 * in the allocation
			 */
			if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
				opt.min_nodes = opt.ntasks;
			else
				opt.min_nodes = ai->nnodes;
			opt.nodes_set = true;
		}
		if(!opt.max_nodes)
			opt.max_nodes = opt.min_nodes;
		if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
			ai->nnodes = opt.max_nodes;
		/* Don't reset the ai->nodelist because that is the
		 * nodelist we want to say the allocation is under
		 * opt.nodelist is what is used for the allocation.
		 */
		/* xfree(ai->nodelist); */
		/* ai->nodelist = xstrdup(buf); */
	}

	/* get the correct number of hosts to run tasks on */
	if (opt.nodelist) {
		hl = hostlist_create(opt.nodelist);
		if (opt.distribution != SLURM_DIST_ARBITRARY)
			hostlist_uniq(hl);
		if (!hostlist_count(hl)) {
			error("Hostlist is now nothing!  Can not run job.");
			hostlist_destroy(hl);
			goto error;
		}

		buf = hostlist_ranged_string_xmalloc(hl);
		count = hostlist_count(hl);
		hostlist_destroy(hl);
		/* Don't reset the ai->nodelist because that is the
		 * nodelist we want to say the allocation is under
		 * opt.nodelist is what is used for the allocation.
		 */
		/* xfree(ai->nodelist); */
		/* ai->nodelist = xstrdup(buf); */
		xfree(opt.nodelist);
		opt.nodelist = buf;
	}

	if (opt.distribution == SLURM_DIST_ARBITRARY) {
		if (count != opt.ntasks) {
			error("You asked for %d tasks but specified %d nodes",
			      opt.ntasks, count);
			goto error;
		}
	}

	if (ai->nnodes == 0) {
		error("No nodes in allocation, can't run job");
		goto error;
	}

	ai->num_cpu_groups = resp->num_cpu_groups;
	ai->cpus_per_node  = resp->cpus_per_node;
	ai->cpu_count_reps = resp->cpu_count_reps;

/* 	info("looking for %d nodes out of %s with a must list of %s", */
/* 	     ai->nnodes, ai->nodelist, opt.nodelist); */
	/*
	 * Create job
	 */
	job = _job_create_structure(ai);
error:
   	xfree(ai);
	return (job);

}
예제 #7
0
파일: srun.c 프로젝트: supermanue/slurm
/*
 * The pack_node_list may not be ordered across multiple components, which can
 * cause problems for some MPI implementations. Put the pack_node_list records
 * in alphabetic order and reorder pack_task_cnts pack_tids to match
 */
static void _reorder_pack_recs(char **in_node_list, uint16_t **in_task_cnts,
			       uint32_t ***in_tids, int total_nnodes)
{
	hostlist_t in_hl, out_hl;
	uint16_t *out_task_cnts = NULL;
	uint32_t **out_tids = NULL;
	char *hostname;
	int i, j;

	in_hl = hostlist_create(*in_node_list);
	if (!in_hl) {
		error("%s: Invalid hostlist(%s)", __func__, *in_node_list);
		return;
	}
	out_hl = hostlist_copy(in_hl);
	hostlist_sort(out_hl);
	hostlist_uniq(out_hl);
	i = hostlist_count(out_hl);
	if (i != total_nnodes) {
		error("%s: Invalid hostlist(%s) count(%d)", __func__,
		      *in_node_list, total_nnodes);
		goto fini;
	}

	out_task_cnts = xmalloc(sizeof(uint16_t) * total_nnodes);
	out_tids = xmalloc(sizeof(uint32_t *) * total_nnodes);
	for (i = 0; i < total_nnodes; i++) {
		hostname = hostlist_nth(out_hl, i);
		if (!hostname) {
			error("%s: Invalid hostlist(%s) count(%d)", __func__,
			      *in_node_list, total_nnodes);
			break;
		}
		j = hostlist_find(in_hl, hostname);
		if (j == -1) {
			error("%s: Invalid hostlist(%s) parsing", __func__,
			      *in_node_list);
			free(hostname);
			break;
		}
		out_task_cnts[i] = in_task_cnts[0][j];
		out_tids[i] = in_tids[0][j];
		free(hostname);
	}

	if (i >= total_nnodes) {	/* Success */
		xfree(*in_node_list);
		*in_node_list = hostlist_ranged_string_xmalloc(out_hl);
		xfree(*in_task_cnts);
		*in_task_cnts = out_task_cnts;
		out_task_cnts = NULL;
		xfree(*in_tids);
		*in_tids = out_tids;
		out_tids = NULL;
	}

#if 0
	info("NODE_LIST[%d]:%s", total_nnodes, *in_node_list);
	for (i = 0; i < total_nnodes; i++) {
		info("TASK_CNT[%d]:%u", i, in_task_cnts[0][i]);
		for (j = 0; j < in_task_cnts[0][i]; j++) {
			info("TIDS[%d][%d]: %u", i, j, in_tids[0][i][j]);
		}
	}
#endif

fini:	hostlist_destroy(in_hl);
	hostlist_destroy(out_hl);
	xfree(out_task_cnts);
	xfree(out_tids);
}