Beispiel #1
0
/* FIXME: client_id could be hostname or IP.
 * We probably want both to work for an exports match.
 */
static int
_match_export_hosts (Export *x, Npconn *conn)
{
    char *client_id = np_conn_get_client_id (conn);
    hostlist_t hl = NULL;
    int res = 0; /* no match */

    /* no client_id restrictions */
    if (!x->hosts) {
        res = 1;
        goto done;
    }
    if (!(hl = hostlist_create (x->hosts))) {
        np_uerror (ENOMEM);
        goto done;
    }
    /* client_id found in exports */
    if (hostlist_find (hl, client_id) != -1) {
        res = 1;
        goto done;
    }
done:
    if (hl)
        hostlist_destroy (hl);
    return res;
}
Beispiel #2
0
/* FIXME: client_id could be hostname or IP.
 * We probably want both to work for an exports match.
 */
static int
_match_export_hosts (Export *x, Npconn *conn)
{
    char *client_id = np_conn_get_client_id (conn);
    hostlist_t hl = NULL;
    int res = 0; /* no match */

    /* privport is required */
    if (x->oflags & XFLAGS_PRIVPORT && !(conn->flags & CONN_FLAGS_PRIVPORT)) {
        np_uerror (EPERM);
        goto done;
    }

    /* no client_id restrictions */
    if (!x->hosts) {
        res = 1;
        goto done;
    }
    if (!(hl = hostlist_create (x->hosts))) {
        np_uerror (ENOMEM);
        goto done;
    }
    /* client_id found in exports */
    if (hostlist_find (hl, client_id) != -1) {
        res = 1;
        goto done;
    }
done:
    if (hl)
        hostlist_destroy (hl);
    return res;
}
Beispiel #3
0
int pmixp_coll_ring_init(pmixp_coll_t *coll, hostlist_t *hl)
{
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("called");
#endif
	int i;
	pmixp_coll_ring_ctx_t *coll_ctx = NULL;
	pmixp_coll_ring_t *ring = &coll->state.ring;
	char *p;
	int rel_id = hostlist_find(*hl, pmixp_info_hostname());

	/* compute the next absolute id of the neighbor */
	p = hostlist_nth(*hl, (rel_id + 1) % coll->peers_cnt);
	ring->next_peerid = pmixp_info_job_hostid(p);
	free(p);

	ring->fwrd_buf_pool = list_create(pmixp_free_buf);
	ring->ring_buf_pool = list_create(pmixp_free_buf);

	for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
		coll_ctx = &ring->ctx_array[i];
		coll_ctx->coll = coll;
		coll_ctx->in_use = false;
		coll_ctx->seq = coll->seq;
		coll_ctx->contrib_local = false;
		coll_ctx->contrib_prev = 0;
		coll_ctx->state = PMIXP_COLL_RING_SYNC;
		// TODO bit vector
		coll_ctx->contrib_map = xmalloc(sizeof(bool) * coll->peers_cnt);
	}

	return SLURM_SUCCESS;
}
int 
wrap_hostlist_find(WRAPPERS_ARGS, hostlist_t hl, const char *hostname)
{
  assert(file && function);

  if (!hl || !hostname)
    WRAPPERS_ERR_INVALID_PARAMETERS("hostlist_find");

  /* -1 isn't an error, it indicates the host isn't found */
  return hostlist_find(hl, hostname);
}
Beispiel #5
0
int pmixp_coll_init(pmixp_coll_t *coll, pmixp_coll_type_t type,
		    const pmixp_proc_t *procs, size_t nprocs)
{
	int rc = SLURM_SUCCESS;
	hostlist_t hl;

	coll->seq = 0;
#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->pset.procs = xmalloc(sizeof(*procs) * nprocs);
	coll->pset.nprocs = nprocs;
	memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs);

	if (SLURM_SUCCESS != pmixp_hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		rc = SLURM_ERROR;
		goto exit;
	}
	coll->peers_cnt = hostlist_count(hl);
	coll->my_peerid = hostlist_find(hl, pmixp_info_hostname());
#ifdef PMIXP_COLL_DEBUG
	/* if we debug collectives - store a copy of a full
	 * hostlist to resolve participant id to the hostname */
	coll->peers_hl = hostlist_copy(hl);
#endif

	switch(type) {
	case PMIXP_COLL_TYPE_FENCE_TREE:
		rc = pmixp_coll_tree_init(coll, &hl);
		break;
	case PMIXP_COLL_TYPE_FENCE_RING:
		rc = pmixp_coll_ring_init(coll, &hl);
		break;
	default:
		PMIXP_ERROR("Unknown coll type");
		rc = SLURM_ERROR;
	}
	hostlist_destroy(hl);
	if (rc) {
		goto exit;
	}

exit:
	return rc;
}
Beispiel #6
0
extern int slurm_job_cpus_allocated_on_node(job_resources_t *job_resrcs_ptr,
					    const char *node)
{
	hostlist_t node_hl;
	int node_id;

	if (!job_resrcs_ptr || !node || !job_resrcs_ptr->nodes)
		slurm_seterrno_ret(EINVAL);

	node_hl = hostlist_create(job_resrcs_ptr->nodes);
	node_id = hostlist_find(node_hl, node);
	hostlist_destroy(node_hl);
	if (node_id == -1)
		return (0); /* No cpus allocated on this node */

	return slurm_job_cpus_allocated_on_node_id(job_resrcs_ptr, node_id);
}
Beispiel #7
0
bitstr_t *get_requested_node_bitmap(void)
{
	static bitstr_t *bitmap = NULL;
	static node_info_msg_t *old_node_ptr = NULL, *new_node_ptr;
	int error_code;
	int i = 0;
	node_info_t *node_ptr = NULL;

	if (!params.hl)
		return NULL;

	if (old_node_ptr) {
		error_code = slurm_load_node(old_node_ptr->last_update,
					     &new_node_ptr, SHOW_ALL);
		if (error_code == SLURM_SUCCESS)
			slurm_free_node_info_msg(old_node_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA)
			return bitmap;
	} else {
		error_code = slurm_load_node((time_t) NULL, &new_node_ptr,
					     SHOW_ALL);
	}

	if (bitmap)
		FREE_NULL_BITMAP(bitmap);

	if (error_code) {
		slurm_perror("slurm_load_node");
		return NULL;
	}

	old_node_ptr = new_node_ptr;

	bitmap = bit_alloc(old_node_ptr->record_count);
	for (i = 0; i < old_node_ptr->record_count; i++) {
		node_ptr = &(old_node_ptr->node_array[i]);
		if (hostlist_find(params.hl, node_ptr->name) != -1)
			bit_set(bitmap, i);
	}
	return bitmap;
}
Beispiel #8
0
/*
 * validate_alloc_node - validate that the allocating node
 * is allowed to use this partition
 * IN part_ptr - pointer to a partition
 * IN alloc_node - allocting node of the request
 * RET 1 if permitted to run, 0 otherwise
 */
extern int validate_alloc_node(struct part_record *part_ptr, char* alloc_node)
{
	int status;

 	if (part_ptr->allow_alloc_nodes == NULL)
 		return 1;	/* all allocating nodes allowed */
 	if (alloc_node == NULL)
 		return 1;	/* if no allocating node defined
				 * let it go */

 	hostlist_t hl = hostlist_create(part_ptr->allow_alloc_nodes);
 	status=hostlist_find(hl,alloc_node);
 	hostlist_destroy(hl);

 	if (status == -1)
		status = 0;
 	else
		status = 1;

 	return status;
}
Beispiel #9
0
extern void
scontrol_print_completing_job(job_info_t *job_ptr,
			      node_info_msg_t *node_info_msg)
{
	int i;
	node_info_t *node_info;
	hostlist_t all_nodes, comp_nodes, down_nodes;
	char *node_buf;

	all_nodes  = hostlist_create(job_ptr->nodes);
	comp_nodes = hostlist_create("");
	down_nodes = hostlist_create("");

	for (i=0; i<node_info_msg->record_count; i++) {
		node_info = &(node_info_msg->node_array[i]);
		if (IS_NODE_COMPLETING(node_info) &&
		    (_in_node_bit_list(i, job_ptr->node_inx)))
			hostlist_push_host(comp_nodes, node_info->name);
		else if (IS_NODE_DOWN(node_info) &&
			 (hostlist_find(all_nodes, node_info->name) != -1))
			hostlist_push_host(down_nodes, node_info->name);
	}

	fprintf(stdout, "JobId=%u ", job_ptr->job_id);

	node_buf = hostlist_ranged_string_xmalloc(comp_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf);
	xfree(node_buf);

	node_buf = hostlist_ranged_string_xmalloc(down_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(DOWN)=%s ", node_buf);
	xfree(node_buf);
	fprintf(stdout, "\n");

	hostlist_destroy(all_nodes);
	hostlist_destroy(comp_nodes);
	hostlist_destroy(down_nodes);
}
Beispiel #10
0
int slurm_job_cpus_allocated_str_on_node(char *cpus,
					 size_t cpus_len,
					 job_resources_t *job_resrcs_ptr,
					 const char *node)
{
	hostlist_t node_hl;
	int node_id;

	if (!job_resrcs_ptr || !node || !job_resrcs_ptr->nodes)
		slurm_seterrno_ret(EINVAL);

	node_hl = hostlist_create(job_resrcs_ptr->nodes);
	node_id = hostlist_find(node_hl, node);
	hostlist_destroy(node_hl);
	if (node_id == -1)
		return SLURM_ERROR;

	return slurm_job_cpus_allocated_str_on_node_id(cpus,
						       cpus_len,
						       job_resrcs_ptr,
						       node_id);
}
Beispiel #11
0
/*
 * setup_cluster_nodes - get cluster record list within requested
 *   time period with used nodes. Used for deciding whether a nodelist is
 *   overlapping with the required nodes.
 */
extern cluster_nodes_t *
setup_cluster_nodes(pgsql_conn_t *pg_conn, slurmdb_job_cond_t *job_cond)
{
	DEF_VARS;
	cluster_nodes_t *cnodes = NULL;
	time_t now = time(NULL);
	hostlist_t temp_hl = NULL;
	hostlist_iterator_t h_itr = NULL;

	if (!job_cond || !job_cond->used_nodes)
		return NULL;

	if (!job_cond->cluster_list || list_count(job_cond->cluster_list) != 1) {
		error("If you are doing a query against nodes "
		      "you must only have 1 cluster "
		      "you are asking for.");
		return NULL;
	}

	temp_hl = hostlist_create(job_cond->used_nodes);
	if (!hostlist_count(temp_hl)) {
		error("we didn't get any real hosts to look for.");
		hostlist_destroy(temp_hl);
		return NULL;
	}

	query = xstrdup_printf("SELECT cluster_nodes, time_start, "
			       "time_end FROM %s.%s WHERE node_name='' "
			       "AND cluster_nodes !=''",
			       (char *)list_peek(job_cond->cluster_list),
			       event_table);

	if (job_cond->usage_start) {
		if (!job_cond->usage_end)
			job_cond->usage_end = now;

		xstrfmtcat(query, " AND ((time_start<%ld) "
			   "AND (time_end>=%ld OR time_end=0))",
			   job_cond->usage_end, job_cond->usage_start);
	}

	result = DEF_QUERY_RET;
	if (!result) {
		hostlist_destroy(temp_hl);
		return NULL;
	}

	h_itr = hostlist_iterator_create(temp_hl);
	cnodes = xmalloc(sizeof(cluster_nodes_t));
	cnodes->cluster_list = list_create(_destroy_local_cluster);
	FOR_EACH_ROW {
		char *host = NULL;
		int loc = 0;
		local_cluster_t *local_cluster =
			xmalloc(sizeof(local_cluster_t));
		local_cluster->hl = hostlist_create(ROW(0));
		local_cluster->start = atoi(ROW(1));
		local_cluster->end   = atoi(ROW(2));
		local_cluster->asked_bitmap =
			bit_alloc(hostlist_count(local_cluster->hl));
		while((host = hostlist_next(h_itr))) {
			if ((loc = hostlist_find(
				    local_cluster->hl, host)) != -1)
				bit_set(local_cluster->asked_bitmap, loc);
			free(host);
		}
		hostlist_iterator_reset(h_itr);
		if (bit_ffs(local_cluster->asked_bitmap) != -1) {
			list_append(cnodes->cluster_list, local_cluster);
			if (local_cluster->end == 0) {
				local_cluster->end = now;
				cnodes->curr_cluster = local_cluster;
			}
		} else
			_destroy_local_cluster(local_cluster);
	} END_EACH_ROW;
	PQclear(result);
	hostlist_iterator_destroy(h_itr);
	if (!list_count(cnodes->cluster_list)) {
		destroy_cluster_nodes(cnodes);
		cnodes = NULL;
	}

	hostlist_destroy(temp_hl);
	return cnodes;
}
Beispiel #12
0
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr,
			  uint32_t node_scaling)
{
	uint16_t base_state;
	uint16_t used_cpus = 0, error_cpus = 0;
	int total_cpus = 0, total_nodes = 0;
	/* since node_scaling could be less here, we need to use the
	 * global node scaling which should never change. */
	int single_node_cpus = (node_ptr->cpus / g_node_scaling);

 	base_state = node_ptr->node_state & NODE_STATE_BASE;

	if (sinfo_ptr->nodes_total == 0) {	/* first node added */
		sinfo_ptr->node_state = node_ptr->node_state;
		sinfo_ptr->features   = node_ptr->features;
		sinfo_ptr->gres       = node_ptr->gres;
		sinfo_ptr->reason     = node_ptr->reason;
		sinfo_ptr->reason_time= node_ptr->reason_time;
		sinfo_ptr->reason_uid = node_ptr->reason_uid;
		sinfo_ptr->min_cpus    = node_ptr->cpus;
		sinfo_ptr->max_cpus    = node_ptr->cpus;
		sinfo_ptr->min_sockets = node_ptr->sockets;
		sinfo_ptr->max_sockets = node_ptr->sockets;
		sinfo_ptr->min_cores   = node_ptr->cores;
		sinfo_ptr->max_cores   = node_ptr->cores;
		sinfo_ptr->min_threads = node_ptr->threads;
		sinfo_ptr->max_threads = node_ptr->threads;
		sinfo_ptr->min_disk   = node_ptr->tmp_disk;
		sinfo_ptr->max_disk   = node_ptr->tmp_disk;
		sinfo_ptr->min_mem    = node_ptr->real_memory;
		sinfo_ptr->max_mem    = node_ptr->real_memory;
		sinfo_ptr->min_weight = node_ptr->weight;
		sinfo_ptr->max_weight = node_ptr->weight;
		sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpus_per_node = sinfo_ptr->part_info->
					       max_cpus_per_node;
		sinfo_ptr->version    = node_ptr->version;
	} else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) {
		/* we already have this node in this record,
		 * just return, don't duplicate */
		return;
	} else {
		if (sinfo_ptr->min_cpus > node_ptr->cpus)
			sinfo_ptr->min_cpus = node_ptr->cpus;
		if (sinfo_ptr->max_cpus < node_ptr->cpus)
			sinfo_ptr->max_cpus = node_ptr->cpus;

		if (sinfo_ptr->min_sockets > node_ptr->sockets)
			sinfo_ptr->min_sockets = node_ptr->sockets;
		if (sinfo_ptr->max_sockets < node_ptr->sockets)
			sinfo_ptr->max_sockets = node_ptr->sockets;

		if (sinfo_ptr->min_cores > node_ptr->cores)
			sinfo_ptr->min_cores = node_ptr->cores;
		if (sinfo_ptr->max_cores < node_ptr->cores)
			sinfo_ptr->max_cores = node_ptr->cores;

		if (sinfo_ptr->min_threads > node_ptr->threads)
			sinfo_ptr->min_threads = node_ptr->threads;
		if (sinfo_ptr->max_threads < node_ptr->threads)
			sinfo_ptr->max_threads = node_ptr->threads;

		if (sinfo_ptr->min_disk > node_ptr->tmp_disk)
			sinfo_ptr->min_disk = node_ptr->tmp_disk;
		if (sinfo_ptr->max_disk < node_ptr->tmp_disk)
			sinfo_ptr->max_disk = node_ptr->tmp_disk;

		if (sinfo_ptr->min_mem > node_ptr->real_memory)
			sinfo_ptr->min_mem = node_ptr->real_memory;
		if (sinfo_ptr->max_mem < node_ptr->real_memory)
			sinfo_ptr->max_mem = node_ptr->real_memory;

		if (sinfo_ptr->min_weight> node_ptr->weight)
			sinfo_ptr->min_weight = node_ptr->weight;
		if (sinfo_ptr->max_weight < node_ptr->weight)
			sinfo_ptr->max_weight = node_ptr->weight;

		if (sinfo_ptr->min_cpu_load > node_ptr->cpu_load)
			sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		if (sinfo_ptr->max_cpu_load < node_ptr->cpu_load)
			sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
	}

	hostlist_push_host(sinfo_ptr->nodes, node_ptr->name);
	if (params.match_flags.node_addr_flag)
		hostlist_push_host(sinfo_ptr->node_addr, node_ptr->node_addr);
	if (params.match_flags.hostnames_flag)
		hostlist_push_host(sinfo_ptr->hostnames, node_ptr->node_hostname);

	total_cpus = node_ptr->cpus;
	total_nodes = node_scaling;

	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ALLOCATED,
				     &used_cpus);
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ERROR,
				     &error_cpus);

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (!params.match_flags.state_flag &&
		    (used_cpus || error_cpus)) {
			/* We only get one shot at this (because all states
			 * are combined together), so we need to make
			 * sure we get all the subgrps accounted. (So use
			 * g_node_scaling for safe measure) */
			total_nodes = g_node_scaling;

			sinfo_ptr->nodes_alloc += used_cpus;
			sinfo_ptr->nodes_other += error_cpus;
			sinfo_ptr->nodes_idle +=
				(total_nodes - (used_cpus + error_cpus));
			used_cpus  *= single_node_cpus;
			error_cpus *= single_node_cpus;
		} else {
			/* process only for this subgrp and then return */
			total_cpus = total_nodes * single_node_cpus;

			if ((base_state == NODE_STATE_ALLOCATED) ||
			    (base_state == NODE_STATE_MIXED) ||
			    (node_ptr->node_state & NODE_STATE_COMPLETING)) {
				sinfo_ptr->nodes_alloc += total_nodes;
				sinfo_ptr->cpus_alloc += total_cpus;
			} else if (IS_NODE_DRAIN(node_ptr) ||
				   (base_state == NODE_STATE_DOWN)) {
				sinfo_ptr->nodes_other += total_nodes;
				sinfo_ptr->cpus_other += total_cpus;
			} else {
				sinfo_ptr->nodes_idle += total_nodes;
				sinfo_ptr->cpus_idle += total_cpus;
			}

			sinfo_ptr->nodes_total += total_nodes;
			sinfo_ptr->cpus_total += total_cpus;

			return;
		}
	} else {
		if ((base_state == NODE_STATE_ALLOCATED) ||
		    (base_state == NODE_STATE_MIXED) ||
		    IS_NODE_COMPLETING(node_ptr))
			sinfo_ptr->nodes_alloc += total_nodes;
		else if (IS_NODE_DRAIN(node_ptr)
			 || (base_state == NODE_STATE_DOWN))
			sinfo_ptr->nodes_other += total_nodes;
		else
			sinfo_ptr->nodes_idle += total_nodes;
	}

	sinfo_ptr->nodes_total += total_nodes;


	sinfo_ptr->cpus_alloc += used_cpus;
	sinfo_ptr->cpus_total += total_cpus;
	total_cpus -= used_cpus + error_cpus;

	if (error_cpus) {
		sinfo_ptr->cpus_idle += total_cpus;
		sinfo_ptr->cpus_other += error_cpus;
	} else if (IS_NODE_DRAIN(node_ptr) ||
		   (base_state == NODE_STATE_DOWN)) {
		sinfo_ptr->cpus_other += total_cpus;
	} else
		sinfo_ptr->cpus_idle += total_cpus;
}
Beispiel #13
0
/*
 * _filter_out - Determine if the specified node should be filtered out or
 *	reported.
 * node_ptr IN - node to consider filtering out
 * RET - true if node should not be reported, false otherwise
 */
static bool _filter_out(node_info_t *node_ptr)
{
	static hostlist_t host_list = NULL;

	if (params.nodes) {
		if (host_list == NULL)
			host_list = hostlist_create(params.nodes);
		if (hostlist_find (host_list, node_ptr->name) == -1)
			return true;
	}

	if (params.dead_nodes && !IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.responding_nodes && IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.state_list) {
		int *node_state;
		bool match = false;
		uint16_t base_state;
		ListIterator iterator;
		uint16_t cpus = 0;
		node_info_t tmp_node, *tmp_node_ptr = &tmp_node;

		iterator = list_iterator_create(params.state_list);
		while ((node_state = list_next(iterator))) {
			tmp_node_ptr->node_state = *node_state;
			if (*node_state == NODE_STATE_DRAIN) {
				/* We search for anything that has the
				 * drain flag set */
				if (IS_NODE_DRAIN(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINING(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINING in node_state_string */
				if (IS_NODE_DRAINING(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINED(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINED in node_state_string */
				if (IS_NODE_DRAINED(node_ptr)) {
					match = true;
					break;
				}
			} else if (*node_state & NODE_STATE_FLAGS) {
				if (*node_state & node_ptr->node_state) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ERROR) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ERROR,
					&cpus);
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ALLOCATED) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ALLOCATED,
					&cpus);
				if (params.cluster_flags & CLUSTER_FLAG_BG
				    && !cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					cpus = node_ptr->cpus;
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_IDLE) {
				base_state = node_ptr->node_state &
					(~NODE_STATE_NO_RESPOND);
				if (base_state == NODE_STATE_IDLE) {
					match = true;
					break;
				}
			} else {
				base_state =
					node_ptr->node_state & NODE_STATE_BASE;
				if (base_state == *node_state) {
					match = true;
					break;
				}
			}
		}
		list_iterator_destroy(iterator);
		if (!match)
			return true;
	}

	return false;
}
Beispiel #14
0
/*
 * _build_sinfo_data - make a sinfo_data entry for each unique node
 *	configuration and add it to the sinfo_list for later printing.
 * sinfo_list IN/OUT - list of unique sinfo_data records to report
 * partition_msg IN - partition info message
 * node_msg IN - node info message
 * RET zero or error code
 */
static int _build_sinfo_data(List sinfo_list,
			     partition_info_msg_t *partition_msg,
			     node_info_msg_t *node_msg)
{
	pthread_attr_t attr_sinfo;
	pthread_t thread_sinfo;
	build_part_info_t *build_struct_ptr;
	node_info_t *node_ptr = NULL;
	partition_info_t *part_ptr = NULL;
	int j;

	g_node_scaling = node_msg->node_scaling;

	/* by default every partition is shown, even if no nodes */
	if ((!params.node_flag) && params.match_flags.partition_flag) {
		part_ptr = partition_msg->partition_array;
		for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
			if ((!params.partition) ||
			    (_strcmp(params.partition, part_ptr->name) == 0)) {
				list_append(sinfo_list, _create_sinfo(
						    part_ptr, (uint16_t) j,
						    NULL,
						    node_msg->node_scaling));
			}
		}
	}

	if (params.filtering) {
		for (j = 0; j < node_msg->record_count; j++) {
			node_ptr = &(node_msg->node_array[j]);
			if (node_ptr->name && _filter_out(node_ptr))
				xfree(node_ptr->name);
		}
	}

	/* make sinfo_list entries for every node in every partition */
	for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
		part_ptr = &(partition_msg->partition_array[j]);

		if (params.filtering && params.partition &&
		    _strcmp(part_ptr->name, params.partition))
			continue;

		if (node_msg->record_count == 1) { /* node_name_single */
			int pos = -1;
			uint16_t subgrp_size = 0;
			hostlist_t hl;

			node_ptr = &(node_msg->node_array[0]);
			if ((node_ptr->name == NULL) ||
			    (part_ptr->nodes == NULL))
				continue;
			hl = hostlist_create(part_ptr->nodes);
			pos = hostlist_find(hl, node_msg->node_array[0].name);
			hostlist_destroy(hl);
			if (pos < 0)
				continue;
			if (select_g_select_nodeinfo_get(
				   node_ptr->select_nodeinfo,
				   SELECT_NODEDATA_SUBGRP_SIZE,
				   0,
				   &subgrp_size) == SLURM_SUCCESS
			    && subgrp_size) {
				_handle_subgrps(sinfo_list,
						(uint16_t) j,
						part_ptr,
						node_ptr,
						node_msg->
						node_scaling);
			} else {
				_insert_node_ptr(sinfo_list,
						 (uint16_t) j,
						 part_ptr,
						 node_ptr,
						 node_msg->
						 node_scaling);
			}
			continue;
		}

		/* Process each partition using a separate thread */
		build_struct_ptr = xmalloc(sizeof(build_part_info_t));
		build_struct_ptr->node_msg   = node_msg;
		build_struct_ptr->part_num   = (uint16_t) j;
		build_struct_ptr->part_ptr   = part_ptr;
		build_struct_ptr->sinfo_list = sinfo_list;

		slurm_mutex_lock(&sinfo_cnt_mutex);
		sinfo_cnt++;
		slurm_mutex_unlock(&sinfo_cnt_mutex);

		slurm_attr_init(&attr_sinfo);
		if (pthread_attr_setdetachstate
		    (&attr_sinfo, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");
		while (pthread_create(&thread_sinfo, &attr_sinfo,
				      _build_part_info,
				      (void *) build_struct_ptr)) {
			error("pthread_create error %m");
			usleep(10000);	/* sleep and retry */
		}
		slurm_attr_destroy(&attr_sinfo);
	}

	slurm_mutex_lock(&sinfo_cnt_mutex);
	while (sinfo_cnt) {
		pthread_cond_wait(&sinfo_cnt_cond, &sinfo_cnt_mutex);
	}
	slurm_mutex_unlock(&sinfo_cnt_mutex);

	_sort_hostlist(sinfo_list);
	return SLURM_SUCCESS;
}
Beispiel #15
0
/*
 * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin
 */
int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs,
		size_t nprocs, pmixp_coll_type_t type)
{
	hostlist_t hl;
	uint32_t nodeid = 0, nodes = 0;
	int parent_id, depth, max_depth, tmp;
	int width, my_nspace = -1;
	char *p;
	int i, *ch_nodeids = NULL;

#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->state = PMIXP_COLL_SYNC;
	coll->procs = xmalloc(sizeof(*procs) * nprocs);
	memcpy(coll->procs, procs, sizeof(*procs) * nprocs);
	coll->nprocs = nprocs;
	coll->my_nspace = my_nspace;

	if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		goto err_exit;
	}

	width = slurm_get_tree_width();
	nodes = hostlist_count(hl);
	nodeid = hostlist_find(hl, pmixp_info_hostname());
	reverse_tree_info(nodeid, nodes, width, &parent_id, &tmp, &depth,
			&max_depth);
	coll->children_cnt = tmp;
	coll->nodeid = nodeid;

	/* We interested in amount of direct childs */
	coll->seq = 0;
	coll->contrib_cntr = 0;
	coll->contrib_local = false;
	ch_nodeids = xmalloc(sizeof(int) * width);
	coll->ch_contribs = xmalloc(sizeof(int) * width);
	coll->children_cnt = reverse_tree_direct_children(nodeid, nodes, width,
			depth, ch_nodeids);

	/* create the hostlist with extract direct children's hostnames */
	coll->ch_hosts = hostlist_create("");
	for (i = 0; i < coll->children_cnt; i++) {
		char *hname = hostlist_nth(hl, ch_nodeids[i]);
		hostlist_push(coll->ch_hosts, hname);
	}
	/* just in case, shouldn't be needed */
	hostlist_uniq(coll->ch_hosts);
	xfree(ch_nodeids);

	if (parent_id == -1) {
		/* if we are the root of the tree:
		 * - we don't have a parent;
		 * - we have large list of all_childrens (we don't want ourselfs there)
		 */
		coll->parent_host = NULL;
		hostlist_delete_host(hl, pmixp_info_hostname());
		coll->all_children = hl;
	} else if (parent_id >= 0) {
		/* for all other nodes in the tree we need to know:
		 * - nodename of our parent;
		 * - we don't need a list of all_childrens and hl anymore
		 */
		p = hostlist_nth(hl, parent_id);
		coll->parent_host = xstrdup(p);
		/* use empty hostlist here */
		coll->all_children = hostlist_create("");
		free(p);
		hostlist_destroy(hl);
	}

	/* Collective data */
	coll->buf = pmixp_server_new_buf();
	coll->serv_offs = get_buf_offset(coll->buf);

	if (SLURM_SUCCESS != _pack_ranges(coll)) {
		PMIXP_ERROR("Cannot pack ranges to coll message header!");
		goto err_exit;
	}

	/* Callback information */
	coll->cbdata = NULL;
	coll->cbfunc = NULL;

	/* init fine grained lock */
	slurm_mutex_init(&coll->lock);

	return SLURM_SUCCESS;
err_exit:
	return SLURM_ERROR;
}
Beispiel #16
0
/*
 * The pack_node_list may not be ordered across multiple components, which can
 * cause problems for some MPI implementations. Put the pack_node_list records
 * in alphabetic order and reorder pack_task_cnts pack_tids to match
 */
static void _reorder_pack_recs(char **in_node_list, uint16_t **in_task_cnts,
			       uint32_t ***in_tids, int total_nnodes)
{
	hostlist_t in_hl, out_hl;
	uint16_t *out_task_cnts = NULL;
	uint32_t **out_tids = NULL;
	char *hostname;
	int i, j;

	in_hl = hostlist_create(*in_node_list);
	if (!in_hl) {
		error("%s: Invalid hostlist(%s)", __func__, *in_node_list);
		return;
	}
	out_hl = hostlist_copy(in_hl);
	hostlist_sort(out_hl);
	hostlist_uniq(out_hl);
	i = hostlist_count(out_hl);
	if (i != total_nnodes) {
		error("%s: Invalid hostlist(%s) count(%d)", __func__,
		      *in_node_list, total_nnodes);
		goto fini;
	}

	out_task_cnts = xmalloc(sizeof(uint16_t) * total_nnodes);
	out_tids = xmalloc(sizeof(uint32_t *) * total_nnodes);
	for (i = 0; i < total_nnodes; i++) {
		hostname = hostlist_nth(out_hl, i);
		if (!hostname) {
			error("%s: Invalid hostlist(%s) count(%d)", __func__,
			      *in_node_list, total_nnodes);
			break;
		}
		j = hostlist_find(in_hl, hostname);
		if (j == -1) {
			error("%s: Invalid hostlist(%s) parsing", __func__,
			      *in_node_list);
			free(hostname);
			break;
		}
		out_task_cnts[i] = in_task_cnts[0][j];
		out_tids[i] = in_tids[0][j];
		free(hostname);
	}

	if (i >= total_nnodes) {	/* Success */
		xfree(*in_node_list);
		*in_node_list = hostlist_ranged_string_xmalloc(out_hl);
		xfree(*in_task_cnts);
		*in_task_cnts = out_task_cnts;
		out_task_cnts = NULL;
		xfree(*in_tids);
		*in_tids = out_tids;
		out_tids = NULL;
	}

#if 0
	info("NODE_LIST[%d]:%s", total_nnodes, *in_node_list);
	for (i = 0; i < total_nnodes; i++) {
		info("TASK_CNT[%d]:%u", i, in_task_cnts[0][i]);
		for (j = 0; j < in_task_cnts[0][i]; j++) {
			info("TIDS[%d][%d]: %u", i, j, in_tids[0][i][j]);
		}
	}
#endif

fini:	hostlist_destroy(in_hl);
	hostlist_destroy(out_hl);
	xfree(out_task_cnts);
	xfree(out_tids);
}
Beispiel #17
0
int pmixp_coll_contrib_node(pmixp_coll_t *coll, char *nodename, Buf buf)
{
	int nodeid;
	char *data = NULL;
	uint32_t size;
	char *state = NULL;

	PMIXP_DEBUG("%s:%d: get contribution from node %s",
			pmixp_info_namespace(), pmixp_info_nodeid(), nodename);

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	pmixp_coll_sanity_check(coll);

	/* fix the collective status if need */
	if (PMIXP_COLL_SYNC == coll->state) {
		PMIXP_DEBUG("%s:%d: get contribution from node %s: switch to PMIXP_COLL_FAN_IN",
			    pmixp_info_namespace(), pmixp_info_nodeid(),
			    nodename);
		coll->state = PMIXP_COLL_FAN_IN;
		coll->ts = time(NULL);
	} else if( PMIXP_COLL_FAN_OUT == coll->state) {
		PMIXP_DEBUG("%s:%d: get contribution from node %s: switch to PMIXP_COLL_FAN_OUT_IN"
			    " (next collective!)",
			    pmixp_info_namespace(), pmixp_info_nodeid(),
			    nodename);
		coll->state = PMIXP_COLL_FAN_OUT_IN;
		coll->ts_next = time(NULL);
	}
	xassert(PMIXP_COLL_FAN_IN == coll->state || PMIXP_COLL_FAN_OUT_IN == coll->state);

	/* Because of possible timeouts/delays in transmission we
	 * can receive a contribution second time. Avoid duplications
	 * by checking our records. */
	nodeid = hostlist_find(coll->ch_hosts, nodename);
	xassert(0 <= nodeid);
	if (0 > nodeid) {
		/* protect ourselfs if we are running with no asserts */
		goto proceed;
	}

	if (0 < coll->ch_contribs[nodeid]) {
		/* May be 0 or 1. If grater - transmission skew, ignore. */
		PMIXP_DEBUG("Multiple contributions from child_id=%d, hostname=%s",
			    nodeid, nodename);
		/* this is duplication, skip. */
		goto proceed;
	}

	data = get_buf_data(buf) + get_buf_offset(buf);
	size = remaining_buf(buf);
	grow_buf(coll->buf, size);
	memcpy(get_buf_data(coll->buf) + get_buf_offset(coll->buf), data, size);
	set_buf_offset(coll->buf, get_buf_offset(coll->buf) + size);

	/* increase number of individual contributions */
	coll->ch_contribs[nodeid]++;

	/* increase number of total contributions */
	coll->contrib_cntr++;

proceed:
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	if( PMIXP_COLL_FAN_IN == coll->state ){
		/* make a progress if we are in fan-in state */
		_progress_fan_in(coll);
	}

	switch( coll->state ){
	case PMIXP_COLL_SYNC:
		state = "sync";
		break;
	case PMIXP_COLL_FAN_IN:
		state = "fan-in";
		break;
	case PMIXP_COLL_FAN_OUT:
		state = "fan-out";
		break;
	case PMIXP_COLL_FAN_OUT_IN:
		state = "fan-out-in";
		break;
	}

	PMIXP_DEBUG("%s:%d: get contribution from node %s: finish. State = %s",
		    pmixp_info_namespace(), pmixp_info_nodeid(), nodename,
		    state);

	return SLURM_SUCCESS;
}
Beispiel #18
0
void *
cerebrod_listener(void *arg)
{
  char buf[CEREBRO_MAX_PACKET_LEN];

  _cerebrod_listener_initialize();

  for (;;)
    {
      struct cerebrod_message *msg;
      char nodename_buf[CEREBRO_MAX_NODENAME_LEN+1];
      char nodename_key[CEREBRO_MAX_NODENAME_LEN+1];
      struct timeval tv;
      int in_cluster_flag, i, count;
      fd_set readfds;
      int recv_len = 0;
      int maxfd = 0;

      FD_ZERO(&readfds);
      Pthread_mutex_lock(&listener_fds_lock);
      for (i = 0; i < conf.listen_message_config_len; i++)
        {
          if (listener_fds[i] > maxfd)
            maxfd = listener_fds[i];
          FD_SET(listener_fds[i], &readfds);
        }

      count = Select(maxfd + 1, &readfds, NULL, NULL, NULL);

      for (i = 0; i < conf.listen_message_config_len; i++)
        {
          if (FD_ISSET(listener_fds[i], &readfds))
            {
              if ((recv_len = recvfrom(listener_fds[i], 
                                       buf, 
                                       CEREBRO_MAX_PACKET_LEN, 
                                       0, 
                                       NULL, 
                                       NULL)) < 0)
                listener_fds[i] = cerebrod_reinit_socket(listener_fds[i], 
                                                         i,
                                                         _listener_setup_socket, 
                                                         "listener: recvfrom");
              break;
            }
        }
      Pthread_mutex_unlock(&listener_fds_lock);

      /* No packet read */
      if (recv_len <= 0)
	continue;

      if (recv_len >= CEREBRO_MAX_PACKET_LEN)
        {
	  CEREBROD_DBG(("received truncated packet"));
          continue;
        }

      if (_cerebrod_message_check_version(buf, recv_len) < 0)
        {
	  CEREBROD_DBG(("received invalid version packet"));
          continue;
        }

      if (!(msg = _cerebrod_message_unmarshall(buf, recv_len)))
        {
	  CEREBROD_DBG(("received unmarshallable packet"));
          continue;
        }

      _cerebrod_message_dump(msg, "Received Message");
      
      /* Guarantee ending '\0' character */
      memset(nodename_buf, '\0', CEREBRO_MAX_NODENAME_LEN+1);
      memcpy(nodename_buf, msg->nodename, CEREBRO_MAX_NODENAME_LEN);

      if (!strlen(nodename_buf))
        {
          CEREBROD_DBG(("received null nodename"));
          cerebrod_message_destroy(msg);
          continue;
        }

      if (found_clusterlist_module)
        {
          if ((in_cluster_flag = clusterlist_module_node_in_cluster(clusterlist_handle,
                                                                    nodename_buf)) < 0)
            CEREBROD_EXIT(("clusterlist_module_node_in_cluster: %s", nodename_buf));

          /* Second chance, is this data being forwarded from another host */
          if (!in_cluster_flag)
            {
              if (Hostlist_find(conf.forward_host_accept, nodename_buf) >= 0)
                in_cluster_flag++;
            }
        }
      else
        /* must assume it is in the cluster */
        /* Note, there is no need to handle 'forward_host_accept' under this case, 
         * since we don't know if it is in the cluster or not anyways.
         */
        in_cluster_flag = 1;
      
      if (!in_cluster_flag)
	{
	  CEREBROD_DBG(("received non-cluster packet: %s", nodename_buf));
          cerebrod_message_destroy(msg);
	  continue;
	}
      
      memset(nodename_key, '\0', CEREBRO_MAX_NODENAME_LEN+1);

      if (found_clusterlist_module)
        {
          if (clusterlist_module_get_nodename(clusterlist_handle,
                                              nodename_buf,
                                              nodename_key, 
                                              CEREBRO_MAX_NODENAME_LEN+1) < 0)
            {
              CEREBROD_DBG(("clusterlist_module_get_nodename: %s", nodename_buf));
              cerebrod_message_destroy(msg);
              continue;
            }
        }
      else
        memcpy(nodename_key, nodename_buf, CEREBRO_MAX_NODENAME_LEN+1);

      Gettimeofday(&tv, NULL);
      cerebrod_listener_data_update(nodename_key, msg, tv.tv_sec);

      /* Forward data as necessary.  Note, there is no need to
       * marshall data, it should already be marshalled when we
       * read it earlier.
       */
      for (i = 0; i < conf.forward_message_config_len; i++)
        {
          /* if the forward destination is local to the machine, don't forward */
          if (conf.forward_message_config[i].ip_is_local)
            continue;

          if (!forwarding_info[i].hosts
              || hostlist_find(forwarding_info[i].hosts, nodename_key) >= 0)
            {
              struct sockaddr *addr;
              struct sockaddr_in msgaddr;
              unsigned int addrlen;
              int rv;
              
              memset(&msgaddr, '\0', sizeof(struct sockaddr_in));
              msgaddr.sin_family = AF_INET;
              msgaddr.sin_port = htons(conf.forward_message_config[i].destination_port);
              memcpy(&msgaddr.sin_addr,
                     &conf.forward_message_config[i].ip_in_addr,
                     sizeof(struct in_addr));
              
              addr = (struct sockaddr *)&msgaddr;
              addrlen = sizeof(struct sockaddr_in);
              
              _cerebrod_message_dump(msg, "Forwarding Message");

              Pthread_mutex_lock(&forwarding_info[i].lock);
              
              if ((rv = sendto(forwarding_info[i].fd,
                               buf,
                               recv_len,
                               0,
                               addr,
                               addrlen)) != recv_len)
                {
                  if (rv < 0)
                    forwarding_info[i].fd = cerebrod_reinit_socket(forwarding_info[i].fd,
                                                                   i,
                                                                   _forwarding_setup_socket,
                                                                   "forwarding: sendto");
                  else
                    CEREBROD_ERR(("sendto: invalid bytes sent"));
                }
              
              Pthread_mutex_unlock(&forwarding_info[i].lock);
            }
        }

      cerebrod_message_destroy(msg);
    }

  return NULL;			/* NOT REACHED */
}
Beispiel #19
0
/*
 * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin
 */
int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs,
		    size_t nprocs, pmixp_coll_type_t type)
{
	hostlist_t hl;
	int max_depth, width, depth, i;
	char *p;

#ifndef NDEBUG
	coll->magic = PMIXP_COLL_STATE_MAGIC;
#endif
	coll->type = type;
	coll->state = PMIXP_COLL_SYNC;
	coll->pset.procs = xmalloc(sizeof(*procs) * nprocs);
	coll->pset.nprocs = nprocs;
	memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs);

	if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) {
		/* TODO: provide ranges output routine */
		PMIXP_ERROR("Bad ranges information");
		goto err_exit;
	}
#ifdef PMIXP_COLL_DEBUG
	/* if we debug collectives - store a copy of a full
	 * hostlist to resolve participant id to the hostname */
	coll->peers_hl = hostlist_copy(hl);
#endif

	width = slurm_get_tree_width();
	coll->peers_cnt = hostlist_count(hl);
	coll->my_peerid = hostlist_find(hl, pmixp_info_hostname());
	reverse_tree_info(coll->my_peerid, coll->peers_cnt, width,
			  &coll->prnt_peerid, &coll->chldrn_cnt, &depth,
			  &max_depth);

	/* We interested in amount of direct childs */
	coll->seq = 0;
	coll->contrib_children = 0;
	coll->contrib_local = false;
	coll->chldrn_ids = xmalloc(sizeof(int) * width);
	coll->contrib_chld = xmalloc(sizeof(int) * width);
	coll->chldrn_cnt = reverse_tree_direct_children(coll->my_peerid,
							coll->peers_cnt,
							  width, depth,
							  coll->chldrn_ids);
	if (coll->prnt_peerid == -1) {
		/* if we are the root of the tree:
		 * - we don't have a parent;
		 * - we have large list of all_childrens (we don't want
		 * ourselfs there)
		 */
		coll->prnt_host = NULL;
		coll->all_chldrn_hl = hostlist_copy(hl);
		hostlist_delete_host(coll->all_chldrn_hl,
				     pmixp_info_hostname());
		coll->chldrn_str =
			hostlist_ranged_string_xmalloc(coll->all_chldrn_hl);
	} else {
		/* for all other nodes in the tree we need to know:
		 * - nodename of our parent;
		 * - we don't need a list of all_childrens and hl anymore
		 */

		/*
		 * setup parent id's
		 */
		p = hostlist_nth(hl, coll->prnt_peerid);
		coll->prnt_host = xstrdup(p);
		free(p);
		/* reset prnt_peerid to the global peer */
		coll->prnt_peerid = pmixp_info_job_hostid(coll->prnt_host);

		/*
		 * setup root id's
		 * (we need this for the SLURM API communication case)
		 */
		p = hostlist_nth(hl, 0);
		coll->root_host = xstrdup(p);
		free(p);
		/* reset prnt_peerid to the global peer */
		coll->root_peerid = pmixp_info_job_hostid(coll->root_host);

		/* use empty hostlist here */
		coll->all_chldrn_hl = hostlist_create("");
		coll->chldrn_str = NULL;
	}

	/* fixup children peer ids to te global ones */
	for(i=0; i<coll->chldrn_cnt; i++){
		p = hostlist_nth(hl, coll->chldrn_ids[i]);
		coll->chldrn_ids[i] = pmixp_info_job_hostid(p);
		free(p);
	}
	hostlist_destroy(hl);

	/* Collective state */
	coll->ufwd_buf = pmixp_server_buf_new();
	coll->dfwd_buf = pmixp_server_buf_new();
	_reset_coll_ufwd(coll);
	_reset_coll_dfwd(coll);
	coll->cbdata = NULL;
	coll->cbfunc = NULL;

	/* init fine grained lock */
	slurm_mutex_init(&coll->lock);

	return SLURM_SUCCESS;
err_exit:
	return SLURM_ERROR;
}
Beispiel #20
0
int pmixp_coll_contrib_parent(pmixp_coll_t *coll, uint32_t peerid,
			     uint32_t seq, Buf buf)
{
#ifdef PMIXP_COLL_DEBUG
	char *nodename = NULL;
	int lpeerid = -1;
#endif
	char *data_src = NULL, *data_dst = NULL;
	uint32_t size;
	int expected_peerid;

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	if (pmixp_info_srv_direct_conn()) {
		expected_peerid = coll->prnt_peerid;
	} else {
		expected_peerid = coll->root_peerid;
	}

	/* Sanity check */
	pmixp_coll_sanity_check(coll);
	if (expected_peerid != peerid) {
		char *nodename = pmixp_info_job_host(peerid);
		/* protect ourselfs if we are running with no asserts */
		PMIXP_ERROR("%p: parent contrib from bad nodeid=%s:%u, "
			    "expect=%d",
			    coll, nodename, peerid, expected_peerid);
		xfree(nodename);
		goto proceed;
	}

#ifdef PMIXP_COLL_DEBUG
	nodename = pmixp_info_job_host(peerid);
	lpeerid = hostlist_find(coll->peers_hl, nodename);
	/* Mark this event */
	PMIXP_DEBUG("%p: contrib/rem from %s:%d(%d): state=%s, size=%u",
		    coll, nodename, peerid, lpeerid,
		    pmixp_coll_state2str(coll->state), remaining_buf(buf));
#endif

	switch (coll->state) {
	case PMIXP_COLL_SYNC:
	case PMIXP_COLL_COLLECT:
		/* It looks like a retransmission attempt when remote side
		 * identified transmission failure, but we actually successfuly
		 * received the message */
#ifdef PMIXP_COLL_DEBUG
		PMIXP_DEBUG("%p: prev contrib from %s:%d(%d): "
			    "seq=%u, cur_seq=%u, state=%s",
			    coll, nodename, peerid, lpeerid,
			    seq, coll->seq,
			    pmixp_coll_state2str(coll->state));
#endif
		/* sanity check */
		if ((coll->seq - 1) != seq) {
			/* FATAL: should not happen in normal workflow */
			char *nodename = pmixp_info_job_host(peerid);
			PMIXP_ERROR("%p: unexpected contrib from %s:%d: "
				    "contrib_seq = %d, coll->seq = %d, "
				    "state=%s",
				    coll, nodename, peerid,
				    seq, coll->seq,
				    pmixp_coll_state2str(coll->state));
			xfree(nodename);
			xassert((coll->seq - 1) == seq);
			abort();
		}
		goto proceed;
	case PMIXP_COLL_UPFWD_WSC:{
		/* we are not actually ready to receive this contribution as
		 * the upward portion of the collective wasn't received yet.
		 * This should not happen as SAPI (SLURM API) is blocking and
		 * we chould transit to PMIXP_COLL_UPFWD_WPC immediately */
		/* FATAL: should not happen in normal workflow */
		char *nodename = pmixp_info_job_host(peerid);
		PMIXP_ERROR("%p: unexpected contrib from %s:%d: "
			    "contrib_seq = %d, coll->seq = %d, "
			    "state=%s",
			    coll, nodename, peerid,
			    seq, coll->seq,
			    pmixp_coll_state2str(coll->state));
		xfree(nodename);
		xassert((coll->seq - 1) == seq);
		abort();
	}
	case PMIXP_COLL_UPFWD:
	case PMIXP_COLL_UPFWD_WPC:
		/* we were waiting for this */
		break;
	case PMIXP_COLL_DOWNFWD:
		/* It looks like a retransmission attempt when remote side
		 * identified transmission failure, but we actually successfuly
		 * received the message */
#ifdef PMIXP_COLL_DEBUG
		PMIXP_DEBUG("%p: double contrib from %s:%d(%d) "
			    "seq=%u, cur_seq=%u, state=%s",
			    coll, nodename, peerid, lpeerid,
			    seq, coll->seq, pmixp_coll_state2str(coll->state));
#endif
		/* sanity check */
		if (coll->seq != seq) {
			char *nodename = pmixp_info_job_host(peerid);
			/* FATAL: should not happen in normal workflow */
			PMIXP_ERROR("%p: unexpected contrib from %s:%d: "
				    "seq = %d, coll->seq = %d, state=%s",
				    coll, nodename, peerid,
				    seq, coll->seq,
				    pmixp_coll_state2str(coll->state));
			xassert((coll->seq - 1) == seq);
			xfree(nodename);
			abort();
		}
		goto proceed;
	default:
		/* should not happen in normal workflow */
		PMIXP_ERROR("%p: unknown collective state %s",
			    coll, pmixp_coll_state2str(coll->state));
		abort();
	}

	/* Because of possible timeouts/delays in transmission we
	 * can receive a contribution second time. Avoid duplications
	 * by checking our records. */
	if (coll->contrib_prnt) {
		char *nodename = pmixp_info_job_host(peerid);
		/* May be 0 or 1. If grater - transmission skew, ignore.
		 * NOTE: this output is not on the critical path -
		 * don't preprocess it out */
		PMIXP_DEBUG("%p: multiple contributions from parent %s:%d",
			    coll, nodename, peerid);
		xfree(nodename);
		/* this is duplication, skip. */
		goto proceed;
	}
	coll->contrib_prnt = true;

	data_src = get_buf_data(buf) + get_buf_offset(buf);
	size = remaining_buf(buf);
	pmixp_server_buf_reserve(coll->dfwd_buf, size);

	data_dst = get_buf_data(coll->dfwd_buf) +
			get_buf_offset(coll->dfwd_buf);
	memcpy(data_dst, data_src, size);
	set_buf_offset(coll->dfwd_buf,
		       get_buf_offset(coll->dfwd_buf) + size);
proceed:
	_progress_coll(coll);

#ifdef PMIXP_COLL_DEBUG
	if (nodename) {
		PMIXP_DEBUG("%p: finish: node=%s:%d(%d), state=%s",
			    coll, nodename, peerid, lpeerid,
			    pmixp_coll_state2str(coll->state));
		xfree(nodename);
	}
#endif
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	return SLURM_SUCCESS;
}
Beispiel #21
0
int pmixp_coll_contrib_child(pmixp_coll_t *coll, uint32_t peerid,
			     uint32_t seq, Buf buf)
{
	char *data_src = NULL, *data_dst = NULL;
	uint32_t size;
	int chld_id;

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);
	pmixp_coll_sanity_check(coll);
	if (0 > (chld_id = _chld_id(coll, peerid))) {
		char *nodename = pmixp_info_job_host(peerid);
		char *avail_ids = _chld_ids_str(coll);
		PMIXP_DEBUG("%p: contribution from the non-child node "
			    "%s:%d, acceptable ids: %s",
			    coll, nodename, peerid, avail_ids);
		xfree(nodename);
		xfree(avail_ids);
	}

#ifdef PMIXP_COLL_DEBUG
	char *nodename = pmixp_info_job_host(peerid);
	int lpeerid = hostlist_find(coll->peers_hl, nodename);
	PMIXP_DEBUG("%p: contrib/rem from %s:%d(%d:%d):, state=%s, size=%u",
		    coll, nodename, peerid, lpeerid, chld_id,
		    pmixp_coll_state2str(coll->state),
		    remaining_buf(buf));
#endif

	switch (coll->state) {
	case PMIXP_COLL_SYNC:
		/* change the state */
		coll->ts = time(NULL);
		/* fall-thru */
	case PMIXP_COLL_COLLECT:
		/* sanity check */
		if (coll->seq != seq) {
			char *nodename = pmixp_info_job_host(peerid);
			/* FATAL: should not happen in normal workflow */
			PMIXP_ERROR("%p: unexpected contrib from %s:%d "
				    "(child #%d) seq = %d, coll->seq = %d, "
				    "state=%s",
				    coll, nodename, peerid, chld_id,
				    seq, coll->seq,
				    pmixp_coll_state2str(coll->state));
			xassert(coll->seq == seq);
			abort();
		}
		break;
	case PMIXP_COLL_UPFWD:
	case PMIXP_COLL_UPFWD_WSC:
		/* FATAL: should not happen in normal workflow */
		PMIXP_ERROR("%p: unexpected contrib from %s:%d, state = %s",
			    coll, nodename, peerid,
			    pmixp_coll_state2str(coll->state));
		xassert(0);
		abort();
	case PMIXP_COLL_UPFWD_WPC:
	case PMIXP_COLL_DOWNFWD:
#ifdef PMIXP_COLL_DEBUG
		/* It looks like a retransmission attempt when remote side
		 * identified transmission failure, but we actually successfuly
		 * received the message */
		PMIXP_DEBUG("%p: contrib for the next collective "
			    "from=%s:%d(%d:%d) contrib_seq=%u, coll->seq=%u, "
			    "state=%s",
			    coll, nodename, peerid, lpeerid, chld_id,
			    seq, coll->seq, pmixp_coll_state2str(coll->state));
#endif
		if ((coll->seq +1) != seq) {
			char *nodename = pmixp_info_job_host(peerid);
			/* should not happen in normal workflow */
			PMIXP_ERROR("%p: unexpected contrib from %s:%d(x:%d) "
				    "seq = %d, coll->seq = %d, "
				    "state=%s",
				    coll, nodename, peerid, chld_id,
				    seq, coll->seq,
				    pmixp_coll_state2str(coll->state));
			xfree(nodename);
			xassert((coll->seq +1) == seq);
			abort();
		}
		break;
	default:
		/* should not happen in normal workflow */
		PMIXP_ERROR("%p: unknown collective state %s",
			    coll, pmixp_coll_state2str(coll->state));
		abort();
	}

	/* Because of possible timeouts/delays in transmission we
	 * can receive a contribution second time. Avoid duplications
	 * by checking our records. */
	if (coll->contrib_chld[chld_id]) {
		char *nodename = pmixp_info_job_host(peerid);
		/* May be 0 or 1. If grater - transmission skew, ignore.
		 * NOTE: this output is not on the critical path -
		 * don't preprocess it out */
		PMIXP_DEBUG("%p: multiple contribs from %s:%d(x:%d)",
			    coll, nodename, peerid, chld_id);
		/* this is duplication, skip. */
		xfree(nodename);
		goto proceed;
	}

	data_src = get_buf_data(buf) + get_buf_offset(buf);
	size = remaining_buf(buf);
	pmixp_server_buf_reserve(coll->ufwd_buf, size);
	data_dst = get_buf_data(coll->ufwd_buf) +
			get_buf_offset(coll->ufwd_buf);
	memcpy(data_dst, data_src, size);
	set_buf_offset(coll->ufwd_buf, get_buf_offset(coll->ufwd_buf) + size);

	/* increase number of individual contributions */
	coll->contrib_chld[chld_id] = true;
	/* increase number of total contributions */
	coll->contrib_children++;

proceed:
	_progress_coll(coll);

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: finish: node=%s:%d(%d:%d), state=%s",
		    coll, nodename, peerid, lpeerid, chld_id,
		    pmixp_coll_state2str(coll->state));
	xfree(nodename);
#endif
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	return SLURM_SUCCESS;
}
extern List setup_cluster_list_with_inx(mysql_conn_t *mysql_conn,
					slurmdb_job_cond_t *job_cond,
					void **curr_cluster)
{
	List local_cluster_list = NULL;
	time_t now = time(NULL);
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	hostlist_t temp_hl = NULL;
	hostlist_iterator_t h_itr = NULL;
	char *query = NULL;
	int dims = 0;

	if (!job_cond || !job_cond->used_nodes)
		return NULL;

	if (!job_cond->cluster_list
	    || list_count(job_cond->cluster_list) != 1) {
		error("If you are doing a query against nodes "
		      "you must only have 1 cluster "
		      "you are asking for.");
		return NULL;
	}

	/* get the dimensions of this cluster so we know how to deal
	   with the hostlists */
	query = xstrdup_printf("select dimensions, flags from %s where "
			       "name='%s'",
			       cluster_table,
			       (char *)list_peek(job_cond->cluster_list));

	debug4("%d(%s:%d) query\n%s",
	       mysql_conn->conn, THIS_FILE, __LINE__, query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		return NULL;
	}
	xfree(query);

	if (!(row = mysql_fetch_row(result))) {
		error("Couldn't get the dimensions of cluster '%s'.",
		      (char *)list_peek(job_cond->cluster_list));
		mysql_free_result(result);
		return NULL;
	}

	/* On a Cray System when dealing with hostlists as we are here
	   this always needs to be 1.
	*/
	if (slurm_atoul(row[1]) & CLUSTER_FLAG_CRAY_A)
		dims = 1;
	else
		dims = atoi(row[0]);

	mysql_free_result(result);

	temp_hl = hostlist_create_dims(job_cond->used_nodes, dims);
	if (hostlist_count(temp_hl) <= 0) {
		error("we didn't get any real hosts to look for.");
		goto no_hosts;
	}
	h_itr = hostlist_iterator_create(temp_hl);

	query = xstrdup_printf("select cluster_nodes, time_start, "
			       "time_end from \"%s_%s\" where node_name='' "
			       "&& cluster_nodes !=''",
			       (char *)list_peek(job_cond->cluster_list),
			       event_table);

	if (job_cond->usage_start) {
		if (!job_cond->usage_end)
			job_cond->usage_end = now;

		xstrfmtcat(query,
			   " && ((time_start < %ld) "
			   "&& (time_end >= %ld || time_end = 0))",
			   job_cond->usage_end, job_cond->usage_start);
	}

	if (debug_flags & DEBUG_FLAG_DB_JOB)
		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		goto no_hosts;
	}
	xfree(query);

	local_cluster_list = list_create(_destroy_local_cluster);
	while ((row = mysql_fetch_row(result))) {
		char *host = NULL;
		int loc = 0;
		local_cluster_t *local_cluster =
			xmalloc(sizeof(local_cluster_t));
		local_cluster->hl = hostlist_create_dims(row[0], dims);
		local_cluster->start = slurm_atoul(row[1]);
		local_cluster->end   = slurm_atoul(row[2]);
		local_cluster->asked_bitmap =
			bit_alloc(hostlist_count(local_cluster->hl));
		while ((host = hostlist_next_dims(h_itr, dims))) {
			if ((loc = hostlist_find(
				     local_cluster->hl, host)) != -1)
				bit_set(local_cluster->asked_bitmap, loc);
			free(host);
		}
		hostlist_iterator_reset(h_itr);
		if (bit_ffs(local_cluster->asked_bitmap) != -1) {
			list_append(local_cluster_list, local_cluster);
			if (local_cluster->end == 0) {
				local_cluster->end = now;
				(*curr_cluster) = local_cluster;
			} else if (!(*curr_cluster)
				   || (((local_cluster_t *)(*curr_cluster))->end
				       < local_cluster->end)) {
				(*curr_cluster) = local_cluster;
			}
		} else
			_destroy_local_cluster(local_cluster);
	}
	mysql_free_result(result);

	if (!list_count(local_cluster_list)) {
		FREE_NULL_LIST(local_cluster_list);
		local_cluster_list = NULL;
		goto no_hosts;
	}

no_hosts:

	hostlist_iterator_destroy(h_itr);
	hostlist_destroy(temp_hl);

	return local_cluster_list;
}
Beispiel #23
0
/* Return false if this node's data needs to be added to sinfo's table of
 * data to print. Return true if it is duplicate/redundant data. */
static bool _match_node_data(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr)
{
	uint32_t tmp = 0;

	if (params.node_flag)
		return false;

	if (params.match_flags.hostnames_flag &&
	    (hostlist_find(sinfo_ptr->hostnames,
			   node_ptr->node_hostname) == -1))
		return false;

	if (params.match_flags.node_addr_flag &&
	    (hostlist_find(sinfo_ptr->node_addr, node_ptr->node_addr) == -1))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.features_flag &&
	    (xstrcmp(node_ptr->features, sinfo_ptr->features)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.features_act_flag &&
	    (xstrcmp(node_ptr->features_act, sinfo_ptr->features_act)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.gres_flag &&
	    (xstrcmp(node_ptr->gres, sinfo_ptr->gres)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.reason_flag &&
	    (xstrcmp(node_ptr->reason, sinfo_ptr->reason)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.reason_timestamp_flag &&
	    (node_ptr->reason_time != sinfo_ptr->reason_time))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.reason_user_flag &&
	    node_ptr->reason_uid != sinfo_ptr->reason_uid) {
		return false;
	}

	if (params.match_flags.state_flag) {
		char *state1, *state2;
		state1 = node_state_string(node_ptr->node_state);
		state2 = node_state_string(sinfo_ptr->node_state);
		if (xstrcmp(state1, state2))
			return false;
	}

	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_MEM_ALLOC,
				     NODE_STATE_ALLOCATED,
				     &tmp);
	if (params.match_flags.alloc_mem_flag &&
	    (tmp != sinfo_ptr->alloc_memory))
		return false;

	/* If no need to exactly match sizes, just return here
	 * otherwise check cpus, disk, memory and weigth individually */
	if (!params.exact_match)
		return true;

	if (params.match_flags.cpus_flag &&
	    (node_ptr->cpus        != sinfo_ptr->min_cpus))
		return false;

	if (params.match_flags.sockets_flag &&
	    (node_ptr->sockets     != sinfo_ptr->min_sockets))
		return false;
	if (params.match_flags.cores_flag &&
	    (node_ptr->cores       != sinfo_ptr->min_cores))
		return false;
	if (params.match_flags.threads_flag &&
	    (node_ptr->threads     != sinfo_ptr->min_threads))
		return false;
	if (params.match_flags.sct_flag &&
	    ((node_ptr->sockets     != sinfo_ptr->min_sockets) ||
	     (node_ptr->cores       != sinfo_ptr->min_cores) ||
	     (node_ptr->threads     != sinfo_ptr->min_threads)))
		return false;
	if (params.match_flags.disk_flag &&
	    (node_ptr->tmp_disk    != sinfo_ptr->min_disk))
		return false;
	if (params.match_flags.memory_flag &&
	    (node_ptr->real_memory != sinfo_ptr->min_mem))
		return false;
	if (params.match_flags.weight_flag &&
	    (node_ptr->weight      != sinfo_ptr->min_weight))
		return false;
	if (params.match_flags.cpu_load_flag &&
	    (node_ptr->cpu_load        != sinfo_ptr->min_cpu_load))
		return false;
	if (params.match_flags.free_mem_flag &&
	    (node_ptr->free_mem        != sinfo_ptr->min_free_mem))
		return false;
	if (params.match_flags.version_flag &&
	    (node_ptr->version     != sinfo_ptr->version))
		return false;

	return true;
}
Beispiel #24
0
/*
 * _set_collectors call the split_hostlist API on the all nodes hostlist
 * to set the node to be used as a collector for unsolicited node aggregation.
 *
 * If this node is a forwarding node (first node in any hostlist),
 * then its collector and backup are the ControlMachine and it's backup.
 *
 * Otherwise, we find the hostlist containing this node.
 * The forwarding node in that hostlist becomes a collector, the next node
 * which is not this node becomes the backup.
 * That list is split, we iterate through it and searching for a list in
 * which this node is a forwarding node. If found, we set the collector and
 * backup, else this process is repeated.
 */
static void _set_collectors(char *this_node_name)
{
	slurm_ctl_conf_t *conf;
	hostlist_t  nodes;
	hostlist_t* hll = NULL;
	char *parent = NULL, *backup = NULL;
	char addrbuf[32];
	int i, j, f = -1;
	int hl_count = 0;
	uint16_t parent_port;
	uint16_t backup_port;
	bool found = false;
	bool ctldparent = true;

#ifdef HAVE_FRONT_END
	return; /* on a FrontEnd system this would never be useful. */
#endif

	if (!run_in_daemon("slurmd"))
		return; /* Only compute nodes have collectors */

	/* Set the initial iteration, collector is controller,
	 * full list is split */
	xassert(this_node_name);

	conf = slurm_conf_lock();
	nodes = _get_all_nodes();
	parent = strdup(conf->control_addr);
	if (conf->backup_addr) {
		backup = strdup(conf->backup_addr);
	}
	parent_port = conf->slurmctld_port;
	backup_port = parent_port;
	slurm_conf_unlock();
	while (!found) {
		if ( route_g_split_hostlist(nodes, &hll, &hl_count) ) {
			error("unable to split forward hostlist");
			goto clean; /* collector addrs remains null */
		}
		/* Find which hostlist contains this node */
		for (i=0; i < hl_count; i++) {
			f = hostlist_find(hll[i], this_node_name);
			if (f != -1)
				break;
		}
		if (i == hl_count) {
			fatal("ROUTE -- %s not found in node_record_table",
			      this_node_name);
		}
		if (f == 0) {
			/* we are a forwarded to node,
			 * so our parent is parent */
			if (hostlist_count(hll[i]) > 1)
				this_is_collector = true;
			xfree(msg_collect_node);
			msg_collect_node = xmalloc(sizeof(slurm_addr_t));
			if (ctldparent)
				slurm_set_addr(msg_collect_node, parent_port,
					       parent);
			else {
				slurm_conf_get_addr(parent, msg_collect_node);
				msg_collect_node->sin_port = htons(parent_port);
			}
			if (debug_flags & DEBUG_FLAG_ROUTE) {
				slurm_print_slurm_addr(msg_collect_node,
						       addrbuf, 32);
				info("ROUTE -- message collector address is %s",
				     addrbuf);
			}
			xfree(msg_collect_backup);
			if (backup) {
				msg_collect_backup =
					xmalloc(sizeof(slurm_addr_t));
				if (ctldparent) {
					slurm_set_addr(msg_collect_backup,
						       backup_port, backup);
				} else {
					slurm_conf_get_addr(backup,
							    msg_collect_backup);
					msg_collect_backup->sin_port =
						htons(backup_port);
				}
				if (debug_flags & DEBUG_FLAG_ROUTE) {
					slurm_print_slurm_addr(
						msg_collect_backup,
						addrbuf, 32);
					info("ROUTE -- message collector backup"
					     " address is %s", addrbuf);
				}
			} else {
				if (debug_flags & DEBUG_FLAG_ROUTE) {
					info("ROUTE -- no message collector "
					     "backup");
				}

			}
			found = true;
			goto clean;
		}

		/* We are not a forwarding node, the first node in this list
		 * will split the forward_list.
		 * We also know that the forwarding node is not a controller.
		 *
		 * clean up parent context */
		ctldparent = false;
		hostlist_destroy(nodes);
		if (parent)
			free(parent);
		if (backup)
			free(backup);
		nodes = hostlist_copy(hll[i]);
		for (j=0; j < hl_count; j++) {
			hostlist_destroy(hll[j]);
		}
		xfree(hll);

		/* set our parent, backup, and continue search */
		parent = hostlist_shift(nodes);
		backup = hostlist_nth(nodes, 0);
		if (strcmp(backup, this_node_name) == 0) {
			free(backup);
			backup = NULL;
			if (hostlist_count(nodes) > 1)
				backup = hostlist_nth(nodes, 1);
		}
		parent_port =  slurm_conf_get_port(parent);
		if (backup) {
			backup_port = slurm_conf_get_port(backup);
		} else
			backup_port = 0;

	}
clean:
	if (debug_flags & DEBUG_FLAG_ROUTE) {
		if (this_is_collector)
			info("ROUTE -- %s is a collector node", this_node_name);
		else
			info("ROUTE -- %s is a leaf node", this_node_name);
	}
	hostlist_destroy(nodes);
	if (parent)
		free(parent);
	if (backup)
		free(backup);
	for (i=0; i < hl_count; i++) {
		hostlist_destroy(hll[i]);
	}
	xfree(hll);
}
Beispiel #25
0
static int _resources_set(char ***env)
{
	char *p = NULL;

	/* Initialize all memory pointers that would be allocated to NULL
	 * So in case of error exit we will know what to xfree
	 */
	_pmixp_job_info.job_hl = hostlist_create("");
	_pmixp_job_info.step_hl = hostlist_create("");
	_pmixp_job_info.hostname = NULL;

	/* Save step host list */
	p = getenvp(*env, PMIXP_STEP_NODES_ENV);
	if (!p) {
		PMIXP_ERROR_NO(ENOENT, "Environment variable %s not found",
				PMIXP_STEP_NODES_ENV);
		goto err_exit;
	}
	hostlist_push(_pmixp_job_info.step_hl, p);

	/* Extract our node name */
	p = hostlist_nth(_pmixp_job_info.step_hl, _pmixp_job_info.node_id);
	_pmixp_job_info.hostname = xstrdup(p);
	free(p);

	/* Determine job-wide node id and job-wide node count */
	p = getenvp(*env, PMIXP_JOB_NODES_ENV);
	if (p == NULL) {
		p = getenvp(*env, PMIXP_JOB_NODES_ENV_DEP);
		if (p == NULL) {
			/* shouldn't happen if we are under SLURM! */
			PMIXP_ERROR_NO(ENOENT, "Neither of nodelist environment variables: %s OR %s was found!",
					PMIXP_JOB_NODES_ENV, PMIXP_JOB_NODES_ENV_DEP);
			goto err_exit;
		}
	}
	hostlist_push(_pmixp_job_info.job_hl, p);
	_pmixp_job_info.nnodes_job = hostlist_count(_pmixp_job_info.job_hl);
	_pmixp_job_info.node_id_job = hostlist_find(_pmixp_job_info.job_hl,
			_pmixp_job_info.hostname);

	/* FIXME!! ------------------------------------------------------------- */
	/* TODO: _get_task_count not always works well.
	 if (_get_task_count(env, &_pmixp_job_info.ntasks_job, &_pmixp_job_info.ncpus_job) < 0) {
	 _pmixp_job_info.ntasks_job  = _pmixp_job_info.ntasks;
	 _pmixp_job_info.ncpus_job  = _pmixp_job_info.ntasks;
	 }
	 xassert(_pmixp_job_info.ntasks <= _pmixp_job_info.ntasks_job);
	 */
	_pmixp_job_info.ntasks_job = _pmixp_job_info.ntasks;
	_pmixp_job_info.ncpus_job = _pmixp_job_info.ntasks;

	/* Save task-to-node mapping */
	p = getenvp(*env, PMIXP_SLURM_MAPPING_ENV);
	if (p == NULL) {
		/* Direct modex won't work */
		PMIXP_ERROR_NO(ENOENT, "No %s environment variable found!",
				PMIXP_SLURM_MAPPING_ENV);
		goto err_exit;
	}

	_pmixp_job_info.task_map_packed = xstrdup(p);

	return SLURM_SUCCESS;
err_exit:
	hostlist_destroy(_pmixp_job_info.job_hl);
	hostlist_destroy(_pmixp_job_info.step_hl);
	if (NULL != _pmixp_job_info.hostname) {
		xfree(_pmixp_job_info.hostname);
	}
	return SLURM_ERROR;
}
Beispiel #26
0
extern int slurm_hostlist_find(hostlist_t hl, const char *hostname)
{
	return hostlist_find(hl, hostname);
}
Beispiel #27
0
/*
 * Create an srun job structure for a step w/out an allocation response msg.
 * (i.e. inside an allocation)
 */
srun_job_t *
job_step_create_allocation(resource_allocation_response_msg_t *resp)
{
	uint32_t job_id = resp->job_id;
	srun_job_t *job = NULL;
	allocation_info_t *ai = xmalloc(sizeof(*ai));
	hostlist_t hl = NULL;
	char *buf = NULL;
	int count = 0;
	uint32_t alloc_count = 0;

	ai->jobid          = job_id;
	ai->stepid         = NO_VAL;
	ai->nodelist = opt.alloc_nodelist;
	hl = hostlist_create(ai->nodelist);
	hostlist_uniq(hl);
	alloc_count = hostlist_count(hl);
	ai->nnodes = alloc_count;
	hostlist_destroy(hl);

	if (opt.exc_nodes) {
		hostlist_t exc_hl = hostlist_create(opt.exc_nodes);
		hostlist_t inc_hl = NULL;
		char *node_name = NULL;

		hl = hostlist_create(ai->nodelist);
		if(opt.nodelist) {
			inc_hl = hostlist_create(opt.nodelist);
		}
		hostlist_uniq(hl);
		//info("using %s or %s", opt.nodelist, ai->nodelist);
		while ((node_name = hostlist_shift(exc_hl))) {
			int inx = hostlist_find(hl, node_name);
			if (inx >= 0) {
				debug("excluding node %s", node_name);
				hostlist_delete_nth(hl, inx);
				ai->nnodes--;	/* decrement node count */
			}
			if(inc_hl) {
				inx = hostlist_find(inc_hl, node_name);
				if (inx >= 0) {
					error("Requested node %s is also "
					      "in the excluded list.",
					      node_name);
					error("Job not submitted.");
					hostlist_destroy(exc_hl);
					hostlist_destroy(inc_hl);
					goto error;
				}
			}
			free(node_name);
		}
		hostlist_destroy(exc_hl);

		/* we need to set this here so if there are more nodes
		 * available than we requested we can set it
		 * straight. If there is no exclude list then we set
		 * the vars then.
		 */
		if (!opt.nodes_set) {
			/* we don't want to set the number of nodes =
			 * to the number of requested processes unless we
			 * know it is less than the number of nodes
			 * in the allocation
			 */
			if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
				opt.min_nodes = opt.ntasks;
			else
				opt.min_nodes = ai->nnodes;
			opt.nodes_set = true;
		}
		if(!opt.max_nodes)
			opt.max_nodes = opt.min_nodes;
		if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
			ai->nnodes = opt.max_nodes;

		count = hostlist_count(hl);
		if(!count) {
			error("Hostlist is now nothing!  Can't run job.");
			hostlist_destroy(hl);
			goto error;
		}
		if(inc_hl) {
			count = hostlist_count(inc_hl);
			if(count < ai->nnodes) {
				/* add more nodes to get correct number for
				   allocation */
				hostlist_t tmp_hl = hostlist_copy(hl);
				int i=0;
				int diff = ai->nnodes - count;
				buf = hostlist_ranged_string_xmalloc(inc_hl);
				hostlist_delete(tmp_hl, buf);
				xfree(buf);
				while ((node_name = hostlist_shift(tmp_hl)) &&
				       (i < diff)) {
					hostlist_push(inc_hl, node_name);
					i++;
				}
				hostlist_destroy(tmp_hl);
			}
			buf = hostlist_ranged_string_xmalloc(inc_hl);
			hostlist_destroy(inc_hl);
			xfree(opt.nodelist);
			opt.nodelist = buf;
		} else {
			if (count > ai->nnodes) {
				/* remove more nodes than needed for
				   allocation */
				int i=0;
				for (i=count; i>ai->nnodes; i--)
					hostlist_delete_nth(hl, i);
			}
			xfree(opt.nodelist);
			opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		}

		hostlist_destroy(hl);
	} else {
		if (!opt.nodes_set) {
			/* we don't want to set the number of nodes =
			 * to the number of requested processes unless we
			 * know it is less than the number of nodes
			 * in the allocation
			 */
			if(opt.ntasks_set && (opt.ntasks < ai->nnodes))
				opt.min_nodes = opt.ntasks;
			else
				opt.min_nodes = ai->nnodes;
			opt.nodes_set = true;
		}
		if(!opt.max_nodes)
			opt.max_nodes = opt.min_nodes;
		if((opt.max_nodes > 0) && (opt.max_nodes < ai->nnodes))
			ai->nnodes = opt.max_nodes;
		/* Don't reset the ai->nodelist because that is the
		 * nodelist we want to say the allocation is under
		 * opt.nodelist is what is used for the allocation.
		 */
		/* xfree(ai->nodelist); */
		/* ai->nodelist = xstrdup(buf); */
	}

	/* get the correct number of hosts to run tasks on */
	if (opt.nodelist) {
		hl = hostlist_create(opt.nodelist);
		if (opt.distribution != SLURM_DIST_ARBITRARY)
			hostlist_uniq(hl);
		if (!hostlist_count(hl)) {
			error("Hostlist is now nothing!  Can not run job.");
			hostlist_destroy(hl);
			goto error;
		}

		buf = hostlist_ranged_string_xmalloc(hl);
		count = hostlist_count(hl);
		hostlist_destroy(hl);
		/* Don't reset the ai->nodelist because that is the
		 * nodelist we want to say the allocation is under
		 * opt.nodelist is what is used for the allocation.
		 */
		/* xfree(ai->nodelist); */
		/* ai->nodelist = xstrdup(buf); */
		xfree(opt.nodelist);
		opt.nodelist = buf;
	}

	if (opt.distribution == SLURM_DIST_ARBITRARY) {
		if (count != opt.ntasks) {
			error("You asked for %d tasks but specified %d nodes",
			      opt.ntasks, count);
			goto error;
		}
	}

	if (ai->nnodes == 0) {
		error("No nodes in allocation, can't run job");
		goto error;
	}

	ai->num_cpu_groups = resp->num_cpu_groups;
	ai->cpus_per_node  = resp->cpus_per_node;
	ai->cpu_count_reps = resp->cpu_count_reps;

/* 	info("looking for %d nodes out of %s with a must list of %s", */
/* 	     ai->nnodes, ai->nodelist, opt.nodelist); */
	/*
	 * Create job
	 */
	job = _job_create_structure(ai);
error:
   	xfree(ai);
	return (job);

}