Exemplo n.º 1
0
static int _handle_subgrps(List sinfo_list, uint16_t part_num,
			   partition_info_t *part_ptr,
			   node_info_t *node_ptr, uint32_t node_scaling)
{
	uint16_t size;
	int *node_state;
	int i=0, state_cnt = 2;
	ListIterator iterator = NULL;
	enum node_states state[] =
		{ NODE_STATE_ALLOCATED, NODE_STATE_ERROR };

	/* If we ever update the hostlist stuff to support this stuff
	 * then we can use this to tack on the end of the node name
	 * the subgrp stuff.  On bluegene systems this would be nice
	 * to see the ionodes in certain states.
	 * When asking for nodes that are reserved, we need to return
	 * all states of those nodes.
	 */
	if (params.state_list)
		iterator = list_iterator_create(params.state_list);

	for(i=0; i<state_cnt; i++) {
		if (iterator) {
			node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
			while ((node_state = list_next(iterator))) {
				tmp_node_ptr->node_state = *node_state;
				if ((((state[i] == NODE_STATE_ALLOCATED)
				      && IS_NODE_DRAINING(tmp_node_ptr))
				     || (*node_state == NODE_STATE_DRAIN))
				    || (*node_state == state[i])
				    || (*node_state == NODE_STATE_RES))
					break;
			}
			list_iterator_reset(iterator);
			if (!node_state)
				continue;
		}
		if (select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						SELECT_NODEDATA_SUBCNT,
						state[i],
						&size) == SLURM_SUCCESS
		   && size) {
			node_scaling -= size;
			node_ptr->node_state &= NODE_STATE_FLAGS;
			node_ptr->node_state |= state[i];
			_insert_node_ptr(sinfo_list, part_num, part_ptr,
					 node_ptr, size);
		}
	}

	/* now handle the idle */
	if (iterator) {
		while ((node_state = list_next(iterator))) {
			node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
			tmp_node_ptr->node_state = *node_state;
			if (((*node_state == NODE_STATE_DRAIN)
			     || IS_NODE_DRAINED(tmp_node_ptr))
			    || (*node_state == NODE_STATE_IDLE)
			    || (*node_state == NODE_STATE_RES))
				break;
		}
		list_iterator_destroy(iterator);
		if (!node_state)
			return SLURM_SUCCESS;
	}
	node_ptr->node_state &= NODE_STATE_FLAGS;
	node_ptr->node_state |= NODE_STATE_IDLE;
	if ((int)node_scaling > 0)
		_insert_node_ptr(sinfo_list, part_num, part_ptr,
				 node_ptr, node_scaling);

	return SLURM_SUCCESS;
}
Exemplo n.º 2
0
/*
 * _filter_out - Determine if the specified node should be filtered out or
 *	reported.
 * node_ptr IN - node to consider filtering out
 * RET - true if node should not be reported, false otherwise
 */
static bool _filter_out(node_info_t *node_ptr)
{
	static hostlist_t host_list = NULL;

	if (params.nodes) {
		if (host_list == NULL)
			host_list = hostlist_create(params.nodes);
		if (hostlist_find (host_list, node_ptr->name) == -1)
			return true;
	}

	if (params.dead_nodes && !IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.responding_nodes && IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.state_list) {
		int *node_state;
		bool match = false;
		uint16_t base_state;
		ListIterator iterator;
		uint16_t cpus = 0;
		node_info_t tmp_node, *tmp_node_ptr = &tmp_node;

		iterator = list_iterator_create(params.state_list);
		while ((node_state = list_next(iterator))) {
			tmp_node_ptr->node_state = *node_state;
			if (*node_state == NODE_STATE_DRAIN) {
				/* We search for anything that has the
				 * drain flag set */
				if (IS_NODE_DRAIN(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINING(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINING in node_state_string */
				if (IS_NODE_DRAINING(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINED(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINED in node_state_string */
				if (IS_NODE_DRAINED(node_ptr)) {
					match = true;
					break;
				}
			} else if (*node_state & NODE_STATE_FLAGS) {
				if (*node_state & node_ptr->node_state) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ERROR) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ERROR,
					&cpus);
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ALLOCATED) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ALLOCATED,
					&cpus);
				if (params.cluster_flags & CLUSTER_FLAG_BG
				    && !cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					cpus = node_ptr->cpus;
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_IDLE) {
				base_state = node_ptr->node_state &
					(~NODE_STATE_NO_RESPOND);
				if (base_state == NODE_STATE_IDLE) {
					match = true;
					break;
				}
			} else {
				base_state =
					node_ptr->node_state & NODE_STATE_BASE;
				if (base_state == *node_state) {
					match = true;
					break;
				}
			}
		}
		list_iterator_destroy(iterator);
		if (!match)
			return true;
	}

	return false;
}
Exemplo n.º 3
0
/* block_state_mutex should be locked before calling */
static int _check_all_blocks_error(int node_inx, time_t event_time,
				   char *reason)
{
	bg_record_t *bg_record = NULL;
	ListIterator itr = NULL;
	struct node_record send_node, *node_ptr;
	struct config_record config_rec;
	int total_cpus = 0;
	int rc = SLURM_SUCCESS;

	xassert(node_inx <= node_record_count);
	node_ptr = &node_record_table_ptr[node_inx];

	/* only do this if the node isn't in the DRAINED state.
	   DRAINING is ok */
	if (IS_NODE_DRAINED(node_ptr))
		return rc;

	memset(&send_node, 0, sizeof(struct node_record));
	memset(&config_rec, 0, sizeof(struct config_record));
	send_node.name = xstrdup(node_ptr->name);
	send_node.config_ptr = &config_rec;

	/* here we need to check if there are any other blocks on this
	   midplane and adjust things correctly */
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		/* only look at other nodes in error state */
		if (!(bg_record->state & BG_BLOCK_ERROR_FLAG))
			continue;
		if (!bit_test(bg_record->mp_bitmap, node_inx))
			continue;
		if (bg_record->cpu_cnt >= bg_conf->cpus_per_mp) {
			total_cpus = bg_conf->cpus_per_mp;
			break;
		} else
			total_cpus += bg_record->cpu_cnt;
	}
	list_iterator_destroy(itr);

	send_node.cpus = total_cpus;
	config_rec.cpus = total_cpus;

	if (send_node.cpus) {
		if (!reason)
			reason = "update block: setting partial node down.";
		if (!node_ptr->reason)
			node_ptr->reason = xstrdup(reason);
		node_ptr->reason_time = event_time;
		node_ptr->reason_uid = slurm_get_slurm_user_id();

		send_node.node_state = NODE_STATE_ERROR;
		rc = clusteracct_storage_g_node_down(acct_db_conn,
						     &send_node, event_time,
						     reason,
						     node_ptr->reason_uid);
	} else {
		if (node_ptr->reason)
			xfree(node_ptr->reason);
		node_ptr->reason_time = 0;
		node_ptr->reason_uid = NO_VAL;

		send_node.node_state = NODE_STATE_IDLE;
		rc = clusteracct_storage_g_node_up(acct_db_conn,
						   &send_node, event_time);
	}

	xfree(send_node.name);

	return rc;
}