Esempio n. 1
0
static void _set_node_mixed(node_info_msg_t *resp)
{
	node_info_t *node_ptr = NULL;
	int i;

	if (!resp)
		return;

	for (i = 0, node_ptr = resp->node_array;
	     i < resp->record_count; i++, node_ptr++) {
		uint16_t used_cpus = 0;
		select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
					     SELECT_NODEDATA_SUBCNT,
					     NODE_STATE_ALLOCATED, &used_cpus);
		if ((used_cpus != 0) && (used_cpus != node_ptr->cpus)) {
			node_ptr->node_state &= NODE_STATE_FLAGS;
			node_ptr->node_state |= NODE_STATE_MIXED;
		}
	}
}
Esempio n. 2
0
extern int clusteracct_storage_g_node_up(void *db_conn,
					 struct node_record *node_ptr,
					 time_t event_time)
{
	if (slurm_acct_storage_init(NULL) < 0)
		return SLURM_ERROR;

	/* on some systems we need to make sure we don't say something
	   is completely up if there are cpus in an error state */
	if(node_ptr->select_nodeinfo) {
		uint16_t err_cpus = 0;
		select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
					     SELECT_NODEDATA_SUBCNT,
					     NODE_STATE_ERROR,
					     &err_cpus);
		if(err_cpus) {
			char *reason = "Setting partial node down.";
			struct node_record send_node;
			struct config_record config_rec;
			uint16_t cpu_cnt = 0;
			select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
						&cpu_cnt);
			err_cpus *= cpu_cnt;
			memset(&send_node, 0, sizeof(struct node_record));
			memset(&config_rec, 0, sizeof(struct config_record));
			send_node.name = node_ptr->name;
			send_node.config_ptr = &config_rec;
			send_node.cpus = err_cpus;
			config_rec.cpus = err_cpus;

			send_node.node_state = NODE_STATE_ERROR;

			return (*(g_acct_storage_context->ops.node_down))
				(db_conn, &send_node,
				 event_time, reason, slurm_get_slurm_user_id());
		}
	}

 	return (*(g_acct_storage_context->ops.node_up))
		(db_conn, node_ptr, event_time);
}
Esempio n. 3
0
static int _insert_node_ptr(List sinfo_list, uint16_t part_num,
			    partition_info_t *part_ptr,
			    node_info_t *node_ptr, uint32_t node_scaling)
{
	int rc = SLURM_SUCCESS;
	sinfo_data_t *sinfo_ptr = NULL;
	ListIterator itr = NULL;

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		uint16_t error_cpus = 0;
		select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
					     SELECT_NODEDATA_SUBCNT,
					     NODE_STATE_ERROR,
					     &error_cpus);

		if (error_cpus && !node_ptr->reason)
			node_ptr->reason = xstrdup("Block(s) in error state");
	}

	itr = list_iterator_create(sinfo_list);
	while ((sinfo_ptr = list_next(itr))) {
		if (!_match_part_data(sinfo_ptr, part_ptr))
			continue;
		if (sinfo_ptr->nodes_total &&
		    (!_match_node_data(sinfo_ptr, node_ptr)))
			continue;
		_update_sinfo(sinfo_ptr, node_ptr, node_scaling);
		break;
	}
	list_iterator_destroy(itr);

	/* if no match, create new sinfo_data entry */
	if (!sinfo_ptr) {
		list_append(sinfo_list,
			    _create_sinfo(part_ptr, part_num,
					  node_ptr, node_scaling));
	}

	return rc;
}
Esempio n. 4
0
extern int slurm_get_select_nodeinfo(dynamic_plugin_data_t *nodeinfo,
				     enum select_nodedata_type data_type,
				     enum node_states state, void *data)
{
	return select_g_select_nodeinfo_get(nodeinfo, data_type, state, data);
}
Esempio n. 5
0
/* Spawn health check function for every node that is not DOWN */
extern void run_health_check(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
#else
	struct node_record *node_ptr;
	int node_test_cnt = 0, node_limit, node_states, run_cyclic;
	static int base_node_loc = -1;
	static time_t cycle_start_time = (time_t) 0;
#endif
	int i;
	char *host_str = NULL;
	agent_arg_t *check_agent_args = NULL;

	/* Sync plugin internal data with
	 * node select_nodeinfo. This is important
	 * after reconfig otherwise select_nodeinfo
	 * will not return the correct number of
	 * allocated cpus.
	 */
	select_g_select_nodeinfo_set_all();

	check_agent_args = xmalloc (sizeof (agent_arg_t));
	check_agent_args->msg_type = REQUEST_HEALTH_CHECK;
	check_agent_args->retry = 0;
	check_agent_args->hostlist = hostlist_create(NULL);
#ifdef HAVE_FRONT_END
	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		hostlist_push_host(check_agent_args->hostlist,
				   front_end_ptr->name);
		check_agent_args->node_count++;
	}
#else
	run_cyclic = slurmctld_conf.health_check_node_state &
		     HEALTH_CHECK_CYCLE;
	node_states = slurmctld_conf.health_check_node_state &
		      (~HEALTH_CHECK_CYCLE);
	if (run_cyclic) {
		time_t now = time(NULL);
		if (cycle_start_time == (time_t) 0)
			cycle_start_time = now;
		else if (base_node_loc >= 0)
			;	/* mid-cycle */
		else if (difftime(now, cycle_start_time) <
			 slurmctld_conf.health_check_interval) {
			return;	/* Wait to start next cycle */
		}
		cycle_start_time = now;
		/* Determine how many nodes we want to test on each call of
		 * run_health_check() to spread out the work. */
		node_limit = (node_record_count * 2) /
			     slurmctld_conf.health_check_interval;
		node_limit = MAX(node_limit, 10);
	}
	if ((node_states != HEALTH_CHECK_NODE_ANY) &&
	    (node_states != HEALTH_CHECK_NODE_IDLE)) {
		/* Update each node's alloc_cpus count */
		select_g_select_nodeinfo_set_all();
	}

	for (i = 0; i < node_record_count; i++) {
		if (run_cyclic) {
			if (node_test_cnt++ >= node_limit)
				break;
			base_node_loc++;
			if (base_node_loc >= node_record_count) {
				base_node_loc = -1;
				break;
			}
			node_ptr = node_record_table_ptr + base_node_loc;
		} else {
			node_ptr = node_record_table_ptr + i;
		}
		if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_FUTURE(node_ptr) ||
		    IS_NODE_POWER_SAVE(node_ptr))
			continue;
		if (node_states != HEALTH_CHECK_NODE_ANY) {
			uint16_t cpus_total, cpus_used = 0;
			if (slurmctld_conf.fast_schedule) {
				cpus_total = node_ptr->config_ptr->cpus;
			} else {
				cpus_total = node_ptr->cpus;
			}
			if (!IS_NODE_IDLE(node_ptr)) {
				select_g_select_nodeinfo_get(
						node_ptr->select_nodeinfo,
						SELECT_NODEDATA_SUBCNT,
						NODE_STATE_ALLOCATED,
						&cpus_used);
			}
			/* Here the node state is inferred from
			 * the cpus allocated on it.
			 * - cpus_used == 0
			 *       means node is idle
			 * - cpus_used < cpus_total
			 *       means the node is in mixed state
			 * else cpus_used == cpus_total
			 *       means the node is allocated
			 */
			if (cpus_used == 0) {
				if (!(node_states & HEALTH_CHECK_NODE_IDLE))
					continue;
				if (!IS_NODE_IDLE(node_ptr))
					continue;
			} else if (cpus_used < cpus_total) {
				if (!(node_states & HEALTH_CHECK_NODE_MIXED))
					continue;
			} else {
				if (!(node_states & HEALTH_CHECK_NODE_ALLOC))
					continue;
			}
		}
		hostlist_push_host(check_agent_args->hostlist, node_ptr->name);
		check_agent_args->node_count++;
	}
	if (run_cyclic && (i >= node_record_count))
		base_node_loc = -1;
#endif

	if (check_agent_args->node_count == 0) {
		hostlist_destroy(check_agent_args->hostlist);
		xfree (check_agent_args);
	} else {
		hostlist_uniq(check_agent_args->hostlist);
		host_str = hostlist_ranged_string_xmalloc(
				check_agent_args->hostlist);
		debug("Spawning health check agent for %s", host_str);
		xfree(host_str);
		ping_begin();
		agent_queue_request(check_agent_args);
	}
}
Esempio n. 6
0
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr,
			  uint32_t node_scaling)
{
	uint16_t base_state;
	uint16_t used_cpus = 0, error_cpus = 0;
	int total_cpus = 0, total_nodes = 0;
	/* since node_scaling could be less here, we need to use the
	 * global node scaling which should never change. */
	int single_node_cpus = (node_ptr->cpus / g_node_scaling);

 	base_state = node_ptr->node_state & NODE_STATE_BASE;

	if (sinfo_ptr->nodes_total == 0) {	/* first node added */
		sinfo_ptr->node_state = node_ptr->node_state;
		sinfo_ptr->features   = node_ptr->features;
		sinfo_ptr->gres       = node_ptr->gres;
		sinfo_ptr->reason     = node_ptr->reason;
		sinfo_ptr->reason_time= node_ptr->reason_time;
		sinfo_ptr->reason_uid = node_ptr->reason_uid;
		sinfo_ptr->min_cpus    = node_ptr->cpus;
		sinfo_ptr->max_cpus    = node_ptr->cpus;
		sinfo_ptr->min_sockets = node_ptr->sockets;
		sinfo_ptr->max_sockets = node_ptr->sockets;
		sinfo_ptr->min_cores   = node_ptr->cores;
		sinfo_ptr->max_cores   = node_ptr->cores;
		sinfo_ptr->min_threads = node_ptr->threads;
		sinfo_ptr->max_threads = node_ptr->threads;
		sinfo_ptr->min_disk   = node_ptr->tmp_disk;
		sinfo_ptr->max_disk   = node_ptr->tmp_disk;
		sinfo_ptr->min_mem    = node_ptr->real_memory;
		sinfo_ptr->max_mem    = node_ptr->real_memory;
		sinfo_ptr->min_weight = node_ptr->weight;
		sinfo_ptr->max_weight = node_ptr->weight;
		sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpus_per_node = sinfo_ptr->part_info->
					       max_cpus_per_node;
		sinfo_ptr->version    = node_ptr->version;
	} else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) {
		/* we already have this node in this record,
		 * just return, don't duplicate */
		return;
	} else {
		if (sinfo_ptr->min_cpus > node_ptr->cpus)
			sinfo_ptr->min_cpus = node_ptr->cpus;
		if (sinfo_ptr->max_cpus < node_ptr->cpus)
			sinfo_ptr->max_cpus = node_ptr->cpus;

		if (sinfo_ptr->min_sockets > node_ptr->sockets)
			sinfo_ptr->min_sockets = node_ptr->sockets;
		if (sinfo_ptr->max_sockets < node_ptr->sockets)
			sinfo_ptr->max_sockets = node_ptr->sockets;

		if (sinfo_ptr->min_cores > node_ptr->cores)
			sinfo_ptr->min_cores = node_ptr->cores;
		if (sinfo_ptr->max_cores < node_ptr->cores)
			sinfo_ptr->max_cores = node_ptr->cores;

		if (sinfo_ptr->min_threads > node_ptr->threads)
			sinfo_ptr->min_threads = node_ptr->threads;
		if (sinfo_ptr->max_threads < node_ptr->threads)
			sinfo_ptr->max_threads = node_ptr->threads;

		if (sinfo_ptr->min_disk > node_ptr->tmp_disk)
			sinfo_ptr->min_disk = node_ptr->tmp_disk;
		if (sinfo_ptr->max_disk < node_ptr->tmp_disk)
			sinfo_ptr->max_disk = node_ptr->tmp_disk;

		if (sinfo_ptr->min_mem > node_ptr->real_memory)
			sinfo_ptr->min_mem = node_ptr->real_memory;
		if (sinfo_ptr->max_mem < node_ptr->real_memory)
			sinfo_ptr->max_mem = node_ptr->real_memory;

		if (sinfo_ptr->min_weight> node_ptr->weight)
			sinfo_ptr->min_weight = node_ptr->weight;
		if (sinfo_ptr->max_weight < node_ptr->weight)
			sinfo_ptr->max_weight = node_ptr->weight;

		if (sinfo_ptr->min_cpu_load > node_ptr->cpu_load)
			sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		if (sinfo_ptr->max_cpu_load < node_ptr->cpu_load)
			sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
	}

	hostlist_push_host(sinfo_ptr->nodes, node_ptr->name);
	if (params.match_flags.node_addr_flag)
		hostlist_push_host(sinfo_ptr->node_addr, node_ptr->node_addr);
	if (params.match_flags.hostnames_flag)
		hostlist_push_host(sinfo_ptr->hostnames, node_ptr->node_hostname);

	total_cpus = node_ptr->cpus;
	total_nodes = node_scaling;

	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ALLOCATED,
				     &used_cpus);
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ERROR,
				     &error_cpus);

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (!params.match_flags.state_flag &&
		    (used_cpus || error_cpus)) {
			/* We only get one shot at this (because all states
			 * are combined together), so we need to make
			 * sure we get all the subgrps accounted. (So use
			 * g_node_scaling for safe measure) */
			total_nodes = g_node_scaling;

			sinfo_ptr->nodes_alloc += used_cpus;
			sinfo_ptr->nodes_other += error_cpus;
			sinfo_ptr->nodes_idle +=
				(total_nodes - (used_cpus + error_cpus));
			used_cpus  *= single_node_cpus;
			error_cpus *= single_node_cpus;
		} else {
			/* process only for this subgrp and then return */
			total_cpus = total_nodes * single_node_cpus;

			if ((base_state == NODE_STATE_ALLOCATED) ||
			    (base_state == NODE_STATE_MIXED) ||
			    (node_ptr->node_state & NODE_STATE_COMPLETING)) {
				sinfo_ptr->nodes_alloc += total_nodes;
				sinfo_ptr->cpus_alloc += total_cpus;
			} else if (IS_NODE_DRAIN(node_ptr) ||
				   (base_state == NODE_STATE_DOWN)) {
				sinfo_ptr->nodes_other += total_nodes;
				sinfo_ptr->cpus_other += total_cpus;
			} else {
				sinfo_ptr->nodes_idle += total_nodes;
				sinfo_ptr->cpus_idle += total_cpus;
			}

			sinfo_ptr->nodes_total += total_nodes;
			sinfo_ptr->cpus_total += total_cpus;

			return;
		}
	} else {
		if ((base_state == NODE_STATE_ALLOCATED) ||
		    (base_state == NODE_STATE_MIXED) ||
		    IS_NODE_COMPLETING(node_ptr))
			sinfo_ptr->nodes_alloc += total_nodes;
		else if (IS_NODE_DRAIN(node_ptr)
			 || (base_state == NODE_STATE_DOWN))
			sinfo_ptr->nodes_other += total_nodes;
		else
			sinfo_ptr->nodes_idle += total_nodes;
	}

	sinfo_ptr->nodes_total += total_nodes;


	sinfo_ptr->cpus_alloc += used_cpus;
	sinfo_ptr->cpus_total += total_cpus;
	total_cpus -= used_cpus + error_cpus;

	if (error_cpus) {
		sinfo_ptr->cpus_idle += total_cpus;
		sinfo_ptr->cpus_other += error_cpus;
	} else if (IS_NODE_DRAIN(node_ptr) ||
		   (base_state == NODE_STATE_DOWN)) {
		sinfo_ptr->cpus_other += total_cpus;
	} else
		sinfo_ptr->cpus_idle += total_cpus;
}
Esempio n. 7
0
/*
 * _build_sinfo_data - make a sinfo_data entry for each unique node
 *	configuration and add it to the sinfo_list for later printing.
 * sinfo_list IN/OUT - list of unique sinfo_data records to report
 * partition_msg IN - partition info message
 * node_msg IN - node info message
 * RET zero or error code
 */
static int _build_sinfo_data(List sinfo_list,
			     partition_info_msg_t *partition_msg,
			     node_info_msg_t *node_msg)
{
	pthread_attr_t attr_sinfo;
	pthread_t thread_sinfo;
	build_part_info_t *build_struct_ptr;
	node_info_t *node_ptr = NULL;
	partition_info_t *part_ptr = NULL;
	int j;

	g_node_scaling = node_msg->node_scaling;

	/* by default every partition is shown, even if no nodes */
	if ((!params.node_flag) && params.match_flags.partition_flag) {
		part_ptr = partition_msg->partition_array;
		for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
			if ((!params.partition) ||
			    (_strcmp(params.partition, part_ptr->name) == 0)) {
				list_append(sinfo_list, _create_sinfo(
						    part_ptr, (uint16_t) j,
						    NULL,
						    node_msg->node_scaling));
			}
		}
	}

	if (params.filtering) {
		for (j = 0; j < node_msg->record_count; j++) {
			node_ptr = &(node_msg->node_array[j]);
			if (node_ptr->name && _filter_out(node_ptr))
				xfree(node_ptr->name);
		}
	}

	/* make sinfo_list entries for every node in every partition */
	for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
		part_ptr = &(partition_msg->partition_array[j]);

		if (params.filtering && params.partition &&
		    _strcmp(part_ptr->name, params.partition))
			continue;

		if (node_msg->record_count == 1) { /* node_name_single */
			int pos = -1;
			uint16_t subgrp_size = 0;
			hostlist_t hl;

			node_ptr = &(node_msg->node_array[0]);
			if ((node_ptr->name == NULL) ||
			    (part_ptr->nodes == NULL))
				continue;
			hl = hostlist_create(part_ptr->nodes);
			pos = hostlist_find(hl, node_msg->node_array[0].name);
			hostlist_destroy(hl);
			if (pos < 0)
				continue;
			if (select_g_select_nodeinfo_get(
				   node_ptr->select_nodeinfo,
				   SELECT_NODEDATA_SUBGRP_SIZE,
				   0,
				   &subgrp_size) == SLURM_SUCCESS
			    && subgrp_size) {
				_handle_subgrps(sinfo_list,
						(uint16_t) j,
						part_ptr,
						node_ptr,
						node_msg->
						node_scaling);
			} else {
				_insert_node_ptr(sinfo_list,
						 (uint16_t) j,
						 part_ptr,
						 node_ptr,
						 node_msg->
						 node_scaling);
			}
			continue;
		}

		/* Process each partition using a separate thread */
		build_struct_ptr = xmalloc(sizeof(build_part_info_t));
		build_struct_ptr->node_msg   = node_msg;
		build_struct_ptr->part_num   = (uint16_t) j;
		build_struct_ptr->part_ptr   = part_ptr;
		build_struct_ptr->sinfo_list = sinfo_list;

		slurm_mutex_lock(&sinfo_cnt_mutex);
		sinfo_cnt++;
		slurm_mutex_unlock(&sinfo_cnt_mutex);

		slurm_attr_init(&attr_sinfo);
		if (pthread_attr_setdetachstate
		    (&attr_sinfo, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");
		while (pthread_create(&thread_sinfo, &attr_sinfo,
				      _build_part_info,
				      (void *) build_struct_ptr)) {
			error("pthread_create error %m");
			usleep(10000);	/* sleep and retry */
		}
		slurm_attr_destroy(&attr_sinfo);
	}

	slurm_mutex_lock(&sinfo_cnt_mutex);
	while (sinfo_cnt) {
		pthread_cond_wait(&sinfo_cnt_cond, &sinfo_cnt_mutex);
	}
	slurm_mutex_unlock(&sinfo_cnt_mutex);

	_sort_hostlist(sinfo_list);
	return SLURM_SUCCESS;
}
Esempio n. 8
0
/* Build information about a partition using one pthread per partition */
void *_build_part_info(void *args)
{
	build_part_info_t *build_struct_ptr;
	List sinfo_list;
	partition_info_t *part_ptr;
	node_info_msg_t *node_msg;
	node_info_t *node_ptr = NULL;
	uint16_t part_num;
	int j = 0;

	if (_serial_part_data())
		slurm_mutex_lock(&sinfo_list_mutex);
	build_struct_ptr = (build_part_info_t *) args;
	sinfo_list = build_struct_ptr->sinfo_list;
	part_num = build_struct_ptr->part_num;
	part_ptr = build_struct_ptr->part_ptr;
	node_msg = build_struct_ptr->node_msg;

	while (part_ptr->node_inx[j] >= 0) {
		int i = 0;
		uint16_t subgrp_size = 0;
		for (i = part_ptr->node_inx[j];
		     i <= part_ptr->node_inx[j+1]; i++) {
			if (i >= node_msg->record_count) {
				/* If info for single node name is loaded */
				break;
			}
			node_ptr = &(node_msg->node_array[i]);
			if (node_ptr->name == NULL)
				continue;

			if (select_g_select_nodeinfo_get(
				   node_ptr->select_nodeinfo,
				   SELECT_NODEDATA_SUBGRP_SIZE, 0,
				   &subgrp_size) == SLURM_SUCCESS
			    && subgrp_size) {
				_handle_subgrps(sinfo_list, part_num,
						part_ptr, node_ptr,
						node_msg->node_scaling);
			} else {
				_insert_node_ptr(sinfo_list, part_num,
						 part_ptr, node_ptr,
						 node_msg->node_scaling);
			}
		}
		j += 2;
	}

	xfree(args);
	if (_serial_part_data())
		slurm_mutex_unlock(&sinfo_list_mutex);
	slurm_mutex_lock(&sinfo_cnt_mutex);
	if (sinfo_cnt > 0) {
		sinfo_cnt--;
	} else {
		error("sinfo_cnt underflow");
		sinfo_cnt = 0;
	}
	pthread_cond_broadcast(&sinfo_cnt_cond);
	slurm_mutex_unlock(&sinfo_cnt_mutex);
	return NULL;
}
Esempio n. 9
0
/*
 * _query_server - download the current server state
 * part_pptr IN/OUT - partition information message
 * node_pptr IN/OUT - node information message
 * block_pptr IN/OUT - BlueGene block data
 * reserv_pptr IN/OUT - reservation information message
 * clear_old IN - If set, then always replace old data, needed when going
 *		  between clusters.
 * RET zero or error code
 */
static int
_query_server(partition_info_msg_t ** part_pptr,
	      node_info_msg_t ** node_pptr,
	      block_info_msg_t ** block_pptr,
	      reserve_info_msg_t ** reserv_pptr,
	      bool clear_old)
{
	static partition_info_msg_t *old_part_ptr = NULL, *new_part_ptr;
	static node_info_msg_t *old_node_ptr = NULL, *new_node_ptr;
	static block_info_msg_t *old_bg_ptr = NULL, *new_bg_ptr;
	static reserve_info_msg_t *old_resv_ptr = NULL, *new_resv_ptr;
	int error_code;
	uint16_t show_flags = 0;
	int cc;
	node_info_t *node_ptr;

	if (params.all_flag)
		show_flags |= SHOW_ALL;

	if (old_part_ptr) {
		if (clear_old)
			old_part_ptr->last_update = 0;
		error_code = slurm_load_partitions(old_part_ptr->last_update,
						   &new_part_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_partition_info_msg(old_part_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_part_ptr = old_part_ptr;
		}
	} else {
		error_code = slurm_load_partitions((time_t) NULL, &new_part_ptr,
						   show_flags);
	}
	if (error_code) {
		slurm_perror("slurm_load_partitions");
		return error_code;
	}

	old_part_ptr = new_part_ptr;
	*part_pptr = new_part_ptr;

	if (old_node_ptr) {
		if (clear_old)
			old_node_ptr->last_update = 0;
		if (params.node_name_single) {
			error_code = slurm_load_node_single(&new_node_ptr,
							    params.nodes,
							    show_flags);
		} else {
			error_code = slurm_load_node(old_node_ptr->last_update,
						     &new_node_ptr, show_flags);
		}
		if (error_code == SLURM_SUCCESS)
			slurm_free_node_info_msg(old_node_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_node_ptr = old_node_ptr;
		}
	} else if (params.node_name_single) {
		error_code = slurm_load_node_single(&new_node_ptr, params.nodes,
						    show_flags);
	} else {
		error_code = slurm_load_node((time_t) NULL, &new_node_ptr,
					     show_flags);
	}

	if (error_code) {
		slurm_perror("slurm_load_node");
		return error_code;
	}
	old_node_ptr = new_node_ptr;
	*node_pptr = new_node_ptr;

	/* Set the node state as NODE_STATE_MIXED. */
	for (cc = 0; cc < new_node_ptr->record_count; cc++) {
		node_ptr = &(new_node_ptr->node_array[cc]);
		if (IS_NODE_DRAIN(node_ptr)) {
			/* don't worry about mixed since the
			 * whole node is being drained. */
		} else {
			uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus;
			int single_node_cpus =
				(node_ptr->cpus / g_node_scaling);

			select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						     SELECT_NODEDATA_SUBCNT,
						     NODE_STATE_ALLOCATED,
						     &alloc_cpus);
			if (params.cluster_flags & CLUSTER_FLAG_BG) {
				if (!alloc_cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					alloc_cpus = node_ptr->cpus;
				else
					alloc_cpus *= single_node_cpus;
			}
			idle_cpus = node_ptr->cpus - alloc_cpus;
			select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						     SELECT_NODEDATA_SUBCNT,
						     NODE_STATE_ERROR,
						     &err_cpus);
			if (params.cluster_flags & CLUSTER_FLAG_BG)
				err_cpus *= single_node_cpus;
			idle_cpus -= err_cpus;

			if ((alloc_cpus && err_cpus) ||
			    (idle_cpus  && (idle_cpus != node_ptr->cpus))) {
				node_ptr->node_state &= NODE_STATE_FLAGS;
				node_ptr->node_state |= NODE_STATE_MIXED;
			}
		}
	}

	if (old_resv_ptr) {
		if (clear_old)
			old_resv_ptr->last_update = 0;
		error_code = slurm_load_reservations(old_resv_ptr->last_update,
						     &new_resv_ptr);
		if (error_code == SLURM_SUCCESS)
			slurm_free_reservation_info_msg(old_resv_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_resv_ptr = old_resv_ptr;
		}
	} else {
		error_code = slurm_load_reservations((time_t) NULL,
						     &new_resv_ptr);
	}

	if (error_code) {
		slurm_perror("slurm_load_reservations");
		return error_code;
	}
	old_resv_ptr = new_resv_ptr;
	*reserv_pptr = new_resv_ptr;

	if (!params.bg_flag)
		return SLURM_SUCCESS;

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (old_bg_ptr) {
			if (clear_old)
				old_bg_ptr->last_update = 0;
			error_code = slurm_load_block_info(
				old_bg_ptr->last_update,
				&new_bg_ptr, show_flags);
			if (error_code == SLURM_SUCCESS)
				slurm_free_block_info_msg(old_bg_ptr);
			else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
				error_code = SLURM_SUCCESS;
				new_bg_ptr = old_bg_ptr;
			}
		} else {
			error_code = slurm_load_block_info((time_t) NULL,
							   &new_bg_ptr,
							   show_flags);
		}
	}

	if (error_code) {
		slurm_perror("slurm_load_block");
		return error_code;
	}
	old_bg_ptr = new_bg_ptr;
	*block_pptr = new_bg_ptr;
	return SLURM_SUCCESS;
}
Esempio n. 10
0
static int _handle_subgrps(List sinfo_list, uint16_t part_num,
			   partition_info_t *part_ptr,
			   node_info_t *node_ptr, uint32_t node_scaling)
{
	uint16_t size;
	int *node_state;
	int i=0, state_cnt = 2;
	ListIterator iterator = NULL;
	enum node_states state[] =
		{ NODE_STATE_ALLOCATED, NODE_STATE_ERROR };

	/* If we ever update the hostlist stuff to support this stuff
	 * then we can use this to tack on the end of the node name
	 * the subgrp stuff.  On bluegene systems this would be nice
	 * to see the ionodes in certain states.
	 * When asking for nodes that are reserved, we need to return
	 * all states of those nodes.
	 */
	if (params.state_list)
		iterator = list_iterator_create(params.state_list);

	for(i=0; i<state_cnt; i++) {
		if (iterator) {
			node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
			while ((node_state = list_next(iterator))) {
				tmp_node_ptr->node_state = *node_state;
				if ((((state[i] == NODE_STATE_ALLOCATED)
				      && IS_NODE_DRAINING(tmp_node_ptr))
				     || (*node_state == NODE_STATE_DRAIN))
				    || (*node_state == state[i])
				    || (*node_state == NODE_STATE_RES))
					break;
			}
			list_iterator_reset(iterator);
			if (!node_state)
				continue;
		}
		if (select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						SELECT_NODEDATA_SUBCNT,
						state[i],
						&size) == SLURM_SUCCESS
		   && size) {
			node_scaling -= size;
			node_ptr->node_state &= NODE_STATE_FLAGS;
			node_ptr->node_state |= state[i];
			_insert_node_ptr(sinfo_list, part_num, part_ptr,
					 node_ptr, size);
		}
	}

	/* now handle the idle */
	if (iterator) {
		while ((node_state = list_next(iterator))) {
			node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
			tmp_node_ptr->node_state = *node_state;
			if (((*node_state == NODE_STATE_DRAIN)
			     || IS_NODE_DRAINED(tmp_node_ptr))
			    || (*node_state == NODE_STATE_IDLE)
			    || (*node_state == NODE_STATE_RES))
				break;
		}
		list_iterator_destroy(iterator);
		if (!node_state)
			return SLURM_SUCCESS;
	}
	node_ptr->node_state &= NODE_STATE_FLAGS;
	node_ptr->node_state |= NODE_STATE_IDLE;
	if ((int)node_scaling > 0)
		_insert_node_ptr(sinfo_list, part_num, part_ptr,
				 node_ptr, node_scaling);

	return SLURM_SUCCESS;
}
Esempio n. 11
0
/*
 * slurm_sprint_node_table - output information about a specific Slurm nodes
 *	based upon message as loaded using slurm_load_node
 * IN node_ptr - an individual node information record pointer
 * IN node_scaling - number of nodes each node represents
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_node_table (node_info_t * node_ptr,
			 int node_scaling, int one_liner )
{
	uint32_t my_state = node_ptr->node_state;
	char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = "";
	char time_str[32];
	char *out = NULL, *reason_str = NULL, *select_reason_str = NULL;
	uint16_t err_cpus = 0, alloc_cpus = 0;
	int cpus_per_node = 1;
	int idle_cpus;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	uint64_t alloc_memory;
	char *node_alloc_tres = NULL;
	char *line_end = (one_liner) ? " " : "\n   ";

	if (node_scaling)
		cpus_per_node = node_ptr->cpus / node_scaling;

	if (my_state & NODE_STATE_CLOUD) {
		my_state &= (~NODE_STATE_CLOUD);
		cloud_str = "+CLOUD";
	}
	if (my_state & NODE_STATE_COMPLETING) {
		my_state &= (~NODE_STATE_COMPLETING);
		comp_str = "+COMPLETING";
	}
	if (my_state & NODE_STATE_DRAIN) {
		my_state &= (~NODE_STATE_DRAIN);
		drain_str = "+DRAIN";
	}
	if (my_state & NODE_STATE_FAIL) {
		my_state &= (~NODE_STATE_FAIL);
		drain_str = "+FAIL";
	}
	if (my_state & NODE_STATE_POWER_SAVE) {
		my_state &= (~NODE_STATE_POWER_SAVE);
		power_str = "+POWER";
	}
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ALLOCATED,
				  &alloc_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		if (!alloc_cpus &&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     IS_NODE_COMPLETING(node_ptr)))
			alloc_cpus = node_ptr->cpus;
		else
			alloc_cpus *= cpus_per_node;
	}
	idle_cpus = node_ptr->cpus - alloc_cpus;

	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ERROR,
				  &err_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG)
		err_cpus *= cpus_per_node;
	idle_cpus -= err_cpus;

	if ((alloc_cpus && err_cpus) ||
	    (idle_cpus  && (idle_cpus != node_ptr->cpus))) {
		my_state &= NODE_STATE_FLAGS;
		my_state |= NODE_STATE_MIXED;
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "NodeName=%s ", node_ptr->name);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
					  SELECT_NODEDATA_RACK_MP,
					  0, &select_reason_str);
		if (select_reason_str) {
			xstrfmtcat(out, "RackMidplane=%s ", select_reason_str);
			xfree(select_reason_str);
		}
	}

	if (node_ptr->arch)
		xstrfmtcat(out, "Arch=%s ", node_ptr->arch);

	xstrfmtcat(out, "CoresPerSocket=%u", node_ptr->cores);

	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "CPUAlloc=%u CPUErr=%u CPUTot=%u ",
		   alloc_cpus, err_cpus, node_ptr->cpus);

	if (node_ptr->cpu_load == NO_VAL)
		xstrcat(out, "CPULoad=N/A");
	else
		xstrfmtcat(out, "CPULoad=%.2f", (node_ptr->cpu_load / 100.0));

	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "AvailableFeatures=%s", node_ptr->features);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "ActiveFeatures=%s", node_ptr->features_act);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s", node_ptr->gres);
	xstrcat(out, line_end);

	/****** Line (optional) ******/
	if (node_ptr->gres_drain) {
		xstrfmtcat(out, "GresDrain=%s", node_ptr->gres_drain);
		xstrcat(out, line_end);
	}

	/****** Line (optional) ******/
	if (node_ptr->gres_used) {
		xstrfmtcat(out, "GresUsed=%s", node_ptr->gres_used);
		xstrcat(out, line_end);
	}

	/****** Line (optional) ******/
	if (node_ptr->node_hostname || node_ptr->node_addr) {
		xstrfmtcat(out, "NodeAddr=%s NodeHostName=%s Version=%s",
			   node_ptr->node_addr, node_ptr->node_hostname,
			   node_ptr->version);
		xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->os)
		xstrfmtcat(out, "OS=%s ", node_ptr->os);

	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_MEM_ALLOC,
				  NODE_STATE_ALLOCATED,
				  &alloc_memory);
	xstrfmtcat(out, "RealMemory=%"PRIu64" AllocMem=%"PRIu64" ",
		   node_ptr->real_memory, alloc_memory);

	if (node_ptr->free_mem == NO_VAL64)
		xstrcat(out, "FreeMem=N/A ");
	else
		xstrfmtcat(out, "FreeMem=%"PRIu64" ", node_ptr->free_mem);

	xstrfmtcat(out, "Sockets=%u Boards=%u",
		   node_ptr->sockets, node_ptr->boards);

	xstrcat(out, line_end);

	/****** core & memory specialization Line (optional) ******/
	if (node_ptr->core_spec_cnt || node_ptr->cpu_spec_list ||
	    node_ptr->mem_spec_limit) {
		if (node_ptr->core_spec_cnt) {
			xstrfmtcat(out, "CoreSpecCount=%u ",
				   node_ptr->core_spec_cnt);
		}
		if (node_ptr->cpu_spec_list) {
			xstrfmtcat(out, "CPUSpecList=%s ",
				   node_ptr->cpu_spec_list);
		}
		if (node_ptr->mem_spec_limit) {
			xstrfmtcat(out, "MemSpecLimit=%"PRIu64"",
				   node_ptr->mem_spec_limit);
		}
		xstrcat(out, line_end);
	}

	/****** Line ******/
	xstrfmtcat(out, "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u ",
		   node_state_string(my_state),
		   cloud_str, comp_str, drain_str, power_str,
		   node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight);

	if (node_ptr->owner == NO_VAL) {
		xstrcat(out, "Owner=N/A ");
	} else {
		char *user_name = uid_to_string((uid_t) node_ptr->owner);
		xstrfmtcat(out, "Owner=%s(%u) ", user_name, node_ptr->owner);
		xfree(user_name);
	}

	xstrfmtcat(out, "MCS_label=%s",
		   (node_ptr->mcs_label == NULL) ? "N/A" : node_ptr->mcs_label);

	xstrcat(out, line_end);

	/****** Line ******/
	if (node_ptr->partitions) {
		xstrfmtcat(out, "Partitions=%s ", node_ptr->partitions);
		xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->boot_time) {
		slurm_make_time_str((time_t *)&node_ptr->boot_time,
				    time_str, sizeof(time_str));
		xstrfmtcat(out, "BootTime=%s ", time_str);
	} else {
		xstrcat(out, "BootTime=None ");
	}

	if (node_ptr->slurmd_start_time) {
		slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time,
				     time_str, sizeof(time_str));
		xstrfmtcat(out, "SlurmdStartTime=%s", time_str);
	} else {
		xstrcat(out, "SlurmdStartTime=None");
	}
	xstrcat(out, line_end);

	/****** TRES Line ******/
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
				     NODE_STATE_ALLOCATED, &node_alloc_tres);
	xstrfmtcat(out, "CfgTRES=%s", node_ptr->tres_fmt_str);
	xstrcat(out, line_end);
	xstrfmtcat(out, "AllocTRES=%s",
		   (node_alloc_tres) ?  node_alloc_tres : "");
	xfree(node_alloc_tres);
	xstrcat(out, line_end);

	/****** Power Management Line ******/
	if (!node_ptr->power || (node_ptr->power->cap_watts == NO_VAL))
		xstrcat(out, "CapWatts=n/a");
	else
		xstrfmtcat(out, "CapWatts=%u", node_ptr->power->cap_watts);

	xstrcat(out, line_end);

	/****** Power Consumption Line ******/
	if (!node_ptr->energy || node_ptr->energy->current_watts == NO_VAL)
		xstrcat(out, "CurrentWatts=n/s LowestJoules=n/s ConsumedJoules=n/s");
	else
		xstrfmtcat(out, "CurrentWatts=%u "
				"LowestJoules=%"PRIu64" "
				"ConsumedJoules=%"PRIu64"",
				node_ptr->energy->current_watts,
				node_ptr->energy->base_consumed_energy,
				node_ptr->energy->consumed_energy);

	xstrcat(out, line_end);

	/****** external sensors Line ******/
	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->consumed_energy == NO_VAL)
		xstrcat(out, "ExtSensorsJoules=n/s ");
	else
		xstrfmtcat(out, "ExtSensorsJoules=%"PRIu64" ",
			   node_ptr->ext_sensors->consumed_energy);

	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->current_watts == NO_VAL)
		xstrcat(out, "ExtSensorsWatts=n/s ");
	else
		xstrfmtcat(out, "ExtSensorsWatts=%u ",
			   node_ptr->ext_sensors->current_watts);

	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->temperature == NO_VAL)
		xstrcat(out, "ExtSensorsTemp=n/s");
	else
		xstrfmtcat(out, "ExtSensorsTemp=%u",
			   node_ptr->ext_sensors->temperature);

	xstrcat(out, line_end);

	/****** Line ******/
	if (node_ptr->reason && node_ptr->reason[0])
		xstrcat(reason_str, node_ptr->reason);
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_EXTRA_INFO,
				  0, &select_reason_str);
	if (select_reason_str && select_reason_str[0]) {
		if (reason_str)
			xstrcat(reason_str, "\n");
		xstrcat(reason_str, select_reason_str);
	}
	xfree(select_reason_str);
	if (reason_str) {
		int inx = 1;
		char *save_ptr = NULL, *tok, *user_name;
		tok = strtok_r(reason_str, "\n", &save_ptr);
		while (tok) {
			if (inx == 1) {
				xstrcat(out, "Reason=");
			} else {
				xstrcat(out, line_end);
				xstrcat(out, "       ");
			}
			xstrfmtcat(out, "%s", tok);
			if ((inx++ == 1) && node_ptr->reason_time) {
				user_name = uid_to_string(node_ptr->reason_uid);
				slurm_make_time_str((time_t *)&node_ptr->reason_time,
						    time_str, sizeof(time_str));
				xstrfmtcat(out, " [%s@%s]", user_name, time_str);
				xfree(user_name);
			}
			tok = strtok_r(NULL, "\n", &save_ptr);
		}
		xfree(reason_str);
	}
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
Esempio n. 12
0
File: sinfo.c Progetto: IFCA/slurm
/*
 * _build_sinfo_data - make a sinfo_data entry for each unique node
 *	configuration and add it to the sinfo_list for later printing.
 * sinfo_list IN/OUT - list of unique sinfo_data records to report
 * partition_msg IN - partition info message
 * node_msg IN - node info message
 * RET zero or error code
 */
static int _build_sinfo_data(List sinfo_list,
			     partition_info_msg_t *partition_msg,
			     node_info_msg_t *node_msg)
{
	node_info_t *node_ptr = NULL;
	partition_info_t *part_ptr = NULL;
	int j, j2;

	g_node_scaling = node_msg->node_scaling;

	/* by default every partition is shown, even if no nodes */
	if ((!params.node_flag) && params.match_flags.partition_flag) {
		part_ptr = partition_msg->partition_array;
		for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
			if ((!params.partition) ||
			    (_strcmp(params.partition, part_ptr->name) == 0)) {
				list_append(sinfo_list, _create_sinfo(
						    part_ptr, (uint16_t) j,
						    NULL,
						    node_msg->node_scaling));
			}
		}
	}

	/* make sinfo_list entries for every node in every partition */
	for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
		part_ptr = &(partition_msg->partition_array[j]);

		if (params.filtering && params.partition &&
		    _strcmp(part_ptr->name, params.partition))
			continue;

		j2 = 0;
		while (part_ptr->node_inx[j2] >= 0) {
			int i2 = 0;
			uint16_t subgrp_size = 0;
			for (i2 = part_ptr->node_inx[j2];
			     i2 <= part_ptr->node_inx[j2+1];
			     i2++) {
				if (i2 >= node_msg->record_count) {
					/* This can happen if info for single
					 * node name is loaded */
					break;
				}
				node_ptr = &(node_msg->node_array[i2]);

				if ((node_ptr->name == NULL) ||
				    (params.filtering &&
				     _filter_out(node_ptr)))
					continue;

				if (select_g_select_nodeinfo_get(
					   node_ptr->select_nodeinfo,
					   SELECT_NODEDATA_SUBGRP_SIZE,
					   0,
					   &subgrp_size) == SLURM_SUCCESS
				    && subgrp_size) {
					_handle_subgrps(sinfo_list,
							(uint16_t) j,
							part_ptr,
							node_ptr,
							node_msg->
							node_scaling);
				} else {
					_insert_node_ptr(sinfo_list,
							 (uint16_t) j,
							 part_ptr,
							 node_ptr,
							 node_msg->
							 node_scaling);
				}
			}
			j2 += 2;
		}
	}
	_sort_hostlist(sinfo_list);
	return SLURM_SUCCESS;
}
Esempio n. 13
0
/*
 * slurm_sprint_node_table - output information about a specific Slurm nodes
 *	based upon message as loaded using slurm_load_node
 * IN node_ptr - an individual node information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *slurm_sprint_node_table(node_info_t *node_ptr, int one_liner)
{
	uint32_t my_state = node_ptr->node_state;
	char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = "";
	char time_str[32];
	char *out = NULL, *reason_str = NULL;
	uint16_t alloc_cpus = 0;
	int idle_cpus;
	uint64_t alloc_memory;
	char *node_alloc_tres = NULL;
	char *line_end = (one_liner) ? " " : "\n   ";
	slurm_ctl_conf_info_msg_t  *slurm_ctl_conf_ptr = NULL;

	if (slurm_load_ctl_conf((time_t) NULL, &slurm_ctl_conf_ptr)
	    != SLURM_SUCCESS)
		fatal("Cannot load slurmctld conf file");

	if (my_state & NODE_STATE_CLOUD) {
		my_state &= (~NODE_STATE_CLOUD);
		cloud_str = "+CLOUD";
	}
	if (my_state & NODE_STATE_COMPLETING) {
		my_state &= (~NODE_STATE_COMPLETING);
		comp_str = "+COMPLETING";
	}
	if (my_state & NODE_STATE_DRAIN) {
		my_state &= (~NODE_STATE_DRAIN);
		drain_str = "+DRAIN";
	}
	if (my_state & NODE_STATE_FAIL) {
		my_state &= (~NODE_STATE_FAIL);
		drain_str = "+FAIL";
	}
	if (my_state & NODE_STATE_POWER_SAVE) {
		my_state &= (~NODE_STATE_POWER_SAVE);
		power_str = "+POWER";
	}
	if (my_state & NODE_STATE_POWERING_DOWN) {
		my_state &= (~NODE_STATE_POWERING_DOWN);
		power_str = "+POWERING_DOWN";
	}
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ALLOCATED,
				  &alloc_cpus);
	idle_cpus = node_ptr->cpus - alloc_cpus;

	if (idle_cpus  && (idle_cpus != node_ptr->cpus)) {
		my_state &= NODE_STATE_FLAGS;
		my_state |= NODE_STATE_MIXED;
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "NodeName=%s ", node_ptr->name);

	if (node_ptr->arch)
		xstrfmtcat(out, "Arch=%s ", node_ptr->arch);

	if (node_ptr->cpu_bind) {
		char tmp_str[128];
		slurm_sprint_cpu_bind_type(tmp_str, node_ptr->cpu_bind);
		xstrfmtcat(out, "CpuBind=%s ", tmp_str);
	}

	xstrfmtcat(out, "CoresPerSocket=%u ", node_ptr->cores);

	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "CPUAlloc=%u CPUTot=%u ",
		   alloc_cpus, node_ptr->cpus);

	if (node_ptr->cpu_load == NO_VAL)
		xstrcat(out, "CPULoad=N/A");
	else
		xstrfmtcat(out, "CPULoad=%.2f", (node_ptr->cpu_load / 100.0));

	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "AvailableFeatures=%s", node_ptr->features);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "ActiveFeatures=%s", node_ptr->features_act);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s", node_ptr->gres);
	xstrcat(out, line_end);

	/****** Line (optional) ******/
	if (node_ptr->gres_drain) {
		xstrfmtcat(out, "GresDrain=%s", node_ptr->gres_drain);
		xstrcat(out, line_end);
	}

	/****** Line (optional) ******/
	if (node_ptr->gres_used) {
		xstrfmtcat(out, "GresUsed=%s", node_ptr->gres_used);
		xstrcat(out, line_end);
	}

	/****** Line (optional) ******/
	{
		bool line_used = false;

		if (node_ptr->node_addr) {
			xstrfmtcat(out, "NodeAddr=%s ", node_ptr->node_addr);
			line_used = true;
		}

		if (node_ptr->node_hostname) {
			xstrfmtcat(out, "NodeHostName=%s ",
				   node_ptr->node_hostname);
			line_used = true;
		}

		if (node_ptr->port != slurm_get_slurmd_port()) {
			xstrfmtcat(out, "Port=%u ", node_ptr->port);
			line_used = true;
		}

		if (node_ptr->version &&
		    xstrcmp(node_ptr->version, slurm_ctl_conf_ptr->version)) {
			xstrfmtcat(out, "Version=%s", node_ptr->version);
			line_used = true;
		}

		if (line_used)
			xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->os) {
		xstrfmtcat(out, "OS=%s ", node_ptr->os);
		xstrcat(out, line_end);
	}

	/****** Line ******/
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_MEM_ALLOC,
				  NODE_STATE_ALLOCATED,
				  &alloc_memory);
	xstrfmtcat(out, "RealMemory=%"PRIu64" AllocMem=%"PRIu64" ",
		   node_ptr->real_memory, alloc_memory);

	if (node_ptr->free_mem == NO_VAL64)
		xstrcat(out, "FreeMem=N/A ");
	else
		xstrfmtcat(out, "FreeMem=%"PRIu64" ", node_ptr->free_mem);

	xstrfmtcat(out, "Sockets=%u Boards=%u",
		   node_ptr->sockets, node_ptr->boards);
	xstrcat(out, line_end);

	/****** core & memory specialization Line (optional) ******/
	if (node_ptr->core_spec_cnt || node_ptr->cpu_spec_list ||
	    node_ptr->mem_spec_limit) {
		if (node_ptr->core_spec_cnt) {
			xstrfmtcat(out, "CoreSpecCount=%u ",
				   node_ptr->core_spec_cnt);
		}
		if (node_ptr->cpu_spec_list) {
			xstrfmtcat(out, "CPUSpecList=%s ",
				   node_ptr->cpu_spec_list);
		}
		if (node_ptr->mem_spec_limit) {
			xstrfmtcat(out, "MemSpecLimit=%"PRIu64"",
				   node_ptr->mem_spec_limit);
		}
		xstrcat(out, line_end);
	}

	/****** Line ******/
	xstrfmtcat(out, "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u ",
		   node_state_string(my_state),
		   cloud_str, comp_str, drain_str, power_str,
		   node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight);

	if (node_ptr->owner == NO_VAL) {
		xstrcat(out, "Owner=N/A ");
	} else {
		char *user_name = uid_to_string((uid_t) node_ptr->owner);
		xstrfmtcat(out, "Owner=%s(%u) ", user_name, node_ptr->owner);
		xfree(user_name);
	}

	xstrfmtcat(out, "MCS_label=%s",
		   (node_ptr->mcs_label == NULL) ? "N/A" : node_ptr->mcs_label);

	xstrcat(out, line_end);

	/****** Line ******/
	if ((node_ptr->next_state != NO_VAL) &&
	    (my_state & NODE_STATE_REBOOT)) {
		xstrfmtcat(out, "NextState=%s",
			   node_state_string(node_ptr->next_state));
		xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->partitions) {
		xstrfmtcat(out, "Partitions=%s ", node_ptr->partitions);
		xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->boot_time) {
		slurm_make_time_str((time_t *)&node_ptr->boot_time,
				    time_str, sizeof(time_str));
		xstrfmtcat(out, "BootTime=%s ", time_str);
	} else {
		xstrcat(out, "BootTime=None ");
	}

	if (node_ptr->slurmd_start_time) {
		slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time,
				     time_str, sizeof(time_str));
		xstrfmtcat(out, "SlurmdStartTime=%s", time_str);
	} else {
		xstrcat(out, "SlurmdStartTime=None");
	}
	xstrcat(out, line_end);

	/****** TRES Line ******/
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
				     NODE_STATE_ALLOCATED, &node_alloc_tres);
	xstrfmtcat(out, "CfgTRES=%s", node_ptr->tres_fmt_str);
	xstrcat(out, line_end);
	xstrfmtcat(out, "AllocTRES=%s",
		   (node_alloc_tres) ?  node_alloc_tres : "");
	xfree(node_alloc_tres);
	xstrcat(out, line_end);

	/****** Power Management Line ******/
	if (!node_ptr->power || (node_ptr->power->cap_watts == NO_VAL))
		xstrcat(out, "CapWatts=n/a");
	else
		xstrfmtcat(out, "CapWatts=%u", node_ptr->power->cap_watts);

	xstrcat(out, line_end);

	/****** Power Consumption Line ******/
	if (!node_ptr->energy || node_ptr->energy->current_watts == NO_VAL)
		xstrcat(out, "CurrentWatts=n/s AveWatts=n/s");
	else
		xstrfmtcat(out, "CurrentWatts=%u AveWatts=%u",
				node_ptr->energy->current_watts,
				node_ptr->energy->ave_watts);

	xstrcat(out, line_end);

	/****** external sensors Line ******/
	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->consumed_energy == NO_VAL64)
		xstrcat(out, "ExtSensorsJoules=n/s ");
	else
		xstrfmtcat(out, "ExtSensorsJoules=%"PRIu64" ",
			   node_ptr->ext_sensors->consumed_energy);

	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->current_watts == NO_VAL)
		xstrcat(out, "ExtSensorsWatts=n/s ");
	else
		xstrfmtcat(out, "ExtSensorsWatts=%u ",
			   node_ptr->ext_sensors->current_watts);

	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->temperature == NO_VAL)
		xstrcat(out, "ExtSensorsTemp=n/s");
	else
		xstrfmtcat(out, "ExtSensorsTemp=%u",
			   node_ptr->ext_sensors->temperature);

	xstrcat(out, line_end);

	/****** Line ******/
	if (node_ptr->reason && node_ptr->reason[0])
		xstrcat(reason_str, node_ptr->reason);
	if (reason_str) {
		int inx = 1;
		char *save_ptr = NULL, *tok, *user_name;
		tok = strtok_r(reason_str, "\n", &save_ptr);
		while (tok) {
			if (inx == 1) {
				xstrcat(out, "Reason=");
			} else {
				xstrcat(out, line_end);
				xstrcat(out, "       ");
			}
			xstrfmtcat(out, "%s", tok);
			if ((inx++ == 1) && node_ptr->reason_time) {
				user_name = uid_to_string(node_ptr->reason_uid);
				slurm_make_time_str((time_t *)&node_ptr->reason_time,
						    time_str, sizeof(time_str));
				xstrfmtcat(out, " [%s@%s]", user_name, time_str);
				xfree(user_name);
			}
			tok = strtok_r(NULL, "\n", &save_ptr);
		}
		xfree(reason_str);
	}
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	slurm_free_ctl_conf(slurm_ctl_conf_ptr);
	return out;
}
Esempio n. 14
0
/* Return false if this node's data needs to be added to sinfo's table of
 * data to print. Return true if it is duplicate/redundant data. */
static bool _match_node_data(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr)
{
	uint32_t tmp = 0;

	if (params.node_flag)
		return false;

	if (params.match_flags.hostnames_flag &&
	    (hostlist_find(sinfo_ptr->hostnames,
			   node_ptr->node_hostname) == -1))
		return false;

	if (params.match_flags.node_addr_flag &&
	    (hostlist_find(sinfo_ptr->node_addr, node_ptr->node_addr) == -1))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.features_flag &&
	    (xstrcmp(node_ptr->features, sinfo_ptr->features)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.features_act_flag &&
	    (xstrcmp(node_ptr->features_act, sinfo_ptr->features_act)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.gres_flag &&
	    (xstrcmp(node_ptr->gres, sinfo_ptr->gres)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.reason_flag &&
	    (xstrcmp(node_ptr->reason, sinfo_ptr->reason)))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.reason_timestamp_flag &&
	    (node_ptr->reason_time != sinfo_ptr->reason_time))
		return false;

	if (sinfo_ptr->nodes &&
	    params.match_flags.reason_user_flag &&
	    node_ptr->reason_uid != sinfo_ptr->reason_uid) {
		return false;
	}

	if (params.match_flags.state_flag) {
		char *state1, *state2;
		state1 = node_state_string(node_ptr->node_state);
		state2 = node_state_string(sinfo_ptr->node_state);
		if (xstrcmp(state1, state2))
			return false;
	}

	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_MEM_ALLOC,
				     NODE_STATE_ALLOCATED,
				     &tmp);
	if (params.match_flags.alloc_mem_flag &&
	    (tmp != sinfo_ptr->alloc_memory))
		return false;

	/* If no need to exactly match sizes, just return here
	 * otherwise check cpus, disk, memory and weigth individually */
	if (!params.exact_match)
		return true;

	if (params.match_flags.cpus_flag &&
	    (node_ptr->cpus        != sinfo_ptr->min_cpus))
		return false;

	if (params.match_flags.sockets_flag &&
	    (node_ptr->sockets     != sinfo_ptr->min_sockets))
		return false;
	if (params.match_flags.cores_flag &&
	    (node_ptr->cores       != sinfo_ptr->min_cores))
		return false;
	if (params.match_flags.threads_flag &&
	    (node_ptr->threads     != sinfo_ptr->min_threads))
		return false;
	if (params.match_flags.sct_flag &&
	    ((node_ptr->sockets     != sinfo_ptr->min_sockets) ||
	     (node_ptr->cores       != sinfo_ptr->min_cores) ||
	     (node_ptr->threads     != sinfo_ptr->min_threads)))
		return false;
	if (params.match_flags.disk_flag &&
	    (node_ptr->tmp_disk    != sinfo_ptr->min_disk))
		return false;
	if (params.match_flags.memory_flag &&
	    (node_ptr->real_memory != sinfo_ptr->min_mem))
		return false;
	if (params.match_flags.weight_flag &&
	    (node_ptr->weight      != sinfo_ptr->min_weight))
		return false;
	if (params.match_flags.cpu_load_flag &&
	    (node_ptr->cpu_load        != sinfo_ptr->min_cpu_load))
		return false;
	if (params.match_flags.free_mem_flag &&
	    (node_ptr->free_mem        != sinfo_ptr->min_free_mem))
		return false;
	if (params.match_flags.version_flag &&
	    (node_ptr->version     != sinfo_ptr->version))
		return false;

	return true;
}