예제 #1
0
static char *	_get_node_state(struct node_record *node_ptr)
{
	static bool got_select_type = false;
	static bool node_allocations;

	if (!got_select_type) {
		char * select_type = slurm_get_select_type();
		if (select_type &&
		    (strcasecmp(select_type, "select/linear") == 0))
			node_allocations = true;
		else
			node_allocations = false;
		xfree(select_type);
		got_select_type = true;
	}

	if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr))
		return "Draining";
	if (IS_NODE_COMPLETING(node_ptr))
		return "Busy";

	if (IS_NODE_DOWN(node_ptr))
		return "Down";
	if (IS_NODE_ALLOCATED(node_ptr)) {
		if (node_allocations)
			return "Busy";
		else
			return "Running";
	}
	if (IS_NODE_IDLE(node_ptr))
		return "Idle";

	return "Unknown";
}
예제 #2
0
파일: info_job.c 프로젝트: cread/slurm
extern void
scontrol_print_completing_job(job_info_t *job_ptr,
			      node_info_msg_t *node_info_msg)
{
	int i, c_offset = 0;
	node_info_t *node_info;
	hostlist_t comp_nodes, down_nodes;
	char *node_buf;

	comp_nodes = hostlist_create(NULL);
	down_nodes = hostlist_create(NULL);

	if (job_ptr->cluster && federation_flag && !local_flag)
		c_offset = get_cluster_node_offset(job_ptr->cluster,
						   node_info_msg);

	for (i = 0; job_ptr->node_inx[i] != -1; i+=2) {
		int j = job_ptr->node_inx[i];
		for (; j <= job_ptr->node_inx[i+1]; j++) {
			int node_inx = j + c_offset;
			if (node_inx >= node_info_msg->record_count)
				break;
			node_info = &(node_info_msg->node_array[node_inx]);
			if (IS_NODE_COMPLETING(node_info))
				hostlist_push_host(comp_nodes, node_info->name);
			else if (IS_NODE_DOWN(node_info))
				hostlist_push_host(down_nodes, node_info->name);
		}
	}

	fprintf(stdout, "JobId=%u ", job_ptr->job_id);

	node_buf = hostlist_ranged_string_xmalloc(comp_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf);
	xfree(node_buf);

	node_buf = hostlist_ranged_string_xmalloc(down_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(DOWN)=%s ", node_buf);
	xfree(node_buf);
	fprintf(stdout, "\n");

	hostlist_destroy(comp_nodes);
	hostlist_destroy(down_nodes);
}
예제 #3
0
extern void
scontrol_print_completing_job(job_info_t *job_ptr,
			      node_info_msg_t *node_info_msg)
{
	int i;
	node_info_t *node_info;
	hostlist_t all_nodes, comp_nodes, down_nodes;
	char *node_buf;

	all_nodes  = hostlist_create(job_ptr->nodes);
	comp_nodes = hostlist_create("");
	down_nodes = hostlist_create("");

	for (i=0; i<node_info_msg->record_count; i++) {
		node_info = &(node_info_msg->node_array[i]);
		if (IS_NODE_COMPLETING(node_info) &&
		    (_in_node_bit_list(i, job_ptr->node_inx)))
			hostlist_push_host(comp_nodes, node_info->name);
		else if (IS_NODE_DOWN(node_info) &&
			 (hostlist_find(all_nodes, node_info->name) != -1))
			hostlist_push_host(down_nodes, node_info->name);
	}

	fprintf(stdout, "JobId=%u ", job_ptr->job_id);

	node_buf = hostlist_ranged_string_xmalloc(comp_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf);
	xfree(node_buf);

	node_buf = hostlist_ranged_string_xmalloc(down_nodes);
	if (node_buf && node_buf[0])
		fprintf(stdout, "Nodes(DOWN)=%s ", node_buf);
	xfree(node_buf);
	fprintf(stdout, "\n");

	hostlist_destroy(all_nodes);
	hostlist_destroy(comp_nodes);
	hostlist_destroy(down_nodes);
}
예제 #4
0
파일: power_save.c 프로젝트: edsw/slurm
/* Perform any power change work to nodes */
static void _do_power_work(time_t now)
{
	static time_t last_log = 0, last_work_scan = 0;
	int i, wake_cnt = 0, sleep_cnt = 0, susp_total = 0;
	time_t delta_t;
	uint32_t susp_state;
	bitstr_t *wake_node_bitmap = NULL, *sleep_node_bitmap = NULL;
	struct node_record *node_ptr;
	bool run_suspend = false;

	/* Set limit on counts of nodes to have state changed */
	delta_t = now - last_work_scan;
	if (delta_t >= 60) {
		suspend_cnt_f = 0.0;
		resume_cnt_f  = 0.0;
	} else {
		float rate = (60 - delta_t) / 60.0;
		suspend_cnt_f *= rate;
		resume_cnt_f  *= rate;
	}
	suspend_cnt = (suspend_cnt_f + 0.5);
	resume_cnt  = (resume_cnt_f  + 0.5);

	if (now > (last_suspend + suspend_timeout)) {
		/* ready to start another round of node suspends */
		run_suspend = true;
		if (last_suspend) {
			bit_nclear(suspend_node_bitmap, 0,
				   (node_record_count - 1));
			bit_nclear(resume_node_bitmap, 0,
				   (node_record_count - 1));
			last_suspend = (time_t) 0;
		}
	}

	last_work_scan = now;

	/* Build bitmaps identifying each node which should change state */
	for (i = 0, node_ptr = node_record_table_ptr;
	     i < node_record_count; i++, node_ptr++) {
		susp_state = IS_NODE_POWER_SAVE(node_ptr);

		if (susp_state)
			susp_total++;

		/* Resume nodes as appropriate */
		if (susp_state &&
		    ((resume_rate == 0) || (resume_cnt < resume_rate))	&&
		    (bit_test(suspend_node_bitmap, i) == 0)		&&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     (node_ptr->last_idle > (now - idle_time)))) {
			if (wake_node_bitmap == NULL) {
				wake_node_bitmap =
					bit_alloc(node_record_count);
			}
			wake_cnt++;
			resume_cnt++;
			resume_cnt_f++;
			node_ptr->node_state &= (~NODE_STATE_POWER_SAVE);
			node_ptr->node_state |=   NODE_STATE_POWER_UP;
			node_ptr->node_state |=   NODE_STATE_NO_RESPOND;
			bit_clear(power_node_bitmap, i);
			bit_clear(avail_node_bitmap, i);
			node_ptr->last_response = now + resume_timeout;
			bit_set(wake_node_bitmap,    i);
			bit_set(resume_node_bitmap,  i);
		}

		/* Suspend nodes as appropriate */
		if (run_suspend 					&&
		    (susp_state == 0)					&&
		    ((suspend_rate == 0) || (suspend_cnt < suspend_rate)) &&
		    (IS_NODE_IDLE(node_ptr) || IS_NODE_DOWN(node_ptr))	&&
		    (node_ptr->sus_job_cnt == 0)			&&
		    (!IS_NODE_COMPLETING(node_ptr))			&&
		    (!IS_NODE_POWER_UP(node_ptr))			&&
		    (node_ptr->last_idle != 0)				&&
		    (node_ptr->last_idle < (now - idle_time))		&&
		    ((exc_node_bitmap == NULL) ||
		     (bit_test(exc_node_bitmap, i) == 0))) {
			if (sleep_node_bitmap == NULL) {
				sleep_node_bitmap =
					bit_alloc(node_record_count);
			}
			sleep_cnt++;
			suspend_cnt++;
			suspend_cnt_f++;
			node_ptr->node_state |= NODE_STATE_POWER_SAVE;
			node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
			if (!IS_NODE_DOWN(node_ptr) &&
			    !IS_NODE_DRAIN(node_ptr))
				bit_set(avail_node_bitmap,   i);
			bit_set(power_node_bitmap,   i);
			bit_set(sleep_node_bitmap,   i);
			bit_set(suspend_node_bitmap, i);
			last_suspend = now;
		}
	}
	if (((now - last_log) > 600) && (susp_total > 0)) {
		info("Power save mode: %d nodes", susp_total);
		last_log = now;
	}

	if (sleep_node_bitmap) {
		char *nodes;
		nodes = bitmap2node_name(sleep_node_bitmap);
		if (nodes)
			_do_suspend(nodes);
		else
			error("power_save: bitmap2nodename");
		xfree(nodes);
		FREE_NULL_BITMAP(sleep_node_bitmap);
		/* last_node_update could be changed already by another thread!
		last_node_update = now; */
	}

	if (wake_node_bitmap) {
		char *nodes;
		nodes = bitmap2node_name(wake_node_bitmap);
		if (nodes)
			_do_resume(nodes);
		else
			error("power_save: bitmap2nodename");
		xfree(nodes);
		FREE_NULL_BITMAP(wake_node_bitmap);
		/* last_node_update could be changed already by another thread!
		last_node_update = now; */
	}
}
예제 #5
0
파일: node.c 프로젝트: cread/slurm
/*
 * convert node_info_t to perl HV
 */
int
node_info_to_hv(node_info_t *node_info, uint16_t node_scaling, HV *hv)
{
	uint16_t err_cpus = 0, alloc_cpus = 0;
#ifdef HAVE_BG
	int cpus_per_node = 1;

	if(node_scaling)
		cpus_per_node = node_info->cpus / node_scaling;
#endif
	if(node_info->arch)
		STORE_FIELD(hv, node_info, arch, charp);
	STORE_FIELD(hv, node_info, boot_time, time_t);
	STORE_FIELD(hv, node_info, cores, uint16_t);
	STORE_FIELD(hv, node_info, cpu_load, uint32_t);
	STORE_FIELD(hv, node_info, cpus, uint16_t);
	if (node_info->features)
		STORE_FIELD(hv, node_info, features, charp);
	if (node_info->features_act)
		STORE_FIELD(hv, node_info, features_act, charp);
	if (node_info->gres)
		STORE_FIELD(hv, node_info, gres, charp);
	if (node_info->name)
		STORE_FIELD(hv, node_info, name, charp);
	else {
		Perl_warn (aTHX_ "node name missing in node_info_t");
		return -1;
	}
	STORE_FIELD(hv, node_info, node_state, uint32_t);
	if(node_info->os)
		STORE_FIELD(hv, node_info, os, charp);
	STORE_FIELD(hv, node_info, real_memory, uint64_t);
	if(node_info->reason)
		STORE_FIELD(hv, node_info, reason, charp);
	STORE_FIELD(hv, node_info, reason_time, time_t);
	STORE_FIELD(hv, node_info, reason_uid, uint32_t);
	STORE_FIELD(hv, node_info, slurmd_start_time, time_t);
	STORE_FIELD(hv, node_info, boards, uint16_t);
	STORE_FIELD(hv, node_info, sockets, uint16_t);
	STORE_FIELD(hv, node_info, threads, uint16_t);
	STORE_FIELD(hv, node_info, tmp_disk, uint32_t);

	slurm_get_select_nodeinfo(node_info->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ALLOCATED,
				  &alloc_cpus);
#ifdef HAVE_BG
	if(!alloc_cpus
	   && (IS_NODE_ALLOCATED(node_info) || IS_NODE_COMPLETING(node_info)))
		alloc_cpus = node_info->cpus;
	else
		alloc_cpus *= cpus_per_node;
#endif

	slurm_get_select_nodeinfo(node_info->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ERROR,
				  &err_cpus);
#ifdef HAVE_BG
	err_cpus *= cpus_per_node;
#endif

	hv_store_uint16_t(hv, "alloc_cpus", alloc_cpus);
	hv_store_uint16_t(hv, "err_cpus", err_cpus);

	STORE_PTR_FIELD(hv, node_info, select_nodeinfo, "Slurm::dynamic_plugin_data_t");

	STORE_FIELD(hv, node_info, weight, uint32_t);
	return 0;
}
예제 #6
0
/*
 * slurm_sprint_node_table - output information about a specific Slurm nodes
 *	based upon message as loaded using slurm_load_node
 * IN node_ptr - an individual node information record pointer
 * IN node_scaling - number of nodes each node represents
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_node_table (node_info_t * node_ptr,
			 int node_scaling, int one_liner )
{
	uint16_t my_state = node_ptr->node_state;
	char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = "";
	char load_str[32], tmp_line[512], time_str[32];
	char *out = NULL, *reason_str = NULL, *select_reason_str = NULL;
	uint16_t err_cpus = 0, alloc_cpus = 0;
	int cpus_per_node = 1;
	int total_used = node_ptr->cpus;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (node_scaling)
		cpus_per_node = node_ptr->cpus / node_scaling;

	if (my_state & NODE_STATE_CLOUD) {
		my_state &= (~NODE_STATE_CLOUD);
		cloud_str = "+CLOUD";
	}
	if (my_state & NODE_STATE_COMPLETING) {
		my_state &= (~NODE_STATE_COMPLETING);
		comp_str = "+COMPLETING";
	}
	if (my_state & NODE_STATE_DRAIN) {
		my_state &= (~NODE_STATE_DRAIN);
		drain_str = "+DRAIN";
	}
	if (my_state & NODE_STATE_POWER_SAVE) {
		my_state &= (~NODE_STATE_POWER_SAVE);
		power_str = "+POWER";
	}
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ALLOCATED,
				  &alloc_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		if (!alloc_cpus &&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     IS_NODE_COMPLETING(node_ptr)))
			alloc_cpus = node_ptr->cpus;
		else
			alloc_cpus *= cpus_per_node;
	}
	total_used -= alloc_cpus;

	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ERROR,
				  &err_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG)
		err_cpus *= cpus_per_node;
	total_used -= err_cpus;

	if ((alloc_cpus && err_cpus) ||
	    (total_used  && (total_used != node_ptr->cpus))) {
		my_state &= NODE_STATE_FLAGS;
		my_state |= NODE_STATE_MIXED;
	}

	/****** Line 1 ******/
	snprintf(tmp_line, sizeof(tmp_line), "NodeName=%s ", node_ptr->name);
	xstrcat(out, tmp_line);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
					  SELECT_NODEDATA_RACK_MP,
					  0, &select_reason_str);
		if (select_reason_str) {
			xstrfmtcat(out, "RackMidplane=%s ", select_reason_str);
			xfree(select_reason_str);
		}
	}

	if (node_ptr->arch) {
		snprintf(tmp_line, sizeof(tmp_line), "Arch=%s ",
			 node_ptr->arch);
		xstrcat(out, tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line), "CoresPerSocket=%u",
		 node_ptr->cores);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	if (node_ptr->cpu_load == NO_VAL)
		strcpy(load_str, "N/A");
	else {
		snprintf(load_str, sizeof(load_str), "%.2f",
			 (node_ptr->cpu_load / 100.0));
	}
	snprintf(tmp_line, sizeof(tmp_line),
		 "CPUAlloc=%u CPUErr=%u CPUTot=%u CPULoad=%s Features=%s",
		 alloc_cpus, err_cpus, node_ptr->cpus, load_str,
		 node_ptr->features);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	snprintf(tmp_line, sizeof(tmp_line), "Gres=%s",node_ptr->gres);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 (optional) ******/
	if (node_ptr->node_hostname || node_ptr->node_addr) {
		snprintf(tmp_line, sizeof(tmp_line),
			 "NodeAddr=%s NodeHostName=%s",
			 node_ptr->node_addr, node_ptr->node_hostname);
		xstrcat(out, tmp_line);	
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
	}

	/****** Line 5 ******/
	if (node_ptr->os) {
		snprintf(tmp_line, sizeof(tmp_line), "OS=%s ", node_ptr->os);
		xstrcat(out, tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line),
		 "RealMemory=%u Sockets=%u Boards=%u",
		 node_ptr->real_memory, node_ptr->sockets, node_ptr->boards);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 6 ******/

	snprintf(tmp_line, sizeof(tmp_line),
		 "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u",
		 node_state_string(my_state),
		 cloud_str, comp_str, drain_str, power_str,
		 node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 7 ******/
	if (node_ptr->boot_time) {
		slurm_make_time_str ((time_t *)&node_ptr->boot_time,
				     time_str, sizeof(time_str));
	} else {
		strncpy(time_str, "None", sizeof(time_str));
	}
	snprintf(tmp_line, sizeof(tmp_line), "BootTime=%s ", time_str);
	xstrcat(out, tmp_line);

	if (node_ptr->slurmd_start_time) {
		slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time,
				     time_str, sizeof(time_str));
	} else {
		strncpy(time_str, "None", sizeof(time_str));
	}
	snprintf(tmp_line, sizeof(tmp_line), "SlurmdStartTime=%s", time_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** power Line ******/
	if (node_ptr->energy->current_watts == NO_VAL)
		snprintf(tmp_line, sizeof(tmp_line), "CurrentWatts=n/s "
				"LowestJoules=n/s ConsumedJoules=n/s");
	else
		snprintf(tmp_line, sizeof(tmp_line), "CurrentWatts=%u "
				"LowestJoules=%u ConsumedJoules=%u",
				node_ptr->energy->current_watts,
				node_ptr->energy->base_watts,
		 node_ptr->energy->consumed_energy);
	xstrcat(out, tmp_line);

	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 8 ******/
	if (node_ptr->reason && node_ptr->reason[0])
		xstrcat(reason_str, node_ptr->reason);
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_EXTRA_INFO,
				  0, &select_reason_str);
	if (select_reason_str && select_reason_str[0]) {
		if (reason_str)
			xstrcat(reason_str, "\n");
		xstrcat(reason_str, select_reason_str);
	}
	xfree(select_reason_str);
	if (reason_str) {
		int inx = 1;
		char *save_ptr = NULL, *tok, *user_name;
		tok = strtok_r(reason_str, "\n", &save_ptr);
		while (tok) {
			if (inx == 1) {
				xstrcat(out, "Reason=");
			} else {
				if (one_liner)
					xstrcat(out, " ");
				else
					xstrcat(out, "\n   ");
				xstrcat(out, "       ");
			}
			snprintf(tmp_line, sizeof(tmp_line), "%s", tok);
			xstrcat(out, tmp_line);
			if ((inx++ == 1) && node_ptr->reason_time) {
				user_name = uid_to_string(node_ptr->reason_uid);
				slurm_make_time_str((time_t *)&node_ptr->reason_time,
						    time_str,sizeof(time_str));
				snprintf(tmp_line, sizeof(tmp_line),
					 " [%s@%s]", user_name, time_str);
				xstrcat(out, tmp_line);
			}
			tok = strtok_r(NULL, "\n", &save_ptr);
		}
		xfree(reason_str);
	}
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
예제 #7
0
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr,
			  uint32_t node_scaling)
{
	uint16_t base_state;
	uint16_t used_cpus = 0, error_cpus = 0;
	int total_cpus = 0, total_nodes = 0;
	/* since node_scaling could be less here, we need to use the
	 * global node scaling which should never change. */
	int single_node_cpus = (node_ptr->cpus / g_node_scaling);

 	base_state = node_ptr->node_state & NODE_STATE_BASE;

	if (sinfo_ptr->nodes_total == 0) {	/* first node added */
		sinfo_ptr->node_state = node_ptr->node_state;
		sinfo_ptr->features   = node_ptr->features;
		sinfo_ptr->gres       = node_ptr->gres;
		sinfo_ptr->reason     = node_ptr->reason;
		sinfo_ptr->reason_time= node_ptr->reason_time;
		sinfo_ptr->reason_uid = node_ptr->reason_uid;
		sinfo_ptr->min_cpus    = node_ptr->cpus;
		sinfo_ptr->max_cpus    = node_ptr->cpus;
		sinfo_ptr->min_sockets = node_ptr->sockets;
		sinfo_ptr->max_sockets = node_ptr->sockets;
		sinfo_ptr->min_cores   = node_ptr->cores;
		sinfo_ptr->max_cores   = node_ptr->cores;
		sinfo_ptr->min_threads = node_ptr->threads;
		sinfo_ptr->max_threads = node_ptr->threads;
		sinfo_ptr->min_disk   = node_ptr->tmp_disk;
		sinfo_ptr->max_disk   = node_ptr->tmp_disk;
		sinfo_ptr->min_mem    = node_ptr->real_memory;
		sinfo_ptr->max_mem    = node_ptr->real_memory;
		sinfo_ptr->min_weight = node_ptr->weight;
		sinfo_ptr->max_weight = node_ptr->weight;
		sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpus_per_node = sinfo_ptr->part_info->
					       max_cpus_per_node;
		sinfo_ptr->version    = node_ptr->version;
	} else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) {
		/* we already have this node in this record,
		 * just return, don't duplicate */
		return;
	} else {
		if (sinfo_ptr->min_cpus > node_ptr->cpus)
			sinfo_ptr->min_cpus = node_ptr->cpus;
		if (sinfo_ptr->max_cpus < node_ptr->cpus)
			sinfo_ptr->max_cpus = node_ptr->cpus;

		if (sinfo_ptr->min_sockets > node_ptr->sockets)
			sinfo_ptr->min_sockets = node_ptr->sockets;
		if (sinfo_ptr->max_sockets < node_ptr->sockets)
			sinfo_ptr->max_sockets = node_ptr->sockets;

		if (sinfo_ptr->min_cores > node_ptr->cores)
			sinfo_ptr->min_cores = node_ptr->cores;
		if (sinfo_ptr->max_cores < node_ptr->cores)
			sinfo_ptr->max_cores = node_ptr->cores;

		if (sinfo_ptr->min_threads > node_ptr->threads)
			sinfo_ptr->min_threads = node_ptr->threads;
		if (sinfo_ptr->max_threads < node_ptr->threads)
			sinfo_ptr->max_threads = node_ptr->threads;

		if (sinfo_ptr->min_disk > node_ptr->tmp_disk)
			sinfo_ptr->min_disk = node_ptr->tmp_disk;
		if (sinfo_ptr->max_disk < node_ptr->tmp_disk)
			sinfo_ptr->max_disk = node_ptr->tmp_disk;

		if (sinfo_ptr->min_mem > node_ptr->real_memory)
			sinfo_ptr->min_mem = node_ptr->real_memory;
		if (sinfo_ptr->max_mem < node_ptr->real_memory)
			sinfo_ptr->max_mem = node_ptr->real_memory;

		if (sinfo_ptr->min_weight> node_ptr->weight)
			sinfo_ptr->min_weight = node_ptr->weight;
		if (sinfo_ptr->max_weight < node_ptr->weight)
			sinfo_ptr->max_weight = node_ptr->weight;

		if (sinfo_ptr->min_cpu_load > node_ptr->cpu_load)
			sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		if (sinfo_ptr->max_cpu_load < node_ptr->cpu_load)
			sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
	}

	hostlist_push_host(sinfo_ptr->nodes, node_ptr->name);
	if (params.match_flags.node_addr_flag)
		hostlist_push_host(sinfo_ptr->node_addr, node_ptr->node_addr);
	if (params.match_flags.hostnames_flag)
		hostlist_push_host(sinfo_ptr->hostnames, node_ptr->node_hostname);

	total_cpus = node_ptr->cpus;
	total_nodes = node_scaling;

	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ALLOCATED,
				     &used_cpus);
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ERROR,
				     &error_cpus);

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (!params.match_flags.state_flag &&
		    (used_cpus || error_cpus)) {
			/* We only get one shot at this (because all states
			 * are combined together), so we need to make
			 * sure we get all the subgrps accounted. (So use
			 * g_node_scaling for safe measure) */
			total_nodes = g_node_scaling;

			sinfo_ptr->nodes_alloc += used_cpus;
			sinfo_ptr->nodes_other += error_cpus;
			sinfo_ptr->nodes_idle +=
				(total_nodes - (used_cpus + error_cpus));
			used_cpus  *= single_node_cpus;
			error_cpus *= single_node_cpus;
		} else {
			/* process only for this subgrp and then return */
			total_cpus = total_nodes * single_node_cpus;

			if ((base_state == NODE_STATE_ALLOCATED) ||
			    (base_state == NODE_STATE_MIXED) ||
			    (node_ptr->node_state & NODE_STATE_COMPLETING)) {
				sinfo_ptr->nodes_alloc += total_nodes;
				sinfo_ptr->cpus_alloc += total_cpus;
			} else if (IS_NODE_DRAIN(node_ptr) ||
				   (base_state == NODE_STATE_DOWN)) {
				sinfo_ptr->nodes_other += total_nodes;
				sinfo_ptr->cpus_other += total_cpus;
			} else {
				sinfo_ptr->nodes_idle += total_nodes;
				sinfo_ptr->cpus_idle += total_cpus;
			}

			sinfo_ptr->nodes_total += total_nodes;
			sinfo_ptr->cpus_total += total_cpus;

			return;
		}
	} else {
		if ((base_state == NODE_STATE_ALLOCATED) ||
		    (base_state == NODE_STATE_MIXED) ||
		    IS_NODE_COMPLETING(node_ptr))
			sinfo_ptr->nodes_alloc += total_nodes;
		else if (IS_NODE_DRAIN(node_ptr)
			 || (base_state == NODE_STATE_DOWN))
			sinfo_ptr->nodes_other += total_nodes;
		else
			sinfo_ptr->nodes_idle += total_nodes;
	}

	sinfo_ptr->nodes_total += total_nodes;


	sinfo_ptr->cpus_alloc += used_cpus;
	sinfo_ptr->cpus_total += total_cpus;
	total_cpus -= used_cpus + error_cpus;

	if (error_cpus) {
		sinfo_ptr->cpus_idle += total_cpus;
		sinfo_ptr->cpus_other += error_cpus;
	} else if (IS_NODE_DRAIN(node_ptr) ||
		   (base_state == NODE_STATE_DOWN)) {
		sinfo_ptr->cpus_other += total_cpus;
	} else
		sinfo_ptr->cpus_idle += total_cpus;
}
예제 #8
0
/*
 * _filter_out - Determine if the specified node should be filtered out or
 *	reported.
 * node_ptr IN - node to consider filtering out
 * RET - true if node should not be reported, false otherwise
 */
static bool _filter_out(node_info_t *node_ptr)
{
	static hostlist_t host_list = NULL;

	if (params.nodes) {
		if (host_list == NULL)
			host_list = hostlist_create(params.nodes);
		if (hostlist_find (host_list, node_ptr->name) == -1)
			return true;
	}

	if (params.dead_nodes && !IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.responding_nodes && IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.state_list) {
		int *node_state;
		bool match = false;
		uint16_t base_state;
		ListIterator iterator;
		uint16_t cpus = 0;
		node_info_t tmp_node, *tmp_node_ptr = &tmp_node;

		iterator = list_iterator_create(params.state_list);
		while ((node_state = list_next(iterator))) {
			tmp_node_ptr->node_state = *node_state;
			if (*node_state == NODE_STATE_DRAIN) {
				/* We search for anything that has the
				 * drain flag set */
				if (IS_NODE_DRAIN(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINING(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINING in node_state_string */
				if (IS_NODE_DRAINING(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINED(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINED in node_state_string */
				if (IS_NODE_DRAINED(node_ptr)) {
					match = true;
					break;
				}
			} else if (*node_state & NODE_STATE_FLAGS) {
				if (*node_state & node_ptr->node_state) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ERROR) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ERROR,
					&cpus);
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ALLOCATED) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ALLOCATED,
					&cpus);
				if (params.cluster_flags & CLUSTER_FLAG_BG
				    && !cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					cpus = node_ptr->cpus;
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_IDLE) {
				base_state = node_ptr->node_state &
					(~NODE_STATE_NO_RESPOND);
				if (base_state == NODE_STATE_IDLE) {
					match = true;
					break;
				}
			} else {
				base_state =
					node_ptr->node_state & NODE_STATE_BASE;
				if (base_state == *node_state) {
					match = true;
					break;
				}
			}
		}
		list_iterator_destroy(iterator);
		if (!match)
			return true;
	}

	return false;
}
예제 #9
0
/*
 * _query_server - download the current server state
 * part_pptr IN/OUT - partition information message
 * node_pptr IN/OUT - node information message
 * block_pptr IN/OUT - BlueGene block data
 * reserv_pptr IN/OUT - reservation information message
 * clear_old IN - If set, then always replace old data, needed when going
 *		  between clusters.
 * RET zero or error code
 */
static int
_query_server(partition_info_msg_t ** part_pptr,
	      node_info_msg_t ** node_pptr,
	      block_info_msg_t ** block_pptr,
	      reserve_info_msg_t ** reserv_pptr,
	      bool clear_old)
{
	static partition_info_msg_t *old_part_ptr = NULL, *new_part_ptr;
	static node_info_msg_t *old_node_ptr = NULL, *new_node_ptr;
	static block_info_msg_t *old_bg_ptr = NULL, *new_bg_ptr;
	static reserve_info_msg_t *old_resv_ptr = NULL, *new_resv_ptr;
	int error_code;
	uint16_t show_flags = 0;
	int cc;
	node_info_t *node_ptr;

	if (params.all_flag)
		show_flags |= SHOW_ALL;

	if (old_part_ptr) {
		if (clear_old)
			old_part_ptr->last_update = 0;
		error_code = slurm_load_partitions(old_part_ptr->last_update,
						   &new_part_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_partition_info_msg(old_part_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_part_ptr = old_part_ptr;
		}
	} else {
		error_code = slurm_load_partitions((time_t) NULL, &new_part_ptr,
						   show_flags);
	}
	if (error_code) {
		slurm_perror("slurm_load_partitions");
		return error_code;
	}

	old_part_ptr = new_part_ptr;
	*part_pptr = new_part_ptr;

	if (old_node_ptr) {
		if (clear_old)
			old_node_ptr->last_update = 0;
		if (params.node_name_single) {
			error_code = slurm_load_node_single(&new_node_ptr,
							    params.nodes,
							    show_flags);
		} else {
			error_code = slurm_load_node(old_node_ptr->last_update,
						     &new_node_ptr, show_flags);
		}
		if (error_code == SLURM_SUCCESS)
			slurm_free_node_info_msg(old_node_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_node_ptr = old_node_ptr;
		}
	} else if (params.node_name_single) {
		error_code = slurm_load_node_single(&new_node_ptr, params.nodes,
						    show_flags);
	} else {
		error_code = slurm_load_node((time_t) NULL, &new_node_ptr,
					     show_flags);
	}

	if (error_code) {
		slurm_perror("slurm_load_node");
		return error_code;
	}
	old_node_ptr = new_node_ptr;
	*node_pptr = new_node_ptr;

	/* Set the node state as NODE_STATE_MIXED. */
	for (cc = 0; cc < new_node_ptr->record_count; cc++) {
		node_ptr = &(new_node_ptr->node_array[cc]);
		if (IS_NODE_DRAIN(node_ptr)) {
			/* don't worry about mixed since the
			 * whole node is being drained. */
		} else {
			uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus;
			int single_node_cpus =
				(node_ptr->cpus / g_node_scaling);

			select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						     SELECT_NODEDATA_SUBCNT,
						     NODE_STATE_ALLOCATED,
						     &alloc_cpus);
			if (params.cluster_flags & CLUSTER_FLAG_BG) {
				if (!alloc_cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					alloc_cpus = node_ptr->cpus;
				else
					alloc_cpus *= single_node_cpus;
			}
			idle_cpus = node_ptr->cpus - alloc_cpus;
			select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						     SELECT_NODEDATA_SUBCNT,
						     NODE_STATE_ERROR,
						     &err_cpus);
			if (params.cluster_flags & CLUSTER_FLAG_BG)
				err_cpus *= single_node_cpus;
			idle_cpus -= err_cpus;

			if ((alloc_cpus && err_cpus) ||
			    (idle_cpus  && (idle_cpus != node_ptr->cpus))) {
				node_ptr->node_state &= NODE_STATE_FLAGS;
				node_ptr->node_state |= NODE_STATE_MIXED;
			}
		}
	}

	if (old_resv_ptr) {
		if (clear_old)
			old_resv_ptr->last_update = 0;
		error_code = slurm_load_reservations(old_resv_ptr->last_update,
						     &new_resv_ptr);
		if (error_code == SLURM_SUCCESS)
			slurm_free_reservation_info_msg(old_resv_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_resv_ptr = old_resv_ptr;
		}
	} else {
		error_code = slurm_load_reservations((time_t) NULL,
						     &new_resv_ptr);
	}

	if (error_code) {
		slurm_perror("slurm_load_reservations");
		return error_code;
	}
	old_resv_ptr = new_resv_ptr;
	*reserv_pptr = new_resv_ptr;

	if (!params.bg_flag)
		return SLURM_SUCCESS;

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (old_bg_ptr) {
			if (clear_old)
				old_bg_ptr->last_update = 0;
			error_code = slurm_load_block_info(
				old_bg_ptr->last_update,
				&new_bg_ptr, show_flags);
			if (error_code == SLURM_SUCCESS)
				slurm_free_block_info_msg(old_bg_ptr);
			else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
				error_code = SLURM_SUCCESS;
				new_bg_ptr = old_bg_ptr;
			}
		} else {
			error_code = slurm_load_block_info((time_t) NULL,
							   &new_bg_ptr,
							   show_flags);
		}
	}

	if (error_code) {
		slurm_perror("slurm_load_block");
		return error_code;
	}
	old_bg_ptr = new_bg_ptr;
	*block_pptr = new_bg_ptr;
	return SLURM_SUCCESS;
}
예제 #10
0
/*
 * slurm_sprint_node_table - output information about a specific Slurm nodes
 *	based upon message as loaded using slurm_load_node
 * IN node_ptr - an individual node information record pointer
 * IN node_scaling - number of nodes each node represents
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_node_table (node_info_t * node_ptr,
			 int node_scaling, int one_liner )
{
	uint32_t my_state = node_ptr->node_state;
	char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = "";
	char time_str[32];
	char *out = NULL, *reason_str = NULL, *select_reason_str = NULL;
	uint16_t err_cpus = 0, alloc_cpus = 0;
	int cpus_per_node = 1;
	int idle_cpus;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	uint64_t alloc_memory;
	char *node_alloc_tres = NULL;
	char *line_end = (one_liner) ? " " : "\n   ";

	if (node_scaling)
		cpus_per_node = node_ptr->cpus / node_scaling;

	if (my_state & NODE_STATE_CLOUD) {
		my_state &= (~NODE_STATE_CLOUD);
		cloud_str = "+CLOUD";
	}
	if (my_state & NODE_STATE_COMPLETING) {
		my_state &= (~NODE_STATE_COMPLETING);
		comp_str = "+COMPLETING";
	}
	if (my_state & NODE_STATE_DRAIN) {
		my_state &= (~NODE_STATE_DRAIN);
		drain_str = "+DRAIN";
	}
	if (my_state & NODE_STATE_FAIL) {
		my_state &= (~NODE_STATE_FAIL);
		drain_str = "+FAIL";
	}
	if (my_state & NODE_STATE_POWER_SAVE) {
		my_state &= (~NODE_STATE_POWER_SAVE);
		power_str = "+POWER";
	}
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ALLOCATED,
				  &alloc_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		if (!alloc_cpus &&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     IS_NODE_COMPLETING(node_ptr)))
			alloc_cpus = node_ptr->cpus;
		else
			alloc_cpus *= cpus_per_node;
	}
	idle_cpus = node_ptr->cpus - alloc_cpus;

	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ERROR,
				  &err_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG)
		err_cpus *= cpus_per_node;
	idle_cpus -= err_cpus;

	if ((alloc_cpus && err_cpus) ||
	    (idle_cpus  && (idle_cpus != node_ptr->cpus))) {
		my_state &= NODE_STATE_FLAGS;
		my_state |= NODE_STATE_MIXED;
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "NodeName=%s ", node_ptr->name);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
					  SELECT_NODEDATA_RACK_MP,
					  0, &select_reason_str);
		if (select_reason_str) {
			xstrfmtcat(out, "RackMidplane=%s ", select_reason_str);
			xfree(select_reason_str);
		}
	}

	if (node_ptr->arch)
		xstrfmtcat(out, "Arch=%s ", node_ptr->arch);

	xstrfmtcat(out, "CoresPerSocket=%u", node_ptr->cores);

	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "CPUAlloc=%u CPUErr=%u CPUTot=%u ",
		   alloc_cpus, err_cpus, node_ptr->cpus);

	if (node_ptr->cpu_load == NO_VAL)
		xstrcat(out, "CPULoad=N/A");
	else
		xstrfmtcat(out, "CPULoad=%.2f", (node_ptr->cpu_load / 100.0));

	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "AvailableFeatures=%s", node_ptr->features);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "ActiveFeatures=%s", node_ptr->features_act);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s", node_ptr->gres);
	xstrcat(out, line_end);

	/****** Line (optional) ******/
	if (node_ptr->gres_drain) {
		xstrfmtcat(out, "GresDrain=%s", node_ptr->gres_drain);
		xstrcat(out, line_end);
	}

	/****** Line (optional) ******/
	if (node_ptr->gres_used) {
		xstrfmtcat(out, "GresUsed=%s", node_ptr->gres_used);
		xstrcat(out, line_end);
	}

	/****** Line (optional) ******/
	if (node_ptr->node_hostname || node_ptr->node_addr) {
		xstrfmtcat(out, "NodeAddr=%s NodeHostName=%s Version=%s",
			   node_ptr->node_addr, node_ptr->node_hostname,
			   node_ptr->version);
		xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->os)
		xstrfmtcat(out, "OS=%s ", node_ptr->os);

	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_MEM_ALLOC,
				  NODE_STATE_ALLOCATED,
				  &alloc_memory);
	xstrfmtcat(out, "RealMemory=%"PRIu64" AllocMem=%"PRIu64" ",
		   node_ptr->real_memory, alloc_memory);

	if (node_ptr->free_mem == NO_VAL64)
		xstrcat(out, "FreeMem=N/A ");
	else
		xstrfmtcat(out, "FreeMem=%"PRIu64" ", node_ptr->free_mem);

	xstrfmtcat(out, "Sockets=%u Boards=%u",
		   node_ptr->sockets, node_ptr->boards);

	xstrcat(out, line_end);

	/****** core & memory specialization Line (optional) ******/
	if (node_ptr->core_spec_cnt || node_ptr->cpu_spec_list ||
	    node_ptr->mem_spec_limit) {
		if (node_ptr->core_spec_cnt) {
			xstrfmtcat(out, "CoreSpecCount=%u ",
				   node_ptr->core_spec_cnt);
		}
		if (node_ptr->cpu_spec_list) {
			xstrfmtcat(out, "CPUSpecList=%s ",
				   node_ptr->cpu_spec_list);
		}
		if (node_ptr->mem_spec_limit) {
			xstrfmtcat(out, "MemSpecLimit=%"PRIu64"",
				   node_ptr->mem_spec_limit);
		}
		xstrcat(out, line_end);
	}

	/****** Line ******/
	xstrfmtcat(out, "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u ",
		   node_state_string(my_state),
		   cloud_str, comp_str, drain_str, power_str,
		   node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight);

	if (node_ptr->owner == NO_VAL) {
		xstrcat(out, "Owner=N/A ");
	} else {
		char *user_name = uid_to_string((uid_t) node_ptr->owner);
		xstrfmtcat(out, "Owner=%s(%u) ", user_name, node_ptr->owner);
		xfree(user_name);
	}

	xstrfmtcat(out, "MCS_label=%s",
		   (node_ptr->mcs_label == NULL) ? "N/A" : node_ptr->mcs_label);

	xstrcat(out, line_end);

	/****** Line ******/
	if (node_ptr->partitions) {
		xstrfmtcat(out, "Partitions=%s ", node_ptr->partitions);
		xstrcat(out, line_end);
	}

	/****** Line ******/
	if (node_ptr->boot_time) {
		slurm_make_time_str((time_t *)&node_ptr->boot_time,
				    time_str, sizeof(time_str));
		xstrfmtcat(out, "BootTime=%s ", time_str);
	} else {
		xstrcat(out, "BootTime=None ");
	}

	if (node_ptr->slurmd_start_time) {
		slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time,
				     time_str, sizeof(time_str));
		xstrfmtcat(out, "SlurmdStartTime=%s", time_str);
	} else {
		xstrcat(out, "SlurmdStartTime=None");
	}
	xstrcat(out, line_end);

	/****** TRES Line ******/
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_TRES_ALLOC_FMT_STR,
				     NODE_STATE_ALLOCATED, &node_alloc_tres);
	xstrfmtcat(out, "CfgTRES=%s", node_ptr->tres_fmt_str);
	xstrcat(out, line_end);
	xstrfmtcat(out, "AllocTRES=%s",
		   (node_alloc_tres) ?  node_alloc_tres : "");
	xfree(node_alloc_tres);
	xstrcat(out, line_end);

	/****** Power Management Line ******/
	if (!node_ptr->power || (node_ptr->power->cap_watts == NO_VAL))
		xstrcat(out, "CapWatts=n/a");
	else
		xstrfmtcat(out, "CapWatts=%u", node_ptr->power->cap_watts);

	xstrcat(out, line_end);

	/****** Power Consumption Line ******/
	if (!node_ptr->energy || node_ptr->energy->current_watts == NO_VAL)
		xstrcat(out, "CurrentWatts=n/s LowestJoules=n/s ConsumedJoules=n/s");
	else
		xstrfmtcat(out, "CurrentWatts=%u "
				"LowestJoules=%"PRIu64" "
				"ConsumedJoules=%"PRIu64"",
				node_ptr->energy->current_watts,
				node_ptr->energy->base_consumed_energy,
				node_ptr->energy->consumed_energy);

	xstrcat(out, line_end);

	/****** external sensors Line ******/
	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->consumed_energy == NO_VAL)
		xstrcat(out, "ExtSensorsJoules=n/s ");
	else
		xstrfmtcat(out, "ExtSensorsJoules=%"PRIu64" ",
			   node_ptr->ext_sensors->consumed_energy);

	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->current_watts == NO_VAL)
		xstrcat(out, "ExtSensorsWatts=n/s ");
	else
		xstrfmtcat(out, "ExtSensorsWatts=%u ",
			   node_ptr->ext_sensors->current_watts);

	if (!node_ptr->ext_sensors
	    || node_ptr->ext_sensors->temperature == NO_VAL)
		xstrcat(out, "ExtSensorsTemp=n/s");
	else
		xstrfmtcat(out, "ExtSensorsTemp=%u",
			   node_ptr->ext_sensors->temperature);

	xstrcat(out, line_end);

	/****** Line ******/
	if (node_ptr->reason && node_ptr->reason[0])
		xstrcat(reason_str, node_ptr->reason);
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_EXTRA_INFO,
				  0, &select_reason_str);
	if (select_reason_str && select_reason_str[0]) {
		if (reason_str)
			xstrcat(reason_str, "\n");
		xstrcat(reason_str, select_reason_str);
	}
	xfree(select_reason_str);
	if (reason_str) {
		int inx = 1;
		char *save_ptr = NULL, *tok, *user_name;
		tok = strtok_r(reason_str, "\n", &save_ptr);
		while (tok) {
			if (inx == 1) {
				xstrcat(out, "Reason=");
			} else {
				xstrcat(out, line_end);
				xstrcat(out, "       ");
			}
			xstrfmtcat(out, "%s", tok);
			if ((inx++ == 1) && node_ptr->reason_time) {
				user_name = uid_to_string(node_ptr->reason_uid);
				slurm_make_time_str((time_t *)&node_ptr->reason_time,
						    time_str, sizeof(time_str));
				xstrfmtcat(out, " [%s@%s]", user_name, time_str);
				xfree(user_name);
			}
			tok = strtok_r(NULL, "\n", &save_ptr);
		}
		xfree(reason_str);
	}
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
예제 #11
0
파일: node_info.c 프로젝트: VURM/slurm
/*
 * slurm_sprint_node_table - output information about a specific Slurm nodes
 *	based upon message as loaded using slurm_load_node
 * IN node_ptr - an individual node information record pointer
 * IN node_scaling - number of nodes each node represents
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_node_table (node_info_t * node_ptr,
			 int node_scaling, int one_liner )
{
	uint16_t my_state = node_ptr->node_state;
	char *comp_str = "", *drain_str = "", *power_str = "";
	char tmp_line[512], time_str[32];
	char *out = NULL;
	uint16_t err_cpus = 0, alloc_cpus = 0;
	int cpus_per_node = 1;
	int total_used = node_ptr->cpus;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (node_scaling)
		cpus_per_node = node_ptr->cpus / node_scaling;

	if (my_state & NODE_STATE_COMPLETING) {
		my_state &= (~NODE_STATE_COMPLETING);
		comp_str = "+COMPLETING";
	}
	if (my_state & NODE_STATE_DRAIN) {
		my_state &= (~NODE_STATE_DRAIN);
		drain_str = "+DRAIN";
	}
	if (my_state & NODE_STATE_POWER_SAVE) {
		my_state &= (~NODE_STATE_POWER_SAVE);
		power_str = "+POWER";
	}
	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ALLOCATED,
				  &alloc_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		if (!alloc_cpus &&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     IS_NODE_COMPLETING(node_ptr)))
			alloc_cpus = node_ptr->cpus;
		else
			alloc_cpus *= cpus_per_node;
	}
	total_used -= alloc_cpus;

	slurm_get_select_nodeinfo(node_ptr->select_nodeinfo,
				  SELECT_NODEDATA_SUBCNT,
				  NODE_STATE_ERROR,
				  &err_cpus);
	if (cluster_flags & CLUSTER_FLAG_BG)
		err_cpus *= cpus_per_node;
	total_used -= err_cpus;

	if ((alloc_cpus && err_cpus) ||
	    (total_used  && (total_used != node_ptr->cpus))) {
		my_state &= NODE_STATE_FLAGS;
		my_state |= NODE_STATE_MIXED;
	}

	/****** Line 1 ******/
	snprintf(tmp_line, sizeof(tmp_line), "NodeName=%s ", node_ptr->name);
	xstrcat(out, tmp_line);
	if (node_ptr->arch) {
		snprintf(tmp_line, sizeof(tmp_line), "Arch=%s ",
			 node_ptr->arch);
		xstrcat(out, tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line), "CoresPerSocket=%u",
		 node_ptr->cores);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "CPUAlloc=%u CPUErr=%u CPUTot=%u Features=%s",
		 alloc_cpus, err_cpus, node_ptr->cpus, node_ptr->features);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	snprintf(tmp_line, sizeof(tmp_line), "Gres=%s",node_ptr->gres);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 (optional) ******/
	if (node_ptr->node_hostname || node_ptr->node_addr) {
		snprintf(tmp_line, sizeof(tmp_line),
			 "NodeAddr=%s NodeHostName=%s",
			 node_ptr->node_addr, node_ptr->node_hostname);
		xstrcat(out, tmp_line);	
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
	}

	/****** Line 5 ******/
	if (node_ptr->os) {
		snprintf(tmp_line, sizeof(tmp_line), "OS=%s ", node_ptr->os);
		xstrcat(out, tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line), "RealMemory=%u Sockets=%u",
		 node_ptr->real_memory, node_ptr->sockets);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 6 ******/

	snprintf(tmp_line, sizeof(tmp_line),
		 "State=%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u",
		 node_state_string(my_state), comp_str, drain_str, power_str,
		 node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 7 ******/
	if (node_ptr->boot_time) {
		slurm_make_time_str ((time_t *)&node_ptr->boot_time,
				     time_str, sizeof(time_str));
	} else {
		strncpy(time_str, "None", sizeof(time_str));
	}
	snprintf(tmp_line, sizeof(tmp_line), "BootTime=%s ", time_str);
	xstrcat(out, tmp_line);

	if (node_ptr->slurmd_start_time) {
		slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time,
				     time_str, sizeof(time_str));
	} else {
		strncpy(time_str, "None", sizeof(time_str));
	}
	snprintf(tmp_line, sizeof(tmp_line), "SlurmdStartTime=%s", time_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 8 ******/
	if (node_ptr->reason_time) {
		char *user_name = uid_to_string(node_ptr->reason_uid);
		slurm_make_time_str ((time_t *)&node_ptr->reason_time,
				     time_str, sizeof(time_str));
		snprintf(tmp_line, sizeof(tmp_line), "Reason=%s [%s@%s]",
			 node_ptr->reason, user_name, time_str);
		xstrcat(out, tmp_line);
		xfree(user_name);
	} else {
		snprintf(tmp_line, sizeof(tmp_line), "Reason=%s",
			 node_ptr->reason);
		xstrcat(out, tmp_line);
	}
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
예제 #12
0
파일: front_end.c 프로젝트: Cray/slurm
/*
 * sync_front_end_state - synchronize job pointers and front-end node state
 */
extern void sync_front_end_state(void)
{
#ifdef HAVE_FRONT_END
	ListIterator job_iterator;
	struct job_record *job_ptr;
	front_end_record_t *front_end_ptr;
	uint16_t state_flags;
	int i;

	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		front_end_ptr->job_cnt_comp = 0;
		front_end_ptr->job_cnt_run  = 0;
	}

	job_iterator = list_iterator_create(job_list);
	while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
		if (job_ptr->batch_host) {
			job_ptr->front_end_ptr =
				find_front_end_record(job_ptr->batch_host);
			if ((job_ptr->front_end_ptr == NULL) &&
			    IS_JOB_RUNNING(job_ptr)) {
				error("front end node %s has vanished, "
				      "killing job %u",
				      job_ptr->batch_host, job_ptr->job_id);
				job_ptr->job_state = JOB_NODE_FAIL |
						     JOB_COMPLETING;
			} else if (job_ptr->front_end_ptr == NULL) {
				info("front end node %s has vanished",
				     job_ptr->batch_host);
			} else if (IS_JOB_COMPLETING(job_ptr)) {
				job_ptr->front_end_ptr->job_cnt_comp++;
			} else if (IS_JOB_RUNNING(job_ptr)) {
				job_ptr->front_end_ptr->job_cnt_run++;
			}
		} else {
			job_ptr->front_end_ptr = NULL;
		}
	}
	list_iterator_destroy(job_iterator);

	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if ((IS_NODE_IDLE(front_end_ptr) ||
		     IS_NODE_UNKNOWN(front_end_ptr)) &&
		    (front_end_ptr->job_cnt_run != 0)) {
			state_flags = front_end_ptr->node_state &
				      NODE_STATE_FLAGS;
			front_end_ptr->node_state = NODE_STATE_ALLOCATED |
						    state_flags;
		}
		if (IS_NODE_ALLOCATED(front_end_ptr) &&
		    (front_end_ptr->job_cnt_run == 0)) {
			state_flags = front_end_ptr->node_state &
				      NODE_STATE_FLAGS;
			front_end_ptr->node_state = NODE_STATE_IDLE |
						    state_flags;
		}
		if (IS_NODE_COMPLETING(front_end_ptr) &&
		    (front_end_ptr->job_cnt_comp == 0)) {
			front_end_ptr->node_state &= (~NODE_STATE_COMPLETING);
		}
		if (!IS_NODE_COMPLETING(front_end_ptr) &&
		    (front_end_ptr->job_cnt_comp != 0)) {
			front_end_ptr->node_state |= NODE_STATE_COMPLETING;
		}
	}

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FRONT_END)
		log_front_end_state();
#endif
}
예제 #13
0
파일: job_test.c 프로젝트: BYUHPC/slurm
/*
 * _can_job_run_on_node - Given the job requirements, determine which
 *                        resources from the given node (if any) can be
 *                        allocated to this job. Returns the number of
 *                        cpus that can be used by this node and a bitmap
 *                        of available resources for allocation.
 *       NOTE: This process does NOT support overcommitting resources
 *
 * IN job_ptr       - pointer to job requirements
 * IN/OUT core_map  - core_bitmap of available cores
 * IN n             - index of node to be evaluated
 * IN cr_type       - Consumable Resource setting
 * IN test_only     - ignore allocated memory check
 *
 * NOTE: The returned cpu_count may be less than the number of set bits in
 *       core_map for the given node. The cr_dist functions will determine
 *       which bits to deselect from the core_map to match the cpu_count.
 */
uint16_t _can_job_run_on_node(struct job_record *job_ptr, bitstr_t *core_map,
			      const uint32_t node_i,
			      struct node_use_record *node_usage,
			      uint16_t cr_type,
			      bool test_only)
{
	uint16_t cpus;
	uint32_t avail_mem, req_mem, gres_cpus, gres_cores, cpus_per_core;
	int core_start_bit, core_end_bit;
	struct node_record *node_ptr = node_record_table_ptr + node_i;
	List gres_list;

	if (!test_only && IS_NODE_COMPLETING(node_ptr)) {
		/* Do not allocate more jobs to nodes with completing jobs */
		cpus = 0;
		return cpus;
	}

	cpus = _allocate_cores(job_ptr, core_map, node_i);

	core_start_bit = cr_get_coremap_offset(node_i);
	core_end_bit   = cr_get_coremap_offset(node_i + 1) - 1;
	node_ptr = select_node_record[node_i].node_ptr;
	cpus_per_core  = select_node_record[node_i].cpus /
			 (core_end_bit - core_start_bit + 1);
	if (node_usage[node_i].gres_list)
		gres_list = node_usage[node_i].gres_list;
	else
		gres_list = node_ptr->gres_list;

	gres_plugin_job_core_filter(job_ptr->gres_list, gres_list, test_only,
				    core_map, core_start_bit, core_end_bit,
				    node_ptr->name);

	if ((cr_type & CR_MEMORY) && cpus) {
		req_mem   = job_ptr->details->pn_min_memory & ~MEM_PER_CPU;
		avail_mem = select_node_record[node_i].real_memory;
		if (!test_only)
			avail_mem -= node_usage[node_i].alloc_memory;
		if (req_mem > avail_mem)
			cpus = 0;
	}

	gres_cores = gres_plugin_job_test(job_ptr->gres_list,
					  gres_list, test_only,
					  core_map, core_start_bit,
					  core_end_bit, job_ptr->job_id,
					  node_ptr->name);
	gres_cpus = gres_cores;
	if (gres_cpus != NO_VAL)
		gres_cpus *= cpus_per_core;
	if ((gres_cpus < job_ptr->details->ntasks_per_node) ||
	    ((job_ptr->details->cpus_per_task > 1) &&
	     (gres_cpus < job_ptr->details->cpus_per_task)))
		gres_cpus = 0;
	if (gres_cpus < cpus)
		cpus = gres_cpus;

	if (cpus == 0)
		bit_nclear(core_map, core_start_bit, core_end_bit);

	if (select_debug_flags & DEBUG_FLAG_SELECT_TYPE) {
		info("select/serial: _can_job_run_on_node: %u cpus on %s(%d), "
		     "mem %u/%u",
		     cpus, select_node_record[node_i].node_ptr->name,
		     node_usage[node_i].node_state,
		     node_usage[node_i].alloc_memory,
		     select_node_record[node_i].real_memory);
	}

	return cpus;
}