Esempio n. 1
0
/*
 * assign_front_end - assign a front end node for starting a job
 * job_ptr IN - job to assign a front end node (tests access control lists)
 * RET pointer to the front end node to use or NULL if none found
 */
extern front_end_record_t *assign_front_end(struct job_record *job_ptr)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr, *best_front_end = NULL;
	uint32_t state_flags;
	int i;

	if (!job_ptr->batch_host && (job_ptr->batch_flag == 0) &&
	    (front_end_ptr = find_front_end_record(job_ptr->alloc_node))) {
		/* Use submit host for interactive job */
		if (!IS_NODE_DOWN(front_end_ptr)  &&
		    !IS_NODE_DRAIN(front_end_ptr) &&
		    !IS_NODE_NO_RESPOND(front_end_ptr) &&
		    _front_end_access(front_end_ptr, job_ptr)) {
			best_front_end = front_end_ptr;
		} else {
			info("%s: front-end node %s not available for job %u",
			     __func__, job_ptr->alloc_node, job_ptr->job_id);
			return NULL;
		}
	} else {
		for (i = 0, front_end_ptr = front_end_nodes;
		     i < front_end_node_cnt; i++, front_end_ptr++) {
			if (job_ptr->batch_host) { /* Find specific front-end */
				if (xstrcmp(job_ptr->batch_host,
					   front_end_ptr->name))
					continue;
				if (!_front_end_access(front_end_ptr, job_ptr))
					break;
			} else {	      /* Find a usable front-end node */
				if (IS_NODE_DOWN(front_end_ptr) ||
				    IS_NODE_DRAIN(front_end_ptr) ||
				    IS_NODE_NO_RESPOND(front_end_ptr))
					continue;
				if (!_front_end_access(front_end_ptr, job_ptr))
					continue;
			}
			if ((best_front_end == NULL) ||
			    (front_end_ptr->job_cnt_run <
			     best_front_end->job_cnt_run))
				best_front_end = front_end_ptr;
		}
	}

	if (best_front_end) {
		state_flags = best_front_end->node_state & NODE_STATE_FLAGS;
		best_front_end->node_state = NODE_STATE_ALLOCATED | state_flags;
		best_front_end->job_cnt_run++;
		return best_front_end;
	} else if (job_ptr->batch_host) {    /* Find specific front-end node */
		error("assign_front_end: front end node %s not found",
		      job_ptr->batch_host);
	} else {		/* Find some usable front-end node */
		error("assign_front_end: no available front end nodes found");
	}
#endif
	return NULL;
}
Esempio n. 2
0
static char *	_get_node_state(struct node_record *node_ptr)
{
	static bool got_select_type = false;
	static bool node_allocations;

	if (!got_select_type) {
		char * select_type = slurm_get_select_type();
		if (select_type &&
		    (strcasecmp(select_type, "select/linear") == 0))
			node_allocations = true;
		else
			node_allocations = false;
		xfree(select_type);
		got_select_type = true;
	}

	if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr))
		return "Draining";
	if (IS_NODE_COMPLETING(node_ptr))
		return "Busy";

	if (IS_NODE_DOWN(node_ptr))
		return "Down";
	if (IS_NODE_ALLOCATED(node_ptr)) {
		if (node_allocations)
			return "Busy";
		else
			return "Running";
	}
	if (IS_NODE_IDLE(node_ptr))
		return "Idle";

	return "Unknown";
}
Esempio n. 3
0
/*
 * assign_front_end - assign a front end node for starting a job
 * RET pointer to the front end node to use or NULL if none available
 */
extern front_end_record_t *assign_front_end(void)
{
#ifdef HAVE_FRONT_END
	static int last_assigned = -1;
	front_end_record_t *front_end_ptr;
	uint16_t state_flags;
	int i;

	for (i = 0; i < front_end_node_cnt; i++) {
		last_assigned = (last_assigned + 1) % front_end_node_cnt;
		front_end_ptr = front_end_nodes + last_assigned;
		if (IS_NODE_DOWN(front_end_ptr) ||
		    IS_NODE_DRAIN(front_end_ptr) ||
		    IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		state_flags = front_end_nodes[last_assigned].node_state &
			      NODE_STATE_FLAGS;
		front_end_nodes[last_assigned].node_state =
				NODE_STATE_ALLOCATED | state_flags;
		front_end_nodes[last_assigned].job_cnt_run++;
		return front_end_ptr;
	}
	fatal("assign_front_end: no available front end nodes found");
#endif
	return NULL;
}
Esempio n. 4
0
/* Determine if specific slurm node is already in DOWN or DRAIN state */
extern int node_already_down(char *node_name)
{
	struct node_record *node_ptr = find_node_record(node_name);

	if (node_ptr) {
		if (IS_NODE_DRAIN(node_ptr))
			return 2;
		else if (IS_NODE_DOWN(node_ptr))
			return 1;
		else
			return 0;
	}

	return 0;
}
Esempio n. 5
0
/*
 * avail_front_end - test if any front end nodes are available for starting job
 */
extern bool avail_front_end(void)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr;
	int i;

	for (i = 0, front_end_ptr = front_end_nodes;
	     i < front_end_node_cnt; i++, front_end_ptr++) {
		if (IS_NODE_DOWN(front_end_ptr)  ||
		    IS_NODE_DRAIN(front_end_ptr) ||
		    IS_NODE_NO_RESPOND(front_end_ptr))
			continue;
		return true;
	}
	return false;
#else
	return true;
#endif
}
Esempio n. 6
0
/*
 * assign_front_end - assign a front end node for starting a job
 * job_ptr IN - job to assign a front end node (tests access control lists)
 * RET pointer to the front end node to use or NULL if none found
 */
extern front_end_record_t *assign_front_end(struct job_record *job_ptr)
{
#ifdef HAVE_FRONT_END
	front_end_record_t *front_end_ptr, *best_front_end = NULL;
	uint32_t state_flags;
	int i;

	for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt;
	     i++, front_end_ptr++) {
		if (job_ptr->batch_host) {   /* Find specific front-end node */
			if (strcmp(job_ptr->batch_host, front_end_ptr->name))
				continue;
			if (!_front_end_access(front_end_ptr, job_ptr))
				break;
		} else {		/* Find some usable front-end node */
			if (IS_NODE_DOWN(front_end_ptr) ||
			    IS_NODE_DRAIN(front_end_ptr) ||
			    IS_NODE_NO_RESPOND(front_end_ptr))
				continue;
			if (!_front_end_access(front_end_ptr, job_ptr))
				continue;
		}
		if ((best_front_end == NULL) ||
		    (front_end_ptr->job_cnt_run < best_front_end->job_cnt_run))
			best_front_end = front_end_ptr;
	}

	if (best_front_end) {
		state_flags = best_front_end->node_state & NODE_STATE_FLAGS;
		best_front_end->node_state = NODE_STATE_ALLOCATED | state_flags;
		best_front_end->job_cnt_run++;
		return best_front_end;
	} else if (job_ptr->batch_host) {    /* Find specific front-end node */
		error("assign_front_end: front end node %s not found",
		      job_ptr->batch_host);
	} else {		/* Find some usable front-end node */
		error("assign_front_end: no available front end nodes found");
	}
#endif
	return NULL;
}
Esempio n. 7
0
/*
 * assign_front_end - assign a front end node for starting a job
 * job_ptr IN - job to assign a front end node (tests access control lists)
 * RET pointer to the front end node to use or NULL if none found
 */
extern front_end_record_t *assign_front_end(struct job_record *job_ptr)
{
#ifdef HAVE_FRONT_END
	static int last_assigned = -1;
	front_end_record_t *front_end_ptr;
	uint16_t state_flags;
	int i;

	for (i = 0; i < front_end_node_cnt; i++) {
		last_assigned = (last_assigned + 1) % front_end_node_cnt;
		front_end_ptr = front_end_nodes + last_assigned;
		if (job_ptr->batch_host) {   /* Find specific front-end node */
			if (strcmp(job_ptr->batch_host, front_end_ptr->name))
				continue;
			if (!_front_end_access(front_end_ptr, job_ptr))
				break;
		} else {		/* Find some usable front-end node */
			if (IS_NODE_DOWN(front_end_ptr) ||
			    IS_NODE_DRAIN(front_end_ptr) ||
			    IS_NODE_NO_RESPOND(front_end_ptr))
				continue;
			if (!_front_end_access(front_end_ptr, job_ptr))
				continue;
		}
		state_flags = front_end_nodes[last_assigned].node_state &
			      NODE_STATE_FLAGS;
		front_end_nodes[last_assigned].node_state =
				NODE_STATE_ALLOCATED | state_flags;
		front_end_nodes[last_assigned].job_cnt_run++;
		return front_end_ptr;
	}
	if (job_ptr->batch_host) {	/* Find specific front-end node */
		error("assign_front_end: front end node %s not found",
		      job_ptr->batch_host);
	} else {		/* Find some usable front-end node */
		error("assign_front_end: no available front end nodes found");
	}
#endif
	return NULL;
}
Esempio n. 8
0
/* Perform any power change work to nodes */
static void _do_power_work(time_t now)
{
	static time_t last_log = 0, last_work_scan = 0;
	int i, wake_cnt = 0, sleep_cnt = 0, susp_total = 0;
	time_t delta_t;
	uint32_t susp_state;
	bitstr_t *wake_node_bitmap = NULL, *sleep_node_bitmap = NULL;
	struct node_record *node_ptr;
	bool run_suspend = false;

	/* Set limit on counts of nodes to have state changed */
	delta_t = now - last_work_scan;
	if (delta_t >= 60) {
		suspend_cnt_f = 0.0;
		resume_cnt_f  = 0.0;
	} else {
		float rate = (60 - delta_t) / 60.0;
		suspend_cnt_f *= rate;
		resume_cnt_f  *= rate;
	}
	suspend_cnt = (suspend_cnt_f + 0.5);
	resume_cnt  = (resume_cnt_f  + 0.5);

	if (now > (last_suspend + suspend_timeout)) {
		/* ready to start another round of node suspends */
		run_suspend = true;
		if (last_suspend) {
			bit_nclear(suspend_node_bitmap, 0,
				   (node_record_count - 1));
			bit_nclear(resume_node_bitmap, 0,
				   (node_record_count - 1));
			last_suspend = (time_t) 0;
		}
	}

	last_work_scan = now;

	/* Build bitmaps identifying each node which should change state */
	for (i = 0, node_ptr = node_record_table_ptr;
	     i < node_record_count; i++, node_ptr++) {
		susp_state = IS_NODE_POWER_SAVE(node_ptr);

		if (susp_state)
			susp_total++;

		/* Resume nodes as appropriate */
		if (susp_state &&
		    ((resume_rate == 0) || (resume_cnt < resume_rate))	&&
		    (bit_test(suspend_node_bitmap, i) == 0)		&&
		    (IS_NODE_ALLOCATED(node_ptr) ||
		     (node_ptr->last_idle > (now - idle_time)))) {
			if (wake_node_bitmap == NULL) {
				wake_node_bitmap =
					bit_alloc(node_record_count);
			}
			wake_cnt++;
			resume_cnt++;
			resume_cnt_f++;
			node_ptr->node_state &= (~NODE_STATE_POWER_SAVE);
			node_ptr->node_state |=   NODE_STATE_POWER_UP;
			node_ptr->node_state |=   NODE_STATE_NO_RESPOND;
			bit_clear(power_node_bitmap, i);
			bit_clear(avail_node_bitmap, i);
			node_ptr->last_response = now + resume_timeout;
			bit_set(wake_node_bitmap,    i);
			bit_set(resume_node_bitmap,  i);
		}

		/* Suspend nodes as appropriate */
		if (run_suspend 					&&
		    (susp_state == 0)					&&
		    ((suspend_rate == 0) || (suspend_cnt < suspend_rate)) &&
		    (IS_NODE_IDLE(node_ptr) || IS_NODE_DOWN(node_ptr))	&&
		    (node_ptr->sus_job_cnt == 0)			&&
		    (!IS_NODE_COMPLETING(node_ptr))			&&
		    (!IS_NODE_POWER_UP(node_ptr))			&&
		    (node_ptr->last_idle != 0)				&&
		    (node_ptr->last_idle < (now - idle_time))		&&
		    ((exc_node_bitmap == NULL) ||
		     (bit_test(exc_node_bitmap, i) == 0))) {
			if (sleep_node_bitmap == NULL) {
				sleep_node_bitmap =
					bit_alloc(node_record_count);
			}
			sleep_cnt++;
			suspend_cnt++;
			suspend_cnt_f++;
			node_ptr->node_state |= NODE_STATE_POWER_SAVE;
			node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
			if (!IS_NODE_DOWN(node_ptr) &&
			    !IS_NODE_DRAIN(node_ptr))
				bit_set(avail_node_bitmap,   i);
			bit_set(power_node_bitmap,   i);
			bit_set(sleep_node_bitmap,   i);
			bit_set(suspend_node_bitmap, i);
			last_suspend = now;
		}
	}
	if (((now - last_log) > 600) && (susp_total > 0)) {
		info("Power save mode: %d nodes", susp_total);
		last_log = now;
	}

	if (sleep_node_bitmap) {
		char *nodes;
		nodes = bitmap2node_name(sleep_node_bitmap);
		if (nodes)
			_do_suspend(nodes);
		else
			error("power_save: bitmap2nodename");
		xfree(nodes);
		FREE_NULL_BITMAP(sleep_node_bitmap);
		/* last_node_update could be changed already by another thread!
		last_node_update = now; */
	}

	if (wake_node_bitmap) {
		char *nodes;
		nodes = bitmap2node_name(wake_node_bitmap);
		if (nodes)
			_do_resume(nodes);
		else
			error("power_save: bitmap2nodename");
		xfree(nodes);
		FREE_NULL_BITMAP(wake_node_bitmap);
		/* last_node_update could be changed already by another thread!
		last_node_update = now; */
	}
}
Esempio n. 9
0
/**
 * basil_inventory - Periodic node-state query via ALPS XML-RPC.
 * This should be run immediately before each scheduling cycle.
 * Returns non-SLURM_SUCCESS if
 * - INVENTORY method failed (error)
 * - no nodes are available (no point in scheduling)
 * - orphaned ALPS reservation exists (wait until ALPS resynchronizes)
 */
extern int basil_inventory(void)
{
	enum basil_version version = get_basil_version();
	struct basil_inventory *inv;
	struct basil_node *node;
	struct basil_rsvn *rsvn;
	int slurm_alps_mismatch = 0;
	int rc = SLURM_SUCCESS;
	int rel_rc;
	time_t now = time(NULL);
	static time_t slurm_alps_mismatch_time = (time_t) 0;
	static bool logged_sync_timeout = false;
	static time_t last_inv_run = 0;

	if ((now - last_inv_run) < inv_interval)
		return SLURM_SUCCESS;

	last_inv_run = now;

	inv = get_full_inventory(version);
	if (inv == NULL) {
		error("BASIL %s INVENTORY failed", bv_names_long[version]);
		return SLURM_ERROR;
	}

	debug("BASIL %s INVENTORY: %d/%d batch nodes available",
	      bv_names_long[version], inv->batch_avail, inv->batch_total);

	/* Avoid checking for inv->batch_avail here since if we are
	   gang scheduling returning an error for a full system is
	   probably the wrong thing to do. (the schedule() function
	   in the slurmctld will never run ;)).
	*/
	if (!inv->f->node_head || !inv->batch_total)
		rc = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;

	for (node = inv->f->node_head; node; node = node->next) {
		int node_inx;
		struct node_record *node_ptr;
		char *reason = NULL;

		/* This will ignore interactive nodes when iterating through
		 * the apbasil inventory.  If we don't do this, SLURM is
		 * unable to resolve the ID to a nidXXX name since it's not in
		 * the slurm.conf file.  (Chris North)
		 */
		if (node->role == BNR_INTER)
			continue;

		node_ptr = _find_node_by_basil_id(node->node_id);
		if (node_ptr == NULL) {
			error("nid%05u (%s node in state %s) not in slurm.conf",
			      node->node_id, nam_noderole[node->role],
			      nam_nodestate[node->state]);
			continue;
		}
		node_inx = node_ptr - node_record_table_ptr;

		if (node_is_allocated(node) && !IS_NODE_ALLOCATED(node_ptr)) {
			/*
			 * ALPS still hangs on to the node while SLURM considers
			 * it already unallocated. Possible causes are partition
			 * cleanup taking too long (can be 10sec ... minutes),
			 * and orphaned ALPS reservations (caught below).
			 *
			 * The converse case (SLURM hanging on to the node while
			 * ALPS has already freed it) happens frequently during
			 * job completion: select_g_job_fini() is called before
			 * make_node_comp(). Rely on SLURM logic for this case.
			 */
			slurm_alps_mismatch++;
		}

		if (node->state == BNS_DOWN) {
			reason = "ALPS marked it DOWN";
		} else if (node->state == BNS_UNAVAIL) {
			reason = "node is UNAVAILABLE";
		} else if (node->state == BNS_ROUTE) {
			reason = "node does ROUTING";
		} else if (node->state == BNS_SUSPECT) {
			reason = "entered SUSPECT mode";
		} else if (node->state == BNS_ADMINDOWN) {
			reason = "node is ADMINDOWN";
		} else if (node->state != BNS_UP) {
			reason = "state not UP";
		} else if (node->role != BNR_BATCH) {
			reason = "mode not BATCH";
		} else if (node->arch != BNA_XT) {
			reason = "arch not XT/XE";
		}

		/* Base state entirely derives from ALPS */
		if (reason) {
			if (node_ptr->down_time == 0)
				node_ptr->down_time = now;
			if (IS_NODE_DOWN(node_ptr)) {
				/* node still down */
			} else if ((slurmctld_conf.slurmd_timeout == 0) ||
				   ((now - node_ptr->down_time) <
				    slurmctld_conf.slurmd_timeout)) {
				node_ptr->node_state |= NODE_STATE_NO_RESPOND;
				bit_clear(avail_node_bitmap, node_inx);
			} else {
				xfree(node_ptr->reason);
				info("MARKING %s DOWN (%s)",
				     node_ptr->name, reason);
				/* set_node_down also kills any running jobs */
				set_node_down_ptr(node_ptr, reason);
			}
		} else if (IS_NODE_DOWN(node_ptr)) {
			xfree(node_ptr->reason);
			node_ptr->down_time = 0;
			info("MARKING %s UP", node_ptr->name);

			/* Reset state, make_node_idle figures out the rest */
			node_ptr->node_state &= NODE_STATE_FLAGS;
			node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
			node_ptr->node_state |= NODE_STATE_UNKNOWN;

			make_node_idle(node_ptr, NULL);
			if (!IS_NODE_DRAIN(node_ptr) &&
			    !IS_NODE_FAIL(node_ptr)) {
				xfree(node_ptr->reason);
				node_ptr->reason_time = 0;
				node_ptr->reason_uid = NO_VAL;
				clusteracct_storage_g_node_up(
					acct_db_conn, node_ptr, now);
			}
		} else if (IS_NODE_NO_RESPOND(node_ptr)) {
			node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
			if (!IS_NODE_DRAIN(node_ptr) &&
			    !IS_NODE_FAIL(node_ptr)) {
				bit_set(avail_node_bitmap, node_inx);
			}
		}
	}

	if (slurm_alps_mismatch)
		debug("ALPS: %d node(s) still held", slurm_alps_mismatch);

	/*
	 * Check that each ALPS reservation corresponds to a SLURM job.
	 * Purge orphaned reservations, which may result from stale or
	 * messed up system state, or are indicative of ALPS problems
	 * (stuck in pending cancel calls).
	 */
	for (rsvn = inv->f->rsvn_head; rsvn; rsvn = rsvn->next) {
		ListIterator job_iter = list_iterator_create(job_list);
		struct job_record *job_ptr;
		uint32_t resv_id;

		while ((job_ptr = (struct job_record *)list_next(job_iter))) {
			if (_get_select_jobinfo(job_ptr->select_jobinfo->data,
						SELECT_JOBDATA_RESV_ID,
						&resv_id) == SLURM_SUCCESS
			    && resv_id == rsvn->rsvn_id)
				break;
		}
		list_iterator_destroy(job_iter);

		/*
		 * Changed to ignore reservations for "UNKNOWN" batch
		 * ids (e.g. the interactive region) (Chris North)
		 */

		if ((job_ptr == NULL) && (xstrcmp(rsvn->batch_id, "UNKNOWN"))) {
			error("orphaned ALPS reservation %u, trying to remove",
			      rsvn->rsvn_id);
			rel_rc = basil_safe_release(rsvn->rsvn_id, inv);
			if (rel_rc) {
				error("ALPS reservation %u removal FAILED: %s",
				      rsvn->rsvn_id, basil_strerror(rel_rc));
			} else {
				debug("ALPS reservation %u removed",
				      rsvn->rsvn_id);
			}
			slurm_alps_mismatch = true;
		}
	}
	free_inv(inv);

	if (slurm_alps_mismatch) {
		/* If SLURM and ALPS state are not in synchronization,
		 * do not schedule any more jobs until waiting at least
		 * SyncTimeout seconds. */
		if (slurm_alps_mismatch_time == 0) {
			slurm_alps_mismatch_time = now;
		} else if (cray_conf->sync_timeout == 0) {
			/* Wait indefinitely */
		} else if (difftime(now, slurm_alps_mismatch_time) <
			   cray_conf->sync_timeout) {
			return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
		} else if (!logged_sync_timeout) {
			error("Could not synchronize SLURM with ALPS for %u "
			      "seconds, proceeding with job scheduling",
			      cray_conf->sync_timeout);
			logged_sync_timeout = true;
		}
	} else {
		slurm_alps_mismatch_time = 0;
		logged_sync_timeout = false;
	}
	return rc;
}
Esempio n. 10
0
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr,
			  uint32_t node_scaling)
{
	uint16_t base_state;
	uint16_t used_cpus = 0, error_cpus = 0;
	int total_cpus = 0, total_nodes = 0;
	/* since node_scaling could be less here, we need to use the
	 * global node scaling which should never change. */
	int single_node_cpus = (node_ptr->cpus / g_node_scaling);

 	base_state = node_ptr->node_state & NODE_STATE_BASE;

	if (sinfo_ptr->nodes_total == 0) {	/* first node added */
		sinfo_ptr->node_state = node_ptr->node_state;
		sinfo_ptr->features   = node_ptr->features;
		sinfo_ptr->gres       = node_ptr->gres;
		sinfo_ptr->reason     = node_ptr->reason;
		sinfo_ptr->reason_time= node_ptr->reason_time;
		sinfo_ptr->reason_uid = node_ptr->reason_uid;
		sinfo_ptr->min_cpus    = node_ptr->cpus;
		sinfo_ptr->max_cpus    = node_ptr->cpus;
		sinfo_ptr->min_sockets = node_ptr->sockets;
		sinfo_ptr->max_sockets = node_ptr->sockets;
		sinfo_ptr->min_cores   = node_ptr->cores;
		sinfo_ptr->max_cores   = node_ptr->cores;
		sinfo_ptr->min_threads = node_ptr->threads;
		sinfo_ptr->max_threads = node_ptr->threads;
		sinfo_ptr->min_disk   = node_ptr->tmp_disk;
		sinfo_ptr->max_disk   = node_ptr->tmp_disk;
		sinfo_ptr->min_mem    = node_ptr->real_memory;
		sinfo_ptr->max_mem    = node_ptr->real_memory;
		sinfo_ptr->min_weight = node_ptr->weight;
		sinfo_ptr->max_weight = node_ptr->weight;
		sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
		sinfo_ptr->max_cpus_per_node = sinfo_ptr->part_info->
					       max_cpus_per_node;
		sinfo_ptr->version    = node_ptr->version;
	} else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) {
		/* we already have this node in this record,
		 * just return, don't duplicate */
		return;
	} else {
		if (sinfo_ptr->min_cpus > node_ptr->cpus)
			sinfo_ptr->min_cpus = node_ptr->cpus;
		if (sinfo_ptr->max_cpus < node_ptr->cpus)
			sinfo_ptr->max_cpus = node_ptr->cpus;

		if (sinfo_ptr->min_sockets > node_ptr->sockets)
			sinfo_ptr->min_sockets = node_ptr->sockets;
		if (sinfo_ptr->max_sockets < node_ptr->sockets)
			sinfo_ptr->max_sockets = node_ptr->sockets;

		if (sinfo_ptr->min_cores > node_ptr->cores)
			sinfo_ptr->min_cores = node_ptr->cores;
		if (sinfo_ptr->max_cores < node_ptr->cores)
			sinfo_ptr->max_cores = node_ptr->cores;

		if (sinfo_ptr->min_threads > node_ptr->threads)
			sinfo_ptr->min_threads = node_ptr->threads;
		if (sinfo_ptr->max_threads < node_ptr->threads)
			sinfo_ptr->max_threads = node_ptr->threads;

		if (sinfo_ptr->min_disk > node_ptr->tmp_disk)
			sinfo_ptr->min_disk = node_ptr->tmp_disk;
		if (sinfo_ptr->max_disk < node_ptr->tmp_disk)
			sinfo_ptr->max_disk = node_ptr->tmp_disk;

		if (sinfo_ptr->min_mem > node_ptr->real_memory)
			sinfo_ptr->min_mem = node_ptr->real_memory;
		if (sinfo_ptr->max_mem < node_ptr->real_memory)
			sinfo_ptr->max_mem = node_ptr->real_memory;

		if (sinfo_ptr->min_weight> node_ptr->weight)
			sinfo_ptr->min_weight = node_ptr->weight;
		if (sinfo_ptr->max_weight < node_ptr->weight)
			sinfo_ptr->max_weight = node_ptr->weight;

		if (sinfo_ptr->min_cpu_load > node_ptr->cpu_load)
			sinfo_ptr->min_cpu_load = node_ptr->cpu_load;
		if (sinfo_ptr->max_cpu_load < node_ptr->cpu_load)
			sinfo_ptr->max_cpu_load = node_ptr->cpu_load;
	}

	hostlist_push_host(sinfo_ptr->nodes, node_ptr->name);
	if (params.match_flags.node_addr_flag)
		hostlist_push_host(sinfo_ptr->node_addr, node_ptr->node_addr);
	if (params.match_flags.hostnames_flag)
		hostlist_push_host(sinfo_ptr->hostnames, node_ptr->node_hostname);

	total_cpus = node_ptr->cpus;
	total_nodes = node_scaling;

	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ALLOCATED,
				     &used_cpus);
	select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
				     SELECT_NODEDATA_SUBCNT,
				     NODE_STATE_ERROR,
				     &error_cpus);

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (!params.match_flags.state_flag &&
		    (used_cpus || error_cpus)) {
			/* We only get one shot at this (because all states
			 * are combined together), so we need to make
			 * sure we get all the subgrps accounted. (So use
			 * g_node_scaling for safe measure) */
			total_nodes = g_node_scaling;

			sinfo_ptr->nodes_alloc += used_cpus;
			sinfo_ptr->nodes_other += error_cpus;
			sinfo_ptr->nodes_idle +=
				(total_nodes - (used_cpus + error_cpus));
			used_cpus  *= single_node_cpus;
			error_cpus *= single_node_cpus;
		} else {
			/* process only for this subgrp and then return */
			total_cpus = total_nodes * single_node_cpus;

			if ((base_state == NODE_STATE_ALLOCATED) ||
			    (base_state == NODE_STATE_MIXED) ||
			    (node_ptr->node_state & NODE_STATE_COMPLETING)) {
				sinfo_ptr->nodes_alloc += total_nodes;
				sinfo_ptr->cpus_alloc += total_cpus;
			} else if (IS_NODE_DRAIN(node_ptr) ||
				   (base_state == NODE_STATE_DOWN)) {
				sinfo_ptr->nodes_other += total_nodes;
				sinfo_ptr->cpus_other += total_cpus;
			} else {
				sinfo_ptr->nodes_idle += total_nodes;
				sinfo_ptr->cpus_idle += total_cpus;
			}

			sinfo_ptr->nodes_total += total_nodes;
			sinfo_ptr->cpus_total += total_cpus;

			return;
		}
	} else {
		if ((base_state == NODE_STATE_ALLOCATED) ||
		    (base_state == NODE_STATE_MIXED) ||
		    IS_NODE_COMPLETING(node_ptr))
			sinfo_ptr->nodes_alloc += total_nodes;
		else if (IS_NODE_DRAIN(node_ptr)
			 || (base_state == NODE_STATE_DOWN))
			sinfo_ptr->nodes_other += total_nodes;
		else
			sinfo_ptr->nodes_idle += total_nodes;
	}

	sinfo_ptr->nodes_total += total_nodes;


	sinfo_ptr->cpus_alloc += used_cpus;
	sinfo_ptr->cpus_total += total_cpus;
	total_cpus -= used_cpus + error_cpus;

	if (error_cpus) {
		sinfo_ptr->cpus_idle += total_cpus;
		sinfo_ptr->cpus_other += error_cpus;
	} else if (IS_NODE_DRAIN(node_ptr) ||
		   (base_state == NODE_STATE_DOWN)) {
		sinfo_ptr->cpus_other += total_cpus;
	} else
		sinfo_ptr->cpus_idle += total_cpus;
}
Esempio n. 11
0
/*
 * _filter_out - Determine if the specified node should be filtered out or
 *	reported.
 * node_ptr IN - node to consider filtering out
 * RET - true if node should not be reported, false otherwise
 */
static bool _filter_out(node_info_t *node_ptr)
{
	static hostlist_t host_list = NULL;

	if (params.nodes) {
		if (host_list == NULL)
			host_list = hostlist_create(params.nodes);
		if (hostlist_find (host_list, node_ptr->name) == -1)
			return true;
	}

	if (params.dead_nodes && !IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.responding_nodes && IS_NODE_NO_RESPOND(node_ptr))
		return true;

	if (params.state_list) {
		int *node_state;
		bool match = false;
		uint16_t base_state;
		ListIterator iterator;
		uint16_t cpus = 0;
		node_info_t tmp_node, *tmp_node_ptr = &tmp_node;

		iterator = list_iterator_create(params.state_list);
		while ((node_state = list_next(iterator))) {
			tmp_node_ptr->node_state = *node_state;
			if (*node_state == NODE_STATE_DRAIN) {
				/* We search for anything that has the
				 * drain flag set */
				if (IS_NODE_DRAIN(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINING(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINING in node_state_string */
				if (IS_NODE_DRAINING(node_ptr)) {
					match = true;
					break;
				}
			} else if (IS_NODE_DRAINED(tmp_node_ptr)) {
				/* We search for anything that gets mapped to
				 * DRAINED in node_state_string */
				if (IS_NODE_DRAINED(node_ptr)) {
					match = true;
					break;
				}
			} else if (*node_state & NODE_STATE_FLAGS) {
				if (*node_state & node_ptr->node_state) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ERROR) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ERROR,
					&cpus);
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_ALLOCATED) {
				slurm_get_select_nodeinfo(
					node_ptr->select_nodeinfo,
					SELECT_NODEDATA_SUBCNT,
					NODE_STATE_ALLOCATED,
					&cpus);
				if (params.cluster_flags & CLUSTER_FLAG_BG
				    && !cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					cpus = node_ptr->cpus;
				if (cpus) {
					match = true;
					break;
				}
			} else if (*node_state == NODE_STATE_IDLE) {
				base_state = node_ptr->node_state &
					(~NODE_STATE_NO_RESPOND);
				if (base_state == NODE_STATE_IDLE) {
					match = true;
					break;
				}
			} else {
				base_state =
					node_ptr->node_state & NODE_STATE_BASE;
				if (base_state == *node_state) {
					match = true;
					break;
				}
			}
		}
		list_iterator_destroy(iterator);
		if (!match)
			return true;
	}

	return false;
}
Esempio n. 12
0
/*
 * _query_server - download the current server state
 * part_pptr IN/OUT - partition information message
 * node_pptr IN/OUT - node information message
 * block_pptr IN/OUT - BlueGene block data
 * reserv_pptr IN/OUT - reservation information message
 * clear_old IN - If set, then always replace old data, needed when going
 *		  between clusters.
 * RET zero or error code
 */
static int
_query_server(partition_info_msg_t ** part_pptr,
	      node_info_msg_t ** node_pptr,
	      block_info_msg_t ** block_pptr,
	      reserve_info_msg_t ** reserv_pptr,
	      bool clear_old)
{
	static partition_info_msg_t *old_part_ptr = NULL, *new_part_ptr;
	static node_info_msg_t *old_node_ptr = NULL, *new_node_ptr;
	static block_info_msg_t *old_bg_ptr = NULL, *new_bg_ptr;
	static reserve_info_msg_t *old_resv_ptr = NULL, *new_resv_ptr;
	int error_code;
	uint16_t show_flags = 0;
	int cc;
	node_info_t *node_ptr;

	if (params.all_flag)
		show_flags |= SHOW_ALL;

	if (old_part_ptr) {
		if (clear_old)
			old_part_ptr->last_update = 0;
		error_code = slurm_load_partitions(old_part_ptr->last_update,
						   &new_part_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_partition_info_msg(old_part_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_part_ptr = old_part_ptr;
		}
	} else {
		error_code = slurm_load_partitions((time_t) NULL, &new_part_ptr,
						   show_flags);
	}
	if (error_code) {
		slurm_perror("slurm_load_partitions");
		return error_code;
	}

	old_part_ptr = new_part_ptr;
	*part_pptr = new_part_ptr;

	if (old_node_ptr) {
		if (clear_old)
			old_node_ptr->last_update = 0;
		if (params.node_name_single) {
			error_code = slurm_load_node_single(&new_node_ptr,
							    params.nodes,
							    show_flags);
		} else {
			error_code = slurm_load_node(old_node_ptr->last_update,
						     &new_node_ptr, show_flags);
		}
		if (error_code == SLURM_SUCCESS)
			slurm_free_node_info_msg(old_node_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_node_ptr = old_node_ptr;
		}
	} else if (params.node_name_single) {
		error_code = slurm_load_node_single(&new_node_ptr, params.nodes,
						    show_flags);
	} else {
		error_code = slurm_load_node((time_t) NULL, &new_node_ptr,
					     show_flags);
	}

	if (error_code) {
		slurm_perror("slurm_load_node");
		return error_code;
	}
	old_node_ptr = new_node_ptr;
	*node_pptr = new_node_ptr;

	/* Set the node state as NODE_STATE_MIXED. */
	for (cc = 0; cc < new_node_ptr->record_count; cc++) {
		node_ptr = &(new_node_ptr->node_array[cc]);
		if (IS_NODE_DRAIN(node_ptr)) {
			/* don't worry about mixed since the
			 * whole node is being drained. */
		} else {
			uint16_t alloc_cpus = 0, err_cpus = 0, idle_cpus;
			int single_node_cpus =
				(node_ptr->cpus / g_node_scaling);

			select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						     SELECT_NODEDATA_SUBCNT,
						     NODE_STATE_ALLOCATED,
						     &alloc_cpus);
			if (params.cluster_flags & CLUSTER_FLAG_BG) {
				if (!alloc_cpus &&
				    (IS_NODE_ALLOCATED(node_ptr) ||
				     IS_NODE_COMPLETING(node_ptr)))
					alloc_cpus = node_ptr->cpus;
				else
					alloc_cpus *= single_node_cpus;
			}
			idle_cpus = node_ptr->cpus - alloc_cpus;
			select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
						     SELECT_NODEDATA_SUBCNT,
						     NODE_STATE_ERROR,
						     &err_cpus);
			if (params.cluster_flags & CLUSTER_FLAG_BG)
				err_cpus *= single_node_cpus;
			idle_cpus -= err_cpus;

			if ((alloc_cpus && err_cpus) ||
			    (idle_cpus  && (idle_cpus != node_ptr->cpus))) {
				node_ptr->node_state &= NODE_STATE_FLAGS;
				node_ptr->node_state |= NODE_STATE_MIXED;
			}
		}
	}

	if (old_resv_ptr) {
		if (clear_old)
			old_resv_ptr->last_update = 0;
		error_code = slurm_load_reservations(old_resv_ptr->last_update,
						     &new_resv_ptr);
		if (error_code == SLURM_SUCCESS)
			slurm_free_reservation_info_msg(old_resv_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_resv_ptr = old_resv_ptr;
		}
	} else {
		error_code = slurm_load_reservations((time_t) NULL,
						     &new_resv_ptr);
	}

	if (error_code) {
		slurm_perror("slurm_load_reservations");
		return error_code;
	}
	old_resv_ptr = new_resv_ptr;
	*reserv_pptr = new_resv_ptr;

	if (!params.bg_flag)
		return SLURM_SUCCESS;

	if (params.cluster_flags & CLUSTER_FLAG_BG) {
		if (old_bg_ptr) {
			if (clear_old)
				old_bg_ptr->last_update = 0;
			error_code = slurm_load_block_info(
				old_bg_ptr->last_update,
				&new_bg_ptr, show_flags);
			if (error_code == SLURM_SUCCESS)
				slurm_free_block_info_msg(old_bg_ptr);
			else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
				error_code = SLURM_SUCCESS;
				new_bg_ptr = old_bg_ptr;
			}
		} else {
			error_code = slurm_load_block_info((time_t) NULL,
							   &new_bg_ptr,
							   show_flags);
		}
	}

	if (error_code) {
		slurm_perror("slurm_load_block");
		return error_code;
	}
	old_bg_ptr = new_bg_ptr;
	*block_pptr = new_bg_ptr;
	return SLURM_SUCCESS;
}
Esempio n. 13
0
/**
 * basil_geometry - Check node attributes, resolve (X,Y,Z) coordinates.
 *
 * Checks both SDB database and ALPS inventory for consistency. The inventory
 * part is identical to basil_inventory(), with the difference of being called
 * before valid bitmaps exist, from select_g_node_init().
 * Its dependencies are:
 * - it needs reset_job_bitmaps() in order to rebuild node_bitmap fields,
 * - it relies on _sync_nodes_to_jobs() to
 *   o kill active jobs on nodes now marked DOWN,
 *   o reset node state to ALLOCATED if it has been marked IDLE here (which is
 *     an error case, since there is no longer an ALPS reservation for the job,
 *     this is caught by the subsequent basil_inventory()).
 */
extern int basil_geometry(struct node_record *node_ptr_array, int node_cnt)
{
    struct node_record *node_ptr, *end = node_ptr_array + node_cnt;
    enum basil_version version = get_basil_version();
    struct basil_inventory *inv;

    /* General mySQL */
    MYSQL		*handle;
    MYSQL_STMT	*stmt = NULL;
    /* Input parameters */
    unsigned int	node_id;
    /*
     * Use a left outer join here since the attributes table may not be
     * populated for a given nodeid (e.g. when the node has been disabled
     * on the SMW via 'xtcli disable').
     * The processor table has more authoritative information, if a nodeid
     * is not listed there, it does not exist.
     */
    const char query[] =	"SELECT x_coord, y_coord, z_coord,"
                            "       cab_position, cab_row, cage, slot, cpu,"
                            "	LOG2(coremask+1), availmem, "
                            "       processor_type  "
                            "FROM  processor LEFT JOIN attributes "
                            "ON    processor_id = nodeid "
                            "WHERE processor_id = ? ";
    const int	PARAM_COUNT = 1;	/* node id */
    MYSQL_BIND	params[PARAM_COUNT];

    int		x_coord, y_coord, z_coord;
    int		cab, row, cage, slot, cpu;
    unsigned int	node_cpus, node_mem;
    char		proc_type[BASIL_STRING_SHORT];
    MYSQL_BIND	bind_cols[COLUMN_COUNT];
    my_bool		is_null[COLUMN_COUNT];
    my_bool		is_error[COLUMN_COUNT];
    int		is_gemini, i;
    time_t		now = time(NULL);

    memset(params, 0, sizeof(params));
    params[0].buffer_type = MYSQL_TYPE_LONG;
    params[0].is_unsigned = true;
    params[0].is_null     = (my_bool *)0;
    params[0].buffer      = (char *)&node_id;

    memset(bind_cols, 0, sizeof(bind_cols));
    for (i = 0; i < COLUMN_COUNT; i ++) {
        bind_cols[i].is_null = &is_null[i];
        bind_cols[i].error   = &is_error[i];

        if (i == COL_TYPE) {
            bind_cols[i].buffer_type   = MYSQL_TYPE_STRING;
            bind_cols[i].buffer_length = sizeof(proc_type);
            bind_cols[i].buffer	   = proc_type;
        } else {
            bind_cols[i].buffer_type   = MYSQL_TYPE_LONG;
            bind_cols[i].is_unsigned   = (i >= COL_CORES);
        }
    }
    bind_cols[COL_X].buffer	     = (char *)&x_coord;
    bind_cols[COL_Y].buffer	     = (char *)&y_coord;
    bind_cols[COL_Z].buffer	     = (char *)&z_coord;
    bind_cols[COL_CAB].buffer    = (char *)&cab;
    bind_cols[COL_ROW].buffer    = (char *)&row;
    bind_cols[COL_CAGE].buffer   = (char *)&cage;
    bind_cols[COL_SLOT].buffer   = (char *)&slot;
    bind_cols[COL_CPU].buffer    = (char *)&cpu;
    bind_cols[COL_CORES].buffer  = (char *)&node_cpus;
    bind_cols[COL_MEMORY].buffer = (char *)&node_mem;

    inv = get_full_inventory(version);
    if (inv == NULL)
        fatal("failed to get initial BASIL inventory");

    info("BASIL %s initial INVENTORY: %d/%d batch nodes available",
         bv_names_long[version], inv->batch_avail, inv->batch_total);

    handle = cray_connect_sdb();
    if (handle == NULL)
        fatal("can not connect to XTAdmin database on the SDB");

    is_gemini = cray_is_gemini_system(handle);
    if (is_gemini < 0)
        fatal("can not determine Cray XT/XE system type");

    stmt = prepare_stmt(handle, query, params, PARAM_COUNT,
                        bind_cols, COLUMN_COUNT);
    if (stmt == NULL)
        fatal("can not prepare statement to resolve Cray coordinates");

    for (node_ptr = node_record_table_ptr; node_ptr < end; node_ptr++) {
        struct basil_node *node;
        char *reason = NULL;

        if ((node_ptr->name == NULL) ||
                (sscanf(node_ptr->name, "nid%05u", &node_id) != 1)) {
            error("can not read basil_node_id from %s",
                  node_ptr->name);
            continue;
        }

        if (exec_stmt(stmt, query, bind_cols, COLUMN_COUNT) < 0)
            fatal("can not resolve %s coordinates", node_ptr->name);

        if (fetch_stmt(stmt) == 0) {
#if _DEBUG
            info("proc_type:%s cpus:%u memory:%u",
                 proc_type, node_cpus, node_mem);
            info("row:%u cage:%u slot:%u cpu:%u xyz:%u:%u:%u",
                 row, cage, slot, cpu, x_coord, y_coord, z_coord);
#endif
            if (strcmp(proc_type, "compute") != 0) {
                /*
                 * Switching a compute node to be a service node
                 * can not happen at runtime: requires a reboot.
                 */
                fatal("Node '%s' is a %s node. "
                      "Only compute nodes can appear in slurm.conf.",
                      node_ptr->name, proc_type);
            } else if (is_null[COL_CORES] || is_null[COL_MEMORY]) {
                /*
                 * This can happen if a node has been disabled
                 * on the SMW (using 'xtcli disable <nid>'). The
                 * node will still be listed in the 'processor'
                 * table, but have no 'attributes' entry (NULL
                 * values for CPUs/memory). Also, the node will
                 * be invisible to ALPS, which is why we need to
                 * set it down here already.
                 */
                node_cpus = node_mem = 0;
                reason = "node data unknown - disabled on SMW?";
            } else if (is_null[COL_X] || is_null[COL_Y]
                       || is_null[COL_Z]) {
                /*
                 * Similar case to the one above, observed when
                 * a blade has been removed. Node will not
                 * likely show up in ALPS.
                 */
                x_coord = y_coord = z_coord = 0;
                reason = "unknown coordinates - hardware failure?";
            } else if (node_cpus < node_ptr->config_ptr->cpus) {
                /*
                 * FIXME: Might reconsider this policy.
                 *
                 * FastSchedule is ignored here, it requires the
                 * slurm.conf to be consistent with hardware.
                 *
                 * Assumption is that CPU/Memory do not change
                 * at runtime (Cray has no hot-swappable parts).
                 *
                 * Hence checking it in basil_inventory() would
                 * mean a lot of runtime overhead.
                 */
                fatal("slurm.conf: node %s has only Procs=%d",
                      node_ptr->name, node_cpus);
            } else if (node_mem < node_ptr->config_ptr->real_memory) {
                fatal("slurm.conf: node %s has RealMemory=%d",
                      node_ptr->name, node_mem);
            }

        } else if (is_gemini) {
            fatal("Non-existing Gemini node '%s' in slurm.conf",
                  node_ptr->name);
        } else {
            fatal("Non-existing SeaStar node '%s' in slurm.conf",
                  node_ptr->name);
        }

        if (!is_gemini) {
            /*
             * SeaStar: each node has unique coordinates
             */
            if (node_ptr->arch == NULL)
                node_ptr->arch = xstrdup("XT");
        } else {
            /*
             * Gemini: each 2 nodes share the same network
             * interface (i.e., nodes 0/1 and 2/3 each have
             * the same coordinates).
             */
            if (node_ptr->arch == NULL)
                node_ptr->arch = xstrdup("XE");
        }

        xfree(node_ptr->node_hostname);
        xfree(node_ptr->comm_name);
        /*
         * Convention: since we are using SLURM in frontend-mode,
         *             we use Node{Addr,HostName} as follows.
         *
         * NodeAddr:      <X><Y><Z> coordinates in base-36 encoding
         *
         * NodeHostName:  c#-#c#s#n# using the  NID convention
         *                <cabinet>-<row><chassis><slot><node>
         * - each cabinet can accommodate 3 chassis (c1..c3)
         * - each chassis has 8 slots               (s0..s7)
         * - each slot contains 2 or 4 nodes        (n0..n3)
         *   o either 2 service nodes (n0/n3)
         *   o or 4 compute nodes     (n0..n3)
         *   o or 2 gemini chips      (g0/g1 serving n0..n3)
         *
         * Example: c0-0c1s0n1
         *          - c0- = cabinet 0
         *          - 0   = row     0
         *          - c1  = chassis 1
         *          - s0  = slot    0
         *          - n1  = node    1
         */
        node_ptr->node_hostname = xstrdup_printf("c%u-%uc%us%un%u", cab,
                                  row, cage, slot, cpu);
        node_ptr->comm_name = xstrdup_printf("%c%c%c",
                                             _enc_coord(x_coord),
                                             _enc_coord(y_coord),
                                             _enc_coord(z_coord));
        dim_size[0] = MAX(dim_size[0], (x_coord - 1));
        dim_size[1] = MAX(dim_size[1], (y_coord - 1));
        dim_size[2] = MAX(dim_size[2], (z_coord - 1));
#if _DEBUG
        info("%s  %s  %s  cpus=%u, mem=%u reason=%s", node_ptr->name,
             node_ptr->node_hostname, node_ptr->comm_name,
             node_cpus, node_mem, reason);
#endif
        /*
         * Check the current state reported by ALPS inventory, unless it
         * is already evident that the node has some other problem.
         */
        if (reason == NULL) {
            for (node = inv->f->node_head; node; node = node->next)
                if (node->node_id == node_id)
                    break;
            if (node == NULL) {
                reason = "not visible to ALPS - check hardware";
            } else if (node->state == BNS_DOWN) {
                reason = "ALPS marked it DOWN";
            } else if (node->state == BNS_UNAVAIL) {
                reason = "node is UNAVAILABLE";
            } else if (node->state == BNS_ROUTE) {
                reason = "node does ROUTING";
            } else if (node->state == BNS_SUSPECT) {
                reason = "entered SUSPECT mode";
            } else if (node->state == BNS_ADMINDOWN) {
                reason = "node is ADMINDOWN";
            } else if (node->state != BNS_UP) {
                reason = "state not UP";
            } else if (node->role != BNR_BATCH) {
                reason = "mode not BATCH";
            } else if (node->arch != BNA_XT) {
                reason = "arch not XT/XE";
            }
        }

        /* Base state entirely derives from ALPS
         * NOTE: The node bitmaps are not defined when this code is
         * initially executed. */
        node_ptr->node_state &= NODE_STATE_FLAGS;
        if (reason) {
            if (node_ptr->down_time == 0)
                node_ptr->down_time = now;
            if (IS_NODE_DOWN(node_ptr)) {
                /* node still down */
                debug("Initial DOWN node %s - %s",
                      node_ptr->name, node_ptr->reason);
            } else if (slurmctld_conf.slurmd_timeout &&
                       ((now - node_ptr->down_time) <
                        slurmctld_conf.slurmd_timeout)) {
                node_ptr->node_state |= NODE_STATE_NO_RESPOND;
            } else {
                info("Initial DOWN node %s - %s",
                     node_ptr->name, reason);
                node_ptr->reason = xstrdup(reason);
                /* Node state flags preserved above */
                node_ptr->node_state |= NODE_STATE_DOWN;
                clusteracct_storage_g_node_down(acct_db_conn,
                                                node_ptr,
                                                now, NULL,
                                                slurm_get_slurm_user_id());
            }
        } else {
            bool node_up_flag = IS_NODE_DOWN(node_ptr) &&
                                !IS_NODE_DRAIN(node_ptr) &&
                                !IS_NODE_FAIL(node_ptr);
            node_ptr->down_time = 0;
            if (node_is_allocated(node))
                node_ptr->node_state |= NODE_STATE_ALLOCATED;
            else
                node_ptr->node_state |= NODE_STATE_IDLE;
            node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
            xfree(node_ptr->reason);
            if (node_up_flag) {
                info("ALPS returned node %s to service",
                     node_ptr->name);
                clusteracct_storage_g_node_up(acct_db_conn,
                                              node_ptr, now);
            }
        }

        free_stmt_result(stmt);
    }

    if (stmt_close(stmt))
        error("error closing statement: %s", mysql_stmt_error(stmt));
    cray_close_sdb(handle);
    free_inv(inv);

    return SLURM_SUCCESS;
}