Exemplo n.º 1
0
/* Send a signal RPC to a list of nodes */
static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
		      char *nodelist)
{
	agent_arg_t *agent_args;
	signal_tasks_msg_t *signal_tasks_msg;
	hostlist_iterator_t hi;
	char *host;
	struct node_record *node_ptr;

	signal_tasks_msg = xmalloc(sizeof(signal_tasks_msg_t));
	signal_tasks_msg->job_id		= job_id;
	signal_tasks_msg->job_step_id	= step_id;
	signal_tasks_msg->signal		= signal;

	agent_args = xmalloc(sizeof(agent_arg_t));
	agent_args->msg_type		= REQUEST_SIGNAL_TASKS;
	agent_args->retry		= 1;
	agent_args->msg_args		= signal_tasks_msg;
	agent_args->hostlist		= hostlist_create(nodelist);
	agent_args->node_count		= hostlist_count(agent_args->hostlist);
	agent_args->protocol_version = SLURM_PROTOCOL_VERSION;
	hi = hostlist_iterator_create(agent_args->hostlist);
	while ((host = hostlist_next(hi))) {
		if ((node_ptr = find_node_record(host)) &&
		    (agent_args->protocol_version > node_ptr->protocol_version))
			agent_args->protocol_version =
				node_ptr->protocol_version;
		free(host);
	}
	hostlist_iterator_destroy(hi);

	agent_queue_request(agent_args);
}
Exemplo n.º 2
0
/* Send a signal RPC to a specific node */
static void _send_sig(uint32_t job_id, uint32_t step_id, uint16_t signal,
		char *node_name, slurm_addr_t node_addr)
{
	agent_arg_t *agent_args;
	kill_tasks_msg_t *kill_tasks_msg;

	kill_tasks_msg = xmalloc(sizeof(kill_tasks_msg_t));
	kill_tasks_msg->job_id		= job_id;
	kill_tasks_msg->job_step_id	= step_id;
	kill_tasks_msg->signal		= signal;

	agent_args = xmalloc(sizeof(agent_arg_t));
	agent_args->msg_type		= REQUEST_SIGNAL_TASKS;
	agent_args->retry		= 1;
	agent_args->msg_args		= kill_tasks_msg;
	agent_args->hostlist = hostlist_create(node_name);
	agent_args->node_count		= 1;

	if ((node_ptr = find_node_record(node_name)))
		agent_args->protocol_version = node_ptr->protocol_version;

	hostlist_iterator_destroy(hi);

	agent_queue_request(agent_args);
}
Exemplo n.º 3
0
/** Convert between Cray NID and slurm nodename format */
static struct node_record *_find_node_by_basil_id(uint32_t node_id)
{
	char nid[9];	/* nid%05d\0 */

	snprintf(nid, sizeof(nid), "nid%05u", node_id);

	return find_node_record(nid);
}
Exemplo n.º 4
0
/*
 * srun_node_fail - notify srun of a node's failure
 * IN job_ptr - job to notify
 * IN node_name - name of failed node
 */
extern void srun_node_fail(struct job_record *job_ptr, char *node_name)
{
#ifndef HAVE_FRONT_END
	struct node_record *node_ptr;
#endif
	int bit_position = -1;
	slurm_addr_t * addr;
	srun_node_fail_msg_t *msg_arg;
	ListIterator step_iterator;
	struct step_record *step_ptr;

	xassert(job_ptr);
	xassert(node_name);
	if (!job_ptr || !IS_JOB_RUNNING(job_ptr))
		return;

#ifdef HAVE_FRONT_END
	/* Purge all jobs steps in front end mode */
#else
	if (!node_name || (node_ptr = find_node_record(node_name)) == NULL)
		return;
	bit_position = node_ptr - node_record_table_ptr;
#endif

	step_iterator = list_iterator_create(job_ptr->step_list);
	while ((step_ptr = (struct step_record *) list_next(step_iterator))) {
		if (step_ptr->step_node_bitmap == NULL)   /* pending step */
			continue;
		if ((bit_position >= 0) &&
		    (!bit_test(step_ptr->step_node_bitmap, bit_position)))
			continue;	/* job step not on this node */
		if ( (step_ptr->port    == 0)    ||
		     (step_ptr->host    == NULL) ||
		     (step_ptr->batch_step)      ||
		     (step_ptr->host[0] == '\0') )
			continue;
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, step_ptr->port, step_ptr->host);
		msg_arg = xmalloc(sizeof(srun_node_fail_msg_t));
		msg_arg->job_id   = job_ptr->job_id;
		msg_arg->step_id  = step_ptr->step_id;
		msg_arg->nodelist = xstrdup(node_name);
		_srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL,
				   msg_arg, step_ptr->start_protocol_ver);
	}
	list_iterator_destroy(step_iterator);

	if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) {
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
		msg_arg = xmalloc(sizeof(srun_node_fail_msg_t));
		msg_arg->job_id   = job_ptr->job_id;
		msg_arg->step_id  = NO_VAL;
		msg_arg->nodelist = xstrdup(node_name);
		_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL,
				   msg_arg, job_ptr->start_protocol_ver);
	}
}
Exemplo n.º 5
0
/*
 * topo_get_node_addr - build node address and the associated pattern
 *      based on the topology information
 *
 * example of output :
 *      address : s0.s4.s8.tux1
 *      pattern : switch.switch.switch.node
 */
extern int topo_get_node_addr(char* node_name, char** paddr, char** ppattern)
{
#ifndef HAVE_FRONT_END
	if (find_node_record(node_name) == NULL)
		return SLURM_ERROR;
#endif

	*paddr = xstrdup(node_name);
	*ppattern = xstrdup("node");
	return SLURM_SUCCESS;
}
Exemplo n.º 6
0
extern int up_nodecard(char *mp_name, bitstr_t *ionode_bitmap)
{
	ListIterator itr = NULL;
	bg_record_t *bg_record = NULL;
	struct node_record *node_ptr = NULL;
	int mp_bit = 0;
	int ret = 0;

	xassert(mp_name);
	xassert(ionode_bitmap);

	node_ptr = find_node_record(mp_name);
	if (!node_ptr) {
		error ("down_sub_node_blocks: invalid node specified %s",
		       mp_name);
		return EINVAL;
	}
	mp_bit = (node_ptr - node_record_table_ptr);

	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		if (bg_record->job_running != BLOCK_ERROR_STATE)
			continue;
		if (!bit_test(bg_record->mp_bitmap, mp_bit))
			continue;

		if (!bit_overlap(bg_record->ionode_bitmap, ionode_bitmap)) {
			continue;
		}
		resume_block(bg_record);
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	/* FIX ME: This needs to call the opposite of
	   slurm_drain_nodes which does not yet exist.
	*/
	if ((ret = node_already_down(mp_name))) {
		/* means it was drained */
		if (ret == 2) {
			/* debug("node %s put back into service after " */
/* 			      "being in an error state", */
/* 			      mp_name); */
		}
	}

	return SLURM_SUCCESS;
}
Exemplo n.º 7
0
/* Determine if specific slurm node is already in DOWN or DRAIN state */
extern int node_already_down(char *node_name)
{
	struct node_record *node_ptr = find_node_record(node_name);

	if (node_ptr) {
		if (IS_NODE_DRAIN(node_ptr))
			return 2;
		else if (IS_NODE_DOWN(node_ptr))
			return 1;
		else
			return 0;
	}

	return 0;
}
Exemplo n.º 8
0
/*
 * _node_name2bitmap - given a node name regular expression, build a bitmap
 *	representation, any invalid hostnames are added to a hostlist
 * IN node_names  - set of node namess
 * OUT bitmap     - set to bitmap, may not have all bits set on error
 * IN/OUT invalid_hostlist - hostlist of invalid host names, initialize to NULL
 * RET 0 if no error, otherwise EINVAL
 * NOTE: call FREE_NULL_BITMAP(bitmap) and hostlist_destroy(invalid_hostlist)
 *       to free memory when variables are no longer required
 */
static int _node_name2bitmap(char *node_names, bitstr_t **bitmap, 
			     hostlist_t *invalid_hostlist)
{
	char *this_node_name;
	bitstr_t *my_bitmap;
	hostlist_t host_list;

	my_bitmap = (bitstr_t *) bit_alloc(node_record_count);
	*bitmap = my_bitmap;

	if (node_names == NULL) {
		error("_node_name2bitmap: node_names is NULL");
		return EINVAL;
	}

	if ( (host_list = hostlist_create(node_names)) == NULL) {
		/* likely a badly formatted hostlist */
		error("_node_name2bitmap: hostlist_create(%s) error", 
		      node_names);
		return EINVAL;
	}

	while ( (this_node_name = hostlist_shift(host_list)) ) {
		struct node_record *node_ptr;
		node_ptr = find_node_record(this_node_name);
		if (node_ptr) {
			bit_set(my_bitmap, 
				(bitoff_t) (node_ptr - node_record_table_ptr));
		} else {
			debug2("_node_name2bitmap: invalid node specified %s",
			       this_node_name);
			if (*invalid_hostlist) {
				hostlist_push_host(*invalid_hostlist,
						   this_node_name);
			} else {
				*invalid_hostlist = 
					hostlist_create(this_node_name);
			}
		}
		free (this_node_name);
	}
	hostlist_destroy(host_list);

	return SLURM_SUCCESS;
}
Exemplo n.º 9
0
/*
 * node_name2bitmap - given a node name regular expression, build a bitmap
 *	representation
 * IN node_names  - list of nodes
 * IN best_effort - if set don't return an error on invalid node name entries
 * OUT bitmap     - set to bitmap, may not have all bits set on error
 * RET 0 if no error, otherwise EINVAL
 * NOTE: call FREE_NULL_BITMAP() to free bitmap memory when no longer required
 */
extern int node_name2bitmap (char *node_names, bool best_effort,
			     bitstr_t **bitmap)
{
	int rc = SLURM_SUCCESS;
	char *this_node_name;
	bitstr_t *my_bitmap;
	hostlist_t host_list;

	my_bitmap = (bitstr_t *) bit_alloc (node_record_count);
	if (my_bitmap == NULL)
		fatal("bit_alloc malloc failure");
	*bitmap = my_bitmap;

	if (node_names == NULL) {
		info("node_name2bitmap: node_names is NULL");
		return rc;
	}

	if ( (host_list = hostlist_create (node_names)) == NULL) {
		/* likely a badly formatted hostlist */
		error ("hostlist_create on %s error:", node_names);
		if (!best_effort)
			rc = EINVAL;
		return rc;
	}

	while ( (this_node_name = hostlist_shift (host_list)) ) {
		struct node_record *node_ptr;
		node_ptr = find_node_record (this_node_name);
		if (node_ptr) {
			bit_set (my_bitmap, (bitoff_t) (node_ptr -
							node_record_table_ptr));
		} else {
			error ("node_name2bitmap: invalid node specified %s",
			       this_node_name);
			if (!best_effort)
				rc = EINVAL;
		}
		free (this_node_name);
	}
	hostlist_destroy (host_list);

	return rc;
}
Exemplo n.º 10
0
/* Tries to find a node fast using the hash table
 *
 * Used by: slurmctld
 */
static sw_gen_node_info_t *
_find_node(char *node_name)
{
	int i;
	sw_gen_node_info_t *n;
	struct node_record *node_ptr;

	if (node_name == NULL) {
		error("%s: _find_node node name is NULL", plugin_type);
		return NULL;
	}
	if (libstate->node_count == 0)
		return NULL;
	xassert(libstate->magic == SW_GEN_LIBSTATE_MAGIC);
	if (libstate->hash_table) {
		i = _hash_index(node_name);
		n = libstate->hash_table[i];
		while (n) {
			xassert(n->magic == SW_GEN_NODE_INFO_MAGIC);
			if (!strcmp(n->node_name, node_name))
				return n;
			n = n->next;
		}
	}

	/* This code is only needed if NodeName and NodeHostName differ */
	node_ptr = find_node_record(node_name);
	if (node_ptr && libstate->hash_table) {
		i = _hash_index(node_ptr->node_hostname);
		n = libstate->hash_table[i];
		while (n) {
			xassert(n->magic == SW_GEN_NODE_INFO_MAGIC);
			if (!strcmp(n->node_name, node_name))
				return n;
			n = n->next;
		}
	}

	return NULL;
}
Exemplo n.º 11
0
/*
 * This could potentially lock the node lock in the slurmctld with
 * slurm_drain_node, or slurm_fail_job so if slurmctld_locked is called we
 * will call the functions without locking the locks again.
 */
extern int down_nodecard(char *mp_name, bitoff_t io_start,
			 bool slurmctld_locked)
{
	List requests = NULL;
	List delete_list = NULL;
	ListIterator itr = NULL;
	bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record;
	bg_record_t *smallest_bg_record = NULL;
	struct node_record *node_ptr = NULL;
	int mp_bit = 0;
	static int io_cnt = NO_VAL;
	static int create_size = NO_VAL;
	static select_ba_request_t blockreq;
	int rc = SLURM_SUCCESS;
	char *reason = "select_bluegene: nodecard down";

	xassert(mp_name);

	if (io_cnt == NO_VAL) {
		io_cnt = 1;
		/* Translate 1 nodecard count to ionode count */
		if ((io_cnt *= bg_conf->io_ratio))
			io_cnt--;

		/* make sure we create something that is able to be
		   created */
		if (bg_conf->smallest_block < bg_conf->nodecard_cnode_cnt)
			create_size = bg_conf->nodecard_cnode_cnt;
		else
			create_size = bg_conf->smallest_block;
	}

	node_ptr = find_node_record(mp_name);
	if (!node_ptr) {
		error ("down_sub_node_blocks: invalid node specified '%s'",
		       mp_name);
		return EINVAL;
	}

	/* this is here for sanity check to make sure we don't core on
	   these bits when we set them below. */
	if (io_start >= bg_conf->ionodes_per_mp
	    || (io_start+io_cnt) >= bg_conf->ionodes_per_mp) {
		debug("io %d-%d not configured on this "
		      "system, only %d ionodes per midplane",
		      io_start, io_start+io_cnt, bg_conf->ionodes_per_mp);
		return EINVAL;
	}
	mp_bit = (node_ptr - node_record_table_ptr);

	memset(&blockreq, 0, sizeof(select_ba_request_t));

	blockreq.conn_type[0] = SELECT_SMALL;
	blockreq.save_name = mp_name;

	debug3("here setting node %d of %d and ionodes %d-%d of %d",
	       mp_bit, node_record_count, io_start,
	       io_start+io_cnt, bg_conf->ionodes_per_mp);

	memset(&tmp_record, 0, sizeof(bg_record_t));
	tmp_record.mp_count = 1;
	tmp_record.cnode_cnt = bg_conf->nodecard_cnode_cnt;
	tmp_record.mp_bitmap = bit_alloc(node_record_count);
	bit_set(tmp_record.mp_bitmap, mp_bit);

	tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
	bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt);

	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		if (!bit_test(bg_record->mp_bitmap, mp_bit))
			continue;

		if (!blocks_overlap(bg_record, &tmp_record))
			continue;

		if (bg_record->job_running > NO_JOB_RUNNING) {
			if (slurmctld_locked)
				job_fail(bg_record->job_running);
			else
				slurm_fail_job(bg_record->job_running);

		}
		/* If Running Dynamic mode and the block is
		   smaller than the create size just continue on.
		*/
		if ((bg_conf->layout_mode == LAYOUT_DYNAMIC)
		    && (bg_record->cnode_cnt < create_size)) {
			if (!delete_list)
				delete_list = list_create(NULL);
			list_append(delete_list, bg_record);
			continue;
		}

		/* keep track of the smallest size that is at least
		   the size of create_size. */
		if (!smallest_bg_record ||
		    (smallest_bg_record->cnode_cnt > bg_record->cnode_cnt))
			smallest_bg_record = bg_record;
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		debug3("running non-dynamic mode");
		/* This should never happen, but just in case... */
		if (delete_list)
			list_destroy(delete_list);

		/* If we found a block that is smaller or equal to a
		   midplane we will just mark it in an error state as
		   opposed to draining the node.
		*/
		if (smallest_bg_record
		    && (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){
			if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
				rc = SLURM_NO_CHANGE_IN_DATA;
				goto cleanup;
			}

			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		debug("No block under 1 midplane available for this nodecard.  "
		      "Draining the whole node.");
		if (!node_already_down(mp_name)) {
			if (slurmctld_locked)
				drain_nodes(mp_name, reason,
					    slurm_get_slurm_user_id());
			else
				slurm_drain_nodes(mp_name, reason,
						  slurm_get_slurm_user_id());
		}
		rc = SLURM_SUCCESS;
		goto cleanup;
	}

	/* below is only for Dynamic mode */

	if (delete_list) {
		int cnt_set = 0;
		bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp);
		/* don't lock here since it is handled inside
		   the put_block_in_error_state
		*/
		itr = list_iterator_create(delete_list);
		while ((bg_record = list_next(itr))) {
			debug2("combining smaller than nodecard "
			       "dynamic block %s",
			       bg_record->bg_block_id);
			while (bg_record->job_running > NO_JOB_RUNNING)
				sleep(1);

			bit_or(iobitmap, bg_record->ionode_bitmap);
			cnt_set++;
		}
		list_iterator_destroy(itr);
		list_destroy(delete_list);
		if (!cnt_set) {
			FREE_NULL_BITMAP(iobitmap);
			rc = SLURM_ERROR;
			goto cleanup;
		}
		/* set the start to be the same as the start of the
		   ionode_bitmap.  If no ionodes set (not a small
		   block) set io_start = 0. */
		if ((io_start = bit_ffs(iobitmap)) == -1) {
			io_start = 0;
			if (create_size > bg_conf->nodecard_cnode_cnt)
				blockreq.small128 = 4;
			else
				blockreq.small32 = 16;
		} else if (create_size <= bg_conf->nodecard_cnode_cnt)
			blockreq.small32 = 1;
		else
			/* this should never happen */
			blockreq.small128 = 1;

		FREE_NULL_BITMAP(iobitmap);
	} else if (smallest_bg_record) {
		debug2("smallest dynamic block is %s",
		       smallest_bg_record->bg_block_id);
		if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
			rc = SLURM_NO_CHANGE_IN_DATA;
			goto cleanup;
		}

		while (smallest_bg_record->job_running > NO_JOB_RUNNING)
			sleep(1);

		if (smallest_bg_record->cnode_cnt == create_size) {
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		if (create_size > smallest_bg_record->cnode_cnt) {
			/* we should never get here.  This means we
			 * have a create_size that is bigger than a
			 * block that is already made.
			 */
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}
		debug3("node count is %d", smallest_bg_record->cnode_cnt);
		switch(smallest_bg_record->cnode_cnt) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small32 = 2;
			break;
		case 256:
			blockreq.small32 = 8;
			break;
#endif
		case 128:
			blockreq.small32 = 4;
			break;
		case 512:
		default:
			blockreq.small32 = 16;
			break;
		}

		if (create_size != bg_conf->nodecard_cnode_cnt) {
			blockreq.small128 = blockreq.small32 / 4;
			blockreq.small32 = 0;
			io_start = 0;
		} else if ((io_start =
			    bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
			/* set the start to be the same as the start of the
			   ionode_bitmap.  If no ionodes set (not a small
			   block) set io_start = 0. */
			io_start = 0;
	} else {
		switch(create_size) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small64 = 8;
			break;
		case 256:
			blockreq.small256 = 2;
#endif
		case 32:
			blockreq.small32 = 16;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
		case 512:
			if (!node_already_down(mp_name)) {
				char *reason = "select_bluegene: nodecard down";
				if (slurmctld_locked)
					drain_nodes(mp_name, reason,
						    slurm_get_slurm_user_id());
				else
					slurm_drain_nodes(
						mp_name, reason,
						slurm_get_slurm_user_id());
			}
			rc = SLURM_SUCCESS;
			goto cleanup;
			break;
		default:
			error("Unknown create size of %d", create_size);
			break;
		}
		/* since we don't have a block in this midplane
		   we need to start at the beginning. */
		io_start = 0;
		/* we also need a bg_block to pretend to be the
		   smallest block that takes up the entire midplane. */
	}


	/* Here we need to add blocks that take up nodecards on this
	   midplane.  Since Slurm only keeps track of midplanes
	   natively this is the only want to handle this case.
	*/
	requests = list_create(destroy_bg_record);
	add_bg_record(requests, NULL, &blockreq, 1, io_start);

	slurm_mutex_lock(&block_state_mutex);
	delete_list = list_create(NULL);
	while ((bg_record = list_pop(requests))) {
		itr = list_iterator_create(bg_lists->main);
		while ((found_record = list_next(itr))) {
			if (!blocks_overlap(bg_record, found_record))
				continue;
			list_push(delete_list, found_record);
			list_remove(itr);
		}
		list_iterator_destroy(itr);

		/* we need to add this record since it doesn't exist */
		if (bridge_block_create(bg_record) == SLURM_ERROR) {
			destroy_bg_record(bg_record);
			error("down_sub_node_blocks: "
			      "unable to configure block in api");
			continue;
		}

		debug("adding block %s to fill in small blocks "
		      "around bad nodecards",
		      bg_record->bg_block_id);
		print_bg_record(bg_record);
		list_append(bg_lists->main, bg_record);
		if (bit_overlap(bg_record->ionode_bitmap,
				tmp_record.ionode_bitmap)) {
			/* here we know the error block doesn't exist
			   so just set the state here */
			slurm_mutex_unlock(&block_state_mutex);
			rc = put_block_in_error_state(bg_record, reason);
			slurm_mutex_lock(&block_state_mutex);
		}
	}
	list_destroy(requests);

	if (delete_list) {
		slurm_mutex_unlock(&block_state_mutex);
		free_block_list(NO_VAL, delete_list, 0, 0);
		list_destroy(delete_list);
	}
	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);
	slurm_mutex_unlock(&block_state_mutex);
	last_bg_update = time(NULL);

cleanup:
	FREE_NULL_BITMAP(tmp_record.mp_bitmap);
	FREE_NULL_BITMAP(tmp_record.ionode_bitmap);

	return rc;

}
Exemplo n.º 12
0
/*
 * topo_get_node_addr - build node address and the associated pattern
 *      based on the topology information
 *
 * example of output :
 *      address : s0.s4.s8.tux1
 *      pattern : switch.switch.switch.node
 */
extern int topo_get_node_addr(char* node_name, char** paddr, char** ppattern)
{
	struct node_record *node_ptr;
	int node_inx;
	hostlist_t sl = NULL;

	int s_max_level = 0;
	int i, j;

	/* no switches found, return */
	if ( switch_record_cnt == 0 ) {
		*paddr = xstrdup(node_name);
		*ppattern = xstrdup("node");
		return SLURM_SUCCESS;
	}

	node_ptr = find_node_record(node_name);
	/* node not found in configuration */
	if ( node_ptr == NULL )
		return SLURM_ERROR;
	node_inx = node_ptr - node_record_table_ptr;

	/* look for switches max level */
	for (i=0; i<switch_record_cnt; i++) {
		if ( switch_record_table[i].level > s_max_level )
			s_max_level = switch_record_table[i].level;
	}

	/* initialize output parameters */
	*paddr = xstrdup("");
	*ppattern = xstrdup("");

	/* build node topology address and the associated pattern */
	for (j=s_max_level; j>=0; j--) {
		for (i=0; i<switch_record_cnt; i++) {
			if ( switch_record_table[i].level != j )
				continue;
			if ( !bit_test(switch_record_table[i]. node_bitmap,
				       node_inx) )
				continue;
			if ( sl == NULL ) {
				sl = hostlist_create(switch_record_table[i].
						     name);
			} else {
				hostlist_push_host(sl,
						   switch_record_table[i].
						   name);
			}
		}
		if ( sl ) {
			char *buf = hostlist_ranged_string_xmalloc(sl);
			xstrcat(*paddr,buf);
			xfree(buf);
			hostlist_destroy(sl);
			sl = NULL;
		}
		xstrcat(*paddr, ".");
		xstrcat(*ppattern, "switch.");
	}

	/* append node name */
	xstrcat(*paddr, node_name);
	xstrcat(*ppattern, "node");

	return SLURM_SUCCESS;
}
Exemplo n.º 13
0
/* use specific set run tasks on each host listed in hostfile
 * XXX: Need to handle over-subscribe.
 */
static int _task_layout_hostfile(slurm_step_layout_t *step_layout,
				 const char *arbitrary_nodes)
{
	int i=0, j, taskid = 0, task_cnt=0;
	hostlist_iterator_t itr = NULL, itr_task = NULL;
	char *host = NULL;

	hostlist_t job_alloc_hosts = NULL;
	hostlist_t step_alloc_hosts = NULL;

	int step_inx = 0, step_hosts_cnt = 0;
	struct node_record **step_hosts_ptrs = NULL;
	struct node_record *host_ptr = NULL;

	debug2("job list is %s", step_layout->node_list);
	if (!arbitrary_nodes) {
		error("no hostlist given for arbitrary dist");
		return SLURM_ERROR;
	}

	debug2("list is %s", arbitrary_nodes);
	step_alloc_hosts = hostlist_create(arbitrary_nodes);
	if (hostlist_count(step_alloc_hosts) != step_layout->task_cnt) {
		error("Asked for %u tasks have %d in the nodelist.  "
		      "Check your nodelist, or set the -n option to be %d",
		      step_layout->task_cnt,
		      hostlist_count(step_alloc_hosts),
		      hostlist_count(step_alloc_hosts));
		hostlist_destroy(step_alloc_hosts);
		return SLURM_ERROR;
	}

	job_alloc_hosts = hostlist_create(step_layout->node_list);
	itr             = hostlist_iterator_create(job_alloc_hosts);
	itr_task        = hostlist_iterator_create(step_alloc_hosts);

	/*
	 * Build array of pointers so that we can do pointer comparisons rather
	 * than strcmp's on nodes.
	 */
	step_hosts_cnt  = hostlist_count(step_alloc_hosts);
	step_hosts_ptrs = xmalloc(sizeof(struct node_record *) *
				  step_hosts_cnt);

	step_inx = 0;
	while((host = hostlist_next(itr_task))) {
		step_hosts_ptrs[step_inx++] = find_node_record_no_alias(host);
		free(host);
	}

	while((host = hostlist_next(itr))) {
		host_ptr = find_node_record(host);
		step_layout->tasks[i] = 0;

		for (step_inx = 0; step_inx < step_hosts_cnt; step_inx++) {
			if (host_ptr == step_hosts_ptrs[step_inx]) {
				step_layout->tasks[i]++;
				task_cnt++;
			}
			if (task_cnt >= step_layout->task_cnt)
				break;
		}
		debug3("%s got %u tasks", host, step_layout->tasks[i]);
		if (step_layout->tasks[i] == 0)
			goto reset_hosts;
		step_layout->tids[i] = xmalloc(sizeof(uint32_t)
					       * step_layout->tasks[i]);
		taskid = 0;
		j = 0;

		for (step_inx = 0; step_inx < step_hosts_cnt; step_inx++) {
			if (host_ptr == step_hosts_ptrs[step_inx]) {
				step_layout->tids[i][j] = taskid;
				j++;
			}
			taskid++;
			if (j >= step_layout->tasks[i])
				break;
		}
		i++;
	reset_hosts:
		free(host);
		if (i > step_layout->task_cnt)
			break;
	}
	hostlist_iterator_destroy(itr);
	hostlist_iterator_destroy(itr_task);
	hostlist_destroy(job_alloc_hosts);
	hostlist_destroy(step_alloc_hosts);
	xfree(step_hosts_ptrs);

	if (task_cnt != step_layout->task_cnt) {
		error("Asked for %u tasks but placed %d. Check your nodelist",
		      step_layout->task_cnt, task_cnt);
		return SLURM_ERROR;
	}

	return SLURM_SUCCESS;
}
Exemplo n.º 14
0
/*
 * _build_part_bitmap - update the total_cpus, total_nodes, and node_bitmap
 *	for the specified partition, also reset the partition pointers in
 *	the node back to this partition.
 * IN part_ptr - pointer to the partition
 * RET 0 if no error, errno otherwise
 * global: node_record_table_ptr - pointer to global node table
 * NOTE: this does not report nodes defined in more than one partition. this
 *	is checked only upon reading the configuration file, not on an update
 */
static int _build_part_bitmap(struct part_record *part_ptr)
{
	char *this_node_name;
	bitstr_t *old_bitmap;
	struct node_record *node_ptr;	/* pointer to node_record */
	hostlist_t host_list;

	part_ptr->total_cpus = 0;
	part_ptr->total_nodes = 0;

	if (part_ptr->node_bitmap == NULL) {
		part_ptr->node_bitmap = bit_alloc(node_record_count);
		old_bitmap = NULL;
	} else {
		old_bitmap = bit_copy(part_ptr->node_bitmap);
		bit_nclear(part_ptr->node_bitmap, 0,
			   node_record_count - 1);
	}

	if (part_ptr->nodes == NULL) {	/* no nodes in partition */
		_unlink_free_nodes(old_bitmap, part_ptr);
		FREE_NULL_BITMAP(old_bitmap);
		return 0;
	}

	if ((host_list = hostlist_create(part_ptr->nodes)) == NULL) {
		FREE_NULL_BITMAP(old_bitmap);
		error("hostlist_create error on %s, %m",
		      part_ptr->nodes);
		return ESLURM_INVALID_NODE_NAME;
	}

	while ((this_node_name = hostlist_shift(host_list))) {
		node_ptr = find_node_record(this_node_name);
		if (node_ptr == NULL) {
			error("_build_part_bitmap: invalid node name %s",
				this_node_name);
			free(this_node_name);
			FREE_NULL_BITMAP(old_bitmap);
			hostlist_destroy(host_list);
			return ESLURM_INVALID_NODE_NAME;
		}
		part_ptr->total_nodes++;
		if (slurmctld_conf.fast_schedule)
			part_ptr->total_cpus += node_ptr->config_ptr->cpus;
		else
			part_ptr->total_cpus += node_ptr->cpus;
		node_ptr->part_cnt++;
		xrealloc(node_ptr->part_pptr, (node_ptr->part_cnt *
			sizeof(struct part_record *)));
		node_ptr->part_pptr[node_ptr->part_cnt-1] = part_ptr;
		if (old_bitmap)
			bit_clear(old_bitmap,
				  (int) (node_ptr -
					 node_record_table_ptr));
		bit_set(part_ptr->node_bitmap,
			(int) (node_ptr - node_record_table_ptr));
		free(this_node_name);
	}
	hostlist_destroy(host_list);

	_unlink_free_nodes(old_bitmap, part_ptr);
	last_node_update = time(NULL);
	FREE_NULL_BITMAP(old_bitmap);
	return 0;
}
Exemplo n.º 15
0
/*
 * _build_single_nodeline_info - From the slurm.conf reader, build table,
 * 	and set values
 * RET 0 if no error, error code otherwise
 * Note: Operates on common variables
 *	default_node_record - default node configuration values
 */
static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr,
				       struct config_record *config_ptr)
{
	int error_code = SLURM_SUCCESS;
	struct node_record *node_rec = NULL;
	hostlist_t address_list = NULL;
	hostlist_t alias_list = NULL;
	hostlist_t hostname_list = NULL;
	hostlist_t port_list = NULL;
	char *address = NULL;
	char *alias = NULL;
	char *hostname = NULL;
	char *port_str = NULL;
	int state_val = NODE_STATE_UNKNOWN;
	int address_count, alias_count, hostname_count, port_count;
	uint16_t port = 0;

	if (node_ptr->state != NULL) {
		state_val = state_str2int(node_ptr->state, node_ptr->nodenames);
		if (state_val == NO_VAL)
			goto cleanup;
	}

	if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) {
		fatal("Unable to create NodeAddr list from %s",
		      node_ptr->addresses);
		error_code = errno;
		goto cleanup;
	}
	if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) {
		fatal("Unable to create NodeName list from %s",
		      node_ptr->nodenames);
		error_code = errno;
		goto cleanup;
	}
	if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) {
		fatal("Unable to create NodeHostname list from %s",
		      node_ptr->hostnames);
		error_code = errno;
		goto cleanup;
	}
	if (node_ptr->port_str && node_ptr->port_str[0] &&
	    (node_ptr->port_str[0] != '[') &&
	    (strchr(node_ptr->port_str, '-') ||
	     strchr(node_ptr->port_str, ','))) {
		xstrfmtcat(port_str, "[%s]", node_ptr->port_str);
		port_list = hostlist_create(port_str);
		xfree(port_str);
	} else {
		port_list = hostlist_create(node_ptr->port_str);
	}
	if (port_list == NULL) {
		error("Unable to create Port list from %s",
		      node_ptr->port_str);
		error_code = errno;
		goto cleanup;
	}

	/* some sanity checks */
	address_count  = hostlist_count(address_list);
	alias_count    = hostlist_count(alias_list);
	hostname_count = hostlist_count(hostname_list);
	port_count     = hostlist_count(port_list);
#ifdef HAVE_FRONT_END
	if ((hostname_count != alias_count) && (hostname_count != 1)) {
		error("NodeHostname count must equal that of NodeName "
		      "records of there must be no more than one");
		goto cleanup;
	}
	if ((address_count != alias_count) && (address_count != 1)) {
		error("NodeAddr count must equal that of NodeName "
		      "records of there must be no more than one");
		goto cleanup;
	}
#else
#ifdef MULTIPLE_SLURMD
	if ((address_count != alias_count) && (address_count != 1)) {
		error("NodeAddr count must equal that of NodeName "
		      "records of there must be no more than one");
		goto cleanup;
	}
#else
	if (address_count < alias_count) {
		error("At least as many NodeAddr are required as NodeName");
		goto cleanup;
	}
	if (hostname_count < alias_count) {
		error("At least as many NodeHostname are required "
		      "as NodeName");
		goto cleanup;
	}
#endif	/* MULTIPLE_SLURMD */
#endif	/* HAVE_FRONT_END */
	if ((port_count != alias_count) && (port_count > 1)) {
		error("Port count must equal that of NodeName "
		      "records or there must be no more than one");
		goto cleanup;
	}

	/* now build the individual node structures */
	while ((alias = hostlist_shift(alias_list))) {
		if (address_count > 0) {
			address_count--;
			if (address)
				free(address);
			address = hostlist_shift(address_list);
		}
		if (hostname_count > 0) {
			hostname_count--;
			if (hostname)
				free(hostname);
			hostname = hostlist_shift(hostname_list);
		}
		if (port_count > 0) {
			int port_int;
			port_count--;
			if (port_str)
				free(port_str);
			port_str = hostlist_shift(port_list);
			port_int = atoi(port_str);
			if ((port_int <= 0) || (port_int > 0xffff))
				fatal("Invalid Port %s", node_ptr->port_str);
			port = port_int;
		}
		/* find_node_record locks this to get the
		 * alias so we need to unlock */
		node_rec = find_node_record(alias);

		if (node_rec == NULL) {
			node_rec = create_node_record(config_ptr, alias);
			if ((state_val != NO_VAL) &&
			    (state_val != NODE_STATE_UNKNOWN))
				node_rec->node_state = state_val;
			node_rec->last_response = (time_t) 0;
			node_rec->comm_name = xstrdup(address);
			node_rec->node_hostname = xstrdup(hostname);
			node_rec->port      = port;
			node_rec->weight    = node_ptr->weight;
			node_rec->features  = xstrdup(node_ptr->feature);
			node_rec->reason    = xstrdup(node_ptr->reason);
		} else {
			/* FIXME - maybe should be fatal? */
			error("Reconfiguration for node %s, ignoring!", alias);
		}
		free(alias);
	}

	/* free allocated storage */
cleanup:
	if (address)
		free(address);
	if (hostname)
		free(hostname);
	if (port_str)
		free(port_str);
	if (address_list)
		hostlist_destroy(address_list);
	if (alias_list)
		hostlist_destroy(alias_list);
	if (hostname_list)
		hostlist_destroy(hostname_list);
	if (port_list)
		hostlist_destroy(port_list);
	return error_code;
}
Exemplo n.º 16
0
/* Test if a batch launch request should be defered
 * RET -1: abort the request, pending job cancelled
 *      0: execute the request now
 *      1: defer the request
 */
static int _batch_launch_defer(queued_request_t *queued_req_ptr)
{
	agent_arg_t *agent_arg_ptr;
	batch_job_launch_msg_t *launch_msg_ptr;
	time_t now = time(NULL);
	struct job_record  *job_ptr;
	int delay_time, nodes_ready = 0;

	agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
	if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)
		return 0;

	if (difftime(now, queued_req_ptr->last_attempt) < 10) {
		/* Reduce overhead by only testing once every 10 secs */
		return 1;
	}

	launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args;
	job_ptr = find_job_record(launch_msg_ptr->job_id);
	if ((job_ptr == NULL) ||
	    (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr))) {
		info("agent(batch_launch): removed pending request for "
		     "cancelled job %u",
		     launch_msg_ptr->job_id);
		return -1;	/* job cancelled while waiting */
	}

	if (job_ptr->wait_all_nodes) {
		(void) job_node_ready(launch_msg_ptr->job_id, &nodes_ready);
	} else {
#ifdef HAVE_FRONT_END
		nodes_ready = 1;
#else
		struct node_record *node_ptr;
		char *hostname;

		hostname = hostlist_deranged_string_xmalloc(
					agent_arg_ptr->hostlist);
		node_ptr = find_node_record(hostname);
		if (node_ptr == NULL) {
			error("agent(batch_launch) removed pending request for "
			      "job %u, missing node %s",
			      launch_msg_ptr->job_id, hostname);
			xfree(hostname);
			return -1;	/* invalid request?? */
		}
		xfree(hostname);
		if (!IS_NODE_POWER_SAVE(node_ptr) &&
		    !IS_NODE_NO_RESPOND(node_ptr)) {
			nodes_ready = 1;
		}
#endif
	}

	delay_time = difftime(now, job_ptr->start_time);
	if (nodes_ready) {
		/* ready to launch, adjust time limit for boot time */
		if (delay_time && (job_ptr->time_limit != INFINITE) &&
		    (!wiki2_sched)) {
			info("Job %u launch delayed by %d secs, "
			     "updating end_time",
			     launch_msg_ptr->job_id, delay_time);
			job_ptr->end_time += delay_time;
		}
		queued_req_ptr->last_attempt = (time_t) 0;
		return 0;
	}

	if (queued_req_ptr->last_attempt == 0) {
		queued_req_ptr->first_attempt = now;
		queued_req_ptr->last_attempt  = now;
	} else if (difftime(now, queued_req_ptr->first_attempt) >=
				 slurm_get_resume_timeout()) {
		error("agent waited too long for nodes to respond, "
		      "sending batch request anyway...");
		if (delay_time && (job_ptr->time_limit != INFINITE) &&
		    (!wiki2_sched)) {
			info("Job %u launch delayed by %d secs, "
			     "updating end_time",
			     launch_msg_ptr->job_id, delay_time);
			job_ptr->end_time += delay_time;
		}
		queued_req_ptr->last_attempt = (time_t) 0;
		return 0;
	}

	queued_req_ptr->last_attempt  = now;
	return 1;
}
Exemplo n.º 17
0
/*
 * get_nodes - get information on specific node(s) changed since some time
 * cmd_ptr IN   - CMD=GETNODES ARG=[<UPDATETIME>:<NODEID>[:<NODEID>]...]
 *                               [<UPDATETIME>:ALL]
 * err_code OUT - 0 or an error code
 * err_msg OUT  - response message
 * NOTE: xfree() err_msg if err_code is zero
 * RET 0 on success, -1 on failure
 *
 * Response format
 * ARG=<cnt>#<NODEID>:
 *	STATE=<state>;		 Moab equivalent node state
 *	[CAT=<reason>];		 Reason for a node being down or drained
 *				 colon separator
 *	CCLASS=<[part:cpus]>;	 SLURM partition with CPU count of node,
 *				 make have more than one partition
 *	[CPULOAD=<load_ave>;]	 One minute BSD load average
 *	[ARCH=<architecture>;]	 Computer architecture
 *	[OS=<operating_system>;] Operating system
 *	CMEMORY=<MB>;		 MB of memory on node
 *	CDISK=<MB>;		 MB of disk space on node
 *	CPROC=<cpus>;		 CPU count on node
 *	[FEATURE=<feature>;]	 Features associated with node, if any
 *	[GRES=<name>[:<count>],...;] generic resources on the node
 *  [#<NODEID>:...];
 */
extern int	get_nodes(char *cmd_ptr, int *err_code, char **err_msg)
{
	char *arg_ptr = NULL, *tmp_char = NULL, *tmp_buf = NULL, *buf = NULL;
	time_t update_time;
	/* Locks: read node, read partition */
	slurmctld_lock_t node_read_lock = {
		NO_LOCK, NO_LOCK, READ_LOCK, READ_LOCK };
	int node_rec_cnt = 0, buf_size = 0;

#ifdef HAVE_ALPS_CRAY
	/* Locks: write node */
	slurmctld_lock_t node_write_lock = {
		NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK };

	/*
	 * Run a Basil Inventory immediately before scheduling, to avoid
	 * race conditions caused by ALPS node state change (caused e.g.
	 * by the node health checker).
	 * This relies on the above write lock for the node state.
	 */
	lock_slurmctld(node_write_lock);
	if (select_g_reconfigure()) {
		unlock_slurmctld(node_write_lock);
		*err_code = -720;
		*err_msg = "Unable to run ALPS inventory";
		error("wiki: Unable to run ALPS inventory");
		return -1;
	}
	unlock_slurmctld(node_write_lock);
#endif

	arg_ptr = strstr(cmd_ptr, "ARG=");
	if (arg_ptr == NULL) {
		*err_code = -300;
		*err_msg = "GETNODES lacks ARG";
		error("wiki: GETNODES lacks ARG");
		return -1;
	}
	update_time = (time_t) strtoul(arg_ptr+4, &tmp_char, 10);
	if (tmp_char[0] != ':') {
		*err_code = -300;
		*err_msg = "Invalid ARG value";
		error("wiki: GETNODES has invalid ARG value");
		return -1;
	}
	tmp_char++;
	lock_slurmctld(node_read_lock);
	if (strncmp(tmp_char, "ALL", 3) == 0) {
		/* report all nodes */
		buf = _dump_all_nodes(&node_rec_cnt, update_time);
	} else {
		struct node_record *node_ptr = NULL;
		char *node_name, *slurm_hosts;
		int node_cnt;
		hostset_t slurm_hostset;

		slurm_hosts = moab2slurm_task_list(tmp_char, &node_cnt);
		if ((slurm_hostset = hostset_create(slurm_hosts))) {
			while ((node_name = hostset_shift(slurm_hostset))) {
				node_ptr = find_node_record(node_name);
				if (node_ptr == NULL) {
					error("sched/wiki2: bad hostname %s",
					      node_name);
					continue;
				}
				if (_hidden_node(node_ptr))
					continue;
				tmp_buf = _dump_node(node_ptr, NULL,
						     update_time);
				if (node_rec_cnt > 0)
					xstrcat(buf, "#");
				xstrcat(buf, tmp_buf);
				xfree(tmp_buf);
				node_rec_cnt++;
			}
			hostset_destroy(slurm_hostset);
		} else {
			error("hostset_create(%s): %m", slurm_hosts);
		}
		xfree(slurm_hosts);
	}
	unlock_slurmctld(node_read_lock);

	/* Prepend ("ARG=%d", node_rec_cnt) to reply message */
	if (buf)
		buf_size = strlen(buf);
	tmp_buf = xmalloc(buf_size + 32);
	if (node_rec_cnt)
		sprintf(tmp_buf, "SC=0 ARG=%d#%s", node_rec_cnt, buf);
	else
		sprintf(tmp_buf, "SC=0 ARG=0#");
	xfree(buf);
	*err_code = 0;
	*err_msg = tmp_buf;
	return 0;
}