Пример #1
0
static int _dynamically_request(List block_list, int *blocks_added,
				select_ba_request_t *request,
				char *user_req_nodes,
				uint16_t query_mode)
{
	List list_of_lists = NULL;
	List temp_list = NULL;
	List new_blocks = NULL;
	List job_list = NULL, booted_list = NULL;
	ListIterator itr = NULL;
	int rc = SLURM_ERROR;
	int create_try = 0;

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("going to create %d", request->size);
	list_of_lists = list_create(NULL);

	/* If preempt is set and we are checking full system it means
	   we altered the block list so only look at it.
	*/
	if (SELECT_IS_PREEMPT_SET(query_mode)
	    && SELECT_IS_CHECK_FULL_SET(query_mode)) {
		list_append(list_of_lists, block_list);
	} else if (user_req_nodes) {
		slurm_mutex_lock(&block_state_mutex);
		job_list = copy_bg_list(bg_lists->job_running);
		list_append(list_of_lists, job_list);
		slurm_mutex_unlock(&block_state_mutex);
	} else {
		slurm_mutex_lock(&block_state_mutex);
		list_append(list_of_lists, block_list);
		if (list_count(block_list) != list_count(bg_lists->booted)) {
			booted_list = copy_bg_list(bg_lists->booted);
			list_append(list_of_lists, booted_list);
			if (list_count(bg_lists->booted)
			    != list_count(bg_lists->job_running)) {
				job_list = copy_bg_list(bg_lists->job_running);
				list_append(list_of_lists, job_list);
			}
		} else if (list_count(block_list)
			   != list_count(bg_lists->job_running)) {
			job_list = copy_bg_list(bg_lists->job_running);
			list_append(list_of_lists, job_list);
		}
		slurm_mutex_unlock(&block_state_mutex);
	}
	itr = list_iterator_create(list_of_lists);
	while ((temp_list = (List)list_next(itr))) {
		create_try++;

		/* 1- try empty space
		   2- we see if we can create one in the
		   unused mps
		   3- see if we can create one in the non
		   job running mps
		*/
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("trying with %d", create_try);
		if ((new_blocks = create_dynamic_block(
			     block_list, request, temp_list, true))) {
			bg_record_t *bg_record = NULL;
			while ((bg_record = list_pop(new_blocks))) {
				if (block_exist_in_list(block_list, bg_record))
					destroy_bg_record(bg_record);
				else if (SELECT_IS_TEST(query_mode)
					 || SELECT_IS_PREEMPT_ON_FULL_TEST(
						 query_mode)) {
					/* Here we don't really want
					   to create the block if we
					   are testing.

					   The second test here is to
					   make sure If we are able to
					   run here but we just
					   preempted we should wait a
					   bit to make sure the
					   preempted blocks have time
					   to clear out.
					*/
					list_append(block_list, bg_record);
					(*blocks_added) = 1;
				} else {
					if (bridge_block_create(bg_record)
					    == SLURM_ERROR) {
						destroy_bg_record(bg_record);
						error("_dynamically_request: "
						      "unable to configure "
						      "block");
						rc = SLURM_ERROR;
						break;
					}
					list_append(block_list, bg_record);
					print_bg_record(bg_record);
					(*blocks_added) = 1;
				}
			}
			list_destroy(new_blocks);
			if (!*blocks_added) {
				rc = SLURM_ERROR;
				continue;
			}
			list_sort(block_list,
				  (ListCmpF)bg_record_sort_aval_inc);

			rc = SLURM_SUCCESS;
			break;
		} else if (errno == ESLURM_INTERCONNECT_FAILURE) {
			rc = SLURM_ERROR;
			break;
		}
	}
	list_iterator_destroy(itr);

	if (list_of_lists)
		list_destroy(list_of_lists);
	if (job_list)
		list_destroy(job_list);
	if (booted_list)
		list_destroy(booted_list);

	return rc;
}
Пример #2
0
extern int create_full_system_block(List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;
	ListIterator itr;
	bg_record_t *bg_record = NULL;
	char *name = NULL;
	List records = NULL;
	uint16_t geo[SYSTEM_DIMENSIONS];
	int i;
	select_ba_request_t blockreq;
	List results = NULL;
	struct part_record *part_ptr = NULL;
	bitstr_t *bitmap = bit_alloc(node_record_count);
	static int *dims = NULL;
	bool larger = 0;
	char start_char[SYSTEM_DIMENSIONS+1];
	char geo_char[SYSTEM_DIMENSIONS+1];

	if (!dims) {
		dims = select_g_ba_get_dims();
		memset(start_char, 0, sizeof(start_char));
		memset(geo_char, 0, sizeof(geo_char));
	}

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	bit_not(bitmap);
	if (bit_ffs(bitmap) != -1) {
		error("We don't have the entire system covered by partitions, "
		      "can't create full system block");
		FREE_NULL_BITMAP(bitmap);
		return SLURM_ERROR;
	}
	FREE_NULL_BITMAP(bitmap);

	/* Here we are adding a block that in for the entire machine
	   just in case it isn't in the bluegene.conf file.
	*/
	slurm_mutex_lock(&block_state_mutex);

	for (i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo[i] = dims[i] - 1;
		if (geo[i] > 0)
			larger = 1;
		geo_char[i] = alpha_num[geo[i]];
		start_char[i] = alpha_num[0];
	}

	i = (10+strlen(bg_conf->slurm_node_prefix));
	name = xmalloc(i);

	if (!larger)
		snprintf(name, i, "%s%s",
			 bg_conf->slurm_node_prefix, start_char);
	else
		snprintf(name, i, "%s[%sx%s]",
			 bg_conf->slurm_node_prefix, start_char, geo_char);


	if (bg_found_block_list) {
		itr = list_iterator_create(bg_found_block_list);
		while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		error("create_full_system_block: no bg_found_block_list 2");
	}

	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		xfree(name);
		error("create_overlapped_blocks: no bg_lists->main 3");
		rc = SLURM_ERROR;
		goto no_total;
	}

	records = list_create(destroy_bg_record);

	memset(&blockreq, 0, sizeof(select_ba_request_t));
	blockreq.save_name = name;
	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		blockreq.conn_type[i] = SELECT_TORUS;

	add_bg_record(records, NULL, &blockreq, 0 , 0);
	xfree(name);

	bg_record = (bg_record_t *) list_pop(records);
	if (!bg_record) {
		error("Nothing was returned from full system create");
		rc = SLURM_ERROR;
		goto no_total;
	}
	reset_ba_system(false);
	for(i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo_char[i] = alpha_num[bg_record->geo[i]];
		start_char[i] = alpha_num[bg_record->start[i]];
	}
	debug2("adding %s %s %s",  bg_record->mp_str, start_char, geo_char);
	if (bg_record->ba_mp_list)
		list_flush(bg_record->ba_mp_list);
	else
		bg_record->ba_mp_list = list_create(destroy_ba_mp);
#ifdef HAVE_BGQ
	results = list_create(destroy_ba_mp);
#else
	results = list_create(NULL);
#endif
	name = set_bg_block(results,
			    bg_record->start,
			    bg_record->geo,
			    bg_record->conn_type);
	if (!name) {
		error("I was unable to make the full system block.");
		list_destroy(results);
		list_iterator_destroy(itr);
		slurm_mutex_unlock(&block_state_mutex);
		return SLURM_ERROR;
	}
	xfree(name);
	if (bg_record->ba_mp_list)
		list_destroy(bg_record->ba_mp_list);
#ifdef HAVE_BGQ
	bg_record->ba_mp_list = results;
	results = NULL;
#else
	bg_record->ba_mp_list = list_create(destroy_ba_mp);
	copy_node_path(results, &bg_record->ba_mp_list);
	list_destroy(results);
#endif
	if ((rc = bridge_block_create(bg_record)) == SLURM_ERROR) {
		error("create_full_system_block: "
		      "unable to configure block in api");
		destroy_bg_record(bg_record);
		goto no_total;
	}

	print_bg_record(bg_record);
	list_append(bg_lists->main, bg_record);

no_total:
	if (records)
		list_destroy(records);
	slurm_mutex_unlock(&block_state_mutex);
	return rc;
}
Пример #3
0
/*
 * create_defined_blocks - create the static blocks that will be used
 * for scheduling, all partitions must be able to be created and booted
 * at once.
 * IN - int overlapped, 1 if partitions are to be overlapped, 0 if they are
 * static.
 * RET - success of fitting all configurations
 */
extern int create_defined_blocks(bg_layout_t overlapped,
				 List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;

	ListIterator itr;
	bg_record_t *bg_record = NULL;
	int i;
	uint16_t geo[SYSTEM_DIMENSIONS];
	char temp[256];
	struct part_record *part_ptr = NULL;
	bitstr_t *usable_mp_bitmap = bit_alloc(node_record_count);

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(usable_mp_bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	if (bit_ffs(usable_mp_bitmap) == -1) {
		fatal("We don't have any nodes in any partitions.  "
		      "Can't create blocks.  "
		      "Please check your slurm.conf.");
	}

	slurm_mutex_lock(&block_state_mutex);
	reset_ba_system(false);
	ba_set_removable_mps(usable_mp_bitmap, 1);
	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = list_next(itr))) {
			if (bg_record->mp_count > 0
			    && !bg_record->full_block
			    && bg_record->cpu_cnt >= bg_conf->cpus_per_mp) {
				char *name = NULL;
				char start_char[SYSTEM_DIMENSIONS+1];
				char geo_char[SYSTEM_DIMENSIONS+1];

				if (overlapped == LAYOUT_OVERLAP) {
					reset_ba_system(false);
					ba_set_removable_mps(usable_mp_bitmap,
							     1);
				}

				/* we want the mps that aren't
				 * in this record to mark them as used
				 */
				if (ba_set_removable_mps(
					    bg_record->mp_bitmap, 1)
				    != SLURM_SUCCESS)
					fatal("It doesn't seem we have a "
					      "bitmap for %s",
					      bg_record->bg_block_id);

				for (i=0; i<SYSTEM_DIMENSIONS; i++) {
					geo[i] = bg_record->geo[i];
					start_char[i] = alpha_num[
						bg_record->start[i]];
					geo_char[i] = alpha_num[geo[i]];
				}
				start_char[i] = '\0';
				geo_char[i] = '\0';

				debug2("adding %s %s %s",
				       bg_record->mp_str,
				       start_char, geo_char);
				if (bg_record->ba_mp_list
				    && list_count(bg_record->ba_mp_list)) {
					if ((rc = check_and_set_mp_list(
						     bg_record->ba_mp_list))
					    != SLURM_SUCCESS) {
						debug2("something happened in "
						       "the load of %s"
						       "Did you use smap to "
						       "make the "
						       "bluegene.conf file?",
						       bg_record->bg_block_id);
						break;
					}
				} else {
#ifdef HAVE_BGQ
					List results =
						list_create(destroy_ba_mp);
#else
					List results = list_create(NULL);
#endif
					name = set_bg_block(
						results,
						bg_record->start,
						geo,
						bg_record->conn_type);
					ba_reset_all_removed_mps();
					if (!name) {
						error("I was unable to "
						      "make the "
						      "requested block.");
						list_destroy(results);
						rc = SLURM_ERROR;
						break;
					}

					snprintf(temp, sizeof(temp), "%s%s",
						 bg_conf->slurm_node_prefix,
						 name);

					xfree(name);
					if (strcmp(temp, bg_record->mp_str)) {
						fatal("given list of %s "
						      "but allocated %s, "
						      "your order might be "
						      "wrong in bluegene.conf",
						      bg_record->mp_str,
						      temp);
					}
					if (bg_record->ba_mp_list)
						list_destroy(
							bg_record->ba_mp_list);
#ifdef HAVE_BGQ
					bg_record->ba_mp_list = results;
					results = NULL;
#else
					bg_record->ba_mp_list =
						list_create(destroy_ba_mp);
					copy_node_path(results,
						       &bg_record->ba_mp_list);
					list_destroy(results);
#endif
				}
			}
			if (!block_exist_in_list(
				    bg_found_block_list, bg_record)) {
				if (bg_record->full_block) {
					/* if this is defined we need
					   to remove it since we are
					   going to try to create it
					   later on overlap systems
					   this doesn't matter, but
					   since we don't clear the
					   table on static mode we
					   can't do it here or it just
					   won't work since other
					   wires will be or are
					   already set
					*/
					list_remove(itr);
					continue;
				}
				if ((rc = bridge_block_create(bg_record))
				    != SLURM_SUCCESS)
					break;
				print_bg_record(bg_record);
			}
		}
		list_iterator_destroy(itr);
		if (rc != SLURM_SUCCESS)
			goto end_it;
	} else {
		error("create_defined_blocks: no bg_lists->main 2");
		rc = SLURM_ERROR;
		goto end_it;
	}
	slurm_mutex_unlock(&block_state_mutex);
	create_full_system_block(bg_found_block_list);

	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);

end_it:
	ba_reset_all_removed_mps();
	FREE_NULL_BITMAP(usable_mp_bitmap);
	slurm_mutex_unlock(&block_state_mutex);

#ifdef _PRINT_BLOCKS_AND_EXIT
	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		debug("\n\n");
		while ((found_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			print_bg_record(found_record);
		}
		list_iterator_destroy(itr);
	} else {
		error("create_defined_blocks: no bg_lists->main 5");
	}
 	exit(0);
#endif	/* _PRINT_BLOCKS_AND_EXIT */
	//exit(0);
	return rc;
}
Пример #4
0
/*
 * This could potentially lock the node lock in the slurmctld with
 * slurm_drain_node, or slurm_fail_job so if slurmctld_locked is called we
 * will call the functions without locking the locks again.
 */
extern int down_nodecard(char *mp_name, bitoff_t io_start,
			 bool slurmctld_locked)
{
	List requests = NULL;
	List delete_list = NULL;
	ListIterator itr = NULL;
	bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record;
	bg_record_t *smallest_bg_record = NULL;
	struct node_record *node_ptr = NULL;
	int mp_bit = 0;
	static int io_cnt = NO_VAL;
	static int create_size = NO_VAL;
	static select_ba_request_t blockreq;
	int rc = SLURM_SUCCESS;
	char *reason = "select_bluegene: nodecard down";

	xassert(mp_name);

	if (io_cnt == NO_VAL) {
		io_cnt = 1;
		/* Translate 1 nodecard count to ionode count */
		if ((io_cnt *= bg_conf->io_ratio))
			io_cnt--;

		/* make sure we create something that is able to be
		   created */
		if (bg_conf->smallest_block < bg_conf->nodecard_cnode_cnt)
			create_size = bg_conf->nodecard_cnode_cnt;
		else
			create_size = bg_conf->smallest_block;
	}

	node_ptr = find_node_record(mp_name);
	if (!node_ptr) {
		error ("down_sub_node_blocks: invalid node specified '%s'",
		       mp_name);
		return EINVAL;
	}

	/* this is here for sanity check to make sure we don't core on
	   these bits when we set them below. */
	if (io_start >= bg_conf->ionodes_per_mp
	    || (io_start+io_cnt) >= bg_conf->ionodes_per_mp) {
		debug("io %d-%d not configured on this "
		      "system, only %d ionodes per midplane",
		      io_start, io_start+io_cnt, bg_conf->ionodes_per_mp);
		return EINVAL;
	}
	mp_bit = (node_ptr - node_record_table_ptr);

	memset(&blockreq, 0, sizeof(select_ba_request_t));

	blockreq.conn_type[0] = SELECT_SMALL;
	blockreq.save_name = mp_name;

	debug3("here setting node %d of %d and ionodes %d-%d of %d",
	       mp_bit, node_record_count, io_start,
	       io_start+io_cnt, bg_conf->ionodes_per_mp);

	memset(&tmp_record, 0, sizeof(bg_record_t));
	tmp_record.mp_count = 1;
	tmp_record.cnode_cnt = bg_conf->nodecard_cnode_cnt;
	tmp_record.mp_bitmap = bit_alloc(node_record_count);
	bit_set(tmp_record.mp_bitmap, mp_bit);

	tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
	bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt);

	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		if (!bit_test(bg_record->mp_bitmap, mp_bit))
			continue;

		if (!blocks_overlap(bg_record, &tmp_record))
			continue;

		if (bg_record->job_running > NO_JOB_RUNNING) {
			if (slurmctld_locked)
				job_fail(bg_record->job_running);
			else
				slurm_fail_job(bg_record->job_running);

		}
		/* If Running Dynamic mode and the block is
		   smaller than the create size just continue on.
		*/
		if ((bg_conf->layout_mode == LAYOUT_DYNAMIC)
		    && (bg_record->cnode_cnt < create_size)) {
			if (!delete_list)
				delete_list = list_create(NULL);
			list_append(delete_list, bg_record);
			continue;
		}

		/* keep track of the smallest size that is at least
		   the size of create_size. */
		if (!smallest_bg_record ||
		    (smallest_bg_record->cnode_cnt > bg_record->cnode_cnt))
			smallest_bg_record = bg_record;
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		debug3("running non-dynamic mode");
		/* This should never happen, but just in case... */
		if (delete_list)
			list_destroy(delete_list);

		/* If we found a block that is smaller or equal to a
		   midplane we will just mark it in an error state as
		   opposed to draining the node.
		*/
		if (smallest_bg_record
		    && (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){
			if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
				rc = SLURM_NO_CHANGE_IN_DATA;
				goto cleanup;
			}

			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		debug("No block under 1 midplane available for this nodecard.  "
		      "Draining the whole node.");
		if (!node_already_down(mp_name)) {
			if (slurmctld_locked)
				drain_nodes(mp_name, reason,
					    slurm_get_slurm_user_id());
			else
				slurm_drain_nodes(mp_name, reason,
						  slurm_get_slurm_user_id());
		}
		rc = SLURM_SUCCESS;
		goto cleanup;
	}

	/* below is only for Dynamic mode */

	if (delete_list) {
		int cnt_set = 0;
		bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp);
		/* don't lock here since it is handled inside
		   the put_block_in_error_state
		*/
		itr = list_iterator_create(delete_list);
		while ((bg_record = list_next(itr))) {
			debug2("combining smaller than nodecard "
			       "dynamic block %s",
			       bg_record->bg_block_id);
			while (bg_record->job_running > NO_JOB_RUNNING)
				sleep(1);

			bit_or(iobitmap, bg_record->ionode_bitmap);
			cnt_set++;
		}
		list_iterator_destroy(itr);
		list_destroy(delete_list);
		if (!cnt_set) {
			FREE_NULL_BITMAP(iobitmap);
			rc = SLURM_ERROR;
			goto cleanup;
		}
		/* set the start to be the same as the start of the
		   ionode_bitmap.  If no ionodes set (not a small
		   block) set io_start = 0. */
		if ((io_start = bit_ffs(iobitmap)) == -1) {
			io_start = 0;
			if (create_size > bg_conf->nodecard_cnode_cnt)
				blockreq.small128 = 4;
			else
				blockreq.small32 = 16;
		} else if (create_size <= bg_conf->nodecard_cnode_cnt)
			blockreq.small32 = 1;
		else
			/* this should never happen */
			blockreq.small128 = 1;

		FREE_NULL_BITMAP(iobitmap);
	} else if (smallest_bg_record) {
		debug2("smallest dynamic block is %s",
		       smallest_bg_record->bg_block_id);
		if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
			rc = SLURM_NO_CHANGE_IN_DATA;
			goto cleanup;
		}

		while (smallest_bg_record->job_running > NO_JOB_RUNNING)
			sleep(1);

		if (smallest_bg_record->cnode_cnt == create_size) {
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		if (create_size > smallest_bg_record->cnode_cnt) {
			/* we should never get here.  This means we
			 * have a create_size that is bigger than a
			 * block that is already made.
			 */
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}
		debug3("node count is %d", smallest_bg_record->cnode_cnt);
		switch(smallest_bg_record->cnode_cnt) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small32 = 2;
			break;
		case 256:
			blockreq.small32 = 8;
			break;
#endif
		case 128:
			blockreq.small32 = 4;
			break;
		case 512:
		default:
			blockreq.small32 = 16;
			break;
		}

		if (create_size != bg_conf->nodecard_cnode_cnt) {
			blockreq.small128 = blockreq.small32 / 4;
			blockreq.small32 = 0;
			io_start = 0;
		} else if ((io_start =
			    bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
			/* set the start to be the same as the start of the
			   ionode_bitmap.  If no ionodes set (not a small
			   block) set io_start = 0. */
			io_start = 0;
	} else {
		switch(create_size) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small64 = 8;
			break;
		case 256:
			blockreq.small256 = 2;
#endif
		case 32:
			blockreq.small32 = 16;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
		case 512:
			if (!node_already_down(mp_name)) {
				char *reason = "select_bluegene: nodecard down";
				if (slurmctld_locked)
					drain_nodes(mp_name, reason,
						    slurm_get_slurm_user_id());
				else
					slurm_drain_nodes(
						mp_name, reason,
						slurm_get_slurm_user_id());
			}
			rc = SLURM_SUCCESS;
			goto cleanup;
			break;
		default:
			error("Unknown create size of %d", create_size);
			break;
		}
		/* since we don't have a block in this midplane
		   we need to start at the beginning. */
		io_start = 0;
		/* we also need a bg_block to pretend to be the
		   smallest block that takes up the entire midplane. */
	}


	/* Here we need to add blocks that take up nodecards on this
	   midplane.  Since Slurm only keeps track of midplanes
	   natively this is the only want to handle this case.
	*/
	requests = list_create(destroy_bg_record);
	add_bg_record(requests, NULL, &blockreq, 1, io_start);

	slurm_mutex_lock(&block_state_mutex);
	delete_list = list_create(NULL);
	while ((bg_record = list_pop(requests))) {
		itr = list_iterator_create(bg_lists->main);
		while ((found_record = list_next(itr))) {
			if (!blocks_overlap(bg_record, found_record))
				continue;
			list_push(delete_list, found_record);
			list_remove(itr);
		}
		list_iterator_destroy(itr);

		/* we need to add this record since it doesn't exist */
		if (bridge_block_create(bg_record) == SLURM_ERROR) {
			destroy_bg_record(bg_record);
			error("down_sub_node_blocks: "
			      "unable to configure block in api");
			continue;
		}

		debug("adding block %s to fill in small blocks "
		      "around bad nodecards",
		      bg_record->bg_block_id);
		print_bg_record(bg_record);
		list_append(bg_lists->main, bg_record);
		if (bit_overlap(bg_record->ionode_bitmap,
				tmp_record.ionode_bitmap)) {
			/* here we know the error block doesn't exist
			   so just set the state here */
			slurm_mutex_unlock(&block_state_mutex);
			rc = put_block_in_error_state(bg_record, reason);
			slurm_mutex_lock(&block_state_mutex);
		}
	}
	list_destroy(requests);

	if (delete_list) {
		slurm_mutex_unlock(&block_state_mutex);
		free_block_list(NO_VAL, delete_list, 0, 0);
		list_destroy(delete_list);
	}
	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);
	slurm_mutex_unlock(&block_state_mutex);
	last_bg_update = time(NULL);

cleanup:
	FREE_NULL_BITMAP(tmp_record.mp_bitmap);
	FREE_NULL_BITMAP(tmp_record.ionode_bitmap);

	return rc;

}