コード例 #1
0
ファイル: bg_read_config.c プロジェクト: masteraxl/slurm
/*
 * Read and process the bluegene.conf configuration file so to interpret what
 * blocks are static/dynamic, torus/mesh, etc.
 */
extern int read_bg_conf(void)
{
	int i;
	int count = 0;
	s_p_hashtbl_t *tbl = NULL;
	char *layout = NULL;
	select_ba_request_t **blockreq_array = NULL;
	image_t **image_array = NULL;
	image_t *image = NULL;
	static time_t last_config_update = (time_t) 0;
	struct stat config_stat;
	ListIterator itr = NULL;
	char* bg_conf_file = NULL;
	static int *dims = NULL;

	if (!dims)
		dims = select_g_ba_get_dims();

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("Reading the bluegene.conf file");

	/* check if config file has changed */
	bg_conf_file = _get_bg_conf();

	if (stat(bg_conf_file, &config_stat) < 0)
		fatal("can't stat bluegene.conf file %s: %m", bg_conf_file);
	if (last_config_update) {
		_reopen_bridge_log();
		if (last_config_update == config_stat.st_mtime) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("%s unchanged", bg_conf_file);
		} else {
			info("Restart slurmctld for %s changes "
			     "to take effect",
			     bg_conf_file);
		}
		last_config_update = config_stat.st_mtime;
		xfree(bg_conf_file);
		return SLURM_SUCCESS;
	}
	last_config_update = config_stat.st_mtime;

	/* initialization */
	/* bg_conf defined in bg_node_alloc.h */
	tbl = s_p_hashtbl_create(bg_conf_file_options);

	if (s_p_parse_file(tbl, NULL, bg_conf_file, false) == SLURM_ERROR)
		fatal("something wrong with opening/reading bluegene "
		      "conf file");
	xfree(bg_conf_file);

#ifdef HAVE_BGL
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltBlrtsImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->blrts_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_blrtsimage, "BlrtsImage", tbl)) {
		if (!list_count(bg_conf->blrts_list))
			fatal("BlrtsImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->blrts_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_blrtsimage = xstrdup(image->name);
		info("Warning: using %s as the default BlrtsImage.  "
		     "If this isn't correct please set BlrtsImage",
		     bg_conf->default_blrtsimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default BlrtsImage %s",
			     bg_conf->default_blrtsimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_blrtsimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->blrts_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltLinuxImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "LinuxImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("LinuxImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default LinuxImage.  "
		     "If this isn't correct please set LinuxImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default LinuxImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltRamDiskImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "RamDiskImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("RamDiskImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default RamDiskImage.  "
		     "If this isn't correct please set RamDiskImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default RamDiskImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}
#elif defined HAVE_BGP

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltCnloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "CnloadImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("CnloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default CnloadImage.  "
		     "If this isn't correct please set CnloadImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default CnloadImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltIoloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "IoloadImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("IoloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default IoloadImage.  "
		     "If this isn't correct please set IoloadImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default IoloadImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}

#endif
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltMloaderImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->mloader_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_mloaderimage,
			    "MloaderImage", tbl)) {
		if (!list_count(bg_conf->mloader_list))
			fatal("MloaderImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->mloader_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_mloaderimage = xstrdup(image->name);
		info("Warning: using %s as the default MloaderImage.  "
		     "If this isn't correct please set MloaderImage",
		     bg_conf->default_mloaderimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default MloaderImage %s",
			     bg_conf->default_mloaderimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_mloaderimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->mloader_list, image);
	}

	if (!s_p_get_uint16(
		    &bg_conf->mp_cnode_cnt, "BasePartitionNodeCnt", tbl)) {
		error("BasePartitionNodeCnt not configured in bluegene.conf "
		      "defaulting to 512 as BasePartitionNodeCnt");
		bg_conf->mp_cnode_cnt = 512;
		bg_conf->quarter_cnode_cnt = 128;
	} else {
		if (bg_conf->mp_cnode_cnt <= 0)
			fatal("You should have more than 0 nodes "
			      "per base partition");

		bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;
	}
	/* bg_conf->cpus_per_mp should had already been set from the
	 * node_init */
	if (bg_conf->cpus_per_mp < bg_conf->mp_cnode_cnt) {
		fatal("For some reason we have only %u cpus per mp, but "
		      "have %u cnodes per mp.  You need at least the same "
		      "number of cpus as you have cnodes per mp.  "
		      "Check the NodeName Procs= "
		      "definition in the slurm.conf.",
		      bg_conf->cpus_per_mp, bg_conf->mp_cnode_cnt);
	}

	bg_conf->cpu_ratio = bg_conf->cpus_per_mp/bg_conf->mp_cnode_cnt;
	if (!bg_conf->cpu_ratio)
		fatal("We appear to have less than 1 cpu on a cnode.  "
		      "You specified %u for BasePartitionNodeCnt "
		      "in the blugene.conf and %u cpus "
		      "for each node in the slurm.conf",
		      bg_conf->mp_cnode_cnt, bg_conf->cpus_per_mp);

	num_unused_cpus = 1;
	for (i = 0; i<SYSTEM_DIMENSIONS; i++)
		num_unused_cpus *= dims[i];
	num_unused_cpus *= bg_conf->cpus_per_mp;

	if (!s_p_get_uint16(
		    &bg_conf->nodecard_cnode_cnt, "NodeCardNodeCnt", tbl)) {
		error("NodeCardNodeCnt not configured in bluegene.conf "
		      "defaulting to 32 as NodeCardNodeCnt");
		bg_conf->nodecard_cnode_cnt = 32;
	}

	if (bg_conf->nodecard_cnode_cnt<=0)
		fatal("You should have more than 0 nodes per nodecard");

	bg_conf->mp_nodecard_cnt =
		bg_conf->mp_cnode_cnt / bg_conf->nodecard_cnode_cnt;

	if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
		fatal("Warning: Numpsets not configured in bluegene.conf");
	if (!bg_conf->ionodes_per_mp) {
		if (!s_p_get_uint16(&bg_conf->ionodes_per_mp,
				    "IONodesPerMP", tbl))
			fatal("Warning: IONodesPerMP not configured "
			      "in bluegene.conf");
	}

#ifdef HAVE_BGQ
	/* You can only have 16 ionodes per midplane */
	if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt)
		bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt;
#endif

	if (bg_conf->ionodes_per_mp) {
		bitstr_t *tmp_bitmap = NULL;
		int small_size = 1;

		/* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
			bg_conf->quarter_ionode_cnt = 2;
			bg_conf->nodecard_ionode_cnt = 2;
		} else {
			bg_conf->quarter_ionode_cnt = bg_conf->ionodes_per_mp/4;
			bg_conf->nodecard_ionode_cnt =
				bg_conf->quarter_ionode_cnt/4;
		}

		/* How many nodecards per ionode */
		bg_conf->nc_ratio =
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt)
			/ (double)bg_conf->ionodes_per_mp;
		/* How many ionodes per nodecard */
		bg_conf->io_ratio =
			(double)bg_conf->ionodes_per_mp /
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt);
		//info("got %f %f", bg_conf->nc_ratio, bg_conf->io_ratio);
		/* figure out the smallest block we can have on the
		   system */
#ifdef HAVE_BGL
		if (bg_conf->io_ratio >= 1)
			bg_conf->smallest_block=32;
		else
			bg_conf->smallest_block=128;
#else

		if (bg_conf->io_ratio >= 2)
			bg_conf->smallest_block=16;
		else if (bg_conf->io_ratio == 1)
			bg_conf->smallest_block=32;
		else if (bg_conf->io_ratio == .5)
			bg_conf->smallest_block=64;
		else if (bg_conf->io_ratio == .25)
			bg_conf->smallest_block=128;
		else if (bg_conf->io_ratio == .125)
			bg_conf->smallest_block=256;
		else {
			error("unknown ioratio %f.  Can't figure out "
			      "smallest block size, setting it to midplane",
			      bg_conf->io_ratio);
			bg_conf->smallest_block = 512;
		}
#endif
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("Smallest block possible on this system is %u",
			     bg_conf->smallest_block);
		/* below we are creating all the possible bitmaps for
		 * each size of small block
		 */
		if ((int)bg_conf->nodecard_ionode_cnt < 1) {
			bg_conf->nodecard_ionode_cnt = 0;
		} else {
			bg_lists->valid_small32 = list_create(_destroy_bitmap);
			if ((small_size = bg_conf->nodecard_ionode_cnt))
				small_size--;
			i = 0;
			while (i<bg_conf->ionodes_per_mp) {
				tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
				bit_nset(tmp_bitmap, i, i+small_size);
				i += small_size+1;
				list_append(bg_lists->valid_small32,
					    tmp_bitmap);
			}
		}
		/* If we only have 1 nodecard just jump to the end
		   since this will never need to happen below.
		   Pretty much a hack to avoid seg fault;). */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
			goto no_calc;

		bg_lists->valid_small128 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small128, tmp_bitmap);
		}

#ifndef HAVE_BGL
		bg_lists->valid_small64 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->nodecard_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small64, tmp_bitmap);
		}

		bg_lists->valid_small256 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small256, tmp_bitmap);
		}
#endif
	} else {
		fatal("your ionodes_per_mp is 0");
	}

no_calc:

	if (!s_p_get_uint16(&bg_conf->bridge_api_verb, "BridgeAPIVerbose", tbl))
		info("Warning: BridgeAPIVerbose not configured "
		     "in bluegene.conf");
	if (!s_p_get_string(&bg_conf->bridge_api_file,
			    "BridgeAPILogFile", tbl))
		info("BridgeAPILogFile not configured in bluegene.conf");
	else
		_reopen_bridge_log();

	if (s_p_get_string(&layout, "DenyPassthrough", tbl)) {
		if (strstr(layout, "A"))
			ba_deny_pass |= PASS_DENY_A;
		if (strstr(layout, "X"))
			ba_deny_pass |= PASS_DENY_X;
		if (strstr(layout, "Y"))
			ba_deny_pass |= PASS_DENY_Y;
		if (strstr(layout, "Z"))
			ba_deny_pass |= PASS_DENY_Z;
		if (!strcasecmp(layout, "ALL"))
			ba_deny_pass |= PASS_DENY_ALL;
		bg_conf->deny_pass = ba_deny_pass;
		xfree(layout);
	}

	if (!s_p_get_string(&layout, "LayoutMode", tbl)) {
		info("Warning: LayoutMode was not specified in bluegene.conf "
		     "defaulting to STATIC partitioning");
		bg_conf->layout_mode = LAYOUT_STATIC;
	} else {
		if (!strcasecmp(layout,"STATIC"))
			bg_conf->layout_mode = LAYOUT_STATIC;
		else if (!strcasecmp(layout,"OVERLAP"))
			bg_conf->layout_mode = LAYOUT_OVERLAP;
		else if (!strcasecmp(layout,"DYNAMIC"))
			bg_conf->layout_mode = LAYOUT_DYNAMIC;
		else {
			fatal("I don't understand this LayoutMode = %s",
			      layout);
		}
		xfree(layout);
	}

	/* add blocks defined in file */
	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		if (!s_p_get_array((void ***)&blockreq_array,
				   &count, "BPs", tbl)) {
			info("WARNING: no blocks defined in bluegene.conf, "
			     "only making full system block");
			/* create_full_system_block(NULL); */
		}

		for (i = 0; i < count; i++) {
			add_bg_record(bg_lists->main, NULL,
				      blockreq_array[i], 0, 0);
		}
	}
	s_p_hashtbl_destroy(tbl);

	return SLURM_SUCCESS;
}
コード例 #2
0
ファイル: bg_dynamic_block.c プロジェクト: masteraxl/slurm
/*
 * create_dynamic_block - create new block(s) to be used for a new
 * job allocation.
 * RET - a list of created block(s) or NULL on failure errno is set.
 */
extern List create_dynamic_block(List block_list,
				 select_ba_request_t *request,
				 List my_block_list,
				 bool track_down_nodes)
{
	int rc = SLURM_SUCCESS;

	ListIterator itr, itr2;
	bg_record_t *bg_record = NULL, *found_record = NULL;
	List results = NULL;
	List new_blocks = NULL;
	bitstr_t *my_bitmap = NULL;
	select_ba_request_t blockreq;
	int cnodes = request->procs / bg_conf->cpu_ratio;
	uint16_t start_geo[SYSTEM_DIMENSIONS];

	if (cnodes < bg_conf->smallest_block) {
		error("Can't create this size %d "
		      "on this system ionodes_per_mp is %d",
		      request->procs,
		      bg_conf->ionodes_per_mp);
		goto finished;
	}
	memset(&blockreq, 0, sizeof(select_ba_request_t));
	memcpy(start_geo, request->geometry, sizeof(start_geo));

	/* We need to lock this just incase a blocks_overlap is called
	   which will in turn reset and set the system as it sees fit.
	*/
	slurm_mutex_lock(&block_state_mutex);
	if (my_block_list) {
		reset_ba_system(track_down_nodes);
		itr = list_iterator_create(my_block_list);
		while ((bg_record = list_next(itr))) {
			if (bg_record->magic != BLOCK_MAGIC) {
				/* This should never happen since we
				   only call this on copies of blocks
				   and we check on this during the
				   copy.
				*/
				error("create_dynamic_block: "
				      "got a block with bad magic?");
				continue;
			}
			if (bg_record->free_cnt) {
				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK) {
					int dim;
					char start_geo[SYSTEM_DIMENSIONS+1];
					char geo[SYSTEM_DIMENSIONS+1];
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++) {
						start_geo[dim] = alpha_num[
							bg_record->start[dim]];
						geo[dim] = alpha_num[
							bg_record->geo[dim]];
					}
					start_geo[dim] = '\0';
					geo[dim] = '\0';
					info("not adding %s(%s) %s %s %s %u "
					     "(free_cnt)",
					     bg_record->bg_block_id,
					     bg_record->mp_str,
					     bg_block_state_string(
						     bg_record->state),
					     start_geo,
					     geo,
					     bg_record->cnode_cnt);
				}
				continue;
			}

			if (!my_bitmap) {
				my_bitmap =
					bit_alloc(bit_size(bg_record->bitmap));
			}

			if (!bit_super_set(bg_record->bitmap, my_bitmap)) {
				bit_or(my_bitmap, bg_record->bitmap);

				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK) {
					int dim;
					char start_geo[SYSTEM_DIMENSIONS+1];
					char geo[SYSTEM_DIMENSIONS+1];
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++) {
						start_geo[dim] = alpha_num[
							bg_record->start[dim]];
						geo[dim] = alpha_num[
							bg_record->geo[dim]];
					}
					start_geo[dim] = '\0';
					geo[dim] = '\0';
					info("adding %s(%s) %s %s %s %u",
					     bg_record->bg_block_id,
					     bg_record->mp_str,
					     bg_block_state_string(
						     bg_record->state),
					     start_geo, geo,
					     bg_record->cnode_cnt);
				}
				if (check_and_set_mp_list(
					    bg_record->ba_mp_list)
				    == SLURM_ERROR) {
					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_BG_PICK)
						info("something happened in "
						     "the load of %s",
						     bg_record->bg_block_id);
					list_iterator_destroy(itr);
					FREE_NULL_BITMAP(my_bitmap);
					rc = SLURM_ERROR;
					goto finished;
				}
			} else {
				if (bg_conf->slurm_debug_flags
				    & DEBUG_FLAG_BG_PICK) {
					int dim;
					char start_geo[SYSTEM_DIMENSIONS+1];
					char geo[SYSTEM_DIMENSIONS+1];
					for (dim=0; dim<SYSTEM_DIMENSIONS;
					     dim++) {
						start_geo[dim] = alpha_num[
							bg_record->start[dim]];
						geo[dim] = alpha_num[
							bg_record->geo[dim]];
					}
					start_geo[dim] = '\0';
					geo[dim] = '\0';
					info("not adding %s(%s) %s %s %s %u ",
					     bg_record->bg_block_id,
					     bg_record->mp_str,
					     bg_block_state_string(
						     bg_record->state),
					     start_geo,
					     geo,
					     bg_record->cnode_cnt);
				}
				/* just so we don't look at it later */
				bg_record->free_cnt = -1;
			}
		}
		list_iterator_destroy(itr);
		FREE_NULL_BITMAP(my_bitmap);
	} else {
		reset_ba_system(false);
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("No list was given");
	}

	if (request->avail_mp_bitmap)
		ba_set_removable_mps(request->avail_mp_bitmap, 1);

	if (request->size==1 && cnodes < bg_conf->mp_cnode_cnt) {
		switch(cnodes) {
#ifdef HAVE_BGL
		case 32:
			blockreq.small32 = 4;
			blockreq.small128 = 3;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
#else
		case 16:
			blockreq.small16 = 2;
			blockreq.small32 = 1;
			blockreq.small64 = 1;
			blockreq.small128 = 1;
			blockreq.small256 = 1;
			break;
		case 32:
			blockreq.small32 = 2;
			blockreq.small64 = 1;
			blockreq.small128 = 1;
			blockreq.small256 = 1;
			break;
		case 64:
			blockreq.small64 = 2;
			blockreq.small128 = 1;
			blockreq.small256 = 1;
			break;
		case 128:
			blockreq.small128 = 2;
			blockreq.small256 = 1;
			break;
		case 256:
			blockreq.small256 = 2;
			break;
#endif
		default:
			error("This size %d is unknown on this system", cnodes);
			goto finished;
			break;
		}

		/* Sort the list so the small blocks are in the order
		 * of ionodes. */
		list_sort(block_list, (ListCmpF)bg_record_cmpf_inc);
		request->conn_type[0] = SELECT_SMALL;
		new_blocks = list_create(destroy_bg_record);
		/* check only blocks that are free and small */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    true, true)
		    == SLURM_SUCCESS)
			goto finished;

		/* check only blocks that are free and any size */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    true, false)
		    == SLURM_SUCCESS)
			goto finished;

		/* check usable blocks that are small with any state */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    false, true)
		    == SLURM_SUCCESS)
			goto finished;

		/* check all usable blocks */
		if (_breakup_blocks(block_list, new_blocks,
				    request, my_block_list,
				    false, false)
		    == SLURM_SUCCESS)
			goto finished;

		/* Re-sort the list back to the original order. */
		list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc);
		list_destroy(new_blocks);
		new_blocks = NULL;
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("small block not able to be placed inside others");
	}

	if (request->conn_type[0] == SELECT_NAV)
		request->conn_type[0] = SELECT_TORUS;

	//debug("going to create %d", request->size);
	if (!new_ba_request(request)) {
		if (request->geometry[0] != (uint16_t)NO_VAL) {
			char *geo = give_geo(request->geometry);
			error("Problems with request for size %d geo %s",
			      request->size, geo);
			xfree(geo);
		} else {
			error("Problems with request for size %d.  "
			      "No geo given.",
			      request->size);
		}
		rc = ESLURM_INTERCONNECT_FAILURE;
		goto finished;
	}

	/* try on free midplanes */
	rc = SLURM_SUCCESS;
	if (results)
		list_flush(results);
	else {
#ifdef HAVE_BGQ
		results = list_create(destroy_ba_mp);
#else
		results = list_create(NULL);
#endif
	}

	rc = allocate_block(request, results);
	/* This could be changed in allocate_block so set it back up */
	memcpy(request->geometry, start_geo, sizeof(start_geo));

	if (rc) {
		rc = SLURM_SUCCESS;
		goto setup_records;
	}

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
		info("allocate failure for size %d base "
		     "partitions of free midplanes",
		     request->size);
	rc = SLURM_ERROR;

	if (!list_count(my_block_list) || !my_block_list)
		goto finished;

	/*Try to put block starting in the smallest of the exisiting blocks*/
	itr = list_iterator_create(my_block_list);
	itr2 = list_iterator_create(my_block_list);
	while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
		bool is_small = 0;
		/* never check a block with a job running */
		if (bg_record->free_cnt
		    || bg_record->job_running != NO_JOB_RUNNING)
			continue;

		/* Here we are only looking for the first
		   block on the midplane.  So either the count
		   is greater or equal than
		   bg_conf->mp_cnode_cnt or the first bit is
		   set in the ionode_bitmap.
		*/
		if (bg_record->cnode_cnt < bg_conf->mp_cnode_cnt) {
			bool found = 0;
			if (bit_ffs(bg_record->ionode_bitmap) != 0)
				continue;
			/* Check to see if we have other blocks in
			   this midplane that have jobs running.
			*/
			while ((found_record = list_next(itr2))) {
				if (!found_record->free_cnt
				    && (found_record->job_running
					!= NO_JOB_RUNNING)
				    && bit_overlap(bg_record->bitmap,
						   found_record->bitmap)) {
					found = 1;
					break;
				}
			}
			list_iterator_reset(itr2);
			if (found)
				continue;
			is_small = 1;
		}

		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("removing %s(%s) for request %d",
			     bg_record->bg_block_id,
			     bg_record->mp_str, request->size);

		remove_block(bg_record->ba_mp_list, is_small);
		rc = SLURM_SUCCESS;
		if (results)
			list_flush(results);
		else {
#ifdef HAVE_BGQ
			results = list_create(destroy_ba_mp);
#else
			results = list_create(NULL);
#endif
		}

		rc = allocate_block(request, results);
		/* This could be changed in allocate_block so set it back up */
		memcpy(request->geometry, start_geo, sizeof(start_geo));
		if (rc) {
			rc = SLURM_SUCCESS;
			break;
		}

		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)
			info("allocate failure for size %d base partitions",
			     request->size);
		rc = SLURM_ERROR;
	}
	list_iterator_destroy(itr);
	list_iterator_destroy(itr2);

setup_records:
	if (rc == SLURM_SUCCESS) {
		/*set up bg_record(s) here */
		new_blocks = list_create(destroy_bg_record);

		blockreq.save_name = request->save_name;
#ifdef HAVE_BGL
		blockreq.blrtsimage = request->blrtsimage;
#endif
		blockreq.linuximage = request->linuximage;
		blockreq.mloaderimage = request->mloaderimage;
		blockreq.ramdiskimage = request->ramdiskimage;
		memcpy(blockreq.conn_type, request->conn_type,
		       sizeof(blockreq.conn_type));

		add_bg_record(new_blocks, &results, &blockreq, 0, 0);
	}

finished:
	if (request->avail_mp_bitmap
	    && (bit_ffc(request->avail_mp_bitmap) == -1))
		ba_reset_all_removed_mps();
	slurm_mutex_unlock(&block_state_mutex);

	/* reset the ones we mucked with */
	itr = list_iterator_create(my_block_list);
	while ((bg_record = (bg_record_t *) list_next(itr))) {
		if (bg_record->free_cnt == -1)
			bg_record->free_cnt = 0;
	}
	list_iterator_destroy(itr);


	xfree(request->save_name);

	if (results)
		list_destroy(results);
	errno = rc;
	return new_blocks;
}
コード例 #3
0
ファイル: bg_defined_block.c プロジェクト: VURM/slurm
extern int create_full_system_block(List bg_found_block_list)
{
	int rc = SLURM_SUCCESS;
	ListIterator itr;
	bg_record_t *bg_record = NULL;
	char *name = NULL;
	List records = NULL;
	uint16_t geo[SYSTEM_DIMENSIONS];
	int i;
	select_ba_request_t blockreq;
	List results = NULL;
	struct part_record *part_ptr = NULL;
	bitstr_t *bitmap = bit_alloc(node_record_count);
	static int *dims = NULL;
	bool larger = 0;
	char start_char[SYSTEM_DIMENSIONS+1];
	char geo_char[SYSTEM_DIMENSIONS+1];

	if (!dims) {
		dims = select_g_ba_get_dims();
		memset(start_char, 0, sizeof(start_char));
		memset(geo_char, 0, sizeof(geo_char));
	}

	/* Locks are already in place to protect part_list here */
	itr = list_iterator_create(part_list);
	while ((part_ptr = list_next(itr))) {
		/* we only want to use mps that are in
		 * partitions
		 */
		if (!part_ptr->node_bitmap) {
			debug4("Partition %s doesn't have any nodes in it.",
			       part_ptr->name);
			continue;
		}
		bit_or(bitmap, part_ptr->node_bitmap);
	}
	list_iterator_destroy(itr);

	bit_not(bitmap);
	if (bit_ffs(bitmap) != -1) {
		error("We don't have the entire system covered by partitions, "
		      "can't create full system block");
		FREE_NULL_BITMAP(bitmap);
		return SLURM_ERROR;
	}
	FREE_NULL_BITMAP(bitmap);

	/* Here we are adding a block that in for the entire machine
	   just in case it isn't in the bluegene.conf file.
	*/
	slurm_mutex_lock(&block_state_mutex);

	for (i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo[i] = dims[i] - 1;
		if (geo[i] > 0)
			larger = 1;
		geo_char[i] = alpha_num[geo[i]];
		start_char[i] = alpha_num[0];
	}

	i = (10+strlen(bg_conf->slurm_node_prefix));
	name = xmalloc(i);

	if (!larger)
		snprintf(name, i, "%s%s",
			 bg_conf->slurm_node_prefix, start_char);
	else
		snprintf(name, i, "%s[%sx%s]",
			 bg_conf->slurm_node_prefix, start_char, geo_char);


	if (bg_found_block_list) {
		itr = list_iterator_create(bg_found_block_list);
		while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		error("create_full_system_block: no bg_found_block_list 2");
	}

	if (bg_lists->main) {
		itr = list_iterator_create(bg_lists->main);
		while ((bg_record = (bg_record_t *) list_next(itr))
		       != NULL) {
			if (!strcmp(name, bg_record->mp_str)) {
				xfree(name);
				list_iterator_destroy(itr);
				/* don't create total already there */
				goto no_total;
			}
		}
		list_iterator_destroy(itr);
	} else {
		xfree(name);
		error("create_overlapped_blocks: no bg_lists->main 3");
		rc = SLURM_ERROR;
		goto no_total;
	}

	records = list_create(destroy_bg_record);

	memset(&blockreq, 0, sizeof(select_ba_request_t));
	blockreq.save_name = name;
	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		blockreq.conn_type[i] = SELECT_TORUS;

	add_bg_record(records, NULL, &blockreq, 0 , 0);
	xfree(name);

	bg_record = (bg_record_t *) list_pop(records);
	if (!bg_record) {
		error("Nothing was returned from full system create");
		rc = SLURM_ERROR;
		goto no_total;
	}
	reset_ba_system(false);
	for(i=0; i<SYSTEM_DIMENSIONS; i++) {
		geo_char[i] = alpha_num[bg_record->geo[i]];
		start_char[i] = alpha_num[bg_record->start[i]];
	}
	debug2("adding %s %s %s",  bg_record->mp_str, start_char, geo_char);
	if (bg_record->ba_mp_list)
		list_flush(bg_record->ba_mp_list);
	else
		bg_record->ba_mp_list = list_create(destroy_ba_mp);
#ifdef HAVE_BGQ
	results = list_create(destroy_ba_mp);
#else
	results = list_create(NULL);
#endif
	name = set_bg_block(results,
			    bg_record->start,
			    bg_record->geo,
			    bg_record->conn_type);
	if (!name) {
		error("I was unable to make the full system block.");
		list_destroy(results);
		list_iterator_destroy(itr);
		slurm_mutex_unlock(&block_state_mutex);
		return SLURM_ERROR;
	}
	xfree(name);
	if (bg_record->ba_mp_list)
		list_destroy(bg_record->ba_mp_list);
#ifdef HAVE_BGQ
	bg_record->ba_mp_list = results;
	results = NULL;
#else
	bg_record->ba_mp_list = list_create(destroy_ba_mp);
	copy_node_path(results, &bg_record->ba_mp_list);
	list_destroy(results);
#endif
	if ((rc = bridge_block_create(bg_record)) == SLURM_ERROR) {
		error("create_full_system_block: "
		      "unable to configure block in api");
		destroy_bg_record(bg_record);
		goto no_total;
	}

	print_bg_record(bg_record);
	list_append(bg_lists->main, bg_record);

no_total:
	if (records)
		list_destroy(records);
	slurm_mutex_unlock(&block_state_mutex);
	return rc;
}
コード例 #4
0
ファイル: bg_read_config.c プロジェクト: A1ve5/slurm
/*
 * Read and process the bluegene.conf configuration file so to interpret what
 * blocks are static/dynamic, torus/mesh, etc.
 */
extern int read_bg_conf(void)
{
	int i;
	bool tmp_bool = 0;
	int count = 0;
	s_p_hashtbl_t *tbl = NULL;
	char *tmp_char = NULL;
	select_ba_request_t **blockreq_array = NULL;
	image_t **image_array = NULL;
	image_t *image = NULL;
	static time_t last_config_update = (time_t) 0;
	struct stat config_stat;
	ListIterator itr = NULL;
	char* bg_conf_file = NULL;
	static int *dims = NULL;

	if (!dims)
		dims = select_g_ba_get_dims();

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("Reading the bluegene.conf file");

	/* check if config file has changed */
	bg_conf_file = get_extra_conf_path("bluegene.conf");

	if (stat(bg_conf_file, &config_stat) < 0)
		fatal("can't stat bluegene.conf file %s: %m", bg_conf_file);
	if (last_config_update) {
		_reopen_bridge_log();
		if (last_config_update == config_stat.st_mtime) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("%s unchanged", bg_conf_file);
		} else {
			info("Restart slurmctld for %s changes "
			     "to take effect",
			     bg_conf_file);
		}
		last_config_update = config_stat.st_mtime;
		xfree(bg_conf_file);
		return SLURM_SUCCESS;
	}
	last_config_update = config_stat.st_mtime;

	/* initialization */
	/* bg_conf defined in bg_node_alloc.h */
	if (!(tbl = config_make_tbl(bg_conf_file)))
		fatal("something wrong with opening/reading bluegene "
		      "conf file");
	xfree(bg_conf_file);

#ifdef HAVE_BGL
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltBlrtsImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->blrts_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_blrtsimage, "BlrtsImage", tbl)) {
		if (!list_count(bg_conf->blrts_list))
			fatal("BlrtsImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->blrts_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_blrtsimage = xstrdup(image->name);
		info("Warning: using %s as the default BlrtsImage.  "
		     "If this isn't correct please set BlrtsImage",
		     bg_conf->default_blrtsimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default BlrtsImage %s",
			     bg_conf->default_blrtsimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_blrtsimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->blrts_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltLinuxImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "LinuxImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("LinuxImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default LinuxImage.  "
		     "If this isn't correct please set LinuxImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default LinuxImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltRamDiskImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "RamDiskImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("RamDiskImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default RamDiskImage.  "
		     "If this isn't correct please set RamDiskImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default RamDiskImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}
#elif defined HAVE_BGP

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltCnloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "CnloadImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("CnloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default CnloadImage.  "
		     "If this isn't correct please set CnloadImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default CnloadImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltIoloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "IoloadImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("IoloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default IoloadImage.  "
		     "If this isn't correct please set IoloadImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default IoloadImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}

#endif
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltMloaderImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->mloader_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_mloaderimage,
			    "MloaderImage", tbl)) {
		if (!list_count(bg_conf->mloader_list))
			fatal("MloaderImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->mloader_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_mloaderimage = xstrdup(image->name);
		info("Warning: using %s as the default MloaderImage.  "
		     "If this isn't correct please set MloaderImage",
		     bg_conf->default_mloaderimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default MloaderImage %s",
			     bg_conf->default_mloaderimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_mloaderimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->mloader_list, image);
	}

	if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt, "MidplaneNodeCnt", tbl)) {
		if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt,
				    "BasePartitionNodeCnt", tbl)) {
			error("MidplaneNodeCnt not configured in bluegene.conf "
			      "defaulting to 512 as MidplaneNodeCnt");
			bg_conf->mp_cnode_cnt = 512;
		}
	}

	if (bg_conf->mp_cnode_cnt <= 0)
		fatal("You should have more than 0 nodes "
		      "per midplane");
	bg_conf->actual_cnodes_per_mp = bg_conf->mp_cnode_cnt;
	bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;

	/* bg_conf->cpus_per_mp should had already been set from the
	 * node_init */
	if (bg_conf->cpus_per_mp < bg_conf->mp_cnode_cnt) {
		fatal("For some reason we have only %u cpus per mp, but "
		      "have %u cnodes per mp.  You need at least the same "
		      "number of cpus as you have cnodes per mp.  "
		      "Check the NodeName CPUs= "
		      "definition in the slurm.conf.",
		      bg_conf->cpus_per_mp, bg_conf->mp_cnode_cnt);
	}

	bg_conf->cpu_ratio = bg_conf->cpus_per_mp/bg_conf->mp_cnode_cnt;
	if (!bg_conf->cpu_ratio)
		fatal("We appear to have less than 1 cpu on a cnode.  "
		      "You specified %u for MidplaneNodeCnt "
		      "in the blugene.conf and %u cpus "
		      "for each node in the slurm.conf",
		      bg_conf->mp_cnode_cnt, bg_conf->cpus_per_mp);

	num_unused_cpus = 1;
	for (i = 0; i<SYSTEM_DIMENSIONS; i++)
		num_unused_cpus *= dims[i];
	num_unused_cpus *= bg_conf->cpus_per_mp;
	num_possible_unused_cpus = num_unused_cpus;

	if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
			    "NodeBoardNodeCnt", tbl)) {
		if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
				    "NodeCardNodeCnt", tbl)) {
			error("NodeCardNodeCnt not configured in bluegene.conf "
			      "defaulting to 32 as NodeCardNodeCnt");
			bg_conf->nodecard_cnode_cnt = 32;
		}
	}

	if (bg_conf->nodecard_cnode_cnt <= 0)
		fatal("You should have more than 0 nodes per nodecard");

	bg_conf->mp_nodecard_cnt =
		bg_conf->mp_cnode_cnt / bg_conf->nodecard_cnode_cnt;

	if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "IONodesPerMP", tbl))
		if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
			fatal("Warning: IONodesPerMP not configured "
			      "in bluegene.conf");

	s_p_get_uint16(&bg_conf->max_block_err, "MaxBlockInError", tbl);

	tmp_bool = 0;
	s_p_get_boolean(&tmp_bool, "SubMidplaneSystem", tbl);
	bg_conf->sub_mp_sys = tmp_bool;

#ifdef HAVE_BGQ
	tmp_bool = 0;
	s_p_get_boolean(&tmp_bool, "AllowSubBlockAllocations", tbl);
	bg_conf->sub_blocks = tmp_bool;

	/* You can only have 16 ionodes per midplane */
	if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt)
		bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt;
#endif

	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		bg_conf->default_conn_type[i] = (uint16_t)NO_VAL;
	s_p_get_string(&tmp_char, "DefaultConnType", tbl);
	if (tmp_char) {
		verify_conn_type(tmp_char, bg_conf->default_conn_type);
		if ((bg_conf->default_conn_type[0] != SELECT_MESH)
		    && (bg_conf->default_conn_type[0] != SELECT_TORUS))
			fatal("Can't have a DefaultConnType of %s "
			      "(only Mesh or Torus values are valid).",
			      tmp_char);
		xfree(tmp_char);
	} else
		bg_conf->default_conn_type[0] = SELECT_TORUS;

#ifndef HAVE_BG_L_P
	int first_conn_type = bg_conf->default_conn_type[0];
	for (i=1; i<SYSTEM_DIMENSIONS; i++) {
		if (bg_conf->default_conn_type[i] == (uint16_t)NO_VAL)
			bg_conf->default_conn_type[i] = first_conn_type;
		else if (bg_conf->default_conn_type[i] >= SELECT_SMALL)
			fatal("Can't have a DefaultConnType of %s "
			      "(only Mesh or Torus values are valid).",
			      tmp_char);
	}
#endif

	if (bg_conf->ionodes_per_mp) {
		bitstr_t *tmp_bitmap = NULL;
		int small_size = 1;

		/* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK,
		 * Sometime on a Q system the nodecard isn't in the 0
		 * spot so only do this if you know it is in that
		 * spot.  Otherwise say the whole midplane is there
		 * and just make blocks over the whole thing.  They
		 * you can error out the blocks that aren't usable. */
		if (bg_conf->sub_mp_sys
		    && bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
#ifdef HAVE_BGQ
			bg_conf->quarter_ionode_cnt = 1;
			bg_conf->nodecard_ionode_cnt = 1;
#else
			bg_conf->quarter_ionode_cnt = 2;
			bg_conf->nodecard_ionode_cnt = 2;
#endif
		} else {
			bg_conf->quarter_ionode_cnt = bg_conf->ionodes_per_mp/4;
			bg_conf->nodecard_ionode_cnt =
				bg_conf->quarter_ionode_cnt/4;
		}

		/* How many nodecards per ionode */
		bg_conf->nc_ratio =
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt)
			/ (double)bg_conf->ionodes_per_mp;
		/* How many ionodes per nodecard */
		bg_conf->io_ratio =
			(double)bg_conf->ionodes_per_mp /
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt);

		/* How many cnodes per ionode */
		bg_conf->ionode_cnode_cnt =
			bg_conf->nodecard_cnode_cnt * bg_conf->nc_ratio;

		//info("got %f %f", bg_conf->nc_ratio, bg_conf->io_ratio);
		/* figure out the smallest block we can have on the
		   system */
#ifdef HAVE_BGL
		if (bg_conf->io_ratio >= 1)
			bg_conf->smallest_block=32;
		else
			bg_conf->smallest_block=128;
#else

		if (bg_conf->io_ratio >= 2)
			bg_conf->smallest_block=16;
		else if (bg_conf->io_ratio == 1)
			bg_conf->smallest_block=32;
		else if (bg_conf->io_ratio == .5)
			bg_conf->smallest_block=64;
		else if (bg_conf->io_ratio == .25)
			bg_conf->smallest_block=128;
		else if (bg_conf->io_ratio == .125)
			bg_conf->smallest_block=256;
		else {
			error("unknown ioratio %f.  Can't figure out "
			      "smallest block size, setting it to midplane",
			      bg_conf->io_ratio);
			bg_conf->smallest_block = 512;
		}
#endif
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("Smallest block possible on this system is %u",
			     bg_conf->smallest_block);
		/* below we are creating all the possible bitmaps for
		 * each size of small block
		 */
		if ((int)bg_conf->nodecard_ionode_cnt < 1) {
			bg_conf->nodecard_ionode_cnt = 0;
		} else {
			bg_lists->valid_small32 = list_create(_destroy_bitmap);
			/* This is suppose to be = and not ==, we only
			   want to decrement when small_size equals
			   something.
			*/
			if ((small_size = bg_conf->nodecard_ionode_cnt))
				small_size--;
			i = 0;
			while (i<bg_conf->ionodes_per_mp) {
				tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
				bit_nset(tmp_bitmap, i, i+small_size);
				i += small_size+1;
				list_append(bg_lists->valid_small32,
					    tmp_bitmap);
			}
		}
		/* If we only have 1 nodecard just jump to the end
		   since this will never need to happen below.
		   Pretty much a hack to avoid seg fault;). */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
			goto no_calc;

		bg_lists->valid_small128 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small128, tmp_bitmap);
		}

#ifndef HAVE_BGL
		bg_lists->valid_small64 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->nodecard_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small64, tmp_bitmap);
		}

		bg_lists->valid_small256 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small256, tmp_bitmap);
		}
#endif
	} else {
		fatal("your ionodes_per_mp is 0");
	}

no_calc:

	if (!s_p_get_uint16(&bg_conf->bridge_api_verb, "BridgeAPIVerbose", tbl))
		info("Warning: BridgeAPIVerbose not configured "
		     "in bluegene.conf");
	if (!s_p_get_string(&bg_conf->bridge_api_file,
			    "BridgeAPILogFile", tbl))
		info("BridgeAPILogFile not configured in bluegene.conf");
	else
		_reopen_bridge_log();

	if (s_p_get_string(&tmp_char, "DenyPassthrough", tbl)) {
		if (strstr(tmp_char, "A"))
			ba_deny_pass |= PASS_DENY_A;
		if (strstr(tmp_char, "X"))
			ba_deny_pass |= PASS_DENY_X;
		if (strstr(tmp_char, "Y"))
			ba_deny_pass |= PASS_DENY_Y;
		if (strstr(tmp_char, "Z"))
			ba_deny_pass |= PASS_DENY_Z;
		if (!xstrcasecmp(tmp_char, "ALL"))
			ba_deny_pass |= PASS_DENY_ALL;
		bg_conf->deny_pass = ba_deny_pass;
		xfree(tmp_char);
	}

	if (!s_p_get_string(&tmp_char, "LayoutMode", tbl)) {
		info("Warning: LayoutMode was not specified in bluegene.conf "
		     "defaulting to STATIC partitioning");
		bg_conf->layout_mode = LAYOUT_STATIC;
	} else {
		if (!xstrcasecmp(tmp_char,"STATIC"))
			bg_conf->layout_mode = LAYOUT_STATIC;
		else if (!xstrcasecmp(tmp_char,"OVERLAP"))
			bg_conf->layout_mode = LAYOUT_OVERLAP;
		else if (!xstrcasecmp(tmp_char,"DYNAMIC"))
			bg_conf->layout_mode = LAYOUT_DYNAMIC;
		else {
			fatal("I don't understand this LayoutMode = %s",
			      tmp_char);
		}
		xfree(tmp_char);
	}

	/* add blocks defined in file */
	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		if (!s_p_get_array((void ***)&blockreq_array,
				   &count, "MPs", tbl)) {
			if (!s_p_get_array((void ***)&blockreq_array,
					   &count, "BPs", tbl)) {
				info("WARNING: no blocks defined in "
				     "bluegene.conf, "
				     "only making full system block");
				/* create_full_system_block(NULL); */
				if (bg_conf->sub_mp_sys ||
				    (bg_conf->mp_cnode_cnt ==
				     bg_conf->nodecard_cnode_cnt))
					fatal("On a sub-midplane system you "
					      "need to define the blocks you "
					      "want on your system.");
			}
		}

		for (i = 0; i < count; i++) {
			add_bg_record(bg_lists->main, NULL,
				      blockreq_array[i], 0, 0);
		}
	} else if (bg_conf->sub_mp_sys ||
		   (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt))
		/* we can't do dynamic here on a sub-midplane system */
		fatal("On a sub-midplane system we can only do OVERLAP or "
		      "STATIC LayoutMode.  Please update your bluegene.conf.");

#ifdef HAVE_BGQ
	if ((bg_recover != NOT_FROM_CONTROLLER) && assoc_mgr_qos_list
	    && s_p_get_string(&tmp_char, "RebootQOSList", tbl)) {
		bool valid;
		char *token, *last = NULL;
		slurmdb_qos_rec_t *qos = NULL;
		assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK,
					   READ_LOCK, NO_LOCK,
					   NO_LOCK, NO_LOCK };

		/* Lock here to avoid g_qos_count changing under us */
		assoc_mgr_lock(&locks);
		bg_conf->reboot_qos_bitmap = bit_alloc(g_qos_count);
		itr = list_iterator_create(assoc_mgr_qos_list);

		token = strtok_r(tmp_char, ",", &last);
		while (token) {
			valid = false;
			while((qos = list_next(itr))) {
				if (!xstrcasecmp(token, qos->name)) {
					bit_set(bg_conf->reboot_qos_bitmap,
						qos->id);
					valid = true;
					break;
				}
			}
			if (!valid)
				error("Invalid RebootQOSList value: %s", token);
			list_iterator_reset(itr);
			token = strtok_r(NULL, ",", &last);
		}
		list_iterator_destroy(itr);
		xfree(tmp_char);
		assoc_mgr_unlock(&locks);
	}
#endif

	s_p_hashtbl_destroy(tbl);

	return SLURM_SUCCESS;
}
コード例 #5
0
ファイル: bg_record_functions.c プロジェクト: VURM/slurm
/*
 * This could potentially lock the node lock in the slurmctld with
 * slurm_drain_node, or slurm_fail_job so if slurmctld_locked is called we
 * will call the functions without locking the locks again.
 */
extern int down_nodecard(char *mp_name, bitoff_t io_start,
			 bool slurmctld_locked)
{
	List requests = NULL;
	List delete_list = NULL;
	ListIterator itr = NULL;
	bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record;
	bg_record_t *smallest_bg_record = NULL;
	struct node_record *node_ptr = NULL;
	int mp_bit = 0;
	static int io_cnt = NO_VAL;
	static int create_size = NO_VAL;
	static select_ba_request_t blockreq;
	int rc = SLURM_SUCCESS;
	char *reason = "select_bluegene: nodecard down";

	xassert(mp_name);

	if (io_cnt == NO_VAL) {
		io_cnt = 1;
		/* Translate 1 nodecard count to ionode count */
		if ((io_cnt *= bg_conf->io_ratio))
			io_cnt--;

		/* make sure we create something that is able to be
		   created */
		if (bg_conf->smallest_block < bg_conf->nodecard_cnode_cnt)
			create_size = bg_conf->nodecard_cnode_cnt;
		else
			create_size = bg_conf->smallest_block;
	}

	node_ptr = find_node_record(mp_name);
	if (!node_ptr) {
		error ("down_sub_node_blocks: invalid node specified '%s'",
		       mp_name);
		return EINVAL;
	}

	/* this is here for sanity check to make sure we don't core on
	   these bits when we set them below. */
	if (io_start >= bg_conf->ionodes_per_mp
	    || (io_start+io_cnt) >= bg_conf->ionodes_per_mp) {
		debug("io %d-%d not configured on this "
		      "system, only %d ionodes per midplane",
		      io_start, io_start+io_cnt, bg_conf->ionodes_per_mp);
		return EINVAL;
	}
	mp_bit = (node_ptr - node_record_table_ptr);

	memset(&blockreq, 0, sizeof(select_ba_request_t));

	blockreq.conn_type[0] = SELECT_SMALL;
	blockreq.save_name = mp_name;

	debug3("here setting node %d of %d and ionodes %d-%d of %d",
	       mp_bit, node_record_count, io_start,
	       io_start+io_cnt, bg_conf->ionodes_per_mp);

	memset(&tmp_record, 0, sizeof(bg_record_t));
	tmp_record.mp_count = 1;
	tmp_record.cnode_cnt = bg_conf->nodecard_cnode_cnt;
	tmp_record.mp_bitmap = bit_alloc(node_record_count);
	bit_set(tmp_record.mp_bitmap, mp_bit);

	tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
	bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt);

	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		if (!bit_test(bg_record->mp_bitmap, mp_bit))
			continue;

		if (!blocks_overlap(bg_record, &tmp_record))
			continue;

		if (bg_record->job_running > NO_JOB_RUNNING) {
			if (slurmctld_locked)
				job_fail(bg_record->job_running);
			else
				slurm_fail_job(bg_record->job_running);

		}
		/* If Running Dynamic mode and the block is
		   smaller than the create size just continue on.
		*/
		if ((bg_conf->layout_mode == LAYOUT_DYNAMIC)
		    && (bg_record->cnode_cnt < create_size)) {
			if (!delete_list)
				delete_list = list_create(NULL);
			list_append(delete_list, bg_record);
			continue;
		}

		/* keep track of the smallest size that is at least
		   the size of create_size. */
		if (!smallest_bg_record ||
		    (smallest_bg_record->cnode_cnt > bg_record->cnode_cnt))
			smallest_bg_record = bg_record;
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		debug3("running non-dynamic mode");
		/* This should never happen, but just in case... */
		if (delete_list)
			list_destroy(delete_list);

		/* If we found a block that is smaller or equal to a
		   midplane we will just mark it in an error state as
		   opposed to draining the node.
		*/
		if (smallest_bg_record
		    && (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){
			if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
				rc = SLURM_NO_CHANGE_IN_DATA;
				goto cleanup;
			}

			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		debug("No block under 1 midplane available for this nodecard.  "
		      "Draining the whole node.");
		if (!node_already_down(mp_name)) {
			if (slurmctld_locked)
				drain_nodes(mp_name, reason,
					    slurm_get_slurm_user_id());
			else
				slurm_drain_nodes(mp_name, reason,
						  slurm_get_slurm_user_id());
		}
		rc = SLURM_SUCCESS;
		goto cleanup;
	}

	/* below is only for Dynamic mode */

	if (delete_list) {
		int cnt_set = 0;
		bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp);
		/* don't lock here since it is handled inside
		   the put_block_in_error_state
		*/
		itr = list_iterator_create(delete_list);
		while ((bg_record = list_next(itr))) {
			debug2("combining smaller than nodecard "
			       "dynamic block %s",
			       bg_record->bg_block_id);
			while (bg_record->job_running > NO_JOB_RUNNING)
				sleep(1);

			bit_or(iobitmap, bg_record->ionode_bitmap);
			cnt_set++;
		}
		list_iterator_destroy(itr);
		list_destroy(delete_list);
		if (!cnt_set) {
			FREE_NULL_BITMAP(iobitmap);
			rc = SLURM_ERROR;
			goto cleanup;
		}
		/* set the start to be the same as the start of the
		   ionode_bitmap.  If no ionodes set (not a small
		   block) set io_start = 0. */
		if ((io_start = bit_ffs(iobitmap)) == -1) {
			io_start = 0;
			if (create_size > bg_conf->nodecard_cnode_cnt)
				blockreq.small128 = 4;
			else
				blockreq.small32 = 16;
		} else if (create_size <= bg_conf->nodecard_cnode_cnt)
			blockreq.small32 = 1;
		else
			/* this should never happen */
			blockreq.small128 = 1;

		FREE_NULL_BITMAP(iobitmap);
	} else if (smallest_bg_record) {
		debug2("smallest dynamic block is %s",
		       smallest_bg_record->bg_block_id);
		if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
			rc = SLURM_NO_CHANGE_IN_DATA;
			goto cleanup;
		}

		while (smallest_bg_record->job_running > NO_JOB_RUNNING)
			sleep(1);

		if (smallest_bg_record->cnode_cnt == create_size) {
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		if (create_size > smallest_bg_record->cnode_cnt) {
			/* we should never get here.  This means we
			 * have a create_size that is bigger than a
			 * block that is already made.
			 */
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}
		debug3("node count is %d", smallest_bg_record->cnode_cnt);
		switch(smallest_bg_record->cnode_cnt) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small32 = 2;
			break;
		case 256:
			blockreq.small32 = 8;
			break;
#endif
		case 128:
			blockreq.small32 = 4;
			break;
		case 512:
		default:
			blockreq.small32 = 16;
			break;
		}

		if (create_size != bg_conf->nodecard_cnode_cnt) {
			blockreq.small128 = blockreq.small32 / 4;
			blockreq.small32 = 0;
			io_start = 0;
		} else if ((io_start =
			    bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
			/* set the start to be the same as the start of the
			   ionode_bitmap.  If no ionodes set (not a small
			   block) set io_start = 0. */
			io_start = 0;
	} else {
		switch(create_size) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small64 = 8;
			break;
		case 256:
			blockreq.small256 = 2;
#endif
		case 32:
			blockreq.small32 = 16;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
		case 512:
			if (!node_already_down(mp_name)) {
				char *reason = "select_bluegene: nodecard down";
				if (slurmctld_locked)
					drain_nodes(mp_name, reason,
						    slurm_get_slurm_user_id());
				else
					slurm_drain_nodes(
						mp_name, reason,
						slurm_get_slurm_user_id());
			}
			rc = SLURM_SUCCESS;
			goto cleanup;
			break;
		default:
			error("Unknown create size of %d", create_size);
			break;
		}
		/* since we don't have a block in this midplane
		   we need to start at the beginning. */
		io_start = 0;
		/* we also need a bg_block to pretend to be the
		   smallest block that takes up the entire midplane. */
	}


	/* Here we need to add blocks that take up nodecards on this
	   midplane.  Since Slurm only keeps track of midplanes
	   natively this is the only want to handle this case.
	*/
	requests = list_create(destroy_bg_record);
	add_bg_record(requests, NULL, &blockreq, 1, io_start);

	slurm_mutex_lock(&block_state_mutex);
	delete_list = list_create(NULL);
	while ((bg_record = list_pop(requests))) {
		itr = list_iterator_create(bg_lists->main);
		while ((found_record = list_next(itr))) {
			if (!blocks_overlap(bg_record, found_record))
				continue;
			list_push(delete_list, found_record);
			list_remove(itr);
		}
		list_iterator_destroy(itr);

		/* we need to add this record since it doesn't exist */
		if (bridge_block_create(bg_record) == SLURM_ERROR) {
			destroy_bg_record(bg_record);
			error("down_sub_node_blocks: "
			      "unable to configure block in api");
			continue;
		}

		debug("adding block %s to fill in small blocks "
		      "around bad nodecards",
		      bg_record->bg_block_id);
		print_bg_record(bg_record);
		list_append(bg_lists->main, bg_record);
		if (bit_overlap(bg_record->ionode_bitmap,
				tmp_record.ionode_bitmap)) {
			/* here we know the error block doesn't exist
			   so just set the state here */
			slurm_mutex_unlock(&block_state_mutex);
			rc = put_block_in_error_state(bg_record, reason);
			slurm_mutex_lock(&block_state_mutex);
		}
	}
	list_destroy(requests);

	if (delete_list) {
		slurm_mutex_unlock(&block_state_mutex);
		free_block_list(NO_VAL, delete_list, 0, 0);
		list_destroy(delete_list);
	}
	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);
	slurm_mutex_unlock(&block_state_mutex);
	last_bg_update = time(NULL);

cleanup:
	FREE_NULL_BITMAP(tmp_record.mp_bitmap);
	FREE_NULL_BITMAP(tmp_record.ionode_bitmap);

	return rc;

}