Ejemplo n.º 1
0
void
taskset_cycle( taskset_t *ts )
{
    hb_lock( ts->task_cond_lock );

    /*
     * Signal all threads that their work is available.
     */
    bit_nset( ts->task_begin_bitmap, 0, ts->thread_count - 1 );
    hb_cond_broadcast( ts->task_begin );

    /*
     * Wait until all threads have completed.  Note that we must
     * loop here as hb_cond_wait() on some platforms (e.g pthead_cond_wait)
     * may unblock prematurely.
     */
    do
    {
        hb_cond_wait( ts->task_complete, ts->task_cond_lock );
    } while ( !allbits_set( ts->task_complete_bitmap, ts->bitmap_elements ) );

    /*
     * Clear completion indications for next time.
     */
    bit_nclear( ts->task_complete_bitmap, 0, ts->thread_count - 1 );

    hb_unlock( ts->task_cond_lock );
}
Ejemplo n.º 2
0
static void cachedev_setbits (bitstr_t *bitmap, ioreq_event *req)
{
#ifdef DEBUG_CACHEDEV
   fprintf(outputfile, "*** %f: Entered cachedev::cachedev_setbits from block %d to %d\n", simtime, req->blkno, req->blkno+req->bcount-1  );
   fflush(outputfile);
#endif

   bit_nset (bitmap, req->blkno, (req->blkno+req->bcount-1));
}
Ejemplo n.º 3
0
void
taskset_fini( taskset_t *ts )
{
    int i;

    hb_lock( ts->task_cond_lock );
    /*
     * Tell each thread to stop, and then cleanup.
     */
    bit_nset( ts->task_stop_bitmap, 0, ts->thread_count - 1 );
    bit_nset( ts->task_begin_bitmap, 0, ts->thread_count - 1 );
    hb_cond_broadcast( ts->task_begin );

    /*
     * Wait for all threads to exit.
     */
    hb_cond_wait( ts->task_complete, ts->task_cond_lock );
    hb_unlock( ts->task_cond_lock );

    /*
     * Clean up taskset memory.
     */
    for( i = 0; i < ts->thread_count; i++)
    {
        hb_thread_close( &ts->task_threads[i] );
    }
    hb_lock_close( &ts->task_cond_lock );
    hb_cond_close( &ts->task_begin );
    hb_cond_close( &ts->task_complete );
    free( ts->task_threads );
    if( ts->task_threads_args != NULL )
        free( ts->task_threads_args );
    free( ts->task_begin_bitmap );
    free( ts->task_complete_bitmap );
    free( ts->task_stop_bitmap );
}
Ejemplo n.º 4
0
/* Remove any specialized cores from those allocated to the job */
static void _clear_spec_cores(struct job_record *job_ptr,
			      bitstr_t *avail_core_bitmap)
{
	int first_node, last_node, i_node;
	int first_core, last_core, i_core;
	int alloc_node = -1, alloc_core = -1, size;
	job_resources_t *job_res = job_ptr->job_resrcs;
	multi_core_data_t *mc_ptr = NULL;

	if (job_ptr->details && job_ptr->details->mc_ptr)
		mc_ptr = job_ptr->details->mc_ptr;

	size = bit_size(job_res->core_bitmap);
	bit_nset(job_res->core_bitmap, 0, size - 1);

	first_node = bit_ffs(job_res->node_bitmap);
	if (first_node >= 0)
		last_node = bit_fls(job_res->node_bitmap);
	else
		last_node = first_node - 1;

	for (i_node = first_node; i_node <= last_node; i_node++) {
		if (!bit_test(job_res->node_bitmap, i_node))
			continue;
		job_res->cpus[++alloc_node] = 0;
		first_core = cr_get_coremap_offset(i_node);
		last_core  = cr_get_coremap_offset(i_node + 1) - 1;
		for (i_core = first_core; i_core <= last_core; i_core++) {
			alloc_core++;
			if (bit_test(avail_core_bitmap, i_core)) {
				uint16_t tpc = select_node_record[i_node].vpus;
				if (mc_ptr &&
				    (mc_ptr->threads_per_core != NO_VAL16) &&
				    (mc_ptr->threads_per_core < tpc))
					tpc = mc_ptr->threads_per_core;

				job_res->cpus[alloc_node] += tpc;
			} else {
				bit_clear(job_res->core_bitmap, alloc_core);
			}
		}
	}
}
Ejemplo n.º 5
0
/* helper function for _expand_masks() */
static void _blot_mask(bitstr_t *mask, uint16_t blot)
{
	uint16_t i, size = 0;
	int prev = -1;

	if (!mask)
		return;
	size = bit_size(mask);
	for (i = 0; i < size; i++) {
		if (bit_test(mask, i)) {
			/* fill in this blot */
			uint16_t start = (i / blot) * blot;
			if (start != prev) {
				bit_nset(mask, start, start+blot-1);
				prev = start;
			}
		}
	}
}
Ejemplo n.º 6
0
/* Convert node name string to equivalent nid string */
static char *_node_names_2_nid_list(char *node_names)
{
	char *nid_list = NULL;
	int i, last_nid_index = -1;
	bool is_dash = false;
	bitstr_t *node_bitmap;

	node_bitmap = bit_alloc(100000);
	for (i = 0; node_names[i]; i++) {
		int nid_index = 0;
		/* skip "nid[" */
		if ((node_names[i] < '0') || (node_names[i] > '9'))
			continue;
		/* skip leading zeros */
		while (node_names[i] == '0')
			i++;
		if (node_names[i] == '[')
			i++;
		while ((node_names[i] >= '0') && (node_names[i] <= '9')) {
			nid_index *= 10;
			nid_index += (node_names[i++] - '0');
		}
		if (is_dash && (nid_index >= last_nid_index)) {
			bit_nset(node_bitmap, last_nid_index, nid_index);
		} else {
			bit_set(node_bitmap, nid_index);
		}
		if ((is_dash = (node_names[i] == '-')))
			last_nid_index = nid_index;
		else if (node_names[i] == '\0')
			break;
	}

	i = strlen(node_names) + 1;
	nid_list = xmalloc(i);
	bit_fmt(nid_list, i, node_bitmap);
	bit_free(node_bitmap);

	return nid_list;
}
Ejemplo n.º 7
0
static int _unpack_node_subgrp(node_subgrp_t **subgrp_pptr, Buf buffer,
			       uint16_t bitmap_size, uint16_t protocol_version)
{
	node_subgrp_t *subgrp = xmalloc(sizeof(node_subgrp_t));
	int j;
	uint32_t uint32_tmp;
	uint16_t uint16_tmp;

	*subgrp_pptr = subgrp;

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_unpackstr_xmalloc(&subgrp->str, &uint32_tmp, buffer);
		if (!subgrp->str)
			subgrp->inx = bitfmt2int("");
		else
			subgrp->inx = bitfmt2int(subgrp->str);

		subgrp->bitmap = bit_alloc(bitmap_size);

		j = 0;
		while (subgrp->inx[j] >= 0) {
			bit_nset(subgrp->bitmap, subgrp->inx[j],
				 subgrp->inx[j+1]);
			j+=2;
		}

		safe_unpack16(&subgrp->cnode_cnt, buffer);
		safe_unpack16(&uint16_tmp, buffer);
		subgrp->state = uint16_tmp;
	}
	return SLURM_SUCCESS;

unpack_error:
	_free_node_subgrp(subgrp);
	*subgrp_pptr = NULL;
	return SLURM_ERROR;
}
Ejemplo n.º 8
0
Archivo: gang.c Proyecto: corburn/slurm
/* a helper function for _add_job_to_active when GS_SOCKET
 * a job has just been added to p_ptr->active_resmap, so set all cores of
 * each used socket to avoid activating another job on the same socket */
static void _fill_sockets(bitstr_t *job_nodemap, struct gs_part *p_ptr)
{
	uint32_t c, i;
	int n, first_bit, last_bit;

	if (!job_nodemap || !p_ptr || !p_ptr->active_resmap)
		return;
	first_bit = bit_ffs(job_nodemap);
	last_bit  = bit_fls(job_nodemap);
	if ((first_bit < 0) || (last_bit < 0))
		fatal("gang: _afill_sockets: nodeless job?");

	for (c = 0, n = 0; n < first_bit; n++) {
		c += _get_phys_bit_cnt(n);
	}
	for (n = first_bit; n <= last_bit; n++) {
		uint16_t s, socks, cps, cores_per_node;
		cores_per_node = _get_phys_bit_cnt(n);
		if (bit_test(job_nodemap, n) == 0) {
			c += cores_per_node;
			continue;
		}
		socks = _get_socket_cnt(n);
		cps = cores_per_node / socks;
		for (s = 0; s < socks; s++) {
			for (i = c; i < c+cps; i++) {
				if (bit_test(p_ptr->active_resmap, i))
					break;
			}
			if (i < c+cps) {
				/* set all bits on this used socket */
				bit_nset(p_ptr->active_resmap, c, c+cps-1);
			}
			c += cps;
		}
	}
}
Ejemplo n.º 9
0
/*
 * Read and process the bluegene.conf configuration file so to interpret what
 * blocks are static/dynamic, torus/mesh, etc.
 */
extern int read_bg_conf(void)
{
	int i;
	bool tmp_bool = 0;
	int count = 0;
	s_p_hashtbl_t *tbl = NULL;
	char *tmp_char = NULL;
	select_ba_request_t **blockreq_array = NULL;
	image_t **image_array = NULL;
	image_t *image = NULL;
	static time_t last_config_update = (time_t) 0;
	struct stat config_stat;
	ListIterator itr = NULL;
	char* bg_conf_file = NULL;
	static int *dims = NULL;

	if (!dims)
		dims = select_g_ba_get_dims();

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("Reading the bluegene.conf file");

	/* check if config file has changed */
	bg_conf_file = get_extra_conf_path("bluegene.conf");

	if (stat(bg_conf_file, &config_stat) < 0)
		fatal("can't stat bluegene.conf file %s: %m", bg_conf_file);
	if (last_config_update) {
		_reopen_bridge_log();
		if (last_config_update == config_stat.st_mtime) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("%s unchanged", bg_conf_file);
		} else {
			info("Restart slurmctld for %s changes "
			     "to take effect",
			     bg_conf_file);
		}
		last_config_update = config_stat.st_mtime;
		xfree(bg_conf_file);
		return SLURM_SUCCESS;
	}
	last_config_update = config_stat.st_mtime;

	/* initialization */
	/* bg_conf defined in bg_node_alloc.h */
	if (!(tbl = config_make_tbl(bg_conf_file)))
		fatal("something wrong with opening/reading bluegene "
		      "conf file");
	xfree(bg_conf_file);

#ifdef HAVE_BGL
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltBlrtsImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->blrts_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_blrtsimage, "BlrtsImage", tbl)) {
		if (!list_count(bg_conf->blrts_list))
			fatal("BlrtsImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->blrts_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_blrtsimage = xstrdup(image->name);
		info("Warning: using %s as the default BlrtsImage.  "
		     "If this isn't correct please set BlrtsImage",
		     bg_conf->default_blrtsimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default BlrtsImage %s",
			     bg_conf->default_blrtsimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_blrtsimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->blrts_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltLinuxImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "LinuxImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("LinuxImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default LinuxImage.  "
		     "If this isn't correct please set LinuxImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default LinuxImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltRamDiskImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "RamDiskImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("RamDiskImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default RamDiskImage.  "
		     "If this isn't correct please set RamDiskImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default RamDiskImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}
#elif defined HAVE_BGP

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltCnloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "CnloadImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("CnloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default CnloadImage.  "
		     "If this isn't correct please set CnloadImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default CnloadImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltIoloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "IoloadImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("IoloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default IoloadImage.  "
		     "If this isn't correct please set IoloadImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default IoloadImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}

#endif
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltMloaderImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->mloader_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_mloaderimage,
			    "MloaderImage", tbl)) {
		if (!list_count(bg_conf->mloader_list))
			fatal("MloaderImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->mloader_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_mloaderimage = xstrdup(image->name);
		info("Warning: using %s as the default MloaderImage.  "
		     "If this isn't correct please set MloaderImage",
		     bg_conf->default_mloaderimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default MloaderImage %s",
			     bg_conf->default_mloaderimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_mloaderimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->mloader_list, image);
	}

	if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt, "MidplaneNodeCnt", tbl)) {
		if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt,
				    "BasePartitionNodeCnt", tbl)) {
			error("MidplaneNodeCnt not configured in bluegene.conf "
			      "defaulting to 512 as MidplaneNodeCnt");
			bg_conf->mp_cnode_cnt = 512;
		}
	}

	if (bg_conf->mp_cnode_cnt <= 0)
		fatal("You should have more than 0 nodes "
		      "per midplane");
	bg_conf->actual_cnodes_per_mp = bg_conf->mp_cnode_cnt;
	bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;

	/* bg_conf->cpus_per_mp should had already been set from the
	 * node_init */
	if (bg_conf->cpus_per_mp < bg_conf->mp_cnode_cnt) {
		fatal("For some reason we have only %u cpus per mp, but "
		      "have %u cnodes per mp.  You need at least the same "
		      "number of cpus as you have cnodes per mp.  "
		      "Check the NodeName CPUs= "
		      "definition in the slurm.conf.",
		      bg_conf->cpus_per_mp, bg_conf->mp_cnode_cnt);
	}

	bg_conf->cpu_ratio = bg_conf->cpus_per_mp/bg_conf->mp_cnode_cnt;
	if (!bg_conf->cpu_ratio)
		fatal("We appear to have less than 1 cpu on a cnode.  "
		      "You specified %u for MidplaneNodeCnt "
		      "in the blugene.conf and %u cpus "
		      "for each node in the slurm.conf",
		      bg_conf->mp_cnode_cnt, bg_conf->cpus_per_mp);

	num_unused_cpus = 1;
	for (i = 0; i<SYSTEM_DIMENSIONS; i++)
		num_unused_cpus *= dims[i];
	num_unused_cpus *= bg_conf->cpus_per_mp;
	num_possible_unused_cpus = num_unused_cpus;

	if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
			    "NodeBoardNodeCnt", tbl)) {
		if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
				    "NodeCardNodeCnt", tbl)) {
			error("NodeCardNodeCnt not configured in bluegene.conf "
			      "defaulting to 32 as NodeCardNodeCnt");
			bg_conf->nodecard_cnode_cnt = 32;
		}
	}

	if (bg_conf->nodecard_cnode_cnt <= 0)
		fatal("You should have more than 0 nodes per nodecard");

	bg_conf->mp_nodecard_cnt =
		bg_conf->mp_cnode_cnt / bg_conf->nodecard_cnode_cnt;

	if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "IONodesPerMP", tbl))
		if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
			fatal("Warning: IONodesPerMP not configured "
			      "in bluegene.conf");

	s_p_get_uint16(&bg_conf->max_block_err, "MaxBlockInError", tbl);

	tmp_bool = 0;
	s_p_get_boolean(&tmp_bool, "SubMidplaneSystem", tbl);
	bg_conf->sub_mp_sys = tmp_bool;

#ifdef HAVE_BGQ
	tmp_bool = 0;
	s_p_get_boolean(&tmp_bool, "AllowSubBlockAllocations", tbl);
	bg_conf->sub_blocks = tmp_bool;

	/* You can only have 16 ionodes per midplane */
	if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt)
		bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt;
#endif

	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		bg_conf->default_conn_type[i] = (uint16_t)NO_VAL;
	s_p_get_string(&tmp_char, "DefaultConnType", tbl);
	if (tmp_char) {
		verify_conn_type(tmp_char, bg_conf->default_conn_type);
		if ((bg_conf->default_conn_type[0] != SELECT_MESH)
		    && (bg_conf->default_conn_type[0] != SELECT_TORUS))
			fatal("Can't have a DefaultConnType of %s "
			      "(only Mesh or Torus values are valid).",
			      tmp_char);
		xfree(tmp_char);
	} else
		bg_conf->default_conn_type[0] = SELECT_TORUS;

#ifndef HAVE_BG_L_P
	int first_conn_type = bg_conf->default_conn_type[0];
	for (i=1; i<SYSTEM_DIMENSIONS; i++) {
		if (bg_conf->default_conn_type[i] == (uint16_t)NO_VAL)
			bg_conf->default_conn_type[i] = first_conn_type;
		else if (bg_conf->default_conn_type[i] >= SELECT_SMALL)
			fatal("Can't have a DefaultConnType of %s "
			      "(only Mesh or Torus values are valid).",
			      tmp_char);
	}
#endif

	if (bg_conf->ionodes_per_mp) {
		bitstr_t *tmp_bitmap = NULL;
		int small_size = 1;

		/* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK,
		 * Sometime on a Q system the nodecard isn't in the 0
		 * spot so only do this if you know it is in that
		 * spot.  Otherwise say the whole midplane is there
		 * and just make blocks over the whole thing.  They
		 * you can error out the blocks that aren't usable. */
		if (bg_conf->sub_mp_sys
		    && bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
#ifdef HAVE_BGQ
			bg_conf->quarter_ionode_cnt = 1;
			bg_conf->nodecard_ionode_cnt = 1;
#else
			bg_conf->quarter_ionode_cnt = 2;
			bg_conf->nodecard_ionode_cnt = 2;
#endif
		} else {
			bg_conf->quarter_ionode_cnt = bg_conf->ionodes_per_mp/4;
			bg_conf->nodecard_ionode_cnt =
				bg_conf->quarter_ionode_cnt/4;
		}

		/* How many nodecards per ionode */
		bg_conf->nc_ratio =
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt)
			/ (double)bg_conf->ionodes_per_mp;
		/* How many ionodes per nodecard */
		bg_conf->io_ratio =
			(double)bg_conf->ionodes_per_mp /
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt);

		/* How many cnodes per ionode */
		bg_conf->ionode_cnode_cnt =
			bg_conf->nodecard_cnode_cnt * bg_conf->nc_ratio;

		//info("got %f %f", bg_conf->nc_ratio, bg_conf->io_ratio);
		/* figure out the smallest block we can have on the
		   system */
#ifdef HAVE_BGL
		if (bg_conf->io_ratio >= 1)
			bg_conf->smallest_block=32;
		else
			bg_conf->smallest_block=128;
#else

		if (bg_conf->io_ratio >= 2)
			bg_conf->smallest_block=16;
		else if (bg_conf->io_ratio == 1)
			bg_conf->smallest_block=32;
		else if (bg_conf->io_ratio == .5)
			bg_conf->smallest_block=64;
		else if (bg_conf->io_ratio == .25)
			bg_conf->smallest_block=128;
		else if (bg_conf->io_ratio == .125)
			bg_conf->smallest_block=256;
		else {
			error("unknown ioratio %f.  Can't figure out "
			      "smallest block size, setting it to midplane",
			      bg_conf->io_ratio);
			bg_conf->smallest_block = 512;
		}
#endif
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("Smallest block possible on this system is %u",
			     bg_conf->smallest_block);
		/* below we are creating all the possible bitmaps for
		 * each size of small block
		 */
		if ((int)bg_conf->nodecard_ionode_cnt < 1) {
			bg_conf->nodecard_ionode_cnt = 0;
		} else {
			bg_lists->valid_small32 = list_create(_destroy_bitmap);
			/* This is suppose to be = and not ==, we only
			   want to decrement when small_size equals
			   something.
			*/
			if ((small_size = bg_conf->nodecard_ionode_cnt))
				small_size--;
			i = 0;
			while (i<bg_conf->ionodes_per_mp) {
				tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
				bit_nset(tmp_bitmap, i, i+small_size);
				i += small_size+1;
				list_append(bg_lists->valid_small32,
					    tmp_bitmap);
			}
		}
		/* If we only have 1 nodecard just jump to the end
		   since this will never need to happen below.
		   Pretty much a hack to avoid seg fault;). */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
			goto no_calc;

		bg_lists->valid_small128 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small128, tmp_bitmap);
		}

#ifndef HAVE_BGL
		bg_lists->valid_small64 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->nodecard_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small64, tmp_bitmap);
		}

		bg_lists->valid_small256 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small256, tmp_bitmap);
		}
#endif
	} else {
		fatal("your ionodes_per_mp is 0");
	}

no_calc:

	if (!s_p_get_uint16(&bg_conf->bridge_api_verb, "BridgeAPIVerbose", tbl))
		info("Warning: BridgeAPIVerbose not configured "
		     "in bluegene.conf");
	if (!s_p_get_string(&bg_conf->bridge_api_file,
			    "BridgeAPILogFile", tbl))
		info("BridgeAPILogFile not configured in bluegene.conf");
	else
		_reopen_bridge_log();

	if (s_p_get_string(&tmp_char, "DenyPassthrough", tbl)) {
		if (strstr(tmp_char, "A"))
			ba_deny_pass |= PASS_DENY_A;
		if (strstr(tmp_char, "X"))
			ba_deny_pass |= PASS_DENY_X;
		if (strstr(tmp_char, "Y"))
			ba_deny_pass |= PASS_DENY_Y;
		if (strstr(tmp_char, "Z"))
			ba_deny_pass |= PASS_DENY_Z;
		if (!xstrcasecmp(tmp_char, "ALL"))
			ba_deny_pass |= PASS_DENY_ALL;
		bg_conf->deny_pass = ba_deny_pass;
		xfree(tmp_char);
	}

	if (!s_p_get_string(&tmp_char, "LayoutMode", tbl)) {
		info("Warning: LayoutMode was not specified in bluegene.conf "
		     "defaulting to STATIC partitioning");
		bg_conf->layout_mode = LAYOUT_STATIC;
	} else {
		if (!xstrcasecmp(tmp_char,"STATIC"))
			bg_conf->layout_mode = LAYOUT_STATIC;
		else if (!xstrcasecmp(tmp_char,"OVERLAP"))
			bg_conf->layout_mode = LAYOUT_OVERLAP;
		else if (!xstrcasecmp(tmp_char,"DYNAMIC"))
			bg_conf->layout_mode = LAYOUT_DYNAMIC;
		else {
			fatal("I don't understand this LayoutMode = %s",
			      tmp_char);
		}
		xfree(tmp_char);
	}

	/* add blocks defined in file */
	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		if (!s_p_get_array((void ***)&blockreq_array,
				   &count, "MPs", tbl)) {
			if (!s_p_get_array((void ***)&blockreq_array,
					   &count, "BPs", tbl)) {
				info("WARNING: no blocks defined in "
				     "bluegene.conf, "
				     "only making full system block");
				/* create_full_system_block(NULL); */
				if (bg_conf->sub_mp_sys ||
				    (bg_conf->mp_cnode_cnt ==
				     bg_conf->nodecard_cnode_cnt))
					fatal("On a sub-midplane system you "
					      "need to define the blocks you "
					      "want on your system.");
			}
		}

		for (i = 0; i < count; i++) {
			add_bg_record(bg_lists->main, NULL,
				      blockreq_array[i], 0, 0);
		}
	} else if (bg_conf->sub_mp_sys ||
		   (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt))
		/* we can't do dynamic here on a sub-midplane system */
		fatal("On a sub-midplane system we can only do OVERLAP or "
		      "STATIC LayoutMode.  Please update your bluegene.conf.");

#ifdef HAVE_BGQ
	if ((bg_recover != NOT_FROM_CONTROLLER) && assoc_mgr_qos_list
	    && s_p_get_string(&tmp_char, "RebootQOSList", tbl)) {
		bool valid;
		char *token, *last = NULL;
		slurmdb_qos_rec_t *qos = NULL;
		assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK,
					   READ_LOCK, NO_LOCK,
					   NO_LOCK, NO_LOCK };

		/* Lock here to avoid g_qos_count changing under us */
		assoc_mgr_lock(&locks);
		bg_conf->reboot_qos_bitmap = bit_alloc(g_qos_count);
		itr = list_iterator_create(assoc_mgr_qos_list);

		token = strtok_r(tmp_char, ",", &last);
		while (token) {
			valid = false;
			while((qos = list_next(itr))) {
				if (!xstrcasecmp(token, qos->name)) {
					bit_set(bg_conf->reboot_qos_bitmap,
						qos->id);
					valid = true;
					break;
				}
			}
			if (!valid)
				error("Invalid RebootQOSList value: %s", token);
			list_iterator_reset(itr);
			token = strtok_r(NULL, ",", &last);
		}
		list_iterator_destroy(itr);
		xfree(tmp_char);
		assoc_mgr_unlock(&locks);
	}
#endif

	s_p_hashtbl_destroy(tbl);

	return SLURM_SUCCESS;
}
Ejemplo n.º 10
0
extern int select_nodeinfo_set_all(void)
{
	ListIterator itr = NULL;
	struct node_record *node_ptr = NULL;
	int i=0;
	bg_record_t *bg_record = NULL;
	static time_t last_set_all = 0;
	ba_mp_t *ba_mp;
	node_subgrp_t *subgrp = NULL;
	int bit_count;

	//uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (!blocks_are_created)
		return SLURM_NO_CHANGE_IN_DATA;

	if (!g_bitmap_size) {
		/* if (cluster_flags & CLUSTER_FLAG_BGQ) */
		/* 	g_bitmap_size = bg_conf->mp_cnode_cnt; */
		/* else */
			g_bitmap_size = bg_conf->ionodes_per_mp;
	}

	/* only set this once when the last_bg_update is newer than
	   the last time we set things up. */
	if (last_set_all && (last_bg_update-1 < last_set_all)) {
		debug2("Node select info for set all hasn't "
		       "changed since %ld",
		       last_set_all);
		return SLURM_NO_CHANGE_IN_DATA;
	}
	last_set_all = last_bg_update;

	/* set this here so we know things have changed */
	last_node_update = time(NULL);

	slurm_mutex_lock(&block_state_mutex);
	for (i=0; i<node_record_count; i++) {
		select_nodeinfo_t *nodeinfo;

		node_ptr = &(node_record_table_ptr[i]);
		xassert(node_ptr->select_nodeinfo);
		nodeinfo = node_ptr->select_nodeinfo->data;
		xassert(nodeinfo);
		xassert(nodeinfo->subgrp_list);
		list_flush(nodeinfo->subgrp_list);
		if (nodeinfo->bitmap_size != g_bitmap_size)
			nodeinfo->bitmap_size = g_bitmap_size;
	}

	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		enum node_states state = NODE_STATE_UNKNOWN;
		select_nodeinfo_t *nodeinfo;
		bitstr_t *bitmap;
		ListIterator itr2 = NULL;

		/* Only mark unidle blocks */
		if (bg_record->job_list && list_count(bg_record->job_list)) {
			struct job_record *job_ptr;
			select_jobinfo_t *jobinfo;
			ListIterator itr =
				list_iterator_create(bg_record->job_list);
			ba_mp = list_peek(bg_record->ba_mp_list);
			node_ptr = &(node_record_table_ptr[ba_mp->index]);
			xassert(node_ptr->select_nodeinfo);
			nodeinfo = node_ptr->select_nodeinfo->data;
			xassert(nodeinfo);
			xassert(nodeinfo->subgrp_list);
			if (ba_mp->cnode_err_bitmap
			    && (bit_count =
				bit_set_count(ba_mp->cnode_err_bitmap))) {
				subgrp = _find_subgrp(nodeinfo->subgrp_list,
						      NODE_STATE_ERROR,
						      g_bitmap_size);
				/* FIXME: the subgrp->bitmap isn't set here. */
				subgrp->cnode_cnt += bit_count;
			}

			subgrp = _find_subgrp(nodeinfo->subgrp_list,
					      NODE_STATE_ALLOCATED,
					      g_bitmap_size);
			while ((job_ptr = list_next(itr))) {
				jobinfo = job_ptr->select_jobinfo->data;
				/* FIXME: the subgrp->bitmap isn't set here. */
				subgrp->cnode_cnt += jobinfo->cnode_cnt;
			}
			list_iterator_destroy(itr);
			continue;
		} else if (bg_record->job_running == NO_JOB_RUNNING)
			continue;

		if (bg_record->state & BG_BLOCK_ERROR_FLAG)
			state = NODE_STATE_ERROR;
		else if (bg_record->job_running > NO_JOB_RUNNING) {
			/* we don't need to set the allocated here
			 * since the whole midplane is allocated */
			if (bg_record->conn_type[0] < SELECT_SMALL)
				continue;
			state = NODE_STATE_ALLOCATED;
		} else {
			error("not sure why we got here with block %s %s",
			      bg_record->bg_block_id,
			      bg_block_state_string(bg_record->state));
			continue;
		}
		/* if ((cluster_flags & CLUSTER_FLAG_BGQ) */
		/*     && (state != NODE_STATE_ERROR)) */
		/* 	bitmap = bg_record->cnodes_used_bitmap; */
		/* else */
		bitmap = bg_record->ionode_bitmap;

		itr2 = list_iterator_create(bg_record->ba_mp_list);
		while ((ba_mp = list_next(itr2))) {
			if (!ba_mp->used)
				continue;

			node_ptr = &(node_record_table_ptr[ba_mp->index]);

			xassert(node_ptr->select_nodeinfo);
			nodeinfo = node_ptr->select_nodeinfo->data;
			xassert(nodeinfo);
			xassert(nodeinfo->subgrp_list);

			if (ba_mp->cnode_err_bitmap
			    && (state == NODE_STATE_ALLOCATED)
			    && (bit_count =
				bit_set_count(ba_mp->cnode_err_bitmap))) {
				subgrp = _find_subgrp(nodeinfo->subgrp_list,
						      NODE_STATE_ERROR,
						      g_bitmap_size);
				/* FIXME: the subgrp->bitmap isn't set here. */
				subgrp->cnode_cnt += bit_count;
			}

			subgrp = _find_subgrp(nodeinfo->subgrp_list,
					      state, g_bitmap_size);

			if (subgrp->cnode_cnt < bg_conf->mp_cnode_cnt) {
				/* if (cluster_flags & CLUSTER_FLAG_BGQ) { */
				/* 	bit_or(subgrp->bitmap, bitmap); */
				/* 	subgrp->cnode_cnt += */
				/* 		bit_set_count(bitmap); */
				/* } else */ if (bg_record->cnode_cnt
					   < bg_conf->mp_cnode_cnt) {
					bit_or(subgrp->bitmap, bitmap);
					subgrp->cnode_cnt +=
						bg_record->cnode_cnt;
				} else {
					bit_nset(subgrp->bitmap,
						 0,
						 (g_bitmap_size-1));
					subgrp->cnode_cnt =
						bg_conf->mp_cnode_cnt;
				}
			}
		}
		list_iterator_destroy(itr2);
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	return SLURM_SUCCESS;
}
Ejemplo n.º 11
0
int
main(int argc, char **argv)
{
	int c;
	int count, waittime;
	int set_lun;
	int fd, retval;
	struct ctlstat_context ctx;

	/* default values */
	retval = 0;
	waittime = 1;
	count = -1;
	memset(&ctx, 0, sizeof(ctx));
	ctx.numdevs = 3;
	ctx.mode = CTLSTAT_MODE_STANDARD;
	ctx.flags |= CTLSTAT_FLAG_CPU;
	ctx.flags |= CTLSTAT_FLAG_FIRST_RUN;
	ctx.flags |= CTLSTAT_FLAG_HEADER;

	while ((c = getopt(argc, argv, ctlstat_opts)) != -1) {
		switch (c) {
		case 'C':
			ctx.flags &= ~CTLSTAT_FLAG_CPU;
			break;
		case 'c':
			count = atoi(optarg);
			break;
		case 'd':
			ctx.flags |= CTLSTAT_FLAG_DMA_TIME;
			break;
		case 'D':
			ctx.mode = CTLSTAT_MODE_DUMP;
			waittime = 30;
			break;
		case 'h':
			ctx.flags &= ~CTLSTAT_FLAG_HEADER;
			break;
		case 'j':
			ctx.mode = CTLSTAT_MODE_JSON;
			waittime = 30;
			break;
		case 'l': {
			int cur_lun;

			cur_lun = atoi(optarg);
			if (cur_lun > CTL_STAT_LUN_BITS)
				errx(1, "Invalid LUN number %d", cur_lun);

			bit_ffs(ctx.lun_mask, CTL_STAT_LUN_BITS, &set_lun);
			if (set_lun == -1)
				ctx.numdevs = 1;
			else
				ctx.numdevs++;
			bit_set(ctx.lun_mask, cur_lun);
			break;
		}
		case 'n':
			ctx.numdevs = atoi(optarg);
			break;
		case 't':
			ctx.flags |= CTLSTAT_FLAG_TOTALS;
			ctx.numdevs = 3;
			break;
		case 'w':
			waittime = atoi(optarg);
			break;
		default:
			retval = 1;
			usage(retval);
			exit(retval);
			break;
		}
	}

	bit_ffs(ctx.lun_mask, CTL_STAT_LUN_BITS, &set_lun);

	if ((F_TOTALS(&ctx))
	 && (set_lun != -1)) {
		errx(1, "Total Mode (-t) is incompatible with individual "
		     "LUN mode (-l)");
	} else if (set_lun == -1) {
		/*
		 * Note that this just selects the first N LUNs to display,
		 * but at this point we have no knoweledge of which LUN
		 * numbers actually exist.  So we may select LUNs that
		 * aren't there.
		 */
		bit_nset(ctx.lun_mask, 0, min(ctx.numdevs - 1,
			 CTL_STAT_LUN_BITS - 1));
	}

	if ((fd = open(CTL_DEFAULT_DEV, O_RDWR)) == -1)
		err(1, "cannot open %s", CTL_DEFAULT_DEV);

	for (;count != 0;) {
		ctx.tmp_lun_stats = ctx.prev_lun_stats;
		ctx.prev_lun_stats = ctx.cur_lun_stats;
		ctx.cur_lun_stats = ctx.tmp_lun_stats;
		ctx.prev_time = ctx.cur_time;
		ctx.prev_cpu = ctx.cur_cpu;
		if (getstats(fd, &ctx.num_luns, &ctx.cur_lun_stats,
			     &ctx.cur_time, &ctx.flags) != 0)
			errx(1, "error returned from getstats()");

		switch(ctx.mode) {
		case CTLSTAT_MODE_STANDARD:
			ctlstat_standard(&ctx);
			break;
		case CTLSTAT_MODE_DUMP:
			ctlstat_dump(&ctx);
			break;
		case CTLSTAT_MODE_JSON:
			ctlstat_json(&ctx);
			break;
		default:
			break;
		}

		fprintf(stdout, "\n");
		ctx.flags &= ~CTLSTAT_FLAG_FIRST_RUN;
		if (count != 1)
			sleep(waittime);
		if (count > 0)
			count--;
	}

	exit (retval);
}
Ejemplo n.º 12
0
Archivo: req.c Proyecto: VURM/slurm
static int
_handle_completion(int fd, slurmd_job_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));
	jobacct = jobacct_gather_g_create(NULL);
	jobacct_gather_g_getinfo(jobacct, JOBACCT_DATA_PIPE, &fd);

	/*
	 * Record the completed nodes
	 */
	pthread_mutex_lock(&step_complete.lock);
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacct_gather_g_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacct_gather_g_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	pthread_cond_signal(&step_complete.cond);
	pthread_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;
rwfail:
	return SLURM_FAILURE;
}
Ejemplo n.º 13
0
/* return NULL if eof or syntax error occurs;
 * otherwise return a pointer to a new entry.
 */
entry *
load_entry(FILE *file, void (*error_func)(const char *), struct passwd *pw,
    char **envp) {
	/* this function reads one crontab entry -- the next -- from a file.
	 * it skips any leading blank lines, ignores comments, and returns
	 * NULL if for any reason the entry can't be read and parsed.
	 *
	 * the entry is also parsed here.
	 *
	 * syntax:
	 *   user crontab:
	 *	minutes hours doms months dows cmd\n
	 *   system crontab (/etc/crontab):
	 *	minutes hours doms months dows USERNAME cmd\n
	 */

	ecode_e	ecode = e_none;
	entry *e;
	int ch;
	char cmd[MAX_COMMAND];
	char envstr[MAX_ENVSTR];
	char **tenvp;

	Debug(DPARS, ("load_entry()...about to eat comments\n"));

	skip_comments(file);

	ch = get_char(file);
	if (ch == EOF)
		return (NULL);

	/* ch is now the first useful character of a useful line.
	 * it may be an @special or it may be the first character
	 * of a list of minutes.
	 */

	e = calloc(sizeof(*e), sizeof(char));

	if (ch == '@') {
		/* all of these should be flagged and load-limited; i.e.,
		 * instead of @hourly meaning "0 * * * *" it should mean
		 * "close to the front of every hour but not 'til the
		 * system load is low".  Problems are: how do you know
		 * what "low" means? (save me from /etc/cron.conf!) and:
		 * how to guarantee low variance (how low is low?), which
		 * means how to we run roughly every hour -- seems like
		 * we need to keep a history or let the first hour set
		 * the schedule, which means we aren't load-limited
		 * anymore.  too much for my overloaded brain. (vix, jan90)
		 * HINT
		 */
		ch = get_string(cmd, MAX_COMMAND, file, " \t\n");
		if (!strcmp("reboot", cmd)) {
			e->flags |= WHEN_REBOOT;
		} else if (!strcmp("yearly", cmd) || !strcmp("annually", cmd)){
			bit_set(e->minute, 0);
			bit_set(e->hour, 0);
			bit_set(e->dom, 0);
			bit_set(e->month, 0);
			bit_nset(e->dow, 0, (LAST_DOW-FIRST_DOW+1));
			e->flags |= DOW_STAR;
		} else if (!strcmp("monthly", cmd)) {
			bit_set(e->minute, 0);
			bit_set(e->hour, 0);
			bit_set(e->dom, 0);
			bit_nset(e->month, 0, (LAST_MONTH-FIRST_MONTH+1));
			bit_nset(e->dow, 0, (LAST_DOW-FIRST_DOW+1));
			e->flags |= DOW_STAR;
		} else if (!strcmp("weekly", cmd)) {
			bit_set(e->minute, 0);
			bit_set(e->hour, 0);
			bit_nset(e->dom, 0, (LAST_DOM-FIRST_DOM+1));
			bit_nset(e->month, 0, (LAST_MONTH-FIRST_MONTH+1));
			bit_set(e->dow, 0);
			e->flags |= DOM_STAR;
		} else if (!strcmp("daily", cmd) || !strcmp("midnight", cmd)) {
			bit_set(e->minute, 0);
			bit_set(e->hour, 0);
			bit_nset(e->dom, 0, (LAST_DOM-FIRST_DOM+1));
			bit_nset(e->month, 0, (LAST_MONTH-FIRST_MONTH+1));
			bit_nset(e->dow, 0, (LAST_DOW-FIRST_DOW+1));
			e->flags |= DOM_STAR | DOW_STAR;
		} else if (!strcmp("hourly", cmd)) {
			bit_set(e->minute, 0);
			bit_nset(e->hour, 0, (LAST_HOUR-FIRST_HOUR+1));
			bit_nset(e->dom, 0, (LAST_DOM-FIRST_DOM+1));
			bit_nset(e->month, 0, (LAST_MONTH-FIRST_MONTH+1));
			bit_nset(e->dow, 0, (LAST_DOW-FIRST_DOW+1));
			e->flags |= DOM_STAR | DOW_STAR;
		} else {
			ecode = e_timespec;
			goto eof;
		}
		/* Advance past whitespace between shortcut and
		 * username/command.
		 */
		Skip_Blanks(ch, file);
		if (ch == EOF || ch == '\n') {
			ecode = e_cmd;
			goto eof;
		}
	} else {
		Debug(DPARS, ("load_entry()...about to parse numerics\n"));

		if (ch == '*')
			e->flags |= MIN_STAR;
		ch = get_list(e->minute, FIRST_MINUTE, LAST_MINUTE,
			      PPC_NULL, ch, file);
		if (ch == EOF) {
			ecode = e_minute;
			goto eof;
		}

		/* hours
		 */

		if (ch == '*')
			e->flags |= HR_STAR;
		ch = get_list(e->hour, FIRST_HOUR, LAST_HOUR,
			      PPC_NULL, ch, file);
		if (ch == EOF) {
			ecode = e_hour;
			goto eof;
		}

		/* DOM (days of month)
		 */

		if (ch == '*')
			e->flags |= DOM_STAR;
		ch = get_list(e->dom, FIRST_DOM, LAST_DOM,
			      PPC_NULL, ch, file);
		if (ch == EOF) {
			ecode = e_dom;
			goto eof;
		}

		/* month
		 */

		ch = get_list(e->month, FIRST_MONTH, LAST_MONTH,
			      MonthNames, ch, file);
		if (ch == EOF) {
			ecode = e_month;
			goto eof;
		}

		/* DOW (days of week)
		 */

		if (ch == '*')
			e->flags |= DOW_STAR;
		ch = get_list(e->dow, FIRST_DOW, LAST_DOW,
			      DowNames, ch, file);
		if (ch == EOF) {
			ecode = e_dow;
			goto eof;
		}
	}

	/* make sundays equivalent */
	if (bit_test(e->dow, 0) || bit_test(e->dow, 7)) {
		bit_set(e->dow, 0);
		bit_set(e->dow, 7);
	}

	/* check for permature EOL and catch a common typo */
	if (ch == '\n' || ch == '*') {
		ecode = e_cmd;
		goto eof;
	}

	/* ch is the first character of a command, or a username */
	unget_char(ch, file);

	if (!pw) {
		char		*username = cmd;	/* temp buffer */

		Debug(DPARS, ("load_entry()...about to parse username\n"));
		ch = get_string(username, MAX_COMMAND, file, " \t\n");

		Debug(DPARS, ("load_entry()...got %s\n",username));
		if (ch == EOF || ch == '\n' || ch == '*') {
			ecode = e_cmd;
			goto eof;
		}

		pw = getpwnam(username);
		if (pw == NULL) {
			ecode = e_username;
			goto eof;
		}
		Debug(DPARS, ("load_entry()...uid %ld, gid %ld\n",
			      (long)pw->pw_uid, (long)pw->pw_gid));
	}

	if ((e->pwd = pw_dup(pw)) == NULL) {
		ecode = e_memory;
		goto eof;
	}
	(void)memset(e->pwd->pw_passwd, 0, strlen(e->pwd->pw_passwd));

	/* copy and fix up environment.  some variables are just defaults and
	 * others are overrides.
	 */
	if ((e->envp = env_copy(envp)) == NULL) {
		ecode = e_memory;
		goto eof;
	}
	if (!env_get("SHELL", e->envp)) {
		if (glue_strings(envstr, sizeof envstr, "SHELL",
				 _PATH_BSHELL, '=')) {
			if ((tenvp = env_set(e->envp, envstr)) == NULL) {
				ecode = e_memory;
				goto eof;
			}
			e->envp = tenvp;
		} else
			log_it("CRON", getpid(), "error", "can't set SHELL");
	}
	if (!env_get("HOME", e->envp)) {
		if (glue_strings(envstr, sizeof envstr, "HOME",
				 pw->pw_dir, '=')) {
			if ((tenvp = env_set(e->envp, envstr)) == NULL) {
				ecode = e_memory;
				goto eof;
			}
			e->envp = tenvp;
		} else
			log_it("CRON", getpid(), "error", "can't set HOME");
	}
	/* If login.conf is in used we will get the default PATH later. */
	if (!env_get("PATH", e->envp)) {
		if (glue_strings(envstr, sizeof envstr, "PATH",
				 _PATH_DEFPATH, '=')) {
			if ((tenvp = env_set(e->envp, envstr)) == NULL) {
				ecode = e_memory;
				goto eof;
			}
			e->envp = tenvp;
		} else
			log_it("CRON", getpid(), "error", "can't set PATH");
	}
	if (glue_strings(envstr, sizeof envstr, "LOGNAME",
			 pw->pw_name, '=')) {
		if ((tenvp = env_set(e->envp, envstr)) == NULL) {
			ecode = e_memory;
			goto eof;
		}
		e->envp = tenvp;
	} else
		log_it("CRON", getpid(), "error", "can't set LOGNAME");
#if defined(BSD) || defined(__linux)
	if (glue_strings(envstr, sizeof envstr, "USER",
			 pw->pw_name, '=')) {
		if ((tenvp = env_set(e->envp, envstr)) == NULL) {
			ecode = e_memory;
			goto eof;
		}
		e->envp = tenvp;
	} else
		log_it("CRON", getpid(), "error", "can't set USER");
#endif

	Debug(DPARS, ("load_entry()...about to parse command\n"));

	/* If the first character of the command is '-' it is a cron option.
	 */
	while ((ch = get_char(file)) == '-') {
		switch (ch = get_char(file)) {
		case 'q':
			e->flags |= DONT_LOG;
			Skip_Nonblanks(ch, file);
			break;
		default:
			ecode = e_option;
			goto eof;
		}
		Skip_Blanks(ch, file);
		if (ch == EOF || ch == '\n') {
			ecode = e_cmd;
			goto eof;
		}
	}
	unget_char(ch, file);

	/* Everything up to the next \n or EOF is part of the command...
	 * too bad we don't know in advance how long it will be, since we
	 * need to malloc a string for it... so, we limit it to MAX_COMMAND.
	 */ 
	ch = get_string(cmd, MAX_COMMAND, file, "\n");

	/* a file without a \n before the EOF is rude, so we'll complain...
	 */
	if (ch == EOF) {
		ecode = e_cmd;
		goto eof;
	}

	/* got the command in the 'cmd' string; save it in *e.
	 */
	if ((e->cmd = strdup(cmd)) == NULL) {
		ecode = e_memory;
		goto eof;
	}

	Debug(DPARS, ("load_entry()...returning successfully\n"));

	/* success, fini, return pointer to the entry we just created...
	 */
	return (e);

 eof:
	if (e->envp)
		env_free(e->envp);
	if (e->pwd)
		free(e->pwd);
	if (e->cmd)
		free(e->cmd);
	free(e);
	while (ch != '\n' && !feof(file))
		ch = get_char(file);
	if (ecode != e_none && error_func)
		(*error_func)(ecodes[(int)ecode]);
	return (NULL);
}
Ejemplo n.º 14
0
Archivo: req.c Proyecto: birc-aeh/slurm
static int
_handle_completion(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;
	char* buf;
	int len;
	Buf buffer;
	int version;	/* For future use */
	bool lock_set = false;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &version, sizeof(int));
	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));

	/*
	 * We must not use getinfo over a pipe with slurmd here
	 * Indeed, slurmstepd does a large use of setinfo over a pipe
	 * with slurmd and doing the reverse can result in a deadlock
	 * scenario with slurmd :
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of
	 * slurmd and slurmstepd
	 */
	safe_read(fd, &len, sizeof(int));
	buf = xmalloc(len);
	safe_read(fd, buf, len);
	buffer = create_buf(buf, len);
	jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION,
			   PROTOCOL_TYPE_SLURM, buffer, 1);
	free_buf(buffer);

	/*
	 * Record the completed nodes
	 */
	pthread_mutex_lock(&step_complete.lock);
	lock_set = true;
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacctinfo_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacctinfo_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	pthread_cond_signal(&step_complete.cond);
	pthread_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;


rwfail:	if (lock_set) {
		pthread_cond_signal(&step_complete.cond);
		pthread_mutex_unlock(&step_complete.lock);
	}
	return SLURM_FAILURE;
}
Ejemplo n.º 15
0
extern int handle_small_record_request(List records,
				       select_ba_request_t *blockreq,
				       bg_record_t *bg_record, bitoff_t start)
{
	bitstr_t *ionodes = bit_alloc(bg_conf->ionodes_per_mp);
	int i=0, ionode_cnt = 0;
	bg_record_t *found_record = NULL;

	xassert(records);
	xassert(blockreq);
	xassert(bg_record);

	xassert(start >= 0);
	xassert(start < bg_conf->ionodes_per_mp);

#ifndef HAVE_BGL
	for(i=0; i<blockreq->small16; i++) {
		bit_nset(ionodes, start, start);
		found_record = create_small_record(bg_record, ionodes, 16);
		/* this needs to be an append so we
		   keep things in the order we got
		   them, they will be sorted later */
		list_append(records, found_record);
		bit_nclear(ionodes, start, start);
		start++;
	}
#endif
	if ((ionode_cnt = bg_conf->nodecard_ionode_cnt))
		ionode_cnt--;
	for(i=0; i<blockreq->small32; i++) {
		bit_nset(ionodes, start, start+ionode_cnt);
		found_record = create_small_record(bg_record, ionodes, 32);
		/* this needs to be an append so we
		   keep things in the order we got
		   them, they will be sorted later */
		list_append(records, found_record);
		bit_nclear(ionodes, start, start+ionode_cnt);
		start+=ionode_cnt+1;
	}

#ifndef HAVE_BGL
	if ((ionode_cnt = bg_conf->nodecard_ionode_cnt * 2))
		ionode_cnt--;
	for(i=0; i<blockreq->small64; i++) {
		bit_nset(ionodes, start, start+ionode_cnt);
		found_record = create_small_record(bg_record, ionodes, 64);
		/* this needs to be an append so we
		   keep things in the order we got
		   them, they will be sorted later */
		list_append(records, found_record);
		bit_nclear(ionodes, start, start+ionode_cnt);
		start+=ionode_cnt+1;
	}
#endif
	if ((ionode_cnt = bg_conf->quarter_ionode_cnt))
		ionode_cnt--;
	for(i=0; i<blockreq->small128; i++) {
		bit_nset(ionodes, start, start+ionode_cnt);
		found_record = create_small_record(bg_record, ionodes, 128);
		/* this needs to be an append so we
		   keep things in the order we got
		   them, they will be sorted later */
		list_append(records, found_record);
		bit_nclear(ionodes, start, start+ionode_cnt);
		start+=ionode_cnt+1;
	}

#ifndef HAVE_BGL
	if ((ionode_cnt = bg_conf->quarter_ionode_cnt * 2))
		ionode_cnt--;
	for(i=0; i<blockreq->small256; i++) {
		bit_nset(ionodes, start, start+ionode_cnt);
		found_record = create_small_record(bg_record, ionodes, 256);
		/* this needs to be an append so we
		   keep things in the order we got
		   them, they will be sorted later */
		list_append(records, found_record);
		bit_nclear(ionodes, start, start+ionode_cnt);
		start+=ionode_cnt+1;
	}
#endif

	FREE_NULL_BITMAP(ionodes);

	return SLURM_SUCCESS;
}
Ejemplo n.º 16
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	bitstr_t *used_socket = NULL;
	int *cores_per_socket;
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth, sock_cnt, tot_socks = 0;
	int i, used_core_idx, used_sock_idx;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
#if HWLOC_API_VERSION < 0x00020000
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC);
#else
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
#endif

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}
#if _DEBUG
	_hwloc_children(topology, hwloc_get_root_obj(topology), 0);
#endif
	/*
	 * Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}

	/*
	 * Count sockets/NUMA containing any cores.
	 * KNL NUMA with no cores are NOT counted.
	 */
	nobj[SOCKET] = 0;
	depth = hwloc_get_type_depth(topology, objtype[SOCKET]);
	used_socket = bit_alloc(_MAX_SOCKET_INX);
	cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX);
	sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth);
	for (i = 0; i < sock_cnt; i++) {
		obj = hwloc_get_obj_by_depth(topology, depth, i);
		if (obj->type == objtype[SOCKET]) {
			cores_per_socket[i] = _core_child_count(topology, obj);
			if (cores_per_socket[i] > 0) {
				nobj[SOCKET]++;
				bit_set(used_socket, tot_socks);
			}
			if (++tot_socks >= _MAX_SOCKET_INX) {	/* Bitmap size */
				fatal("Socket count exceeds %d, expand data structure size",
				      _MAX_SOCKET_INX);
				break;
			}
		}
	}

	nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);

	/*
	 * Workaround for hwloc bug, in some cases the topology "children" array
	 * does not get populated, so _core_child_count() always returns 0
	 */
	if (nobj[SOCKET] == 0) {
		nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology,
							objtype[SOCKET]);
		if (nobj[SOCKET] == 0) {
			debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
			nobj[SOCKET] = 1;
		}
		if (nobj[SOCKET] >= _MAX_SOCKET_INX) {	/* Bitmap size */
			fatal("Socket count exceeds %d, expand data structure size",
			      _MAX_SOCKET_INX);
		}
		bit_nset(used_socket, 0, nobj[SOCKET] - 1);
	}

	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	if ((actual_cpus % nobj[CORE]) != 0) {
		error("Thread count (%d) not multiple of core count (%d)",
		      actual_cpus, nobj[CORE]);
	}
	nobj[PU] = actual_cpus / nobj[CORE];	/* threads per core */

	if ((nobj[CORE] % nobj[SOCKET]) != 0) {
		error("Core count (%d) not multiple of socket count (%d)",
		      nobj[CORE], nobj[SOCKET]);
	}
	nobj[CORE] /= nobj[SOCKET];		/* cores per socket */

	debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	if (p_block_map_size)
		*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		used_sock_idx = -1;
		used_core_idx = -1;
		for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET];
		     idx[SOCKET]++) {
			if (!bit_test(used_socket, idx[SOCKET]))
				continue;
			used_sock_idx++;
			for (idx[CORE] = 0;
			     idx[CORE] < cores_per_socket[idx[SOCKET]];
			     idx[CORE]++) {
				used_core_idx++;
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = used_core_idx * nobj[PU] + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid,
					       used_sock_idx, idx[CORE], idx[PU]);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			}
		}
	}
	FREE_NULL_BITMAP(used_socket);
	xfree(cores_per_socket);
	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if _DEBUG
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return SLURM_SUCCESS;

}
Ejemplo n.º 17
0
/*
 * Read and process the bluegene.conf configuration file so to interpret what
 * blocks are static/dynamic, torus/mesh, etc.
 */
extern int read_bg_conf(void)
{
	int i;
	int count = 0;
	s_p_hashtbl_t *tbl = NULL;
	char *layout = NULL;
	select_ba_request_t **blockreq_array = NULL;
	image_t **image_array = NULL;
	image_t *image = NULL;
	static time_t last_config_update = (time_t) 0;
	struct stat config_stat;
	ListIterator itr = NULL;
	char* bg_conf_file = NULL;
	static int *dims = NULL;

	if (!dims)
		dims = select_g_ba_get_dims();

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("Reading the bluegene.conf file");

	/* check if config file has changed */
	bg_conf_file = _get_bg_conf();

	if (stat(bg_conf_file, &config_stat) < 0)
		fatal("can't stat bluegene.conf file %s: %m", bg_conf_file);
	if (last_config_update) {
		_reopen_bridge_log();
		if (last_config_update == config_stat.st_mtime) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("%s unchanged", bg_conf_file);
		} else {
			info("Restart slurmctld for %s changes "
			     "to take effect",
			     bg_conf_file);
		}
		last_config_update = config_stat.st_mtime;
		xfree(bg_conf_file);
		return SLURM_SUCCESS;
	}
	last_config_update = config_stat.st_mtime;

	/* initialization */
	/* bg_conf defined in bg_node_alloc.h */
	tbl = s_p_hashtbl_create(bg_conf_file_options);

	if (s_p_parse_file(tbl, NULL, bg_conf_file, false) == SLURM_ERROR)
		fatal("something wrong with opening/reading bluegene "
		      "conf file");
	xfree(bg_conf_file);

#ifdef HAVE_BGL
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltBlrtsImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->blrts_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_blrtsimage, "BlrtsImage", tbl)) {
		if (!list_count(bg_conf->blrts_list))
			fatal("BlrtsImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->blrts_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_blrtsimage = xstrdup(image->name);
		info("Warning: using %s as the default BlrtsImage.  "
		     "If this isn't correct please set BlrtsImage",
		     bg_conf->default_blrtsimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default BlrtsImage %s",
			     bg_conf->default_blrtsimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_blrtsimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->blrts_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltLinuxImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "LinuxImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("LinuxImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default LinuxImage.  "
		     "If this isn't correct please set LinuxImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default LinuxImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltRamDiskImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "RamDiskImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("RamDiskImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default RamDiskImage.  "
		     "If this isn't correct please set RamDiskImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default RamDiskImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}
#elif defined HAVE_BGP

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltCnloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "CnloadImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("CnloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default CnloadImage.  "
		     "If this isn't correct please set CnloadImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default CnloadImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltIoloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "IoloadImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("IoloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default IoloadImage.  "
		     "If this isn't correct please set IoloadImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default IoloadImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}

#endif
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltMloaderImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->mloader_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_mloaderimage,
			    "MloaderImage", tbl)) {
		if (!list_count(bg_conf->mloader_list))
			fatal("MloaderImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->mloader_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_mloaderimage = xstrdup(image->name);
		info("Warning: using %s as the default MloaderImage.  "
		     "If this isn't correct please set MloaderImage",
		     bg_conf->default_mloaderimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default MloaderImage %s",
			     bg_conf->default_mloaderimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_mloaderimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->mloader_list, image);
	}

	if (!s_p_get_uint16(
		    &bg_conf->mp_cnode_cnt, "BasePartitionNodeCnt", tbl)) {
		error("BasePartitionNodeCnt not configured in bluegene.conf "
		      "defaulting to 512 as BasePartitionNodeCnt");
		bg_conf->mp_cnode_cnt = 512;
		bg_conf->quarter_cnode_cnt = 128;
	} else {
		if (bg_conf->mp_cnode_cnt <= 0)
			fatal("You should have more than 0 nodes "
			      "per base partition");

		bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;
	}
	/* bg_conf->cpus_per_mp should had already been set from the
	 * node_init */
	if (bg_conf->cpus_per_mp < bg_conf->mp_cnode_cnt) {
		fatal("For some reason we have only %u cpus per mp, but "
		      "have %u cnodes per mp.  You need at least the same "
		      "number of cpus as you have cnodes per mp.  "
		      "Check the NodeName Procs= "
		      "definition in the slurm.conf.",
		      bg_conf->cpus_per_mp, bg_conf->mp_cnode_cnt);
	}

	bg_conf->cpu_ratio = bg_conf->cpus_per_mp/bg_conf->mp_cnode_cnt;
	if (!bg_conf->cpu_ratio)
		fatal("We appear to have less than 1 cpu on a cnode.  "
		      "You specified %u for BasePartitionNodeCnt "
		      "in the blugene.conf and %u cpus "
		      "for each node in the slurm.conf",
		      bg_conf->mp_cnode_cnt, bg_conf->cpus_per_mp);

	num_unused_cpus = 1;
	for (i = 0; i<SYSTEM_DIMENSIONS; i++)
		num_unused_cpus *= dims[i];
	num_unused_cpus *= bg_conf->cpus_per_mp;

	if (!s_p_get_uint16(
		    &bg_conf->nodecard_cnode_cnt, "NodeCardNodeCnt", tbl)) {
		error("NodeCardNodeCnt not configured in bluegene.conf "
		      "defaulting to 32 as NodeCardNodeCnt");
		bg_conf->nodecard_cnode_cnt = 32;
	}

	if (bg_conf->nodecard_cnode_cnt<=0)
		fatal("You should have more than 0 nodes per nodecard");

	bg_conf->mp_nodecard_cnt =
		bg_conf->mp_cnode_cnt / bg_conf->nodecard_cnode_cnt;

	if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
		fatal("Warning: Numpsets not configured in bluegene.conf");
	if (!bg_conf->ionodes_per_mp) {
		if (!s_p_get_uint16(&bg_conf->ionodes_per_mp,
				    "IONodesPerMP", tbl))
			fatal("Warning: IONodesPerMP not configured "
			      "in bluegene.conf");
	}

#ifdef HAVE_BGQ
	/* You can only have 16 ionodes per midplane */
	if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt)
		bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt;
#endif

	if (bg_conf->ionodes_per_mp) {
		bitstr_t *tmp_bitmap = NULL;
		int small_size = 1;

		/* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
			bg_conf->quarter_ionode_cnt = 2;
			bg_conf->nodecard_ionode_cnt = 2;
		} else {
			bg_conf->quarter_ionode_cnt = bg_conf->ionodes_per_mp/4;
			bg_conf->nodecard_ionode_cnt =
				bg_conf->quarter_ionode_cnt/4;
		}

		/* How many nodecards per ionode */
		bg_conf->nc_ratio =
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt)
			/ (double)bg_conf->ionodes_per_mp;
		/* How many ionodes per nodecard */
		bg_conf->io_ratio =
			(double)bg_conf->ionodes_per_mp /
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt);
		//info("got %f %f", bg_conf->nc_ratio, bg_conf->io_ratio);
		/* figure out the smallest block we can have on the
		   system */
#ifdef HAVE_BGL
		if (bg_conf->io_ratio >= 1)
			bg_conf->smallest_block=32;
		else
			bg_conf->smallest_block=128;
#else

		if (bg_conf->io_ratio >= 2)
			bg_conf->smallest_block=16;
		else if (bg_conf->io_ratio == 1)
			bg_conf->smallest_block=32;
		else if (bg_conf->io_ratio == .5)
			bg_conf->smallest_block=64;
		else if (bg_conf->io_ratio == .25)
			bg_conf->smallest_block=128;
		else if (bg_conf->io_ratio == .125)
			bg_conf->smallest_block=256;
		else {
			error("unknown ioratio %f.  Can't figure out "
			      "smallest block size, setting it to midplane",
			      bg_conf->io_ratio);
			bg_conf->smallest_block = 512;
		}
#endif
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("Smallest block possible on this system is %u",
			     bg_conf->smallest_block);
		/* below we are creating all the possible bitmaps for
		 * each size of small block
		 */
		if ((int)bg_conf->nodecard_ionode_cnt < 1) {
			bg_conf->nodecard_ionode_cnt = 0;
		} else {
			bg_lists->valid_small32 = list_create(_destroy_bitmap);
			if ((small_size = bg_conf->nodecard_ionode_cnt))
				small_size--;
			i = 0;
			while (i<bg_conf->ionodes_per_mp) {
				tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
				bit_nset(tmp_bitmap, i, i+small_size);
				i += small_size+1;
				list_append(bg_lists->valid_small32,
					    tmp_bitmap);
			}
		}
		/* If we only have 1 nodecard just jump to the end
		   since this will never need to happen below.
		   Pretty much a hack to avoid seg fault;). */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
			goto no_calc;

		bg_lists->valid_small128 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small128, tmp_bitmap);
		}

#ifndef HAVE_BGL
		bg_lists->valid_small64 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->nodecard_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small64, tmp_bitmap);
		}

		bg_lists->valid_small256 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small256, tmp_bitmap);
		}
#endif
	} else {
		fatal("your ionodes_per_mp is 0");
	}

no_calc:

	if (!s_p_get_uint16(&bg_conf->bridge_api_verb, "BridgeAPIVerbose", tbl))
		info("Warning: BridgeAPIVerbose not configured "
		     "in bluegene.conf");
	if (!s_p_get_string(&bg_conf->bridge_api_file,
			    "BridgeAPILogFile", tbl))
		info("BridgeAPILogFile not configured in bluegene.conf");
	else
		_reopen_bridge_log();

	if (s_p_get_string(&layout, "DenyPassthrough", tbl)) {
		if (strstr(layout, "A"))
			ba_deny_pass |= PASS_DENY_A;
		if (strstr(layout, "X"))
			ba_deny_pass |= PASS_DENY_X;
		if (strstr(layout, "Y"))
			ba_deny_pass |= PASS_DENY_Y;
		if (strstr(layout, "Z"))
			ba_deny_pass |= PASS_DENY_Z;
		if (!strcasecmp(layout, "ALL"))
			ba_deny_pass |= PASS_DENY_ALL;
		bg_conf->deny_pass = ba_deny_pass;
		xfree(layout);
	}

	if (!s_p_get_string(&layout, "LayoutMode", tbl)) {
		info("Warning: LayoutMode was not specified in bluegene.conf "
		     "defaulting to STATIC partitioning");
		bg_conf->layout_mode = LAYOUT_STATIC;
	} else {
		if (!strcasecmp(layout,"STATIC"))
			bg_conf->layout_mode = LAYOUT_STATIC;
		else if (!strcasecmp(layout,"OVERLAP"))
			bg_conf->layout_mode = LAYOUT_OVERLAP;
		else if (!strcasecmp(layout,"DYNAMIC"))
			bg_conf->layout_mode = LAYOUT_DYNAMIC;
		else {
			fatal("I don't understand this LayoutMode = %s",
			      layout);
		}
		xfree(layout);
	}

	/* add blocks defined in file */
	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		if (!s_p_get_array((void ***)&blockreq_array,
				   &count, "BPs", tbl)) {
			info("WARNING: no blocks defined in bluegene.conf, "
			     "only making full system block");
			/* create_full_system_block(NULL); */
		}

		for (i = 0; i < count; i++) {
			add_bg_record(bg_lists->main, NULL,
				      blockreq_array[i], 0, 0);
		}
	}
	s_p_hashtbl_destroy(tbl);

	return SLURM_SUCCESS;
}
Ejemplo n.º 18
0
int
main(int argc, char *argv[])
{
	note("Testing static decl");
	{
		bitstr_t bit_decl(bs, 65);
		/*bitstr_t *bsp = bs;*/

		bit_set(bs,9);
		bit_set(bs,14);
		TEST(bit_test(bs,9), "bit 9 set"); 
		TEST(!bit_test(bs,12), "bit 12 not set");
		TEST(bit_test(bs,14), "bit 14 set" );
		/*bit_free(bsp);*/	/* triggers TEST in bit_free - OK */
	}
	note("Testing basic vixie functions");
	{
		bitstr_t *bs = bit_alloc(16), *bs2;


		/*bit_set(bs, 42);*/ 	/* triggers TEST in bit_set - OK */
		bit_set(bs,9);
		bit_set(bs,14);
		TEST(bit_test(bs,9), "bit 9 set"); 
		TEST(!bit_test(bs,12), "bit 12 not set" );
		TEST(bit_test(bs,14), "bit 14 set");

		bs2 = bit_copy(bs);
		bit_fill_gaps(bs2);
		TEST(bit_ffs(bs2) == 9, "first bit set = 9 ");
		TEST(bit_fls(bs2) == 14, "last bit set = 14");
		TEST(bit_set_count(bs2) == 6, "bitstring");
		TEST(bit_test(bs2,12), "bitstring");
		TEST(bit_super_set(bs,bs2) == 1, "bitstring");
		TEST(bit_super_set(bs2,bs) == 0, "bitstring");

		bit_clear(bs,14);
		TEST(!bit_test(bs,14), "bitstring");

		bit_nclear(bs,9,14);
		TEST(!bit_test(bs,9), "bitstring");
		TEST(!bit_test(bs,12), "bitstring");
		TEST(!bit_test(bs,14), "bitstring");

		bit_nset(bs,9,14);
		TEST(bit_test(bs,9), "bitstring");
		TEST(bit_test(bs,12), "bitstring");
		TEST(bit_test(bs,14), "bitstring");

		TEST(bit_ffs(bs) == 9, "ffs");
		TEST(bit_ffc(bs) == 0, "ffc");
		bit_nset(bs,0,8);
		TEST(bit_ffc(bs) == 15, "ffc");

		bit_free(bs);
		/*bit_set(bs,9); */	/* triggers TEST in bit_set - OK */
	}
	note("Testing and/or/not");
	{
		bitstr_t *bs1 = bit_alloc(128);
		bitstr_t *bs2 = bit_alloc(128);

		bit_set(bs1, 100);
		bit_set(bs1, 104);
		bit_set(bs2, 100);

		bit_and(bs1, bs2);
		TEST(bit_test(bs1, 100), "and");
		TEST(!bit_test(bs1, 104), "and");

		bit_set(bs2, 110);
		bit_set(bs2, 111);
		bit_set(bs2, 112);
		bit_or(bs1, bs2);
		TEST(bit_test(bs1, 100), "or");
		TEST(bit_test(bs1, 110), "or");
		TEST(bit_test(bs1, 111), "or");
		TEST(bit_test(bs1, 112), "or");

		bit_not(bs1);
		TEST(!bit_test(bs1, 100), "not");
		TEST(bit_test(bs1, 12), "not");

		bit_free(bs1);
		bit_free(bs2);
	}

	note("testing bit selection");
	{
		bitstr_t *bs1 = bit_alloc(128), *bs2;
		bit_set(bs1, 21);
		bit_set(bs1, 100);
		bit_fill_gaps(bs1);
		bs2 = bit_pick_cnt(bs1,20);

		if (bs2) {
			TEST(bit_set_count(bs2) == 20, "pick");
			TEST(bit_ffs(bs2) == 21, "pick");
			TEST(bit_fls(bs2) == 40, "pick");
			bit_free(bs2);
		}
		else
			TEST(0, "alloc fail");

		bit_free(bs1);
	}
	note("Testing realloc");
	{
		bitstr_t *bs = bit_alloc(1);

		TEST(bit_ffs(bs) == -1, "bitstring");
		bit_set(bs,0);
		/*bit_set(bs, 1000);*/	/* triggers TEST in bit_set - OK */
		bs = bit_realloc(bs,1048576);
		bit_set(bs,1000);
		bit_set(bs,1048575);
		TEST(bit_test(bs, 0), "bitstring");
		TEST(bit_test(bs, 1000), "bitstring");
		TEST(bit_test(bs, 1048575), "bitstring");
		TEST(bit_set_count(bs) == 3, "bitstring");
		bit_clear(bs,0);
		bit_clear(bs,1000);
		TEST(bit_set_count(bs) == 1, "bitstring");
		TEST(bit_ffs(bs) == 1048575, "bitstring");
		bit_free(bs);
	}
	note("Testing bit_fmt");
	{
		char tmpstr[1024];
		bitstr_t *bs = bit_alloc(1024);

		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), ""), "bitstring");
		bit_set(bs,42);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42"), "bitstring");
		bit_set(bs,102);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42,102"), "bitstring");
		bit_nset(bs,9,14);
		TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr), bs), 
					"9-14,42,102"), "bitstring");
	}

	note("Testing bit_nffc/bit_nffs");
	{
		bitstr_t *bs = bit_alloc(1024);

		bit_set(bs, 2);
		bit_set(bs, 6);
		bit_set(bs, 7);
		bit_nset(bs,12,1018); 

		TEST(bit_nffc(bs, 2) == 0, "bitstring");
		TEST(bit_nffc(bs, 3) == 3, "bitstring");
		TEST(bit_nffc(bs, 4) == 8, "bitstring");
		TEST(bit_nffc(bs, 5) == 1019, "bitstring");
		TEST(bit_nffc(bs, 6) == -1, "bitstring");

		TEST(bit_nffs(bs, 1) == 2, "bitstring");
		TEST(bit_nffs(bs, 2) == 6, "bitstring");
		TEST(bit_nffs(bs, 100) == 12, "bitstring");
		TEST(bit_nffs(bs, 1023) == -1, "bitstring");

		bit_free(bs);
	}

	note("Testing bit_unfmt");
	{
		bitstr_t *bs = bit_alloc(1024);
		bitstr_t *bs2 = bit_alloc(1024);
		char tmpstr[4096];

		bit_set(bs,1);
		bit_set(bs,3);
		bit_set(bs,30);
		bit_nset(bs,42,64);
		bit_nset(bs,97,1000);

		bit_fmt(tmpstr, sizeof(tmpstr), bs);
		TEST(bit_unfmt(bs2, tmpstr) != -1, "bitstring");
		TEST(bit_equal(bs, bs2), "bitstring");
	}

	totals();
	return failed;
}
Ejemplo n.º 19
0
/*
 * main - slurmctld main function, start various threads and process RPCs
 * test7.17.prog <TRES_PER_NODE> <CONFIG_DIR_HEAD> <CONFIG_SUB_DIR> <CPU_COUNT>
 * 
 */
int main(int argc, char *argv[])
{
	log_options_t opts = LOG_OPTS_STDERR_ONLY;
	int rc;
	uint32_t cpu_count, cpu_alloc, job_id = 12345;
	char *node_name, *reason_down = NULL;
	char *orig_config, *new_config = NULL, *tres_per_node = NULL;
	Buf buffer;
	List job_gres_list = NULL, node_gres_list = NULL;
	bitstr_t *cpu_bitmap;
	char config_dir[10000], test[1000];
	char slurm_conf[1000];
	uint32_t num_tasks = 1;
	uint32_t min_nodes = 1;
	uint32_t max_nodes = 1;
	uint16_t ntasks_per_node = NO_VAL16;
	uint16_t ntasks_per_socket = NO_VAL16;
	uint16_t sockets_per_node = NO_VAL16;
	uint16_t cpus_per_task = NO_VAL16;
	int core_count, sock_count;

	/* Setup slurm.conf and gres.conf test paths */
	strcpy(config_dir, argv[2]);
	strcpy(config_dir,strcat(config_dir, "/test7.17_configs"));
	strcpy(test, strcat(config_dir, argv[3]));
	strcpy(slurm_conf, strcat(test, "/slurm.conf"));

	/* Enable detailed logging for now */
	opts.stderr_level = LOG_LEVEL_DEBUG;
	log_init(argv[0], opts, SYSLOG_FACILITY_USER, NULL);

	/*
	 * Logic normally executed by slurmd daemon
	 */
	setenv("SLURM_CONF", slurm_conf, 1);
	rc = gres_plugin_init();
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_init");
		exit(1);
	}

	setenv("SLURM_CONFIG_DIR", config_dir, 1);

	cpu_count = strtol(argv[4], NULL, 10);
	node_name = "test_node";
	rc = gres_plugin_node_config_load(cpu_count, node_name, NULL, NULL,
					  NULL);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_load");
		exit(1);
	}

	buffer = init_buf(1024);
	rc = gres_plugin_node_config_pack(buffer);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_pack");
		exit(1);
	}

	/*
	 * Logic normally executed by slurmctld daemon
	 */
	orig_config = "gpu:8";
	rc = gres_plugin_init_node_config(node_name, orig_config,
					  &node_gres_list);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_init_node_config");
		exit(1);
	}

	set_buf_offset(buffer, 0);
	rc = gres_plugin_node_config_unpack(buffer, node_name);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_unpack");
		exit(1);
	}

	core_count = cpu_count;
	sock_count = 1;
	rc = gres_plugin_node_config_validate(node_name, orig_config,
					      &new_config, &node_gres_list,
					      cpu_count, core_count, sock_count,
					      0, &reason_down);
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_node_config_validate");
		exit(1);
	}

	if (argc > 2)
		tres_per_node = xstrdup(argv[1]);

	rc = gres_plugin_job_state_validate(NULL,	/* cpus_per_tres */
					    NULL,	/* tres_freq */
					    NULL,	/* tres_per_job */
					    tres_per_node,
					    NULL,	/* tres_per_socket */
					    NULL,	/* tres_per_task */
					    NULL,	/* mem_per_tres */
					    &num_tasks,
					    &min_nodes,
					    &max_nodes,
					    &ntasks_per_node,
					    &ntasks_per_socket,
					    &sockets_per_node,
					    &cpus_per_task,
					    &job_gres_list);
	if (rc != SLURM_SUCCESS) {
		slurm_seterrno(rc);
		slurm_perror("failure: gres_plugin_job_state_validate");
		exit(1);
	}

	gres_plugin_node_state_log(node_gres_list, node_name);
	gres_plugin_job_state_log(job_gres_list, job_id);

	cpu_bitmap = bit_alloc(cpu_count);
	bit_nset(cpu_bitmap, 0, cpu_count - 1);
	cpu_alloc = gres_plugin_job_test(job_gres_list, node_gres_list, true,
					 cpu_bitmap, 0, cpu_count - 1,
					 job_id, node_name);
	if (cpu_alloc == NO_VAL)
		printf("cpu_alloc=ALL\n");
	else
		printf("cpu_alloc=%u\n", cpu_alloc);

	rc = gres_plugin_fini();
	if (rc != SLURM_SUCCESS) {
		slurm_perror("failure: gres_plugin_fini");
		exit(1);
	}

	printf("Test %s ran to completion\n\n", argv[3]);
	exit(0);
}
Ejemplo n.º 20
0
/*
 * This could potentially lock the node lock in the slurmctld with
 * slurm_drain_node, or slurm_fail_job so if slurmctld_locked is called we
 * will call the functions without locking the locks again.
 */
extern int down_nodecard(char *mp_name, bitoff_t io_start,
			 bool slurmctld_locked)
{
	List requests = NULL;
	List delete_list = NULL;
	ListIterator itr = NULL;
	bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record;
	bg_record_t *smallest_bg_record = NULL;
	struct node_record *node_ptr = NULL;
	int mp_bit = 0;
	static int io_cnt = NO_VAL;
	static int create_size = NO_VAL;
	static select_ba_request_t blockreq;
	int rc = SLURM_SUCCESS;
	char *reason = "select_bluegene: nodecard down";

	xassert(mp_name);

	if (io_cnt == NO_VAL) {
		io_cnt = 1;
		/* Translate 1 nodecard count to ionode count */
		if ((io_cnt *= bg_conf->io_ratio))
			io_cnt--;

		/* make sure we create something that is able to be
		   created */
		if (bg_conf->smallest_block < bg_conf->nodecard_cnode_cnt)
			create_size = bg_conf->nodecard_cnode_cnt;
		else
			create_size = bg_conf->smallest_block;
	}

	node_ptr = find_node_record(mp_name);
	if (!node_ptr) {
		error ("down_sub_node_blocks: invalid node specified '%s'",
		       mp_name);
		return EINVAL;
	}

	/* this is here for sanity check to make sure we don't core on
	   these bits when we set them below. */
	if (io_start >= bg_conf->ionodes_per_mp
	    || (io_start+io_cnt) >= bg_conf->ionodes_per_mp) {
		debug("io %d-%d not configured on this "
		      "system, only %d ionodes per midplane",
		      io_start, io_start+io_cnt, bg_conf->ionodes_per_mp);
		return EINVAL;
	}
	mp_bit = (node_ptr - node_record_table_ptr);

	memset(&blockreq, 0, sizeof(select_ba_request_t));

	blockreq.conn_type[0] = SELECT_SMALL;
	blockreq.save_name = mp_name;

	debug3("here setting node %d of %d and ionodes %d-%d of %d",
	       mp_bit, node_record_count, io_start,
	       io_start+io_cnt, bg_conf->ionodes_per_mp);

	memset(&tmp_record, 0, sizeof(bg_record_t));
	tmp_record.mp_count = 1;
	tmp_record.cnode_cnt = bg_conf->nodecard_cnode_cnt;
	tmp_record.mp_bitmap = bit_alloc(node_record_count);
	bit_set(tmp_record.mp_bitmap, mp_bit);

	tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
	bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt);

	slurm_mutex_lock(&block_state_mutex);
	itr = list_iterator_create(bg_lists->main);
	while ((bg_record = list_next(itr))) {
		if (!bit_test(bg_record->mp_bitmap, mp_bit))
			continue;

		if (!blocks_overlap(bg_record, &tmp_record))
			continue;

		if (bg_record->job_running > NO_JOB_RUNNING) {
			if (slurmctld_locked)
				job_fail(bg_record->job_running);
			else
				slurm_fail_job(bg_record->job_running);

		}
		/* If Running Dynamic mode and the block is
		   smaller than the create size just continue on.
		*/
		if ((bg_conf->layout_mode == LAYOUT_DYNAMIC)
		    && (bg_record->cnode_cnt < create_size)) {
			if (!delete_list)
				delete_list = list_create(NULL);
			list_append(delete_list, bg_record);
			continue;
		}

		/* keep track of the smallest size that is at least
		   the size of create_size. */
		if (!smallest_bg_record ||
		    (smallest_bg_record->cnode_cnt > bg_record->cnode_cnt))
			smallest_bg_record = bg_record;
	}
	list_iterator_destroy(itr);
	slurm_mutex_unlock(&block_state_mutex);

	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		debug3("running non-dynamic mode");
		/* This should never happen, but just in case... */
		if (delete_list)
			list_destroy(delete_list);

		/* If we found a block that is smaller or equal to a
		   midplane we will just mark it in an error state as
		   opposed to draining the node.
		*/
		if (smallest_bg_record
		    && (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){
			if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
				rc = SLURM_NO_CHANGE_IN_DATA;
				goto cleanup;
			}

			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		debug("No block under 1 midplane available for this nodecard.  "
		      "Draining the whole node.");
		if (!node_already_down(mp_name)) {
			if (slurmctld_locked)
				drain_nodes(mp_name, reason,
					    slurm_get_slurm_user_id());
			else
				slurm_drain_nodes(mp_name, reason,
						  slurm_get_slurm_user_id());
		}
		rc = SLURM_SUCCESS;
		goto cleanup;
	}

	/* below is only for Dynamic mode */

	if (delete_list) {
		int cnt_set = 0;
		bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp);
		/* don't lock here since it is handled inside
		   the put_block_in_error_state
		*/
		itr = list_iterator_create(delete_list);
		while ((bg_record = list_next(itr))) {
			debug2("combining smaller than nodecard "
			       "dynamic block %s",
			       bg_record->bg_block_id);
			while (bg_record->job_running > NO_JOB_RUNNING)
				sleep(1);

			bit_or(iobitmap, bg_record->ionode_bitmap);
			cnt_set++;
		}
		list_iterator_destroy(itr);
		list_destroy(delete_list);
		if (!cnt_set) {
			FREE_NULL_BITMAP(iobitmap);
			rc = SLURM_ERROR;
			goto cleanup;
		}
		/* set the start to be the same as the start of the
		   ionode_bitmap.  If no ionodes set (not a small
		   block) set io_start = 0. */
		if ((io_start = bit_ffs(iobitmap)) == -1) {
			io_start = 0;
			if (create_size > bg_conf->nodecard_cnode_cnt)
				blockreq.small128 = 4;
			else
				blockreq.small32 = 16;
		} else if (create_size <= bg_conf->nodecard_cnode_cnt)
			blockreq.small32 = 1;
		else
			/* this should never happen */
			blockreq.small128 = 1;

		FREE_NULL_BITMAP(iobitmap);
	} else if (smallest_bg_record) {
		debug2("smallest dynamic block is %s",
		       smallest_bg_record->bg_block_id);
		if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) {
			rc = SLURM_NO_CHANGE_IN_DATA;
			goto cleanup;
		}

		while (smallest_bg_record->job_running > NO_JOB_RUNNING)
			sleep(1);

		if (smallest_bg_record->cnode_cnt == create_size) {
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}

		if (create_size > smallest_bg_record->cnode_cnt) {
			/* we should never get here.  This means we
			 * have a create_size that is bigger than a
			 * block that is already made.
			 */
			rc = put_block_in_error_state(
				smallest_bg_record, reason);
			goto cleanup;
		}
		debug3("node count is %d", smallest_bg_record->cnode_cnt);
		switch(smallest_bg_record->cnode_cnt) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small32 = 2;
			break;
		case 256:
			blockreq.small32 = 8;
			break;
#endif
		case 128:
			blockreq.small32 = 4;
			break;
		case 512:
		default:
			blockreq.small32 = 16;
			break;
		}

		if (create_size != bg_conf->nodecard_cnode_cnt) {
			blockreq.small128 = blockreq.small32 / 4;
			blockreq.small32 = 0;
			io_start = 0;
		} else if ((io_start =
			    bit_ffs(smallest_bg_record->ionode_bitmap)) == -1)
			/* set the start to be the same as the start of the
			   ionode_bitmap.  If no ionodes set (not a small
			   block) set io_start = 0. */
			io_start = 0;
	} else {
		switch(create_size) {
#ifndef HAVE_BGL
		case 64:
			blockreq.small64 = 8;
			break;
		case 256:
			blockreq.small256 = 2;
#endif
		case 32:
			blockreq.small32 = 16;
			break;
		case 128:
			blockreq.small128 = 4;
			break;
		case 512:
			if (!node_already_down(mp_name)) {
				char *reason = "select_bluegene: nodecard down";
				if (slurmctld_locked)
					drain_nodes(mp_name, reason,
						    slurm_get_slurm_user_id());
				else
					slurm_drain_nodes(
						mp_name, reason,
						slurm_get_slurm_user_id());
			}
			rc = SLURM_SUCCESS;
			goto cleanup;
			break;
		default:
			error("Unknown create size of %d", create_size);
			break;
		}
		/* since we don't have a block in this midplane
		   we need to start at the beginning. */
		io_start = 0;
		/* we also need a bg_block to pretend to be the
		   smallest block that takes up the entire midplane. */
	}


	/* Here we need to add blocks that take up nodecards on this
	   midplane.  Since Slurm only keeps track of midplanes
	   natively this is the only want to handle this case.
	*/
	requests = list_create(destroy_bg_record);
	add_bg_record(requests, NULL, &blockreq, 1, io_start);

	slurm_mutex_lock(&block_state_mutex);
	delete_list = list_create(NULL);
	while ((bg_record = list_pop(requests))) {
		itr = list_iterator_create(bg_lists->main);
		while ((found_record = list_next(itr))) {
			if (!blocks_overlap(bg_record, found_record))
				continue;
			list_push(delete_list, found_record);
			list_remove(itr);
		}
		list_iterator_destroy(itr);

		/* we need to add this record since it doesn't exist */
		if (bridge_block_create(bg_record) == SLURM_ERROR) {
			destroy_bg_record(bg_record);
			error("down_sub_node_blocks: "
			      "unable to configure block in api");
			continue;
		}

		debug("adding block %s to fill in small blocks "
		      "around bad nodecards",
		      bg_record->bg_block_id);
		print_bg_record(bg_record);
		list_append(bg_lists->main, bg_record);
		if (bit_overlap(bg_record->ionode_bitmap,
				tmp_record.ionode_bitmap)) {
			/* here we know the error block doesn't exist
			   so just set the state here */
			slurm_mutex_unlock(&block_state_mutex);
			rc = put_block_in_error_state(bg_record, reason);
			slurm_mutex_lock(&block_state_mutex);
		}
	}
	list_destroy(requests);

	if (delete_list) {
		slurm_mutex_unlock(&block_state_mutex);
		free_block_list(NO_VAL, delete_list, 0, 0);
		list_destroy(delete_list);
	}
	slurm_mutex_lock(&block_state_mutex);
	sort_bg_record_inc_size(bg_lists->main);
	slurm_mutex_unlock(&block_state_mutex);
	last_bg_update = time(NULL);

cleanup:
	FREE_NULL_BITMAP(tmp_record.mp_bitmap);
	FREE_NULL_BITMAP(tmp_record.ionode_bitmap);

	return rc;

}
Ejemplo n.º 21
0
/* To effectively deal with heterogeneous nodes, we fake a cyclic
 * distribution to figure out how many cpus are needed on each node.
 *
 * This routine is a slightly modified "version" of the routine
 * _task_layout_block in src/common/dist_tasks.c. We do not need to
 * assign tasks to job->hostid[] and job->tids[][] at this point so
 * the cpu allocation is the same for cyclic and block.
 *
 * For the consumable resources support we need to determine what
 * "node/CPU/Core/thread"-tuplets will be allocated for a given job.
 * In the past we assumed that we only allocated one task per CPU (at
 * that point the lowest level of logical processor) and didn't allow
 * the use of overcommit. We have changed this philosophy and are now
 * allowing people to overcommit their resources and expect the system
 * administrator to enable the task/affinity plug-in which will then
 * bind all of a job's tasks to its allocated resources thereby
 * avoiding interference between co-allocated running jobs.
 *
 * In the consumable resources environment we need to determine the
 * layout schema within slurmctld.
 *
 * We have a core_bitmap of all available cores. All we're doing here
 * is removing cores that are not needed based on the task count, and
 * the choice of cores to remove is based on the distribution:
 * - "cyclic" removes cores "evenly", starting from the last socket,
 * - "block" removes cores from the "last" socket(s)
 * - "plane" removes cores "in chunks"
 */
extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type)
{
	int error_code, cr_cpu = 1;

	if (((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) ||
	     (job_ptr->details->whole_node != 0)) &&
	    (job_ptr->details->core_spec == 0)) {
		/* the job has been allocated an EXCLUSIVE set of nodes,
		 * so it gets all of the bits in the core_bitmap and
		 * all of the available CPUs in the cpus array */
		int size = bit_size(job_ptr->job_resrcs->core_bitmap);
		bit_nset(job_ptr->job_resrcs->core_bitmap, 0, size-1);
		return SLURM_SUCCESS;
	}

	_log_select_maps("cr_dist/start", job_ptr->job_resrcs->node_bitmap,
			 job_ptr->job_resrcs->core_bitmap);
	if (job_ptr->details->task_dist == SLURM_DIST_PLANE) {
		/* perform a plane distribution on the 'cpus' array */
		error_code = _compute_plane_dist(job_ptr);
		if (error_code != SLURM_SUCCESS) {
			error("cons_res: cr_dist: Error in "
			      "_compute_plane_dist");
			return error_code;
		}
	} else {
		/* perform a cyclic distribution on the 'cpus' array */
		error_code = _compute_c_b_task_dist(job_ptr);
		if (error_code != SLURM_SUCCESS) {
			error("cons_res: cr_dist: Error in "
			      "_compute_c_b_task_dist");
			return error_code;
		}
	}

	/* now sync up the core_bitmap with the allocated 'cpus' array
	 * based on the given distribution AND resource setting */
	if ((cr_type & CR_CORE) || (cr_type & CR_SOCKET))
		cr_cpu = 0;

	if (cr_cpu) {
		_block_sync_core_bitmap(job_ptr, cr_type);
		return SLURM_SUCCESS;
	}

	/*
	 * If SelectTypeParameters mentions to use a block distribution for
	 * cores by default, use that kind of distribution if no particular
	 * cores distribution specified.
	 * Note : cyclic cores distribution, which is the default, is treated
	 * by the next code block
	 */
	if ( slurmctld_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK ) {
		switch(job_ptr->details->task_dist) {
		case SLURM_DIST_ARBITRARY:
		case SLURM_DIST_BLOCK:
		case SLURM_DIST_CYCLIC:
		case SLURM_DIST_UNKNOWN:
			_block_sync_core_bitmap(job_ptr, cr_type);
			return SLURM_SUCCESS;
		}
	}

	/* Determine the number of logical processors per node needed
	 * for this job. Make sure below matches the layouts in
	 * lllp_distribution in plugins/task/affinity/dist_task.c (FIXME) */
	switch(job_ptr->details->task_dist) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		_block_sync_core_bitmap(job_ptr, cr_type);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_BLOCK_CYCLIC:
	case SLURM_DIST_CYCLIC_CYCLIC:
	case SLURM_DIST_BLOCK_CFULL:
	case SLURM_DIST_CYCLIC_CFULL:
	case SLURM_DIST_UNKNOWN:
		error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type);
		break;
	default:
		error("select/cons_res: invalid task_dist entry");
		return SLURM_ERROR;
	}

	_log_select_maps("cr_dist/fini", job_ptr->job_resrcs->node_bitmap,
			 job_ptr->job_resrcs->core_bitmap);
	return error_code;
}
Ejemplo n.º 22
0
/* To effectively deal with heterogeneous nodes, we fake a cyclic
 * distribution to figure out how many cpus are needed on each node.
 *
 * This routine is a slightly modified "version" of the routine
 * _task_layout_block in src/common/dist_tasks.c. We do not need to
 * assign tasks to job->hostid[] and job->tids[][] at this point so
 * the cpu allocation is the same for cyclic and block.
 *
 * For the consumable resources support we need to determine what
 * "node/CPU/Core/thread"-tuplets will be allocated for a given job.
 * In the past we assumed that we only allocated one task per CPU (at
 * that point the lowest level of logical processor) and didn't allow
 * the use of overcommit. We have changed this philosophy and are now
 * allowing people to overcommit their resources and expect the system
 * administrator to enable the task/affinity plug-in which will then
 * bind all of a job's tasks to its allocated resources thereby
 * avoiding interference between co-allocated running jobs.
 *
 * In the consumable resources environment we need to determine the
 * layout schema within slurmctld.
 *
 * We have a core_bitmap of all available cores. All we're doing here
 * is removing cores that are not needed based on the task count, and
 * the choice of cores to remove is based on the distribution:
 * - "cyclic" removes cores "evenly", starting from the last socket,
 * - "block" removes cores from the "last" socket(s)
 * - "plane" removes cores "in chunks"
 *
 * IN job_ptr - job to be allocated resources
 * IN cr_type - allocation type (sockets, cores, etc.)
 * IN preempt_mode - true if testing with simulated preempted jobs
 */
extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type,
		   bool preempt_mode)
{
	int error_code, cr_cpu = 1;

	if (job_ptr->details->core_spec != (uint16_t) NO_VAL) {
		/* The job has been allocated all non-specialized cores,
		 * so we don't need to select specific CPUs. */
		return SLURM_SUCCESS;
	}

	if ((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) ||
	    (job_ptr->details->whole_node == 1)) {
		int n, i;
		job_resources_t *job_res = job_ptr->job_resrcs;
		/* The job has been allocated an EXCLUSIVE set of nodes,
		 * so it gets all of the bits in the core_bitmap and
		 * all of the available CPUs in the cpus array. */
		int size = bit_size(job_res->core_bitmap);
		bit_nset(job_res->core_bitmap, 0, size-1);

		/* Up to this point we might not have the job_res pointer have
		 * the right cpu count.  It is most likely a core count.  We
		 * will fix that so we can layout tasks correctly.
		 */
		size = bit_size(job_res->node_bitmap);
		for (i = 0, n = bit_ffs(job_res->node_bitmap); n < size; n++) {
			if (bit_test(job_res->node_bitmap, n) == 0)
				continue;
			job_res->cpus[i++] = select_node_record[n].cpus;
		}
		return SLURM_SUCCESS;
	}

	_log_select_maps("cr_dist/start", job_ptr->job_resrcs->node_bitmap,
			 job_ptr->job_resrcs->core_bitmap);
	if ((job_ptr->details->task_dist & SLURM_DIST_STATE_BASE) ==
	    SLURM_DIST_PLANE) {
		/* perform a plane distribution on the 'cpus' array */
		error_code = _compute_plane_dist(job_ptr);
		if (error_code != SLURM_SUCCESS) {
			error("cons_res: cr_dist: Error in "
			      "_compute_plane_dist");
			return error_code;
		}
	} else {
		/* perform a cyclic distribution on the 'cpus' array */
		error_code = _compute_c_b_task_dist(job_ptr);
		if (error_code != SLURM_SUCCESS) {
			error("cons_res: cr_dist: Error in "
			      "_compute_c_b_task_dist");
			return error_code;
		}
	}

	/* now sync up the core_bitmap with the allocated 'cpus' array
	 * based on the given distribution AND resource setting */
	if ((cr_type & CR_CORE) || (cr_type & CR_SOCKET))
		cr_cpu = 0;

	if (cr_cpu) {
		_block_sync_core_bitmap(job_ptr, cr_type);
		return SLURM_SUCCESS;
	}

	/*
	 * If SelectTypeParameters mentions to use a block distribution for
	 * cores by default, use that kind of distribution if no particular
	 * cores distribution specified.
	 * Note : cyclic cores distribution, which is the default, is treated
	 * by the next code block
	 */
	if ( slurmctld_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK ) {
		switch(job_ptr->details->task_dist & SLURM_DIST_NODEMASK) {
		case SLURM_DIST_ARBITRARY:
		case SLURM_DIST_BLOCK:
		case SLURM_DIST_CYCLIC:
		case SLURM_DIST_UNKNOWN:
			_block_sync_core_bitmap(job_ptr, cr_type);
			return SLURM_SUCCESS;
		}
	}

	/* Determine the number of logical processors per node needed
	 * for this job. Make sure below matches the layouts in
	 * lllp_distribution in plugins/task/affinity/dist_task.c (FIXME) */
	switch(job_ptr->details->task_dist & SLURM_DIST_NODESOCKMASK) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		_block_sync_core_bitmap(job_ptr, cr_type);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_BLOCK_CYCLIC:
	case SLURM_DIST_CYCLIC_CYCLIC:
	case SLURM_DIST_BLOCK_CFULL:
	case SLURM_DIST_CYCLIC_CFULL:
	case SLURM_DIST_UNKNOWN:
		error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type,
						      preempt_mode);
		break;
	default:
		error("select/cons_res: invalid task_dist entry");
		return SLURM_ERROR;
	}

	_log_select_maps("cr_dist/fini", job_ptr->job_resrcs->node_bitmap,
			 job_ptr->job_resrcs->core_bitmap);
	return error_code;
}