Exemplo n.º 1
0
static void _load_config(void)
{
    char *sched_params, *tmp_ptr;

    sched_params = slurm_get_sched_params();
    debug_flags  = slurm_get_debug_flags();

    if (sched_params && (tmp_ptr=strstr(sched_params, "interval=")))
        backfill_interval = atoi(tmp_ptr + 9);
    if (backfill_interval < 1) {
        fatal("Invalid backfill scheduler interval: %d",
              backfill_interval);
    }

    if (sched_params && (tmp_ptr=strstr(sched_params, "bf_window=")))
        backfill_window = atoi(tmp_ptr + 10) * 60;  /* mins to secs */
    if (backfill_window < 1) {
        fatal("Invalid backfill scheduler window: %d",
              backfill_window);
    }
    if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf=")))
        max_backfill_job_cnt = atoi(tmp_ptr + 11);
    if (max_backfill_job_cnt < 1) {
        fatal("Invalid backfill scheduler max_job_bf: %d",
              max_backfill_job_cnt);
    }
    xfree(sched_params);
}
Exemplo n.º 2
0
Arquivo: builtin.c Projeto: VURM/slurm
static void _load_config(void)
{
	char *sched_params, *tmp_ptr;

	sched_timeout = slurm_get_msg_timeout() / 2;
	sched_timeout = MAX(sched_timeout, 1);
	sched_timeout = MIN(sched_timeout, 10);

	sched_params = slurm_get_sched_params();

	if (sched_params && (tmp_ptr=strstr(sched_params, "interval=")))
		backfill_interval = atoi(tmp_ptr + 9);
	if (backfill_interval < 1) {
		fatal("Invalid backfill scheduler interval: %d",
		      backfill_interval);
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf=")))
		max_backfill_job_cnt = atoi(tmp_ptr + 11);
	if (max_backfill_job_cnt < 1) {
		fatal("Invalid backfill scheduler max_job_bf: %d",
		      max_backfill_job_cnt);
	}
	xfree(sched_params);
}
Exemplo n.º 3
0
static void _load_config(void)
{
	char *sched_params, *tmp_ptr;

	sched_params = slurm_get_sched_params();
	debug_flags  = slurm_get_debug_flags();

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_interval=")))
		backfill_interval = atoi(tmp_ptr + 12);
	if (backfill_interval < 1) {
		fatal("Invalid backfill scheduler bf_interval: %d",
		      backfill_interval);
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_window=")))
		backfill_window = atoi(tmp_ptr + 10) * 60;  /* mins to secs */
	if (backfill_window < 1) {
		fatal("Invalid backfill scheduler window: %d",
		      backfill_window);
	}
	if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf=")))
		max_backfill_job_cnt = atoi(tmp_ptr + 11);
	if (max_backfill_job_cnt < 1) {
		fatal("Invalid backfill scheduler max_job_bf: %d",
		      max_backfill_job_cnt);
	}
	/* "bf_res=" is vestigial from version 2.3 and can be removed later.
	 * Only "bf_resolution=" is documented. */
	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_res=")))
		backfill_resolution = atoi(tmp_ptr + 7);
	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution=")))
		backfill_resolution = atoi(tmp_ptr + 14);
	if (backfill_resolution < 1) {
		fatal("Invalid backfill scheduler resolution: %d",
		      backfill_resolution);
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_part=")))
		max_backfill_job_per_part = atoi(tmp_ptr + 16);
	if (max_backfill_job_per_part < 0) {
		fatal("Invalid backfill scheduler bf_max_job_part: %d",
		      max_backfill_job_per_part);
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_user="******"Invalid backfill scheduler bf_max_job_user: %d",
		      max_backfill_job_per_user);
	}

	/* bf_continue makes backfill continue where it was if interrupted
	 */
	if (sched_params && (strstr(sched_params, "bf_continue"))) {
		backfill_continue = true;
	}

	xfree(sched_params);
}
Exemplo n.º 4
0
extern int init(void)
{
	char *sched_params;
	verbose("preempt/partition_prio loaded");
	sched_params = slurm_get_sched_params();
	if (xstrcasestr(sched_params, "preempt_youngest_first"))
		youngest_order = true;
	xfree(sched_params);
	return SLURM_SUCCESS;
}
Exemplo n.º 5
0
static bool _enable_pack_steps(void)
{
	bool enabled = false;
	char *sched_params = slurm_get_sched_params();

	if (sched_params && strstr(sched_params, "enable_hetero_steps"))
		enabled = true;
	else if (mpi_type && strstr(mpi_type, "none"))
		enabled = true;
	xfree(sched_params);
	return enabled;
}
Exemplo n.º 6
0
static void _get_config(void)
{
	char *opt;
	char *params = slurm_get_sched_params();

	if (params) {
		/*                    01234567890123456789012 */
		opt = strstr(params, "jobs_per_user_per_hour=");
		if (opt)
			jobs_per_user_per_hour = atoi(opt + 23);
		info("job_submit/throttle: jobs_per_user_per_hour=%d",
		     jobs_per_user_per_hour);
		xfree(params);
	}
}
Exemplo n.º 7
0
static void _set_inv_interval(void)
{
	char *tmp_ptr, *sched_params = slurm_get_sched_params();
	int i;

	if (sched_params) {
		if (sched_params &&
		    (tmp_ptr = xstrcasestr(sched_params,
						"inventory_interval="))) {
		/*                                   0123456789012345 */
			i = atoi(tmp_ptr + 19);
			if (i < 0)
				error("ignoring SchedulerParameters: "
				      "inventory_interval of %d", i);
			else
				inv_interval = i;
		}
		xfree(sched_params);
	}
}
Exemplo n.º 8
0
static void _load_config(void)
{
	char *sched_params, *select_type, *tmp_ptr;

	sched_timeout = slurm_get_msg_timeout() / 2;
	sched_timeout = MAX(sched_timeout, 1);
	sched_timeout = MIN(sched_timeout, 10);

	sched_params = slurm_get_sched_params();

	if (sched_params && (tmp_ptr=strstr(sched_params, "interval=")))
		builtin_interval = atoi(tmp_ptr + 9);
	if (builtin_interval < 1) {
		error("Invalid SchedulerParameters interval: %d",
		      builtin_interval);
		builtin_interval = BACKFILL_INTERVAL;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf=")))
		max_sched_job_cnt = atoi(tmp_ptr + 11);
	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_test=")))
		max_sched_job_cnt = atoi(tmp_ptr + 16);
	if (max_sched_job_cnt < 1) {
		error("Invalid SchedulerParameters bf_max_job_test: %d",
		      max_sched_job_cnt);
		max_sched_job_cnt = 50;
	}
	xfree(sched_params);

	select_type = slurm_get_select_type();
	if (!xstrcmp(select_type, "select/serial")) {
		/* Do not spend time computing expected start time for
		 * pending jobs */
		max_sched_job_cnt = 0;
		stop_builtin_agent();
	}
	xfree(select_type);
}
Exemplo n.º 9
0
int main(int argc, char **argv)
{
	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
	job_desc_msg_t desc;
	resource_allocation_response_msg_t *alloc;
	time_t before, after;
	allocation_msg_thread_t *msg_thr;
	char **env = NULL, *cluster_name;
	int status = 0;
	int retries = 0;
	pid_t pid  = getpid();
	pid_t tpgid = 0;
	pid_t rc_pid = 0;
	int i, rc = 0;
	static char *msg = "Slurm job queue full, sleeping and retrying.";
	slurm_allocation_callbacks_t callbacks;

	slurm_conf_init(NULL);
	log_init(xbasename(argv[0]), logopt, 0, NULL);
	_set_exit_code();

	if (spank_init_allocator() < 0) {
		error("Failed to initialize plugin stack");
		exit(error_exit);
	}

	/* Be sure to call spank_fini when salloc exits
	 */
	if (atexit((void (*) (void)) spank_fini) < 0)
		error("Failed to register atexit handler for plugins: %m");


	if (initialize_and_process_args(argc, argv) < 0) {
		error("salloc parameter parsing");
		exit(error_exit);
	}
	/* reinit log with new verbosity (if changed by command line) */
	if (opt.verbose || opt.quiet) {
		logopt.stderr_level += opt.verbose;
		logopt.stderr_level -= opt.quiet;
		logopt.prefix_level = 1;
		log_alter(logopt, 0, NULL);
	}

	if (spank_init_post_opt() < 0) {
		error("Plugin stack post-option processing failed");
		exit(error_exit);
	}

	_set_spank_env();
	_set_submit_dir_env();
	if (opt.cwd && chdir(opt.cwd)) {
		error("chdir(%s): %m", opt.cwd);
		exit(error_exit);
	}

	if (opt.get_user_env_time >= 0) {
		bool no_env_cache = false;
		char *sched_params;
		char *user = uid_to_string(opt.uid);

		if (xstrcmp(user, "nobody") == 0) {
			error("Invalid user id %u: %m", (uint32_t)opt.uid);
			exit(error_exit);
		}

		sched_params = slurm_get_sched_params();
		no_env_cache = (sched_params &&
				strstr(sched_params, "no_env_cache"));
		xfree(sched_params);

		env = env_array_user_default(user,
					     opt.get_user_env_time,
					     opt.get_user_env_mode,
					     no_env_cache);
		xfree(user);
		if (env == NULL)
			exit(error_exit);    /* error already logged */
		_set_rlimits(env);
	}

	/*
	 * Job control for interactive salloc sessions: only if ...
	 *
	 * a) input is from a terminal (stdin has valid termios attributes),
	 * b) controlling terminal exists (non-negative tpgid),
	 * c) salloc is not run in allocation-only (--no-shell) mode,
	 * NOTE: d and e below are configuration dependent
	 * d) salloc runs in its own process group (true in interactive
	 *    shells that support job control),
	 * e) salloc has been configured at compile-time to support background
	 *    execution and is not currently in the background process group.
	 */
	if (tcgetattr(STDIN_FILENO, &saved_tty_attributes) < 0) {
		/*
		 * Test existence of controlling terminal (tpgid > 0)
		 * after first making sure stdin is not redirected.
		 */
	} else if ((tpgid = tcgetpgrp(STDIN_FILENO)) < 0) {
#ifdef HAVE_ALPS_CRAY
		verbose("no controlling terminal");
#else
		if (!opt.no_shell) {
			error("no controlling terminal: please set --no-shell");
			exit(error_exit);
		}
#endif
#ifdef SALLOC_RUN_FOREGROUND
	} else if ((!opt.no_shell) && (pid == getpgrp())) {
		if (tpgid == pid)
			is_interactive = true;
		while (tcgetpgrp(STDIN_FILENO) != pid) {
			if (!is_interactive) {
				error("Waiting for program to be placed in "
				      "the foreground");
				is_interactive = true;
			}
			killpg(pid, SIGTTIN);
		}
	}
#else
	} else if ((!opt.no_shell) && (getpgrp() == tcgetpgrp(STDIN_FILENO))) {
Exemplo n.º 10
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth;
	int i;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}

	/* Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}
	nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]);
	nobj[CORE]   = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);
	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[SOCKET] == 0 ) {
		debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
		nobj[SOCKET] = 1;
	}
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	nobj[PU]     = actual_cpus/nobj[CORE];  /* threads per core */
	nobj[CORE]  /= nobj[SOCKET];            /* cores per socket */

	debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) {
			for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = idx[SOCKET]*nobj[CORE]*nobj[PU]
					      + idx[CORE]*nobj[PU]
					      + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d", absid, macid);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			 }
		}
	}

	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if DEBUG_DETAIL
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return 0;

}
Exemplo n.º 11
0
static void _load_config(void)
{
	char *sched_params, *tmp_ptr;

	sched_params = slurm_get_sched_params();
	debug_flags  = slurm_get_debug_flags();

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_interval=")))
		backfill_interval = atoi(tmp_ptr + 12);
	if (backfill_interval < 1) {
		error("Invalid SchedulerParameters bf_interval: %d",
		      backfill_interval);
		backfill_interval = BACKFILL_INTERVAL;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_window=")))
		backfill_window = atoi(tmp_ptr + 10) * 60;  /* mins to secs */
	if (backfill_window < 1) {
		error("Invalid SchedulerParameters bf_window: %d",
		      backfill_window);
		backfill_window = BACKFILL_WINDOW;
	}

	/* "max_job_bf" replaced by "bf_max_job_test" in version 14.03 and
	 * can be removed later. Only "bf_max_job_test" is documented. */
	if (sched_params && (tmp_ptr=strstr(sched_params, "max_job_bf=")))
		max_backfill_job_cnt = atoi(tmp_ptr + 11);
	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_test=")))
		max_backfill_job_cnt = atoi(tmp_ptr + 16);
	if (max_backfill_job_cnt < 1) {
		error("Invalid SchedulerParameters bf_max_job_test: %d",
		      max_backfill_job_cnt);
		max_backfill_job_cnt = 50;
	}
	/* "bf_res=" is vestigial from version 2.3 and can be removed later.
	 * Only "bf_resolution=" is documented. */
	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_res=")))
		backfill_resolution = atoi(tmp_ptr + 7);
	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_resolution=")))
		backfill_resolution = atoi(tmp_ptr + 14);
	if (backfill_resolution < 1) {
		error("Invalid SchedulerParameters bf_resolution: %d",
		      backfill_resolution);
		backfill_resolution = BACKFILL_RESOLUTION;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_part=")))
		max_backfill_job_per_part = atoi(tmp_ptr + 16);
	if (max_backfill_job_per_part < 0) {
		error("Invalid SchedulerParameters bf_max_job_part: %d",
		      max_backfill_job_per_part);
		max_backfill_job_per_part = 0;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_start=")))
		max_backfill_jobs_start = atoi(tmp_ptr + 17);
	if (max_backfill_jobs_start < 0) {
		error("Invalid SchedulerParameters bf_max_job_start: %d",
		      max_backfill_jobs_start);
		max_backfill_jobs_start = 0;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_max_job_user="******"Invalid SchedulerParameters bf_max_job_user: %d",
		      max_backfill_job_per_user);
		max_backfill_job_per_user = 0;
	}

	/* bf_continue makes backfill continue where it was if interrupted
	 */
	if (sched_params && (strstr(sched_params, "bf_continue"))) {
		backfill_continue = true;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_yield_interval=")))
		sched_timeout = atoi(tmp_ptr + 18);
	if (sched_timeout <= 0) {
		error("Invalid backfill scheduler bf_sched_timeout: %d",
		      sched_timeout);
		sched_timeout = SCHED_TIMEOUT;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "bf_yield_sleep=")))
		yield_sleep = atoi(tmp_ptr + 15);
	if (yield_sleep <= 0) {
		error("Invalid backfill scheduler bf_yield_sleep: %d",
		      yield_sleep);
		yield_sleep = YIELD_SLEEP;
	}

	if (sched_params && (tmp_ptr=strstr(sched_params, "max_rpc_cnt=")))
		defer_rpc_cnt = atoi(tmp_ptr + 12);
	if (defer_rpc_cnt < 0) {
		error("Invalid SchedulerParameters max_rpc_cnt: %d",
		      defer_rpc_cnt);
		defer_rpc_cnt = 0;
	}

	xfree(sched_params);
}
Exemplo n.º 12
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	bitstr_t *used_socket = NULL;
	int *cores_per_socket;
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth, sock_cnt, tot_socks = 0;
	int i, used_core_idx, used_sock_idx;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
#if HWLOC_API_VERSION < 0x00020000
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC);
#else
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
#endif

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}
#if _DEBUG
	_hwloc_children(topology, hwloc_get_root_obj(topology), 0);
#endif
	/*
	 * Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}

	/*
	 * Count sockets/NUMA containing any cores.
	 * KNL NUMA with no cores are NOT counted.
	 */
	nobj[SOCKET] = 0;
	depth = hwloc_get_type_depth(topology, objtype[SOCKET]);
	used_socket = bit_alloc(_MAX_SOCKET_INX);
	cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX);
	sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth);
	for (i = 0; i < sock_cnt; i++) {
		obj = hwloc_get_obj_by_depth(topology, depth, i);
		if (obj->type == objtype[SOCKET]) {
			cores_per_socket[i] = _core_child_count(topology, obj);
			if (cores_per_socket[i] > 0) {
				nobj[SOCKET]++;
				bit_set(used_socket, tot_socks);
			}
			if (++tot_socks >= _MAX_SOCKET_INX) {	/* Bitmap size */
				fatal("Socket count exceeds %d, expand data structure size",
				      _MAX_SOCKET_INX);
				break;
			}
		}
	}

	nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);

	/*
	 * Workaround for hwloc bug, in some cases the topology "children" array
	 * does not get populated, so _core_child_count() always returns 0
	 */
	if (nobj[SOCKET] == 0) {
		nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology,
							objtype[SOCKET]);
		if (nobj[SOCKET] == 0) {
			debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
			nobj[SOCKET] = 1;
		}
		if (nobj[SOCKET] >= _MAX_SOCKET_INX) {	/* Bitmap size */
			fatal("Socket count exceeds %d, expand data structure size",
			      _MAX_SOCKET_INX);
		}
		bit_nset(used_socket, 0, nobj[SOCKET] - 1);
	}

	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	if ((actual_cpus % nobj[CORE]) != 0) {
		error("Thread count (%d) not multiple of core count (%d)",
		      actual_cpus, nobj[CORE]);
	}
	nobj[PU] = actual_cpus / nobj[CORE];	/* threads per core */

	if ((nobj[CORE] % nobj[SOCKET]) != 0) {
		error("Core count (%d) not multiple of socket count (%d)",
		      nobj[CORE], nobj[SOCKET]);
	}
	nobj[CORE] /= nobj[SOCKET];		/* cores per socket */

	debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	if (p_block_map_size)
		*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		used_sock_idx = -1;
		used_core_idx = -1;
		for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET];
		     idx[SOCKET]++) {
			if (!bit_test(used_socket, idx[SOCKET]))
				continue;
			used_sock_idx++;
			for (idx[CORE] = 0;
			     idx[CORE] < cores_per_socket[idx[SOCKET]];
			     idx[CORE]++) {
				used_core_idx++;
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = used_core_idx * nobj[PU] + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid,
					       used_sock_idx, idx[CORE], idx[PU]);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			}
		}
	}
	FREE_NULL_BITMAP(used_socket);
	xfree(cores_per_socket);
	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if _DEBUG
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return SLURM_SUCCESS;

}