Exemplo n.º 1
0
static int _parse_resv_core_cnt(resv_desc_msg_t *resv_msg_ptr, char *val,
                                bool from_tres)
{

    char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL;
    char *type;
    int node_inx = 0;

    type = slurm_get_select_type();
    if (strcasestr(type, "cray")) {
        int param;
        param = slurm_get_select_type_param();
        if (! (param & CR_OTHER_CONS_RES)) {
            error("CoreCnt or CPUCnt is only "
                  "suported when "
                  "SelectTypeParameters "
                  "includes OTHER_CONS_RES");
            xfree(type);
            return SLURM_ERROR;
        }
    } else if (strcasestr(type, "cons_res") == NULL) {
        error("CoreCnt or CPUCnt is only "
              "suported when "
              "SelectType includes "
              "select/cons_res");
        xfree(type);
        return SLURM_ERROR;
    }

    xfree(type);
    core_cnt = xstrdup(val);
    tok = strtok_r(core_cnt, ",", &ptrptr);
    while (tok) {
        xrealloc(resv_msg_ptr->core_cnt,
                 sizeof(uint32_t) * (node_inx + 2));
        resv_msg_ptr->core_cnt[node_inx] =
            strtol(tok, &endptr, 10);
        if ((endptr == NULL) ||
                (endptr[0] != '\0') ||
                (tok[0] == '\0')) {
            exit_code = 1;
            if (from_tres)
                error("Invalid TRES core count %s", val);
            else
                error("Invalid core count %s", val);
            xfree(core_cnt);
            return SLURM_ERROR;
        }
        node_inx++;
        tok = strtok_r(NULL, ",", &ptrptr);
    }

    xfree(core_cnt);
    return SLURM_SUCCESS;
}
Exemplo n.º 2
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init ( void )
{
	/* We must call the api here since we call this from other
	 * things other than the slurmctld.
	 */
	uint16_t select_type_param = slurm_get_select_type_param();
	if (select_type_param & CR_OTHER_CONS_RES)
		plugin_id = 108;
	debug_flags = slurm_get_debug_flags();
	verbose("%s loaded", plugin_name);
	return SLURM_SUCCESS;
}
Exemplo n.º 3
0
/*
 * init() is called when the plugin is loaded, before any other functions
 * are called.  Put global initialization here.
 */
extern int init ( void )
{
	/* We must call the api here since we call this from other
	 * things other than the slurmctld.
	 */
	uint16_t select_type_param = slurm_get_select_type_param();
	if (select_type_param & CR_OTHER_CONS_RES)
		plugin_id = 108;
	debug_flags = slurm_get_debug_flags();

#ifdef HAVE_NATIVE_CRAY
	// Spawn the aeld thread, only in slurmctld.
	if (run_in_daemon("slurmctld")) {
		_spawn_cleanup_thread(NULL, _aeld_event_loop);
	}
#endif

	verbose("%s loaded", plugin_name);
	return SLURM_SUCCESS;
}
Exemplo n.º 4
0
/*
 * Initialize context for node selection plugin
 */
extern int slurm_select_init(bool only_default)
{
	int retval = SLURM_SUCCESS;
	char *type = NULL;
	int i, j, len;
	DIR *dirp;
	struct dirent *e;
	char *dir_array = NULL, *head = NULL;
	char *plugin_type = "select";

	if ( init_run && select_context )
		return retval;

	slurm_mutex_lock( &select_context_lock );

	if ( select_context )
		goto done;

	type = slurm_get_select_type();
	if (working_cluster_rec) {
		/* just ignore warnings here */
	} else {
#ifdef HAVE_XCPU
		if (strcasecmp(type, "select/linear")) {
			error("%s is incompatible with XCPU use", type);
			fatal("Use SelectType=select/linear");
		}
#endif
		if (!strcasecmp(type, "select/linear")) {
			uint16_t cr_type = slurm_get_select_type_param();
			if ((cr_type & CR_SOCKET) || (cr_type & CR_CORE) ||
			    (cr_type & CR_CPU))
				fatal("Invalid SelectTypeParameter "
				      "for select/linear");
		}

#ifdef HAVE_BG
		if (strcasecmp(type, "select/bluegene")) {
			error("%s is incompatible with BlueGene", type);
			fatal("Use SelectType=select/bluegene");
		}
#else
		if (!strcasecmp(type, "select/bluegene")) {
			fatal("Requested SelectType=select/bluegene "
			      "in slurm.conf, but not running on a BG[L|P|Q] "
			      "system.  If looking to emulate a BG[L|P|Q] "
			      "system use --enable-bgl-emulation or "
			      "--enable-bgp-emulation respectively.");
		}
#endif

#ifdef HAVE_ALPS_CRAY
		if (strcasecmp(type, "select/alps")) {
			error("%s is incompatible with Cray system "
			      "running alps", type);
			fatal("Use SelectType=select/alps");
		}
#else
		if (!strcasecmp(type, "select/alps")) {
			fatal("Requested SelectType=select/alps "
			      "in slurm.conf, but not running on a ALPS Cray "
			      "system.  If looking to emulate a Alps Cray "
			      "system use --enable-alps-cray-emulation.");
		}
#endif
	}

	select_context_cnt = 0;
	if (only_default) {
		ops = xmalloc(sizeof(slurm_select_ops_t));
		select_context = xmalloc(sizeof(plugin_context_t));
		if ((select_context[0] = plugin_context_create(
			     plugin_type, type, (void **)&ops[0],
			     node_select_syms, sizeof(node_select_syms)))) {
			select_context_default = 0;
			select_context_cnt++;
		}
		goto skip_load_all;
	}

	if (!(dir_array = slurm_get_plugin_dir())) {
		error("plugin_load_and_link: No plugin dir given");
		goto done;
	}

	head = dir_array;
	for (i=0; ; i++) {
		bool got_colon = 0;
		if (dir_array[i] == ':') {
			dir_array[i] = '\0';
			got_colon = 1;
		} else if (dir_array[i] != '\0')
			continue;

		/* Open the directory. */
		if (!(dirp = opendir(head))) {
			error("cannot open plugin directory %s", head);
			goto done;
		}

		while (1) {
			char full_name[128];

			if (!(e = readdir( dirp )))
				break;
			/* Check only files with select_ in them. */
			if (strncmp(e->d_name, "select_", 7))
				continue;

			len = strlen(e->d_name);
#if defined(__CYGWIN__)
			len -= 4;
#else
			len -= 3;
#endif
			/* Check only shared object files */
			if (strcmp(e->d_name+len,
#if defined(__CYGWIN__)
				   ".dll"
#else
				   ".so"
#endif
				    ))
				continue;
			/* add one for the / */
			len++;
			xassert(len<sizeof(full_name));
			snprintf(full_name, len, "select/%s", e->d_name+7);
			for (j=0; j<select_context_cnt; j++) {
				if (!strcmp(full_name,
					    select_context[j]->type))
					break;
			}
			if (j >= select_context_cnt) {
				xrealloc(ops,
					 (sizeof(slurm_select_ops_t) *
					  (select_context_cnt + 1)));
				xrealloc(select_context,
					 (sizeof(plugin_context_t) *
					  (select_context_cnt + 1)));

				select_context[select_context_cnt] =
					plugin_context_create(
						plugin_type, full_name,
						(void **)&ops[
							select_context_cnt],
						node_select_syms,
						sizeof(node_select_syms));
				if (select_context[select_context_cnt]) {
					/* set the default */
					if (!strcmp(full_name, type))
						select_context_default =
							select_context_cnt;
					select_context_cnt++;
				}
			}
		}

		closedir(dirp);

		if (got_colon) {
			head = dir_array + i + 1;
		} else
			break;
	}

skip_load_all:
	if (select_context_default == -1)
		fatal("Can't find plugin for %s", type);

	/* Insure that plugin_id is valid and unique */
	for (i=0; i<select_context_cnt; i++) {
		for (j=i+1; j<select_context_cnt; j++) {
			if (*(ops[i].plugin_id) !=
			    *(ops[j].plugin_id))
				continue;
			fatal("SelectPlugins: Duplicate plugin_id %u for "
			      "%s and %s",
			      *(ops[i].plugin_id),
			      select_context[i]->type,
			      select_context[j]->type);
		}
		if (*(ops[i].plugin_id) < 100) {
			fatal("SelectPlugins: Invalid plugin_id %u (<100) %s",
			      *(ops[i].plugin_id),
			      select_context[i]->type);
		}

	}
	init_run = true;

done:
	slurm_mutex_unlock( &select_context_lock );
	xfree(type);
	xfree(dir_array);
	return retval;
}
Exemplo n.º 5
0
/*
 * lllp_distribution
 *
 * Note: lllp stands for Lowest Level of Logical Processors.
 *
 * When automatic binding is enabled:
 *      - no binding flags set >= CPU_BIND_NONE, and
 *      - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS}
 * Otherwise limit job step to the allocated CPUs
 *
 * generate the appropriate cpu_bind type and string which results in
 * the specified lllp distribution.
 *
 * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated)
 * IN- global task id array
 */
void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id)
{
	int rc = SLURM_SUCCESS;
	bitstr_t **masks = NULL;
	char buf_type[100];
	int maxtasks = req->tasks_to_launch[(int)node_id];
	int whole_nodes, whole_sockets, whole_cores, whole_threads;
	int part_sockets, part_cores;
	const uint32_t *gtid = req->global_task_ids[(int)node_id];
	static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES |
				      CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS;
	static uint16_t bind_mode = CPU_BIND_NONE   | CPU_BIND_MASK   |
				    CPU_BIND_RANK   | CPU_BIND_MAP    |
				    CPU_BIND_LDMASK | CPU_BIND_LDRANK |
				    CPU_BIND_LDMAP;
	static int only_one_thread_per_core = -1;

	if (only_one_thread_per_core == -1) {
		if (conf->cpus == (conf->sockets * conf->cores))
			only_one_thread_per_core = 1;
		else
			only_one_thread_per_core = 0;
	}

	/* If we are telling the system we only want to use 1 thread
	 * per core with the CPUs node option this is the easiest way
	 * to portray that to the affinity plugin.
	 */
	if (only_one_thread_per_core)
		req->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE;

	if (req->cpu_bind_type & bind_mode) {
		/* Explicit step binding specified by user */
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if ((whole_nodes == 0) && avail_mask &&
		    (req->job_core_spec == (uint16_t) NO_VAL)) {
			info("task/affinity: entire node must be allocated, "
			     "disabling affinity");
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		} else {
			if (req->job_core_spec == (uint16_t) NO_VAL) {
				if (req->cpu_bind_type & CPU_BIND_MASK)
					_validate_mask(req, avail_mask);
				else if (req->cpu_bind_type & CPU_BIND_MAP)
					_validate_map(req, avail_mask);
			}
			xfree(avail_mask);
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] manual binding: %s",
		     req->job_id, buf_type);
		return;
	}

	if (!(req->cpu_bind_type & bind_entity)) {
		/* No bind unit (sockets, cores) specified by user,
		 * pick something reasonable */
		uint32_t task_plugin_param = slurm_get_task_plugin_param();
		bool auto_def_set = false;
		int spec_thread_cnt = 0;
		int max_tasks = req->tasks_to_launch[(int)node_id] *
			req->cpus_per_task;
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		debug("binding tasks:%d to "
		      "nodes:%d sockets:%d:%d cores:%d:%d threads:%d",
		      max_tasks, whole_nodes, whole_sockets ,part_sockets,
		      whole_cores, part_cores, whole_threads);
		if ((req->job_core_spec != (uint16_t) NO_VAL) &&
		    (req->job_core_spec &  CORE_SPEC_THREAD)  &&
		    (req->job_core_spec != CORE_SPEC_THREAD)) {
			spec_thread_cnt = req->job_core_spec &
					  (~CORE_SPEC_THREAD);
		}
		if (((max_tasks == whole_sockets) && (part_sockets == 0)) ||
		    (spec_thread_cnt &&
		     (max_tasks == (whole_sockets + part_sockets)))) {
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}
		if (((max_tasks == whole_cores) && (part_cores == 0)) ||
		    (spec_thread_cnt &&
		     (max_tasks == (whole_cores + part_cores)))) {
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		}
		if (max_tasks == whole_threads) {
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		}

		if (task_plugin_param & CPU_AUTO_BIND_TO_THREADS) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		} else if (task_plugin_param & CPU_AUTO_BIND_TO_CORES) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		} else if (task_plugin_param & CPU_AUTO_BIND_TO_SOCKETS) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}

		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type |= CPU_BIND_MASK;
		}

		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] auto binding off: %s",
		     req->job_id, buf_type);
		return;

  make_auto:	xfree(avail_mask);
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] %s auto binding: "
		     "%s, dist %d", req->job_id,
		     (auto_def_set) ? "default" : "implicit",
		     buf_type, req->task_dist);
	} else {
		/* Explicit bind unit (sockets, cores) specified by user */
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] binding: %s, dist %d",
		     req->job_id, buf_type, req->task_dist);
	}

	switch (req->task_dist & SLURM_DIST_STATE_BASE) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		/* tasks are distributed in blocks within a plane */
		rc = _task_layout_lllp_block(req, node_id, &masks);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_UNKNOWN:
		if (slurm_get_select_type_param()
		    & CR_CORE_DEFAULT_DIST_BLOCK) {
			rc = _task_layout_lllp_block(req, node_id, &masks);
			break;
		}
		/* We want to fall through here if we aren't doing a
		   default dist block.
		*/
	default:
		rc = _task_layout_lllp_cyclic(req, node_id, &masks);
		break;
	}

	/* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS &
	 * max_cores - does select/cons_res plugin allocate whole
	 * socket??? Maybe not. Check srun man page.
	 */

	if (rc == SLURM_SUCCESS) {
		_task_layout_display_masks(req, gtid, maxtasks, masks);
	    	/* translate abstract masks to actual hardware layout */
		_lllp_map_abstract_masks(maxtasks, masks);
		_task_layout_display_masks(req, gtid, maxtasks, masks);
#ifdef HAVE_NUMA
		if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
			_match_masks_to_ldom(maxtasks, masks);
			_task_layout_display_masks(req, gtid, maxtasks, masks);
		}
#endif
	    	 /* convert masks into cpu_bind mask string */
		 _lllp_generate_cpu_bind(req, maxtasks, masks);
	} else {
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		error("lllp_distribution jobid [%u] overriding binding: %s",
		      req->job_id, buf_type);
		error("Verify socket/core/thread counts in configuration");
	}
	if (masks)
		_lllp_free_masks(maxtasks, masks);
}
Exemplo n.º 6
0
/* user to have to play with the cgroup hierarchy to modify it */
extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job)
{
	int fstatus = SLURM_ERROR;

#ifndef HAVE_HWLOC

	error("task/cgroup: plugin not compiled with hwloc support, "
	      "skipping affinity.");
	return fstatus;

#else
	char mstr[1 + CPU_SETSIZE / 4];
	cpu_bind_type_t bind_type;
	cpu_set_t ts;
	hwloc_obj_t obj;
	hwloc_obj_type_t socket_or_node;
	hwloc_topology_t topology;
	hwloc_bitmap_t cpuset;
	hwloc_obj_type_t hwtype;
	hwloc_obj_type_t req_hwtype;
	int bind_verbose = 0;
	int rc = SLURM_SUCCESS, match;
	pid_t    pid = job->envtp->task_pid;
	size_t tssize;
	uint32_t nldoms;
	uint32_t nsockets;
	uint32_t ncores;
	uint32_t npus;
	uint32_t nobj;
	uint32_t taskid = job->envtp->localid;
	uint32_t jntasks = job->node_tasks;
	uint32_t jnpus;

	/* Allocate and initialize hwloc objects */
	hwloc_topology_init(&topology);
	hwloc_topology_load(topology);
	cpuset = hwloc_bitmap_alloc();

	int spec_threads = 0;

	if (job->batch) {
		jnpus = job->cpus;
		job->cpus_per_task = job->cpus;
	} else
		jnpus = jntasks * job->cpus_per_task;

	bind_type = job->cpu_bind_type;
	if ((conf->task_plugin_param & CPU_BIND_VERBOSE) ||
	    (bind_type & CPU_BIND_VERBOSE))
		bind_verbose = 1 ;

	if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	     hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) {
		/* One socket contains multiple NUMA-nodes
		 * like AMD Opteron 6000 series etc.
		 * In such case, use NUMA-node instead of socket. */
		socket_or_node = HWLOC_OBJ_NODE;
	} else {
		socket_or_node = HWLOC_OBJ_SOCKET;
	}

	if (bind_type & CPU_BIND_NONE) {
		if (bind_verbose)
			info("task/cgroup: task[%u] is requesting no affinity",
			     taskid);
		return 0;
	} else if (bind_type & CPU_BIND_TO_THREADS) {
		if (bind_verbose)
			info("task/cgroup: task[%u] is requesting "
			     "thread level binding",taskid);
		req_hwtype = HWLOC_OBJ_PU;
	} else if (bind_type & CPU_BIND_TO_CORES) {
		if (bind_verbose)
			info("task/cgroup: task[%u] is requesting "
			     "core level binding",taskid);
		req_hwtype = HWLOC_OBJ_CORE;
	} else if (bind_type & CPU_BIND_TO_SOCKETS) {
		if (bind_verbose)
			info("task/cgroup: task[%u] is requesting "
			     "socket level binding",taskid);
		req_hwtype = socket_or_node;
	} else if (bind_type & CPU_BIND_TO_LDOMS) {
		if (bind_verbose)
			info("task/cgroup: task[%u] is requesting "
			     "ldom level binding",taskid);
		req_hwtype = HWLOC_OBJ_NODE;
	} else if (bind_type & CPU_BIND_TO_BOARDS) {
		if (bind_verbose)
			info("task/cgroup: task[%u] is requesting "
			     "board level binding",taskid);
		req_hwtype = HWLOC_OBJ_GROUP;
	} else if (bind_type & bind_mode_ldom) {
		req_hwtype = HWLOC_OBJ_NODE;
	} else {
		if (bind_verbose)
			info("task/cgroup: task[%u] using core level binding"
			     " by default",taskid);
		req_hwtype = HWLOC_OBJ_CORE;
	}

	/*
	 * Perform the topology detection. It will only get allowed PUs.
	 * Detect in the same time the granularity to use for binding.
	 * The granularity can be relaxed from threads to cores if enough
	 * cores are available as with hyperthread support, ntasks-per-core
	 * param can let us have access to more threads per core for each
	 * task
	 * Revert back to machine granularity if no finer-grained granularity
	 * matching the request is found. This will result in no affinity
	 * applied.
	 * The detected granularity will be used to find where to best place
	 * the task, then the cpu_bind option will be used to relax the
	 * affinity constraint and use more PUs. (i.e. use a core granularity
	 * to dispatch the tasks across the sockets and then provide access
	 * to each task to the cores of its socket.)
	 */
	npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						   HWLOC_OBJ_PU);
	ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_CORE);
	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						       socket_or_node);
	nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_NODE);
	//info("PU:%d CORE:%d SOCK:%d LDOM:%d", npus, ncores, nsockets, nldoms);

	hwtype = HWLOC_OBJ_MACHINE;
	nobj = 1;
	if ((job->job_core_spec != (uint16_t) NO_VAL) &&
	    (job->job_core_spec &  CORE_SPEC_THREAD)  &&
	    (job->job_core_spec != CORE_SPEC_THREAD)) {
		spec_threads = job->job_core_spec & (~CORE_SPEC_THREAD);
	}
	if (npus >= (jnpus + spec_threads) || bind_type & CPU_BIND_TO_THREADS) {
		hwtype = HWLOC_OBJ_PU;
		nobj = npus;
	}
	if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) {
		hwtype = HWLOC_OBJ_CORE;
		nobj = ncores;
	}
	if (nsockets >= jntasks &&
	    bind_type & CPU_BIND_TO_SOCKETS) {
		hwtype = socket_or_node;
		nobj = nsockets;
	}
	/*
	 * HWLOC returns all the NUMA nodes available regardless of the
	 * number of underlying sockets available (regardless of the allowed
	 * resources). So there is no guarantee that each ldom will be populated
	 * with usable sockets. So add a simple check that at least ensure that
	 * we have as many sockets as ldoms before moving to ldoms granularity
	 */
	if (nldoms >= jntasks &&
	    nsockets >= nldoms &&
	    bind_type & (CPU_BIND_TO_LDOMS | bind_mode_ldom)) {
		hwtype = HWLOC_OBJ_NODE;
		nobj = nldoms;
	}

	/*
	 * If not enough objects to do the job, revert to no affinity mode
	 */
	if (hwloc_compare_types(hwtype, HWLOC_OBJ_MACHINE) == 0) {
		info("task/cgroup: task[%u] disabling affinity because of %s "
		     "granularity",taskid, hwloc_obj_type_string(hwtype));

	} else if ((hwloc_compare_types(hwtype, HWLOC_OBJ_CORE) >= 0) &&
		   (nobj < jnpus)) {
		info("task/cgroup: task[%u] not enough %s objects (%d < %d), "
		     "disabling affinity",
		     taskid, hwloc_obj_type_string(hwtype), nobj, jnpus);

	} else if (bind_type & bind_mode) {
		/* Explicit binding mode specified by the user
		 * Bind the taskid in accordance with the specified mode
		 */
		obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0);
		match = hwloc_bitmap_isequal(obj->complete_cpuset,
					     obj->allowed_cpuset);
		if ((job->job_core_spec == (uint16_t) NO_VAL) && !match) {
			info("task/cgroup: entire node must be allocated, "
			     "disabling affinity, task[%u]", taskid);
			fprintf(stderr, "Requested cpu_bind option requires "
				"entire node to be allocated; disabling "
				"affinity\n");
		} else {
			if (bind_verbose) {
				info("task/cgroup: task[%u] is requesting "
				     "explicit binding mode", taskid);
			}
			_get_sched_cpuset(topology, hwtype, req_hwtype, &ts,
					  job);
			tssize = sizeof(cpu_set_t);
			fstatus = SLURM_SUCCESS;
			if (job->job_core_spec != (uint16_t) NO_VAL)
				_validate_mask(taskid, obj, &ts);
			if ((rc = sched_setaffinity(pid, tssize, &ts))) {
				error("task/cgroup: task[%u] unable to set "
				      "mask 0x%s", taskid,
				      cpuset_to_str(&ts, mstr));
				error("sched_setaffinity rc = %d", rc);
				fstatus = SLURM_ERROR;
			} else if (bind_verbose) {
				info("task/cgroup: task[%u] mask 0x%s",
				     taskid, cpuset_to_str(&ts, mstr));
			}
			_slurm_chkaffinity(&ts, job, rc);
		}
	} else {
		/* Bind the detected object to the taskid, respecting the
		 * granularity, using the designated or default distribution
		 * method (block or cyclic). */
		char *str;

		if (bind_verbose) {
			info("task/cgroup: task[%u] using %s granularity dist %u",
			     taskid, hwloc_obj_type_string(hwtype),
			     job->task_dist);
		}

		/* See srun man page for detailed information on --distribution
		 * option.
		 *
		 * You can see the equivalent code for the
		 * task/affinity plugin in
		 * src/plugins/task/affinity/dist_tasks.c, around line 368
		 */
		switch (job->task_dist & SLURM_DIST_NODESOCKMASK) {
		case SLURM_DIST_BLOCK_BLOCK:
		case SLURM_DIST_CYCLIC_BLOCK:
		case SLURM_DIST_PLANE:
			/* tasks are distributed in blocks within a plane */
			_task_cgroup_cpuset_dist_block(topology,
				hwtype, req_hwtype,
				nobj, job, bind_verbose, cpuset);
			break;
		case SLURM_DIST_ARBITRARY:
		case SLURM_DIST_BLOCK:
		case SLURM_DIST_CYCLIC:
		case SLURM_DIST_UNKNOWN:
			if (slurm_get_select_type_param()
			    & CR_CORE_DEFAULT_DIST_BLOCK) {
				_task_cgroup_cpuset_dist_block(topology,
					hwtype, req_hwtype,
					nobj, job, bind_verbose, cpuset);
				break;
			}
			/* We want to fall through here if we aren't doing a
			   default dist block.
			*/
		default:
			_task_cgroup_cpuset_dist_cyclic(topology,
				hwtype, req_hwtype,
				job, bind_verbose, cpuset);
			break;
		}

		hwloc_bitmap_asprintf(&str, cpuset);

		tssize = sizeof(cpu_set_t);
		if (hwloc_cpuset_to_glibc_sched_affinity(topology, cpuset,
							 &ts, tssize) == 0) {
			fstatus = SLURM_SUCCESS;
			if ((rc = sched_setaffinity(pid, tssize, &ts))) {
				error("task/cgroup: task[%u] unable to set "
				      "taskset '%s'", taskid, str);
				fstatus = SLURM_ERROR;
			} else if (bind_verbose) {
				info("task/cgroup: task[%u] set taskset '%s'",
				     taskid, str);
			}
			_slurm_chkaffinity(&ts, job, rc);
		} else {
			error("task/cgroup: task[%u] unable to build "
			      "taskset '%s'",taskid,str);
			fstatus = SLURM_ERROR;
		}
		free(str);
	}

	/* Destroy hwloc objects */
	hwloc_bitmap_free(cpuset);
	hwloc_topology_destroy(topology);

	return fstatus;
#endif

}
Exemplo n.º 7
0
/* Get this plugin's sequence number in Slurm's internal tables */
extern int select_get_plugin_id_pos(uint32_t plugin_id)
{
	int i;
	static bool cray_other_cons_res = false;

	if (slurm_select_init(0) < 0)
		return SLURM_ERROR;
again:
	for (i = 0; i < select_context_cnt; i++) {
		if (*(ops[i].plugin_id) == plugin_id)
			break;
	}
	if (i >= select_context_cnt) {
		/*
		 * Put on the extra Cray select plugins that do not get
		 * generated automatically.
		 */
		if (!cray_other_cons_res &&
		    ((plugin_id == SELECT_PLUGIN_CRAY_CONS_RES)  ||
		     (plugin_id == SELECT_PLUGIN_CRAY_CONS_TRES) ||
		     (plugin_id == SELECT_PLUGIN_CRAY_LINEAR))) {
			char *type = "select", *name = "select/cray";
			uint16_t save_params = slurm_get_select_type_param();
			uint16_t params[2];
			int cray_plugin_id[2], cray_offset;

			cray_other_cons_res = true;

			if (plugin_id == SELECT_PLUGIN_CRAY_LINEAR) {
				params[0] = save_params & ~CR_OTHER_CONS_RES;
				cray_plugin_id[0] = SELECT_PLUGIN_CRAY_CONS_RES;
				params[1] = save_params & ~CR_OTHER_CONS_TRES;
				cray_plugin_id[1] = SELECT_PLUGIN_CRAY_CONS_TRES;
			} else if (plugin_id == SELECT_PLUGIN_CRAY_CONS_RES) {
				params[0] = save_params | CR_OTHER_CONS_RES;
				cray_plugin_id[0] = SELECT_PLUGIN_CRAY_LINEAR;
				params[1] = save_params & ~CR_OTHER_CONS_RES;
				cray_plugin_id[1] = SELECT_PLUGIN_CRAY_CONS_TRES;
			} else {	/* SELECT_PLUGIN_CRAY_CONS_TRES */
				params[0] = save_params | CR_OTHER_CONS_TRES;
				cray_plugin_id[0] = SELECT_PLUGIN_CRAY_LINEAR;
				params[1] = save_params & ~CR_OTHER_CONS_RES;
				cray_plugin_id[1] = SELECT_PLUGIN_CRAY_CONS_RES;
			}

			for (cray_offset = 0; cray_offset < 2; cray_offset++) {
				for (i = 0; i < select_context_cnt; i++) {
					if (*(ops[i].plugin_id) ==
					    cray_plugin_id[cray_offset])
						break;
				}
				if (i < select_context_cnt)
					break;	/* Found match */
			}
			if (i >= select_context_cnt)
				goto end_it;	/* No match */

			slurm_mutex_lock(&select_context_lock);
			slurm_set_select_type_param(params[cray_offset]);
			plugin_context_destroy(select_context[i]);
			select_context[i] =
				plugin_context_create(type, name,
						      (void **)&ops[i],
						      node_select_syms,
						      sizeof(node_select_syms));
			slurm_set_select_type_param(save_params);
			slurm_mutex_unlock(&select_context_lock);
			goto again;
		}

	end_it:
		return SLURM_ERROR;
	}
	return i;
}
Exemplo n.º 8
0
/*
 * Initialize context for node selection plugin
 */
extern int slurm_select_init(bool only_default)
{
	int retval = SLURM_SUCCESS;
	char *select_type = NULL;
	int i, j, plugin_cnt;
	char *plugin_type = "select";
	List plugin_names = NULL;
	_plugin_args_t plugin_args = {0};

	if ( init_run && select_context )
		return retval;

	slurm_mutex_lock( &select_context_lock );

	if ( select_context )
		goto done;

	select_type = slurm_get_select_type();
	if (working_cluster_rec) {
		/* just ignore warnings here */
	} else {
#ifdef HAVE_NATIVE_CRAY
		if (xstrcasecmp(select_type, "select/cray")) {
			error("%s is incompatible with a native Cray system.",
			      select_type);
			fatal("Use SelectType=select/cray");
		}
#else
		/* if (!xstrcasecmp(select_type, "select/cray")) { */
		/* 	fatal("Requested SelectType=select/cray " */
		/* 	      "in slurm.conf, but not running on a native Cray " */
		/* 	      "system.  If looking to run on a Cray " */
		/* 	      "system natively use --enable-native-cray."); */
		/* } */
#endif
	}

	select_context_cnt = 0;

	plugin_args.plugin_type    = plugin_type;
	plugin_args.default_plugin = select_type;

	if (only_default) {
		plugin_names = list_create(slurm_destroy_char);
		list_append(plugin_names, xstrdup(select_type));
	} else {
		plugin_names = plugin_get_plugins_of_type(plugin_type);
	}
	if (plugin_names && (plugin_cnt = list_count(plugin_names))) {
		ops = xcalloc(plugin_cnt, sizeof(slurm_select_ops_t));
		select_context = xcalloc(plugin_cnt,
					 sizeof(plugin_context_t *));
		list_for_each(plugin_names, _load_plugins, &plugin_args);
	}


	if (select_context_default == -1)
		fatal("Can't find plugin for %s", select_type);

	/* Ensure that plugin_id is valid and unique */
	for (i=0; i<select_context_cnt; i++) {
		for (j=i+1; j<select_context_cnt; j++) {
			if (*(ops[i].plugin_id) !=
			    *(ops[j].plugin_id))
				continue;
			fatal("SelectPlugins: Duplicate plugin_id %u for "
			      "%s and %s",
			      *(ops[i].plugin_id),
			      select_context[i]->type,
			      select_context[j]->type);
		}
		if (*(ops[i].plugin_id) < 100) {
			fatal("SelectPlugins: Invalid plugin_id %u (<100) %s",
			      *(ops[i].plugin_id),
			      select_context[i]->type);
		}

	}
	init_run = true;
done:
	slurm_mutex_unlock( &select_context_lock );
	if (!working_cluster_rec) {
		if (select_running_linear_based()) {
			uint16_t cr_type = slurm_get_select_type_param();
			if (cr_type & (CR_CPU | CR_CORE | CR_SOCKET)) {
				fatal("Invalid SelectTypeParameters for "
				      "%s: %s (%u), it can't contain "
				      "CR_(CPU|CORE|SOCKET).",
				      select_type,
				      select_type_param_string(cr_type),
				      cr_type);
			}
		}
	}

	xfree(select_type);
	FREE_NULL_LIST(plugin_names);

	return retval;
}
Exemplo n.º 9
0
static int _task_layout_block(slurm_step_layout_t *step_layout, uint16_t *cpus)
{
	static uint16_t select_params = NO_VAL16;
	int i, j, task_id = 0;
	bool pack_nodes;

	if (select_params == NO_VAL16)
		select_params = slurm_get_select_type_param();
	if (step_layout->task_dist & SLURM_DIST_PACK_NODES)
		pack_nodes = true;
	else if (step_layout->task_dist & SLURM_DIST_NO_PACK_NODES)
		pack_nodes = false;
	else if (select_params & CR_PACK_NODES)
		pack_nodes = true;
	else
		pack_nodes = false;

	if (pack_nodes) {
		/* Pass 1: Put one task on each node */
		for (i = 0; ((i < step_layout->node_cnt) &&
			     (task_id < step_layout->task_cnt)); i++) {
			/* cpus has already been altered for cpus_per_task */
			if (step_layout->tasks[i] < cpus[i]) {
				step_layout->tasks[i]++;
				task_id++;
			}
		}

		/* Pass 2: Fill remaining CPUs on a node-by-node basis */
		for (i = 0; ((i < step_layout->node_cnt) &&
			     (task_id < step_layout->task_cnt)); i++) {
			/* cpus has already been altered for cpus_per_task */
			while ((step_layout->tasks[i] < cpus[i]) &&
			       (task_id < step_layout->task_cnt)) {
				step_layout->tasks[i]++;
				task_id++;
			}
		}

		/* Pass 3: Spread remaining tasks across all nodes */
		while (task_id < step_layout->task_cnt) {
			for (i = 0; ((i < step_layout->node_cnt) &&
				     (task_id < step_layout->task_cnt)); i++) {
				step_layout->tasks[i]++;
				task_id++;
			}
		}
	} else {
		/* To effectively deal with heterogeneous nodes, we fake a
		 * cyclic distribution to determine how many tasks go on each
		 * node and then make those assignments in a block fashion. */
		bool over_subscribe = false;
		for (j = 0; task_id < step_layout->task_cnt; j++) {
			bool space_remaining = false;
			for (i = 0; ((i < step_layout->node_cnt) &&
				     (task_id < step_layout->task_cnt)); i++) {
				if ((j < cpus[i]) || over_subscribe) {
					step_layout->tasks[i]++;
					task_id++;
					if ((j + 1) < cpus[i])
						space_remaining = true;
				}
			}
			if (!space_remaining)
				over_subscribe = true;
		}
	}

	/* Now distribute the tasks */
	task_id = 0;
	for (i = 0; i < step_layout->node_cnt; i++) {
		step_layout->tids[i] = xmalloc(sizeof(uint32_t)
					       * step_layout->tasks[i]);
		for (j = 0; j < step_layout->tasks[i]; j++) {
			step_layout->tids[i][j] = task_id;
			task_id++;
		}
	}
	return SLURM_SUCCESS;
}
Exemplo n.º 10
0
/*
 * scontrol_parse_res_options   parse options for creating or updating a
                                reservation
 * IN argc - count of arguments
 * IN argv - list of arguments
 * IN msg  - a string to append to any error message
 * OUT resv_msg_ptr - struct holding reservation parameters
 * OUT free_user_str - bool indicating that resv_msg_ptr->users should be freed
 * OUT free_acct_str - bool indicating that resv_msg_ptr->accounts should be
 *		       freed
 * RET 0 on success, -1 on err and prints message
 */
extern int
scontrol_parse_res_options(int argc, char *argv[], const char *msg,
			   resv_desc_msg_t  *resv_msg_ptr,
			   int *free_user_str, int *free_acct_str)
{
	int i;
	int duration = -3;   /* -1 == INFINITE, -2 == error, -3 == not set */

	*free_user_str = 0;
	*free_acct_str = 0;

	for (i=0; i<argc; i++) {
		char *tag = argv[i];
		int taglen = 0;
		char plus_minus = '\0';

		char *val = strchr(argv[i], '=');
		taglen = val - argv[i];

		if (!val && strncasecmp(argv[i], "res", 3) == 0) {
			continue;
		} else if (!val || taglen == 0) {
			exit_code = 1;
			error("Unknown parameter %s.  %s", argv[i], msg);
			return -1;
		}
		if (val[-1] == '+' || val[-1] == '-') {
			plus_minus = val[-1];
			taglen--;
		}
		val++;

		if (strncasecmp(tag, "ReservationName", MAX(taglen, 1)) == 0) {
			resv_msg_ptr->name = val;

		} else if (strncasecmp(tag, "Accounts", MAX(taglen, 1)) == 0) {
			if (plus_minus) {
				resv_msg_ptr->accounts =
					_process_plus_minus(plus_minus, val);
				*free_acct_str = 1;
			} else {
				resv_msg_ptr->accounts = val;
			}
		} else if (strncasecmp(tag, "BurstBuffer", MAX(taglen, 2))
			   == 0) {
			resv_msg_ptr->burst_buffer = val;
		} else if (strncasecmp(tag, "StartTime", MAX(taglen, 1)) == 0){
			time_t  t = parse_time(val, 0);
			if (errno == ESLURM_INVALID_TIME_VALUE) {
				exit_code = 1;
				error("Invalid start time %s.  %s",
				      argv[i], msg);
				return -1;
			}
			resv_msg_ptr->start_time = t;

		} else if (strncasecmp(tag, "EndTime", MAX(taglen, 1)) == 0) {
			time_t  t = parse_time(val, 0);
			if (errno == ESLURM_INVALID_TIME_VALUE) {
				exit_code = 1;
				error("Invalid end time %s.  %s", argv[i],msg);
				return -1;
			}
			resv_msg_ptr->end_time = t;

		} else if (strncasecmp(tag, "Duration", MAX(taglen, 1)) == 0) {
			/* -1 == INFINITE, -2 == error, -3 == not set */
			duration = time_str2mins(val);
			if (duration < 0 && duration != INFINITE) {
				exit_code = 1;
				error("Invalid duration %s.  %s", argv[i],msg);
				return -1;
			}
			resv_msg_ptr->duration = (uint32_t)duration;

		} else if (strncasecmp(tag, "Flags", MAX(taglen, 2)) == 0) {
			uint32_t f;
			if (plus_minus) {
				char *tmp =
					_process_plus_minus(plus_minus, val);
				f = parse_resv_flags(tmp, msg);
				xfree(tmp);
			} else {
				f = parse_resv_flags(val, msg);
			}
			if (f == 0xffffffff) {
				return -1;
			} else {
				resv_msg_ptr->flags = f;
			}
		} else if (strncasecmp(tag, "NodeCnt", MAX(taglen,5)) == 0 ||
			   strncasecmp(tag, "NodeCount", MAX(taglen,5)) == 0) {
			char *endptr = NULL, *node_cnt, *tok, *ptrptr = NULL;
			int node_inx = 0;
			node_cnt = xstrdup(val);
			tok = strtok_r(node_cnt, ",", &ptrptr);
			while (tok) {
				xrealloc(resv_msg_ptr->node_cnt,
					 sizeof(uint32_t) * (node_inx + 2));
				resv_msg_ptr->node_cnt[node_inx] =
					strtol(tok, &endptr, 10);
				if ((endptr != NULL) &&
				    ((endptr[0] == 'k') ||
				     (endptr[0] == 'K'))) {
					resv_msg_ptr->node_cnt[node_inx] *=
						1024;
				} else if ((endptr != NULL) &&
					   ((endptr[0] == 'm') ||
					    (endptr[0] == 'M'))) {
					resv_msg_ptr->node_cnt[node_inx] *=
						1024 * 1024;
				} else if ((endptr == NULL) ||
					   (endptr[0] != '\0') ||
					   (tok[0] == '\0')) {
					exit_code = 1;
					error("Invalid node count %s.  %s",
					      argv[i], msg);
					xfree(node_cnt);
					return -1;
				}
				node_inx++;
				tok = strtok_r(NULL, ",", &ptrptr);
			}
			xfree(node_cnt);

		} else if (strncasecmp(tag, "CoreCnt",   MAX(taglen,5)) == 0 ||
		           strncasecmp(tag, "CoreCount", MAX(taglen,5)) == 0 ||
		           strncasecmp(tag, "CPUCnt",    MAX(taglen,5)) == 0 ||
			   strncasecmp(tag, "CPUCount",  MAX(taglen,5)) == 0) {

			char *endptr = NULL, *core_cnt, *tok, *ptrptr = NULL;
			char *type;
			int node_inx = 0;

			type = slurm_get_select_type();
			if (strcasestr(type, "cray")) {
				int param;
				param = slurm_get_select_type_param();
				if (! (param & CR_OTHER_CONS_RES)) {
					error("CoreCnt or CPUCnt is only "
					      "suported when "
					      "SelectTypeParameters "
					      "includes OTHER_CONS_RES");
					xfree(type);
					return -1;
				}
			} else {
				if (strcasestr(type, "cons_res") == NULL) {
					error("CoreCnt or CPUCnt is only "
					      "suported when "
					      "SelectType includes "
					      "select/cons_res");
					xfree(type);
					return -1;
				}
			}
			xfree(type);
			core_cnt = xstrdup(val);
			tok = strtok_r(core_cnt, ",", &ptrptr);
			while (tok) {
				xrealloc(resv_msg_ptr->core_cnt,
					 sizeof(uint32_t) * (node_inx + 2));
				resv_msg_ptr->core_cnt[node_inx] =
					strtol(tok, &endptr, 10);
				if ((endptr == NULL) ||
				   (endptr[0] != '\0') ||
				   (tok[0] == '\0')) {
					exit_code = 1;
					error("Invalid core count %s. %s",
					      argv[i], msg);
					xfree(core_cnt);
					return -1;
				}
				node_inx++;
				tok = strtok_r(NULL, ",", &ptrptr);
			}
			xfree(core_cnt);

		} else if (strncasecmp(tag, "Nodes", MAX(taglen, 5)) == 0) {
			resv_msg_ptr->node_list = val;

		} else if (strncasecmp(tag, "Features", MAX(taglen, 2)) == 0) {
			resv_msg_ptr->features = val;

		} else if (strncasecmp(tag, "Licenses", MAX(taglen, 2)) == 0) {
			resv_msg_ptr->licenses = val;

		} else if (strncasecmp(tag, "PartitionName", MAX(taglen, 1))
			   == 0) {
			resv_msg_ptr->partition = val;

		} else if (strncasecmp(tag, "Users", MAX(taglen, 1)) == 0) {
			if (plus_minus) {
				resv_msg_ptr->users =
					_process_plus_minus(plus_minus, val);
				*free_user_str = 1;
			} else {
				resv_msg_ptr->users = val;
			}
		} else if (strncasecmp(tag, "Watts", MAX(taglen, 1)) == 0) {
			if (parse_uint32(val, &(resv_msg_ptr->resv_watts))) {
				error("Invalid Watts value: %s", val);
				return -1;
			}
		} else if (strncasecmp(tag, "res", 3) == 0) {
			continue;
		} else {
			exit_code = 1;
			error("Unknown parameter %s.  %s", argv[i], msg);
			return -1;
		}
	}
	return 0;
}
Exemplo n.º 11
0
/*
 * lllp_distribution
 *
 * Note: lllp stands for Lowest Level of Logical Processors.
 *
 * When automatic binding is enabled:
 *      - no binding flags set >= CPU_BIND_NONE, and
 *      - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS}
 * Otherwise limit job step to the allocated CPUs
 *
 * generate the appropriate cpu_bind type and string which results in
 * the specified lllp distribution.
 *
 * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated)
 * IN- global task id array
 */
void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id)
{
	int rc = SLURM_SUCCESS;
	bitstr_t **masks = NULL;
	char buf_type[100];
	int maxtasks = req->tasks_to_launch[(int)node_id];
	int whole_nodes, whole_sockets, whole_cores, whole_threads;
	int part_sockets, part_cores;
	const uint32_t *gtid = req->global_task_ids[(int)node_id];
	static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES |
				      CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS;
	static uint16_t bind_mode = CPU_BIND_NONE   | CPU_BIND_MASK   |
				    CPU_BIND_RANK   | CPU_BIND_MAP    |
				    CPU_BIND_LDMASK | CPU_BIND_LDRANK |
				    CPU_BIND_LDMAP;

	if (req->cpu_bind_type & bind_mode) {
		/* Explicit step binding specified by user */
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if ((whole_nodes == 0) && avail_mask &&
		    (req->job_core_spec == 0)) {
			info("task/affinity: entire node must be allocated, "
			     "disabling affinity");
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		} else {
			if (req->job_core_spec) {
				if (req->cpu_bind_type & CPU_BIND_MASK)
					_validate_mask(req, avail_mask);
				else if (req->cpu_bind_type & CPU_BIND_MAP)
					_validate_map(req, avail_mask);
			}
			xfree(avail_mask);
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] manual binding: %s",
		     req->job_id, buf_type);
		return;
	}

	if (!(req->cpu_bind_type & bind_entity)) {
		/* No bind unit (sockets, cores) specified by user,
		 * pick something reasonable */
		int max_tasks = req->tasks_to_launch[(int)node_id];
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		debug("binding tasks:%d to "
		      "nodes:%d sockets:%d:%d cores:%d:%d threads:%d",
		      max_tasks, whole_nodes, whole_sockets ,part_sockets,
		      whole_cores, part_cores, whole_threads);

		if ((max_tasks == whole_sockets) && (part_sockets == 0)) {
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}
		if ((max_tasks == whole_cores) && (part_cores == 0)) {
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		}
		if (max_tasks == whole_threads) {
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		}
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] auto binding off: %s",
		     req->job_id, buf_type);
		return;

  make_auto:	xfree(avail_mask);
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] implicit auto binding: "
		     "%s, dist %d", req->job_id, buf_type, req->task_dist);
	} else {
		/* Explicit bind unit (sockets, cores) specified by user */
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] binding: %s, dist %d",
		     req->job_id, buf_type, req->task_dist);
	}

	switch (req->task_dist) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		/* tasks are distributed in blocks within a plane */
		rc = _task_layout_lllp_block(req, node_id, &masks);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_UNKNOWN:
		if (slurm_get_select_type_param()
		    & CR_CORE_DEFAULT_DIST_BLOCK) {
			rc = _task_layout_lllp_block(req, node_id, &masks);
			break;
		}
		/* We want to fall through here if we aren't doing a
		   default dist block.
		*/
	default:
		rc = _task_layout_lllp_cyclic(req, node_id, &masks);
		break;
	}

	/* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS &
	 * max_cores - does select/cons_res plugin allocate whole
	 * socket??? Maybe not. Check srun man page.
	 */

	if (rc == SLURM_SUCCESS) {
		_task_layout_display_masks(req, gtid, maxtasks, masks);
	    	/* translate abstract masks to actual hardware layout */
		_lllp_map_abstract_masks(maxtasks, masks);
		_task_layout_display_masks(req, gtid, maxtasks, masks);
#ifdef HAVE_NUMA
		if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
			_match_masks_to_ldom(maxtasks, masks);
			_task_layout_display_masks(req, gtid, maxtasks, masks);
		}
#endif
	    	 /* convert masks into cpu_bind mask string */
		 _lllp_generate_cpu_bind(req, maxtasks, masks);
	} else {
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		error("lllp_distribution jobid [%u] overriding binding: %s",
		      req->job_id, buf_type);
		error("Verify socket/core/thread counts in configuration");
	}
	if (masks)
		_lllp_free_masks(maxtasks, masks);
}