Ejemplo n.º 1
0
/* Determine which CPUs a job step can use.
 * OUT whole_<entity>_count - returns count of whole <entities> in this
 *                            allocation for this node
 * OUT part__<entity>_count - returns count of partial <entities> in this
 *                            allocation for this node
 * RET - a string representation of the available mask or NULL on error
 * NOTE: Caller must xfree() the return value. */
static char *_alloc_mask(launch_tasks_request_msg_t *req,
			 int *whole_node_cnt,  int *whole_socket_cnt,
			 int *whole_core_cnt,  int *whole_thread_cnt,
			 int *part_socket_cnt, int *part_core_cnt)
{
	uint16_t sockets, cores, threads;
	int c, s, t, i;
	int c_miss, s_miss, t_miss, c_hit, t_hit;
	bitstr_t *alloc_bitmap;
	char *str_mask;
	bitstr_t *alloc_mask;

	*whole_node_cnt   = 0;
	*whole_socket_cnt = 0;
	*whole_core_cnt   = 0;
	*whole_thread_cnt = 0;
	*part_socket_cnt  = 0;
	*part_core_cnt    = 0;

	alloc_bitmap = _get_avail_map(req, &sockets, &cores, &threads);
	if (!alloc_bitmap)
		return NULL;

	alloc_mask = bit_alloc(bit_size(alloc_bitmap));

	i = 0;
	for (s=0, s_miss=false; s<sockets; s++) {
		for (c=0, c_hit=c_miss=false; c<cores; c++) {
			for (t=0, t_hit=t_miss=false; t<threads; t++) {
				/* If we are pretending we have a
				   larger system than we really have
				   this is needed to make sure we
				   don't bust the bank.
				*/
				if (i >= bit_size(alloc_bitmap))
					i = 0;
				if (bit_test(alloc_bitmap, i)) {
					bit_set(alloc_mask, i);
					(*whole_thread_cnt)++;
					t_hit = true;
					c_hit = true;
				} else
					t_miss = true;
				i++;
			}
			if (!t_miss)
				(*whole_core_cnt)++;
			else {
				if (t_hit)
					(*part_core_cnt)++;
				c_miss = true;
			}
		}
		if (!c_miss)
			(*whole_socket_cnt)++;
		else {
			if (c_hit)
				(*part_socket_cnt)++;
			s_miss = true;
		}
	}
	if (!s_miss)
		(*whole_node_cnt)++;
	FREE_NULL_BITMAP(alloc_bitmap);

	if ((req->job_core_spec != (uint16_t) NO_VAL) &&
	    (req->job_core_spec &  CORE_SPEC_THREAD)  &&
	    (req->job_core_spec != CORE_SPEC_THREAD)) {
		int spec_thread_cnt;
		spec_thread_cnt = req->job_core_spec & (~CORE_SPEC_THREAD);
		for (t = threads - 1;
		     ((t > 0) && (spec_thread_cnt > 0)); t--) {
			for (c = cores - 1;
			     ((c > 0) && (spec_thread_cnt > 0)); c--) {
				for (s = sockets - 1;
				     ((s >= 0) && (spec_thread_cnt > 0)); s--) {
					i = s * cores + c;
					i = (i * threads) + t;
					bit_clear(alloc_mask, i);
					spec_thread_cnt--;
				}
			}
		}
	}

	/* translate abstract masks to actual hardware layout */
	_lllp_map_abstract_masks(1, &alloc_mask);

#ifdef HAVE_NUMA
	if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
		_match_masks_to_ldom(1, &alloc_mask);
	}
#endif

	str_mask = bit_fmt_hexmask(alloc_mask);
	FREE_NULL_BITMAP(alloc_mask);
	return str_mask;
}
Ejemplo n.º 2
0
/*
 * batch_bind - Set the batch request message so as to bind the shell to the
 *	proper resources
 */
void batch_bind(batch_job_launch_msg_t *req)
{
	bitstr_t *req_map, *hw_map;
	slurm_cred_arg_t arg;
	uint16_t sockets=0, cores=0, num_cpus;
	int start, task_cnt=0;

	if (slurm_cred_get_args(req->cred, &arg) != SLURM_SUCCESS) {
		error("task/affinity: job lacks a credential");
		return;
	}
	start = _get_local_node_info(&arg, 0, &sockets, &cores);
	if (start != 0) {
		error("task/affinity: missing node 0 in job credential");
		slurm_cred_free_args(&arg);
		return;
	}
	if ((sockets * cores) == 0) {
		error("task/affinity: socket and core count both zero");
		slurm_cred_free_args(&arg);
		return;
	}

	num_cpus  = MIN((sockets * cores),
			 (conf->sockets * conf->cores));
	req_map = (bitstr_t *) bit_alloc(num_cpus);
	hw_map  = (bitstr_t *) bit_alloc(conf->block_map_size);

#ifdef HAVE_FRONT_END
{
	/* Since the front-end nodes are a shared resource, we limit each job
	 * to one CPU based upon monotonically increasing sequence number */
	static int last_id = 0;
	bit_set(hw_map, ((last_id++) % conf->block_map_size));
	task_cnt = 1;
}
#else
{
	char *str;
	int t, p;

	/* Transfer core_bitmap data to local req_map.
	 * The MOD function handles the case where fewer processes
	 * physically exist than are configured (slurmd is out of
	 * sync with the slurmctld daemon). */
	for (p = 0; p < (sockets * cores); p++) {
		if (bit_test(arg.job_core_bitmap, p))
			bit_set(req_map, (p % num_cpus));
	}

	str = (char *)bit_fmt_hexmask(req_map);
	debug3("task/affinity: job %u core mask from slurmctld: %s",
		req->job_id, str);
	xfree(str);

	for (p = 0; p < num_cpus; p++) {
		if (bit_test(req_map, p) == 0)
			continue;
		/* core_bitmap does not include threads, so we
		 * add them here but limit them to what the job
		 * requested */
		for (t = 0; t < conf->threads; t++) {
			uint16_t pos = p * conf->threads + t;
			if (pos >= conf->block_map_size) {
				info("more resources configured than exist");
				p = num_cpus;
				break;
			}
			bit_set(hw_map, pos);
			task_cnt++;
		}
	}
}
#endif
	if (task_cnt) {
		req->cpu_bind_type = CPU_BIND_MASK;
		if (conf->task_plugin_param & CPU_BIND_VERBOSE)
			req->cpu_bind_type |= CPU_BIND_VERBOSE;
		xfree(req->cpu_bind);
		req->cpu_bind = (char *)bit_fmt_hexmask(hw_map);
		info("task/affinity: job %u CPU input mask for node: %s",
		     req->job_id, req->cpu_bind);
		/* translate abstract masks to actual hardware layout */
		_lllp_map_abstract_masks(1, &hw_map);
#ifdef HAVE_NUMA
		if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
			_match_masks_to_ldom(1, &hw_map);
		}
#endif
		xfree(req->cpu_bind);
		req->cpu_bind = (char *)bit_fmt_hexmask(hw_map);
		info("task/affinity: job %u CPU final HW mask for node: %s",
		     req->job_id, req->cpu_bind);
	} else {
		error("task/affinity: job %u allocated no CPUs",
		      req->job_id);
	}
	FREE_NULL_BITMAP(hw_map);
	FREE_NULL_BITMAP(req_map);
	slurm_cred_free_args(&arg);
}
Ejemplo n.º 3
0
/*
 * lllp_distribution
 *
 * Note: lllp stands for Lowest Level of Logical Processors.
 *
 * When automatic binding is enabled:
 *      - no binding flags set >= CPU_BIND_NONE, and
 *      - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS}
 * Otherwise limit job step to the allocated CPUs
 *
 * generate the appropriate cpu_bind type and string which results in
 * the specified lllp distribution.
 *
 * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated)
 * IN- global task id array
 */
void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id)
{
	int rc = SLURM_SUCCESS;
	bitstr_t **masks = NULL;
	char buf_type[100];
	int maxtasks = req->tasks_to_launch[(int)node_id];
	int whole_nodes, whole_sockets, whole_cores, whole_threads;
	int part_sockets, part_cores;
	const uint32_t *gtid = req->global_task_ids[(int)node_id];
	static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES |
				      CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS;
	static uint16_t bind_mode = CPU_BIND_NONE   | CPU_BIND_MASK   |
				    CPU_BIND_RANK   | CPU_BIND_MAP    |
				    CPU_BIND_LDMASK | CPU_BIND_LDRANK |
				    CPU_BIND_LDMAP;
	static int only_one_thread_per_core = -1;

	if (only_one_thread_per_core == -1) {
		if (conf->cpus == (conf->sockets * conf->cores))
			only_one_thread_per_core = 1;
		else
			only_one_thread_per_core = 0;
	}

	/* If we are telling the system we only want to use 1 thread
	 * per core with the CPUs node option this is the easiest way
	 * to portray that to the affinity plugin.
	 */
	if (only_one_thread_per_core)
		req->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE;

	if (req->cpu_bind_type & bind_mode) {
		/* Explicit step binding specified by user */
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if ((whole_nodes == 0) && avail_mask &&
		    (req->job_core_spec == (uint16_t) NO_VAL)) {
			info("task/affinity: entire node must be allocated, "
			     "disabling affinity");
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		} else {
			if (req->job_core_spec == (uint16_t) NO_VAL) {
				if (req->cpu_bind_type & CPU_BIND_MASK)
					_validate_mask(req, avail_mask);
				else if (req->cpu_bind_type & CPU_BIND_MAP)
					_validate_map(req, avail_mask);
			}
			xfree(avail_mask);
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] manual binding: %s",
		     req->job_id, buf_type);
		return;
	}

	if (!(req->cpu_bind_type & bind_entity)) {
		/* No bind unit (sockets, cores) specified by user,
		 * pick something reasonable */
		uint32_t task_plugin_param = slurm_get_task_plugin_param();
		bool auto_def_set = false;
		int spec_thread_cnt = 0;
		int max_tasks = req->tasks_to_launch[(int)node_id] *
			req->cpus_per_task;
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		debug("binding tasks:%d to "
		      "nodes:%d sockets:%d:%d cores:%d:%d threads:%d",
		      max_tasks, whole_nodes, whole_sockets ,part_sockets,
		      whole_cores, part_cores, whole_threads);
		if ((req->job_core_spec != (uint16_t) NO_VAL) &&
		    (req->job_core_spec &  CORE_SPEC_THREAD)  &&
		    (req->job_core_spec != CORE_SPEC_THREAD)) {
			spec_thread_cnt = req->job_core_spec &
					  (~CORE_SPEC_THREAD);
		}
		if (((max_tasks == whole_sockets) && (part_sockets == 0)) ||
		    (spec_thread_cnt &&
		     (max_tasks == (whole_sockets + part_sockets)))) {
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}
		if (((max_tasks == whole_cores) && (part_cores == 0)) ||
		    (spec_thread_cnt &&
		     (max_tasks == (whole_cores + part_cores)))) {
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		}
		if (max_tasks == whole_threads) {
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		}

		if (task_plugin_param & CPU_AUTO_BIND_TO_THREADS) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		} else if (task_plugin_param & CPU_AUTO_BIND_TO_CORES) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		} else if (task_plugin_param & CPU_AUTO_BIND_TO_SOCKETS) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}

		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type |= CPU_BIND_MASK;
		}

		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] auto binding off: %s",
		     req->job_id, buf_type);
		return;

  make_auto:	xfree(avail_mask);
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] %s auto binding: "
		     "%s, dist %d", req->job_id,
		     (auto_def_set) ? "default" : "implicit",
		     buf_type, req->task_dist);
	} else {
		/* Explicit bind unit (sockets, cores) specified by user */
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] binding: %s, dist %d",
		     req->job_id, buf_type, req->task_dist);
	}

	switch (req->task_dist & SLURM_DIST_STATE_BASE) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		/* tasks are distributed in blocks within a plane */
		rc = _task_layout_lllp_block(req, node_id, &masks);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_UNKNOWN:
		if (slurm_get_select_type_param()
		    & CR_CORE_DEFAULT_DIST_BLOCK) {
			rc = _task_layout_lllp_block(req, node_id, &masks);
			break;
		}
		/* We want to fall through here if we aren't doing a
		   default dist block.
		*/
	default:
		rc = _task_layout_lllp_cyclic(req, node_id, &masks);
		break;
	}

	/* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS &
	 * max_cores - does select/cons_res plugin allocate whole
	 * socket??? Maybe not. Check srun man page.
	 */

	if (rc == SLURM_SUCCESS) {
		_task_layout_display_masks(req, gtid, maxtasks, masks);
	    	/* translate abstract masks to actual hardware layout */
		_lllp_map_abstract_masks(maxtasks, masks);
		_task_layout_display_masks(req, gtid, maxtasks, masks);
#ifdef HAVE_NUMA
		if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
			_match_masks_to_ldom(maxtasks, masks);
			_task_layout_display_masks(req, gtid, maxtasks, masks);
		}
#endif
	    	 /* convert masks into cpu_bind mask string */
		 _lllp_generate_cpu_bind(req, maxtasks, masks);
	} else {
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		error("lllp_distribution jobid [%u] overriding binding: %s",
		      req->job_id, buf_type);
		error("Verify socket/core/thread counts in configuration");
	}
	if (masks)
		_lllp_free_masks(maxtasks, masks);
}
Ejemplo n.º 4
0
/*
 * lllp_distribution
 *
 * Note: lllp stands for Lowest Level of Logical Processors.
 *
 * When automatic binding is enabled:
 *      - no binding flags set >= CPU_BIND_NONE, and
 *      - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS}
 * Otherwise limit job step to the allocated CPUs
 *
 * generate the appropriate cpu_bind type and string which results in
 * the specified lllp distribution.
 *
 * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated)
 * IN- global task id array
 */
void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id)
{
	int rc = SLURM_SUCCESS;
	bitstr_t **masks = NULL;
	char buf_type[100];
	int maxtasks = req->tasks_to_launch[(int)node_id];
	int whole_nodes, whole_sockets, whole_cores, whole_threads;
	int part_sockets, part_cores;
        const uint32_t *gtid = req->global_task_ids[(int)node_id];
	static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES |
				      CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS;
	static uint16_t bind_mode = CPU_BIND_NONE   | CPU_BIND_MASK   |
				    CPU_BIND_RANK   | CPU_BIND_MAP    |
				    CPU_BIND_LDMASK | CPU_BIND_LDRANK |
				    CPU_BIND_LDMAP;

	if (req->cpu_bind_type & bind_mode) {
		/* Explicit step binding specified by user */
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if ((whole_nodes == 0) && avail_mask) {
			/* Step does NOT have access to whole node,
			 * bind to full mask of available processors */
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		} else {
			/* Step does have access to whole node,
			 * bind to whatever step wants */
			xfree(avail_mask);
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] manual binding: %s",
		     req->job_id, buf_type);
		return;
	}

	if (!(req->cpu_bind_type & bind_entity)) {
		/* No bind unit (sockets, cores) specified by user,
		 * pick something reasonable */
		int max_tasks = req->tasks_to_launch[(int)node_id];
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		debug("binding tasks:%d to "
		      "nodes:%d sockets:%d:%d cores:%d:%d threads:%d",
		      max_tasks, whole_nodes, whole_sockets ,part_sockets,
		      whole_cores, part_cores, whole_threads);

		if ((max_tasks == whole_sockets) && (part_sockets == 0)) {
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}
		if ((max_tasks == whole_cores) && (part_cores == 0)) {
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		}
		if (max_tasks == whole_threads) {
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		}
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] auto binding off: %s",
		     req->job_id, buf_type);
		return;

  make_auto:	xfree(avail_mask);
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] implicit auto binding: "
		     "%s, dist %d", req->job_id, buf_type, req->task_dist);
	} else {
		/* Explicit bind unit (sockets, cores) specified by user */
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] binding: %s, dist %d",
		     req->job_id, buf_type, req->task_dist);
	}

	switch (req->task_dist) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		/* tasks are distributed in blocks within a plane */
		rc = _task_layout_lllp_block(req, node_id, &masks);
		break;
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC_CYCLIC:
	case SLURM_DIST_BLOCK_CYCLIC:
		rc = _task_layout_lllp_cyclic(req, node_id, &masks);
		break;
	default:
		if (req->cpus_per_task > 1)
			rc = _task_layout_lllp_multi(req, node_id, &masks);
		else
			rc = _task_layout_lllp_cyclic(req, node_id, &masks);
		req->task_dist = SLURM_DIST_BLOCK_CYCLIC;
		break;
	}

	/* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS &
	 * max_cores - does select/cons_res plugin allocate whole
	 * socket??? Maybe not. Check srun man page.
	 */

	if (rc == SLURM_SUCCESS) {
		_task_layout_display_masks(req, gtid, maxtasks, masks);
	    	/* translate abstract masks to actual hardware layout */
		_lllp_map_abstract_masks(maxtasks, masks);
		_task_layout_display_masks(req, gtid, maxtasks, masks);
#ifdef HAVE_NUMA
		if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
			_match_masks_to_ldom(maxtasks, masks);
			_task_layout_display_masks(req, gtid, maxtasks, masks);
		}
#endif
	    	 /* convert masks into cpu_bind mask string */
		 _lllp_generate_cpu_bind(req, maxtasks, masks);
	} else {
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		error("lllp_distribution jobid [%u] overriding binding: %s",
		      req->job_id, buf_type);
		error("Verify socket/core/thread counts in configuration");
	}
	if (masks)
		_lllp_free_masks(maxtasks, masks);
}