Exemple #1
0
/*
 * verify cpu_bind arguments
 *
 * we support different launch policy names
 * we also allow a verbose setting to be specified
 *     --cpu_bind=threads
 *     --cpu_bind=cores
 *     --cpu_bind=sockets
 *     --cpu_bind=v
 *     --cpu_bind=rank,v
 *     --cpu_bind=rank
 *     --cpu_bind={MAP_CPU|MASK_CPU}:0,1,2,3,4
 *
 *
 * returns -1 on error, 0 otherwise
 */
int slurm_verify_cpu_bind(const char *arg, char **cpu_bind,
			  cpu_bind_type_t *flags)
{
	char *buf, *p, *tok;
	int bind_bits =
		CPU_BIND_NONE|CPU_BIND_RANK|CPU_BIND_MAP|CPU_BIND_MASK;
	int bind_to_bits =
		CPU_BIND_TO_SOCKETS|CPU_BIND_TO_CORES|CPU_BIND_TO_THREADS;
	uint16_t task_plugin_param = slurm_get_task_plugin_param();
	bool have_binding = _have_task_affinity();
	bool log_binding = true;

	bind_bits    |= CPU_BIND_LDRANK|CPU_BIND_LDMAP|CPU_BIND_LDMASK;
	bind_to_bits |= CPU_BIND_TO_LDOMS;

	if (arg == NULL) {
		if ((*flags != 0) || 		/* already set values */
		    (task_plugin_param == 0))	/* no system defaults */
			return 0;

		/* set system defaults */
		xfree(*cpu_bind);
		if (task_plugin_param & CPU_BIND_NONE)
			*flags = CPU_BIND_NONE;
		else if (task_plugin_param & CPU_BIND_TO_SOCKETS)
			*flags = CPU_BIND_TO_SOCKETS;
		else if (task_plugin_param & CPU_BIND_TO_CORES)
			*flags = CPU_BIND_TO_CORES;
		else if (task_plugin_param & CPU_BIND_TO_THREADS)
			*flags |= CPU_BIND_TO_THREADS;
		else if (task_plugin_param & CPU_BIND_TO_LDOMS)
			*flags |= CPU_BIND_TO_LDOMS;
		if (task_plugin_param & CPU_BIND_VERBOSE)
			*flags |= CPU_BIND_VERBOSE;
	    	return 0;
	}

	/* Start with system default verbose flag (if set) */
	if (task_plugin_param & CPU_BIND_VERBOSE)
		*flags |= CPU_BIND_VERBOSE;

    	buf = xstrdup(arg);
    	p = buf;
	/* change all ',' delimiters not followed by a digit to ';'  */
	/* simplifies parsing tokens while keeping map/mask together */
	while (p[0] != '\0') {
	    	if ((p[0] == ',') && (!_isvalue(&(p[1]))))
			p[0] = ';';
		p++;
	}

	p = buf;
	while ((tok = strsep(&p, ";"))) {
		if (strcasecmp(tok, "help") == 0) {
			slurm_print_cpu_bind_help();
			return 1;
		}
		if (!have_binding && log_binding) {
			info("cluster configuration lacks support for cpu "
			     "binding");
			log_binding = false;
		}
		if ((strcasecmp(tok, "q") == 0) ||
			   (strcasecmp(tok, "quiet") == 0)) {
		        *flags &= ~CPU_BIND_VERBOSE;
		} else if ((strcasecmp(tok, "v") == 0) ||
			   (strcasecmp(tok, "verbose") == 0)) {
		        *flags |= CPU_BIND_VERBOSE;
		} else if ((strcasecmp(tok, "no") == 0) ||
			   (strcasecmp(tok, "none") == 0)) {
			_clear_then_set((int *)flags, bind_bits, CPU_BIND_NONE);
			xfree(*cpu_bind);
		} else if (strcasecmp(tok, "rank") == 0) {
			_clear_then_set((int *)flags, bind_bits, CPU_BIND_RANK);
			xfree(*cpu_bind);
		} else if ((strncasecmp(tok, "map_cpu", 7) == 0) ||
		           (strncasecmp(tok, "mapcpu", 6) == 0)) {
			char *list;
			list = strsep(&tok, ":=");
			list = strsep(&tok, ":=");
			_clear_then_set((int *)flags, bind_bits, CPU_BIND_MAP);
			xfree(*cpu_bind);
			if (list && *list) {
				*cpu_bind = xstrdup(list);
			} else {
				error("missing list for \"--cpu_bind="
				      "map_cpu:<list>\"");
				xfree(buf);
				return 1;
			}
		} else if ((strncasecmp(tok, "mask_cpu", 8) == 0) ||
		           (strncasecmp(tok, "maskcpu", 7) == 0)) {
			char *list;
			list = strsep(&tok, ":=");
			list = strsep(&tok, ":=");
			_clear_then_set((int *)flags, bind_bits, CPU_BIND_MASK);
			xfree(*cpu_bind);
			if (list && *list) {
				*cpu_bind = xstrdup(list);
			} else {
				error("missing list for \"--cpu_bind="
				      "mask_cpu:<list>\"");
				xfree(buf);
				return -1;
			}
		} else if (strcasecmp(tok, "rank_ldom") == 0) {
			_clear_then_set((int *)flags, bind_bits,
					CPU_BIND_LDRANK);
			xfree(*cpu_bind);
		} else if ((strncasecmp(tok, "map_ldom", 8) == 0) ||
		           (strncasecmp(tok, "mapldom", 7) == 0)) {
			char *list;
			list = strsep(&tok, ":=");
			list = strsep(&tok, ":=");
			_clear_then_set((int *)flags, bind_bits,
					CPU_BIND_LDMAP);
			xfree(*cpu_bind);
			if (list && *list) {
				*cpu_bind = xstrdup(list);
			} else {
				error("missing list for \"--cpu_bind="
				      "map_ldom:<list>\"");
				xfree(buf);
				return 1;
			}
		} else if ((strncasecmp(tok, "mask_ldom", 9) == 0) ||
		           (strncasecmp(tok, "maskldom", 8) == 0)) {
			char *list;
			list = strsep(&tok, ":=");
			list = strsep(&tok, ":=");
			_clear_then_set((int *)flags, bind_bits,
					CPU_BIND_LDMASK);
			xfree(*cpu_bind);
			if (list && *list) {
				*cpu_bind = xstrdup(list);
			} else {
				error("missing list for \"--cpu_bind="
				      "mask_ldom:<list>\"");
				xfree(buf);
				return -1;
			}
		} else if ((strcasecmp(tok, "socket") == 0) ||
		           (strcasecmp(tok, "sockets") == 0)) {
			if (task_plugin_param &
			    (CPU_BIND_NONE | CPU_BIND_TO_CORES |
			     CPU_BIND_TO_THREADS | CPU_BIND_TO_LDOMS)) {
				error("--cpu_bind=sockets incompatible with "
				      "TaskPluginParam configuration "
				      "parameter");
				return -1;
			}
			_clear_then_set((int *)flags, bind_to_bits,
				       CPU_BIND_TO_SOCKETS);
		} else if ((strcasecmp(tok, "core") == 0) ||
		           (strcasecmp(tok, "cores") == 0)) {
			if (task_plugin_param &
			    (CPU_BIND_NONE | CPU_BIND_TO_SOCKETS |
			     CPU_BIND_TO_THREADS | CPU_BIND_TO_LDOMS)) {
				error("--cpu_bind=cores incompatible with "
				      "TaskPluginParam configuration "
				      "parameter");
				return -1;
			}
			_clear_then_set((int *)flags, bind_to_bits,
				       CPU_BIND_TO_CORES);
		} else if ((strcasecmp(tok, "thread") == 0) ||
		           (strcasecmp(tok, "threads") == 0)) {
			if (task_plugin_param &
			    (CPU_BIND_NONE | CPU_BIND_TO_SOCKETS |
			     CPU_BIND_TO_CORES | CPU_BIND_TO_LDOMS)) {
				error("--cpu_bind=threads incompatible with "
				      "TaskPluginParam configuration "
				      "parameter");
				return -1;
			}
			_clear_then_set((int *)flags, bind_to_bits,
				       CPU_BIND_TO_THREADS);
		} else if ((strcasecmp(tok, "ldom") == 0) ||
		           (strcasecmp(tok, "ldoms") == 0)) {
			if (task_plugin_param &
			    (CPU_BIND_NONE | CPU_BIND_TO_SOCKETS |
			     CPU_BIND_TO_CORES | CPU_BIND_TO_THREADS)) {
				error("--cpu_bind=threads incompatible with "
				      "TaskPluginParam configuration "
				      "parameter");
				return -1;
			}
			_clear_then_set((int *)flags, bind_to_bits,
				       CPU_BIND_TO_LDOMS);
		} else {
			error("unrecognized --cpu_bind argument \"%s\"", tok);
			xfree(buf);
			return -1;
		}
	}
	xfree(buf);

	return 0;
}
/*
 * lllp_distribution
 *
 * Note: lllp stands for Lowest Level of Logical Processors.
 *
 * When automatic binding is enabled:
 *      - no binding flags set >= CPU_BIND_NONE, and
 *      - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS}
 * Otherwise limit job step to the allocated CPUs
 *
 * generate the appropriate cpu_bind type and string which results in
 * the specified lllp distribution.
 *
 * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated)
 * IN- global task id array
 */
void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id)
{
	int rc = SLURM_SUCCESS;
	bitstr_t **masks = NULL;
	char buf_type[100];
	int maxtasks = req->tasks_to_launch[(int)node_id];
	int whole_nodes, whole_sockets, whole_cores, whole_threads;
	int part_sockets, part_cores;
	const uint32_t *gtid = req->global_task_ids[(int)node_id];
	static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES |
				      CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS;
	static uint16_t bind_mode = CPU_BIND_NONE   | CPU_BIND_MASK   |
				    CPU_BIND_RANK   | CPU_BIND_MAP    |
				    CPU_BIND_LDMASK | CPU_BIND_LDRANK |
				    CPU_BIND_LDMAP;
	static int only_one_thread_per_core = -1;

	if (only_one_thread_per_core == -1) {
		if (conf->cpus == (conf->sockets * conf->cores))
			only_one_thread_per_core = 1;
		else
			only_one_thread_per_core = 0;
	}

	/* If we are telling the system we only want to use 1 thread
	 * per core with the CPUs node option this is the easiest way
	 * to portray that to the affinity plugin.
	 */
	if (only_one_thread_per_core)
		req->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE;

	if (req->cpu_bind_type & bind_mode) {
		/* Explicit step binding specified by user */
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if ((whole_nodes == 0) && avail_mask &&
		    (req->job_core_spec == (uint16_t) NO_VAL)) {
			info("task/affinity: entire node must be allocated, "
			     "disabling affinity");
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		} else {
			if (req->job_core_spec == (uint16_t) NO_VAL) {
				if (req->cpu_bind_type & CPU_BIND_MASK)
					_validate_mask(req, avail_mask);
				else if (req->cpu_bind_type & CPU_BIND_MAP)
					_validate_map(req, avail_mask);
			}
			xfree(avail_mask);
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] manual binding: %s",
		     req->job_id, buf_type);
		return;
	}

	if (!(req->cpu_bind_type & bind_entity)) {
		/* No bind unit (sockets, cores) specified by user,
		 * pick something reasonable */
		uint32_t task_plugin_param = slurm_get_task_plugin_param();
		bool auto_def_set = false;
		int spec_thread_cnt = 0;
		int max_tasks = req->tasks_to_launch[(int)node_id] *
			req->cpus_per_task;
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		debug("binding tasks:%d to "
		      "nodes:%d sockets:%d:%d cores:%d:%d threads:%d",
		      max_tasks, whole_nodes, whole_sockets ,part_sockets,
		      whole_cores, part_cores, whole_threads);
		if ((req->job_core_spec != (uint16_t) NO_VAL) &&
		    (req->job_core_spec &  CORE_SPEC_THREAD)  &&
		    (req->job_core_spec != CORE_SPEC_THREAD)) {
			spec_thread_cnt = req->job_core_spec &
					  (~CORE_SPEC_THREAD);
		}
		if (((max_tasks == whole_sockets) && (part_sockets == 0)) ||
		    (spec_thread_cnt &&
		     (max_tasks == (whole_sockets + part_sockets)))) {
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}
		if (((max_tasks == whole_cores) && (part_cores == 0)) ||
		    (spec_thread_cnt &&
		     (max_tasks == (whole_cores + part_cores)))) {
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		}
		if (max_tasks == whole_threads) {
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		}

		if (task_plugin_param & CPU_AUTO_BIND_TO_THREADS) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_THREADS;
			goto make_auto;
		} else if (task_plugin_param & CPU_AUTO_BIND_TO_CORES) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_CORES;
			goto make_auto;
		} else if (task_plugin_param & CPU_AUTO_BIND_TO_SOCKETS) {
			auto_def_set = true;
			req->cpu_bind_type |= CPU_BIND_TO_SOCKETS;
			goto make_auto;
		}

		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type |= CPU_BIND_MASK;
		}

		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] auto binding off: %s",
		     req->job_id, buf_type);
		return;

  make_auto:	xfree(avail_mask);
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] %s auto binding: "
		     "%s, dist %d", req->job_id,
		     (auto_def_set) ? "default" : "implicit",
		     buf_type, req->task_dist);
	} else {
		/* Explicit bind unit (sockets, cores) specified by user */
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		info("lllp_distribution jobid [%u] binding: %s, dist %d",
		     req->job_id, buf_type, req->task_dist);
	}

	switch (req->task_dist & SLURM_DIST_STATE_BASE) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		/* tasks are distributed in blocks within a plane */
		rc = _task_layout_lllp_block(req, node_id, &masks);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_UNKNOWN:
		if (slurm_get_select_type_param()
		    & CR_CORE_DEFAULT_DIST_BLOCK) {
			rc = _task_layout_lllp_block(req, node_id, &masks);
			break;
		}
		/* We want to fall through here if we aren't doing a
		   default dist block.
		*/
	default:
		rc = _task_layout_lllp_cyclic(req, node_id, &masks);
		break;
	}

	/* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS &
	 * max_cores - does select/cons_res plugin allocate whole
	 * socket??? Maybe not. Check srun man page.
	 */

	if (rc == SLURM_SUCCESS) {
		_task_layout_display_masks(req, gtid, maxtasks, masks);
	    	/* translate abstract masks to actual hardware layout */
		_lllp_map_abstract_masks(maxtasks, masks);
		_task_layout_display_masks(req, gtid, maxtasks, masks);
#ifdef HAVE_NUMA
		if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) {
			_match_masks_to_ldom(maxtasks, masks);
			_task_layout_display_masks(req, gtid, maxtasks, masks);
		}
#endif
	    	 /* convert masks into cpu_bind mask string */
		 _lllp_generate_cpu_bind(req, maxtasks, masks);
	} else {
		char *avail_mask = _alloc_mask(req,
					       &whole_nodes,  &whole_sockets,
					       &whole_cores,  &whole_threads,
					       &part_sockets, &part_cores);
		if (avail_mask) {
			xfree(req->cpu_bind);
			req->cpu_bind = avail_mask;
			req->cpu_bind_type &= (~bind_mode);
			req->cpu_bind_type |= CPU_BIND_MASK;
		}
		slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
		error("lllp_distribution jobid [%u] overriding binding: %s",
		      req->job_id, buf_type);
		error("Verify socket/core/thread counts in configuration");
	}
	if (masks)
		_lllp_free_masks(maxtasks, masks);
}