/* * verify cpu_bind arguments * * we support different launch policy names * we also allow a verbose setting to be specified * --cpu_bind=threads * --cpu_bind=cores * --cpu_bind=sockets * --cpu_bind=v * --cpu_bind=rank,v * --cpu_bind=rank * --cpu_bind={MAP_CPU|MASK_CPU}:0,1,2,3,4 * * * returns -1 on error, 0 otherwise */ int slurm_verify_cpu_bind(const char *arg, char **cpu_bind, cpu_bind_type_t *flags) { char *buf, *p, *tok; int bind_bits = CPU_BIND_NONE|CPU_BIND_RANK|CPU_BIND_MAP|CPU_BIND_MASK; int bind_to_bits = CPU_BIND_TO_SOCKETS|CPU_BIND_TO_CORES|CPU_BIND_TO_THREADS; uint16_t task_plugin_param = slurm_get_task_plugin_param(); bool have_binding = _have_task_affinity(); bool log_binding = true; bind_bits |= CPU_BIND_LDRANK|CPU_BIND_LDMAP|CPU_BIND_LDMASK; bind_to_bits |= CPU_BIND_TO_LDOMS; if (arg == NULL) { if ((*flags != 0) || /* already set values */ (task_plugin_param == 0)) /* no system defaults */ return 0; /* set system defaults */ xfree(*cpu_bind); if (task_plugin_param & CPU_BIND_NONE) *flags = CPU_BIND_NONE; else if (task_plugin_param & CPU_BIND_TO_SOCKETS) *flags = CPU_BIND_TO_SOCKETS; else if (task_plugin_param & CPU_BIND_TO_CORES) *flags = CPU_BIND_TO_CORES; else if (task_plugin_param & CPU_BIND_TO_THREADS) *flags |= CPU_BIND_TO_THREADS; else if (task_plugin_param & CPU_BIND_TO_LDOMS) *flags |= CPU_BIND_TO_LDOMS; if (task_plugin_param & CPU_BIND_VERBOSE) *flags |= CPU_BIND_VERBOSE; return 0; } /* Start with system default verbose flag (if set) */ if (task_plugin_param & CPU_BIND_VERBOSE) *flags |= CPU_BIND_VERBOSE; buf = xstrdup(arg); p = buf; /* change all ',' delimiters not followed by a digit to ';' */ /* simplifies parsing tokens while keeping map/mask together */ while (p[0] != '\0') { if ((p[0] == ',') && (!_isvalue(&(p[1])))) p[0] = ';'; p++; } p = buf; while ((tok = strsep(&p, ";"))) { if (strcasecmp(tok, "help") == 0) { slurm_print_cpu_bind_help(); return 1; } if (!have_binding && log_binding) { info("cluster configuration lacks support for cpu " "binding"); log_binding = false; } if ((strcasecmp(tok, "q") == 0) || (strcasecmp(tok, "quiet") == 0)) { *flags &= ~CPU_BIND_VERBOSE; } else if ((strcasecmp(tok, "v") == 0) || (strcasecmp(tok, "verbose") == 0)) { *flags |= CPU_BIND_VERBOSE; } else if ((strcasecmp(tok, "no") == 0) || (strcasecmp(tok, "none") == 0)) { _clear_then_set((int *)flags, bind_bits, CPU_BIND_NONE); xfree(*cpu_bind); } else if (strcasecmp(tok, "rank") == 0) { _clear_then_set((int *)flags, bind_bits, CPU_BIND_RANK); xfree(*cpu_bind); } else if ((strncasecmp(tok, "map_cpu", 7) == 0) || (strncasecmp(tok, "mapcpu", 6) == 0)) { char *list; list = strsep(&tok, ":="); list = strsep(&tok, ":="); _clear_then_set((int *)flags, bind_bits, CPU_BIND_MAP); xfree(*cpu_bind); if (list && *list) { *cpu_bind = xstrdup(list); } else { error("missing list for \"--cpu_bind=" "map_cpu:<list>\""); xfree(buf); return 1; } } else if ((strncasecmp(tok, "mask_cpu", 8) == 0) || (strncasecmp(tok, "maskcpu", 7) == 0)) { char *list; list = strsep(&tok, ":="); list = strsep(&tok, ":="); _clear_then_set((int *)flags, bind_bits, CPU_BIND_MASK); xfree(*cpu_bind); if (list && *list) { *cpu_bind = xstrdup(list); } else { error("missing list for \"--cpu_bind=" "mask_cpu:<list>\""); xfree(buf); return -1; } } else if (strcasecmp(tok, "rank_ldom") == 0) { _clear_then_set((int *)flags, bind_bits, CPU_BIND_LDRANK); xfree(*cpu_bind); } else if ((strncasecmp(tok, "map_ldom", 8) == 0) || (strncasecmp(tok, "mapldom", 7) == 0)) { char *list; list = strsep(&tok, ":="); list = strsep(&tok, ":="); _clear_then_set((int *)flags, bind_bits, CPU_BIND_LDMAP); xfree(*cpu_bind); if (list && *list) { *cpu_bind = xstrdup(list); } else { error("missing list for \"--cpu_bind=" "map_ldom:<list>\""); xfree(buf); return 1; } } else if ((strncasecmp(tok, "mask_ldom", 9) == 0) || (strncasecmp(tok, "maskldom", 8) == 0)) { char *list; list = strsep(&tok, ":="); list = strsep(&tok, ":="); _clear_then_set((int *)flags, bind_bits, CPU_BIND_LDMASK); xfree(*cpu_bind); if (list && *list) { *cpu_bind = xstrdup(list); } else { error("missing list for \"--cpu_bind=" "mask_ldom:<list>\""); xfree(buf); return -1; } } else if ((strcasecmp(tok, "socket") == 0) || (strcasecmp(tok, "sockets") == 0)) { if (task_plugin_param & (CPU_BIND_NONE | CPU_BIND_TO_CORES | CPU_BIND_TO_THREADS | CPU_BIND_TO_LDOMS)) { error("--cpu_bind=sockets incompatible with " "TaskPluginParam configuration " "parameter"); return -1; } _clear_then_set((int *)flags, bind_to_bits, CPU_BIND_TO_SOCKETS); } else if ((strcasecmp(tok, "core") == 0) || (strcasecmp(tok, "cores") == 0)) { if (task_plugin_param & (CPU_BIND_NONE | CPU_BIND_TO_SOCKETS | CPU_BIND_TO_THREADS | CPU_BIND_TO_LDOMS)) { error("--cpu_bind=cores incompatible with " "TaskPluginParam configuration " "parameter"); return -1; } _clear_then_set((int *)flags, bind_to_bits, CPU_BIND_TO_CORES); } else if ((strcasecmp(tok, "thread") == 0) || (strcasecmp(tok, "threads") == 0)) { if (task_plugin_param & (CPU_BIND_NONE | CPU_BIND_TO_SOCKETS | CPU_BIND_TO_CORES | CPU_BIND_TO_LDOMS)) { error("--cpu_bind=threads incompatible with " "TaskPluginParam configuration " "parameter"); return -1; } _clear_then_set((int *)flags, bind_to_bits, CPU_BIND_TO_THREADS); } else if ((strcasecmp(tok, "ldom") == 0) || (strcasecmp(tok, "ldoms") == 0)) { if (task_plugin_param & (CPU_BIND_NONE | CPU_BIND_TO_SOCKETS | CPU_BIND_TO_CORES | CPU_BIND_TO_THREADS)) { error("--cpu_bind=threads incompatible with " "TaskPluginParam configuration " "parameter"); return -1; } _clear_then_set((int *)flags, bind_to_bits, CPU_BIND_TO_LDOMS); } else { error("unrecognized --cpu_bind argument \"%s\"", tok); xfree(buf); return -1; } } xfree(buf); return 0; }
/* * lllp_distribution * * Note: lllp stands for Lowest Level of Logical Processors. * * When automatic binding is enabled: * - no binding flags set >= CPU_BIND_NONE, and * - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS} * Otherwise limit job step to the allocated CPUs * * generate the appropriate cpu_bind type and string which results in * the specified lllp distribution. * * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated) * IN- global task id array */ void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id) { int rc = SLURM_SUCCESS; bitstr_t **masks = NULL; char buf_type[100]; int maxtasks = req->tasks_to_launch[(int)node_id]; int whole_nodes, whole_sockets, whole_cores, whole_threads; int part_sockets, part_cores; const uint32_t *gtid = req->global_task_ids[(int)node_id]; static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES | CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS; static uint16_t bind_mode = CPU_BIND_NONE | CPU_BIND_MASK | CPU_BIND_RANK | CPU_BIND_MAP | CPU_BIND_LDMASK | CPU_BIND_LDRANK | CPU_BIND_LDMAP; static int only_one_thread_per_core = -1; if (only_one_thread_per_core == -1) { if (conf->cpus == (conf->sockets * conf->cores)) only_one_thread_per_core = 1; else only_one_thread_per_core = 0; } /* If we are telling the system we only want to use 1 thread * per core with the CPUs node option this is the easiest way * to portray that to the affinity plugin. */ if (only_one_thread_per_core) req->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE; if (req->cpu_bind_type & bind_mode) { /* Explicit step binding specified by user */ char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); if ((whole_nodes == 0) && avail_mask && (req->job_core_spec == (uint16_t) NO_VAL)) { info("task/affinity: entire node must be allocated, " "disabling affinity"); xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type &= (~bind_mode); req->cpu_bind_type |= CPU_BIND_MASK; } else { if (req->job_core_spec == (uint16_t) NO_VAL) { if (req->cpu_bind_type & CPU_BIND_MASK) _validate_mask(req, avail_mask); else if (req->cpu_bind_type & CPU_BIND_MAP) _validate_map(req, avail_mask); } xfree(avail_mask); } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] manual binding: %s", req->job_id, buf_type); return; } if (!(req->cpu_bind_type & bind_entity)) { /* No bind unit (sockets, cores) specified by user, * pick something reasonable */ uint32_t task_plugin_param = slurm_get_task_plugin_param(); bool auto_def_set = false; int spec_thread_cnt = 0; int max_tasks = req->tasks_to_launch[(int)node_id] * req->cpus_per_task; char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); debug("binding tasks:%d to " "nodes:%d sockets:%d:%d cores:%d:%d threads:%d", max_tasks, whole_nodes, whole_sockets ,part_sockets, whole_cores, part_cores, whole_threads); if ((req->job_core_spec != (uint16_t) NO_VAL) && (req->job_core_spec & CORE_SPEC_THREAD) && (req->job_core_spec != CORE_SPEC_THREAD)) { spec_thread_cnt = req->job_core_spec & (~CORE_SPEC_THREAD); } if (((max_tasks == whole_sockets) && (part_sockets == 0)) || (spec_thread_cnt && (max_tasks == (whole_sockets + part_sockets)))) { req->cpu_bind_type |= CPU_BIND_TO_SOCKETS; goto make_auto; } if (((max_tasks == whole_cores) && (part_cores == 0)) || (spec_thread_cnt && (max_tasks == (whole_cores + part_cores)))) { req->cpu_bind_type |= CPU_BIND_TO_CORES; goto make_auto; } if (max_tasks == whole_threads) { req->cpu_bind_type |= CPU_BIND_TO_THREADS; goto make_auto; } if (task_plugin_param & CPU_AUTO_BIND_TO_THREADS) { auto_def_set = true; req->cpu_bind_type |= CPU_BIND_TO_THREADS; goto make_auto; } else if (task_plugin_param & CPU_AUTO_BIND_TO_CORES) { auto_def_set = true; req->cpu_bind_type |= CPU_BIND_TO_CORES; goto make_auto; } else if (task_plugin_param & CPU_AUTO_BIND_TO_SOCKETS) { auto_def_set = true; req->cpu_bind_type |= CPU_BIND_TO_SOCKETS; goto make_auto; } if (avail_mask) { xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type |= CPU_BIND_MASK; } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] auto binding off: %s", req->job_id, buf_type); return; make_auto: xfree(avail_mask); slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] %s auto binding: " "%s, dist %d", req->job_id, (auto_def_set) ? "default" : "implicit", buf_type, req->task_dist); } else { /* Explicit bind unit (sockets, cores) specified by user */ slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] binding: %s, dist %d", req->job_id, buf_type, req->task_dist); } switch (req->task_dist & SLURM_DIST_STATE_BASE) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: /* tasks are distributed in blocks within a plane */ rc = _task_layout_lllp_block(req, node_id, &masks); break; case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_UNKNOWN: if (slurm_get_select_type_param() & CR_CORE_DEFAULT_DIST_BLOCK) { rc = _task_layout_lllp_block(req, node_id, &masks); break; } /* We want to fall through here if we aren't doing a default dist block. */ default: rc = _task_layout_lllp_cyclic(req, node_id, &masks); break; } /* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS & * max_cores - does select/cons_res plugin allocate whole * socket??? Maybe not. Check srun man page. */ if (rc == SLURM_SUCCESS) { _task_layout_display_masks(req, gtid, maxtasks, masks); /* translate abstract masks to actual hardware layout */ _lllp_map_abstract_masks(maxtasks, masks); _task_layout_display_masks(req, gtid, maxtasks, masks); #ifdef HAVE_NUMA if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) { _match_masks_to_ldom(maxtasks, masks); _task_layout_display_masks(req, gtid, maxtasks, masks); } #endif /* convert masks into cpu_bind mask string */ _lllp_generate_cpu_bind(req, maxtasks, masks); } else { char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); if (avail_mask) { xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type &= (~bind_mode); req->cpu_bind_type |= CPU_BIND_MASK; } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); error("lllp_distribution jobid [%u] overriding binding: %s", req->job_id, buf_type); error("Verify socket/core/thread counts in configuration"); } if (masks) _lllp_free_masks(maxtasks, masks); }