/* * task_slurmd_launch_request() */ extern int task_slurmd_launch_request (uint32_t job_id, launch_tasks_request_msg_t *req, uint32_t node_id) { char buf_type[100]; debug("task_slurmd_launch_request: %u.%u %u", job_id, req->job_step_id, node_id); if (((conf->sockets >= 1) && ((conf->cores > 1) || (conf->threads > 1))) || (!(req->cpu_bind_type & CPU_BIND_NONE))) { _update_bind_type(req); slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); debug("task affinity : before lllp distribution cpu bind " "method is '%s' (%s)", buf_type, req->cpu_bind); lllp_distribution(req, node_id); slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); debug("task affinity : after lllp distribution cpu bind " "method is '%s' (%s)", buf_type, req->cpu_bind); } return SLURM_SUCCESS; }
/* cpu bind enforcement, update binding type based upon the * TaskPluginParam configuration parameter */ static void _update_bind_type(launch_tasks_request_msg_t *req) { bool set_bind = false; if ((req->cpu_bind_type & (~CPU_BIND_VERBOSE)) == 0) { if (conf->task_plugin_param & CPU_BIND_NONE) { req->cpu_bind_type |= CPU_BIND_NONE; req->cpu_bind_type &= (~CPU_BIND_TO_SOCKETS); req->cpu_bind_type &= (~CPU_BIND_TO_CORES); req->cpu_bind_type &= (~CPU_BIND_TO_THREADS); req->cpu_bind_type &= (~CPU_BIND_TO_LDOMS); set_bind = true; } else if (conf->task_plugin_param & CPU_BIND_TO_SOCKETS) { req->cpu_bind_type &= (~CPU_BIND_NONE); req->cpu_bind_type |= CPU_BIND_TO_SOCKETS; req->cpu_bind_type &= (~CPU_BIND_TO_CORES); req->cpu_bind_type &= (~CPU_BIND_TO_THREADS); req->cpu_bind_type &= (~CPU_BIND_TO_LDOMS); set_bind = true; } else if (conf->task_plugin_param & CPU_BIND_TO_CORES) { req->cpu_bind_type &= (~CPU_BIND_NONE); req->cpu_bind_type &= (~CPU_BIND_TO_SOCKETS); req->cpu_bind_type |= CPU_BIND_TO_CORES; req->cpu_bind_type &= (~CPU_BIND_TO_THREADS); req->cpu_bind_type &= (~CPU_BIND_TO_LDOMS); set_bind = true; } else if (conf->task_plugin_param & CPU_BIND_TO_THREADS) { req->cpu_bind_type &= (~CPU_BIND_NONE); req->cpu_bind_type &= (~CPU_BIND_TO_SOCKETS); req->cpu_bind_type &= (~CPU_BIND_TO_CORES); req->cpu_bind_type |= CPU_BIND_TO_THREADS; req->cpu_bind_type &= (~CPU_BIND_TO_LDOMS); set_bind = true; } else if (conf->task_plugin_param & CPU_BIND_TO_LDOMS) { req->cpu_bind_type &= (~CPU_BIND_NONE); req->cpu_bind_type &= (~CPU_BIND_TO_SOCKETS); req->cpu_bind_type &= (~CPU_BIND_TO_CORES); req->cpu_bind_type &= (~CPU_BIND_TO_THREADS); req->cpu_bind_type &= CPU_BIND_TO_LDOMS; set_bind = true; } } if (conf->task_plugin_param & CPU_BIND_VERBOSE) { req->cpu_bind_type |= CPU_BIND_VERBOSE; set_bind = true; } if (set_bind) { char bind_str[128]; slurm_sprint_cpu_bind_type(bind_str, req->cpu_bind_type); info("task affinity : enforcing '%s' cpu bind method", bind_str); } }
int get_cpuset(cpu_set_t *mask, stepd_step_rec_t *job) { int nummasks, maskid, i, threads; char *curstr, *selstr; char mstr[1 + CPU_SETSIZE / 4]; uint32_t local_id = job->envtp->localid; char buftype[1024]; slurm_sprint_cpu_bind_type(buftype, job->cpu_bind_type); debug3("get_cpuset (%s[%d]) %s", buftype, job->cpu_bind_type, job->cpu_bind); CPU_ZERO(mask); if (job->cpu_bind_type & CPU_BIND_NONE) { return true; } if (job->cpu_bind_type & CPU_BIND_RANK) { threads = MAX(conf->threads, 1); CPU_SET(job->envtp->localid % (job->cpus*threads), mask); return true; } if (job->cpu_bind_type & CPU_BIND_LDRANK) { /* if HAVE_NUMA then bind this task ID to it's corresponding * locality domain ID. Otherwise, bind this task ID to it's * corresponding socket ID */ return _bind_ldom(local_id, mask); } if (!job->cpu_bind) return false; nummasks = 1; selstr = NULL; /* get number of strings present in cpu_bind */ curstr = job->cpu_bind; while (*curstr) { if (nummasks == local_id+1) { selstr = curstr; break; } if (*curstr == ',') nummasks++; curstr++; } /* if we didn't already find the mask... */ if (!selstr) { /* ...select mask string by wrapping task ID into list */ maskid = local_id % nummasks; i = maskid; curstr = job->cpu_bind; while (*curstr && i) { if (*curstr == ',') i--; curstr++; } if (!*curstr) { return false; } selstr = curstr; } /* extract the selected mask from the list */ i = 0; curstr = mstr; while (*selstr && *selstr != ',' && i++ < (CPU_SETSIZE/4)) *curstr++ = *selstr++; *curstr = '\0'; if (job->cpu_bind_type & CPU_BIND_MASK) { /* convert mask string into cpu_set_t mask */ if (task_str_to_cpuset(mask, mstr) < 0) { error("task_str_to_cpuset %s", mstr); return false; } return true; } if (job->cpu_bind_type & CPU_BIND_MAP) { unsigned int mycpu = 0; if (xstrncmp(mstr, "0x", 2) == 0) { mycpu = strtoul (&(mstr[2]), NULL, 16); } else { mycpu = strtoul (mstr, NULL, 10); } CPU_SET(mycpu, mask); return true; } if (job->cpu_bind_type & CPU_BIND_LDMASK) { /* if HAVE_NUMA bind this task to the locality domains * identified in mstr. Otherwise bind this task to the * sockets identified in mstr */ int len = strlen(mstr); char *ptr = mstr + len - 1; uint32_t base = 0; curstr = mstr; /* skip 0x, it's all hex anyway */ if (len > 1 && !memcmp(mstr, "0x", 2L)) curstr += 2; while (ptr >= curstr) { char val = slurm_char_to_hex(*ptr); if (val == (char) -1) return false; if (val & 1) _bind_ldom(base, mask); if (val & 2) _bind_ldom(base + 1, mask); if (val & 4) _bind_ldom(base + 2, mask); if (val & 8) _bind_ldom(base + 3, mask); len--; ptr--; base += 4; } return true; } if (job->cpu_bind_type & CPU_BIND_LDMAP) { /* if HAVE_NUMA bind this task to the given locality * domain. Otherwise bind this task to the given * socket */ uint32_t myldom = 0; if (xstrncmp(mstr, "0x", 2) == 0) { myldom = strtoul (&(mstr[2]), NULL, 16); } else { myldom = strtoul (mstr, NULL, 10); } return _bind_ldom(myldom, mask); } return false; }
/* * lllp_distribution * * Note: lllp stands for Lowest Level of Logical Processors. * * When automatic binding is enabled: * - no binding flags set >= CPU_BIND_NONE, and * - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS} * Otherwise limit job step to the allocated CPUs * * generate the appropriate cpu_bind type and string which results in * the specified lllp distribution. * * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated) * IN- global task id array */ void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id) { int rc = SLURM_SUCCESS; bitstr_t **masks = NULL; char buf_type[100]; int maxtasks = req->tasks_to_launch[(int)node_id]; int whole_nodes, whole_sockets, whole_cores, whole_threads; int part_sockets, part_cores; const uint32_t *gtid = req->global_task_ids[(int)node_id]; static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES | CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS; static uint16_t bind_mode = CPU_BIND_NONE | CPU_BIND_MASK | CPU_BIND_RANK | CPU_BIND_MAP | CPU_BIND_LDMASK | CPU_BIND_LDRANK | CPU_BIND_LDMAP; static int only_one_thread_per_core = -1; if (only_one_thread_per_core == -1) { if (conf->cpus == (conf->sockets * conf->cores)) only_one_thread_per_core = 1; else only_one_thread_per_core = 0; } /* If we are telling the system we only want to use 1 thread * per core with the CPUs node option this is the easiest way * to portray that to the affinity plugin. */ if (only_one_thread_per_core) req->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE; if (req->cpu_bind_type & bind_mode) { /* Explicit step binding specified by user */ char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); if ((whole_nodes == 0) && avail_mask && (req->job_core_spec == (uint16_t) NO_VAL)) { info("task/affinity: entire node must be allocated, " "disabling affinity"); xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type &= (~bind_mode); req->cpu_bind_type |= CPU_BIND_MASK; } else { if (req->job_core_spec == (uint16_t) NO_VAL) { if (req->cpu_bind_type & CPU_BIND_MASK) _validate_mask(req, avail_mask); else if (req->cpu_bind_type & CPU_BIND_MAP) _validate_map(req, avail_mask); } xfree(avail_mask); } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] manual binding: %s", req->job_id, buf_type); return; } if (!(req->cpu_bind_type & bind_entity)) { /* No bind unit (sockets, cores) specified by user, * pick something reasonable */ uint32_t task_plugin_param = slurm_get_task_plugin_param(); bool auto_def_set = false; int spec_thread_cnt = 0; int max_tasks = req->tasks_to_launch[(int)node_id] * req->cpus_per_task; char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); debug("binding tasks:%d to " "nodes:%d sockets:%d:%d cores:%d:%d threads:%d", max_tasks, whole_nodes, whole_sockets ,part_sockets, whole_cores, part_cores, whole_threads); if ((req->job_core_spec != (uint16_t) NO_VAL) && (req->job_core_spec & CORE_SPEC_THREAD) && (req->job_core_spec != CORE_SPEC_THREAD)) { spec_thread_cnt = req->job_core_spec & (~CORE_SPEC_THREAD); } if (((max_tasks == whole_sockets) && (part_sockets == 0)) || (spec_thread_cnt && (max_tasks == (whole_sockets + part_sockets)))) { req->cpu_bind_type |= CPU_BIND_TO_SOCKETS; goto make_auto; } if (((max_tasks == whole_cores) && (part_cores == 0)) || (spec_thread_cnt && (max_tasks == (whole_cores + part_cores)))) { req->cpu_bind_type |= CPU_BIND_TO_CORES; goto make_auto; } if (max_tasks == whole_threads) { req->cpu_bind_type |= CPU_BIND_TO_THREADS; goto make_auto; } if (task_plugin_param & CPU_AUTO_BIND_TO_THREADS) { auto_def_set = true; req->cpu_bind_type |= CPU_BIND_TO_THREADS; goto make_auto; } else if (task_plugin_param & CPU_AUTO_BIND_TO_CORES) { auto_def_set = true; req->cpu_bind_type |= CPU_BIND_TO_CORES; goto make_auto; } else if (task_plugin_param & CPU_AUTO_BIND_TO_SOCKETS) { auto_def_set = true; req->cpu_bind_type |= CPU_BIND_TO_SOCKETS; goto make_auto; } if (avail_mask) { xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type |= CPU_BIND_MASK; } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] auto binding off: %s", req->job_id, buf_type); return; make_auto: xfree(avail_mask); slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] %s auto binding: " "%s, dist %d", req->job_id, (auto_def_set) ? "default" : "implicit", buf_type, req->task_dist); } else { /* Explicit bind unit (sockets, cores) specified by user */ slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] binding: %s, dist %d", req->job_id, buf_type, req->task_dist); } switch (req->task_dist & SLURM_DIST_STATE_BASE) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: /* tasks are distributed in blocks within a plane */ rc = _task_layout_lllp_block(req, node_id, &masks); break; case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_UNKNOWN: if (slurm_get_select_type_param() & CR_CORE_DEFAULT_DIST_BLOCK) { rc = _task_layout_lllp_block(req, node_id, &masks); break; } /* We want to fall through here if we aren't doing a default dist block. */ default: rc = _task_layout_lllp_cyclic(req, node_id, &masks); break; } /* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS & * max_cores - does select/cons_res plugin allocate whole * socket??? Maybe not. Check srun man page. */ if (rc == SLURM_SUCCESS) { _task_layout_display_masks(req, gtid, maxtasks, masks); /* translate abstract masks to actual hardware layout */ _lllp_map_abstract_masks(maxtasks, masks); _task_layout_display_masks(req, gtid, maxtasks, masks); #ifdef HAVE_NUMA if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) { _match_masks_to_ldom(maxtasks, masks); _task_layout_display_masks(req, gtid, maxtasks, masks); } #endif /* convert masks into cpu_bind mask string */ _lllp_generate_cpu_bind(req, maxtasks, masks); } else { char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); if (avail_mask) { xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type &= (~bind_mode); req->cpu_bind_type |= CPU_BIND_MASK; } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); error("lllp_distribution jobid [%u] overriding binding: %s", req->job_id, buf_type); error("Verify socket/core/thread counts in configuration"); } if (masks) _lllp_free_masks(maxtasks, masks); }
/* * _lllp_generate_cpu_bind * * Generate the cpu_bind type and string given an array of bitstr_t masks * * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated) * IN- maximum number of tasks * IN- array of masks */ static void _lllp_generate_cpu_bind(launch_tasks_request_msg_t *req, const uint32_t maxtasks, bitstr_t **masks) { int i, num_bits=0, masks_len; bitstr_t *bitmask; bitoff_t charsize; char *masks_str = NULL; char buf_type[100]; for (i = 0; i < maxtasks; i++) { bitmask = masks[i]; if (bitmask) { num_bits = bit_size(bitmask); break; } } charsize = (num_bits + 3) / 4; /* ASCII hex digits */ charsize += 3; /* "0x" and trailing "," */ masks_len = maxtasks * charsize + 1; /* number of masks + null */ debug3("%s %d %"BITSTR_FMT" %d", __func__, maxtasks, charsize, masks_len); masks_str = xmalloc(masks_len); masks_len = 0; for (i = 0; i < maxtasks; i++) { char *str; int curlen; bitmask = masks[i]; if (bitmask == NULL) { continue; } str = (char *)bit_fmt_hexmask(bitmask); curlen = strlen(str) + 1; if (masks_len > 0) masks_str[masks_len-1]=','; strncpy(&masks_str[masks_len], str, curlen); masks_len += curlen; xassert(masks_str[masks_len] == '\0'); xfree(str); } if (req->cpu_bind) { xfree(req->cpu_bind); } if (masks_str[0] != '\0') { req->cpu_bind = masks_str; req->cpu_bind_type |= CPU_BIND_MASK; } else { req->cpu_bind = NULL; req->cpu_bind_type &= ~CPU_BIND_VERBOSE; } /* clear mask generation bits */ req->cpu_bind_type &= ~CPU_BIND_TO_THREADS; req->cpu_bind_type &= ~CPU_BIND_TO_CORES; req->cpu_bind_type &= ~CPU_BIND_TO_SOCKETS; req->cpu_bind_type &= ~CPU_BIND_TO_LDOMS; slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("_lllp_generate_cpu_bind jobid [%u]: %s, %s", req->job_id, buf_type, masks_str); }
extern void *slurm_ctl_conf_2_key_pairs (slurm_ctl_conf_t* slurm_ctl_conf_ptr) { List ret_list = NULL; config_key_pair_t *key_pair; char tmp_str[128]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if ( slurm_ctl_conf_ptr == NULL ) return NULL; ret_list = list_create(destroy_config_key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStorageBackupHost"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->accounting_storage_backup_host); list_append(ret_list, key_pair); accounting_enforce_string(slurm_ctl_conf_ptr-> accounting_storage_enforce, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStorageEnforce"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStorageHost"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->accounting_storage_host); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStorageLoc"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->accounting_storage_loc); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->accounting_storage_port); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStoragePort"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStorageType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->accounting_storage_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStorageUser"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->accounting_storage_user); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AccountingStoreJobComment"); if (slurm_ctl_conf_ptr->acctng_store_job_comment) key_pair->value = xstrdup("YES"); else key_pair->value = xstrdup("NO"); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AcctGatherEnergyType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_energy_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AcctGatherFilesystemType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_filesystem_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AcctGatherInfinibandType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_infiniband_type); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->acct_gather_node_freq); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AcctGatherNodeFreq"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AcctGatherProfileType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->acct_gather_profile_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AuthInfo"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->authinfo); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("AuthType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->authtype); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("BackupAddr"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->backup_addr); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("BackupController"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->backup_controller); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->batch_start_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("BatchStartTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); slurm_make_time_str((time_t *)&slurm_ctl_conf_ptr->boot_time, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("BOOT_TIME"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("CacheGroups"); if (slurm_ctl_conf_ptr->group_info & GROUP_CACHE) key_pair->value = xstrdup("1"); else key_pair->value = xstrdup("0"); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("CheckpointType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->checkpoint_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ClusterName"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->cluster_name); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->complete_wait); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("CompleteWait"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ControlAddr"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->control_addr); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ControlMachine"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->control_machine); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("CryptoType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->crypto_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("DebugFlags"); key_pair->value = debug_flags2str(slurm_ctl_conf_ptr->debug_flags); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->def_mem_per_cpu & MEM_PER_CPU) { key_pair->name = xstrdup("DefMemPerCPU"); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->def_mem_per_cpu & (~MEM_PER_CPU)); key_pair->value = xstrdup(tmp_str); } else if (slurm_ctl_conf_ptr->def_mem_per_cpu) { key_pair->name = xstrdup("DefMemPerNode"); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->def_mem_per_cpu); key_pair->value = xstrdup(tmp_str); } else { key_pair->name = xstrdup("DefMemPerNode"); key_pair->value = xstrdup("UNLIMITED"); } key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); key_pair->name = xstrdup("DisableRootJobs"); if (slurm_ctl_conf_ptr->disable_root_jobs) key_pair->value = xstrdup("YES"); else key_pair->value = xstrdup("NO"); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->dynalloc_port); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("DynAllocPort"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); key_pair->name = xstrdup("EnforcePartLimits"); if (slurm_ctl_conf_ptr->enforce_part_limits) key_pair->value = xstrdup("YES"); else key_pair->value = xstrdup("NO"); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("Epilog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->epilog); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u usec", slurm_ctl_conf_ptr->epilog_msg_time); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("EpilogMsgTime"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("EpilogSlurmctld"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->epilog_slurmctld); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ExtSensorsType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->ext_sensors_type); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->ext_sensors_freq); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ExtSensorsFreq"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); if (strcmp(slurm_ctl_conf_ptr->priority_type, "priority/basic")) { snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->fs_dampening_factor); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("FairShareDampeningFactor"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); } snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->fast_schedule); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("FastSchedule"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->first_job_id); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("FirstJobId"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->get_env_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("GetEnvTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("GresTypes"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->gres_plugins); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("GroupUpdateForce"); if (slurm_ctl_conf_ptr->group_info & GROUP_FORCE) key_pair->value = xstrdup("1"); else key_pair->value = xstrdup("0"); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->group_info & GROUP_TIME_MASK); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("GroupUpdateTime"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->hash_val != NO_VAL) { if (slurm_ctl_conf_ptr->hash_val == slurm_get_hash_val()) snprintf(tmp_str, sizeof(tmp_str), "Match"); else { snprintf(tmp_str, sizeof(tmp_str), "Different Ours=0x%x Slurmctld=0x%x", slurm_get_hash_val(), slurm_ctl_conf_ptr->hash_val); } key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HASH_VAL"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); } snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->health_check_interval); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HealthCheckInterval"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HealthCheckNodeState"); key_pair->value = health_check_node_state_str(slurm_ctl_conf_ptr-> health_check_node_state); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HealthCheckProgram"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->health_check_program); list_append(ret_list, key_pair); if (cluster_flags & CLUSTER_FLAG_XCPU) { key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("HAVE_XCPU"); key_pair->value = xstrdup("1"); list_append(ret_list, key_pair); } snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->inactive_limit); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("InactiveLimit"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobAcctGatherFrequency"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_freq); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobAcctGatherType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobAcctGatherParams"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_acct_gather_params); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCheckpointDir"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_ckpt_dir); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCompHost"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_comp_host); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCompLoc"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_comp_loc); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->job_comp_port); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCompPort"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCompType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_comp_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCompUser"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_comp_user); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobContainerPlugin"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_container_plugin); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCredentialPrivateKey"); key_pair->value = xstrdup(slurm_ctl_conf_ptr-> job_credential_private_key); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobCredentialPublicCertificate"); key_pair->value = xstrdup( slurm_ctl_conf_ptr->job_credential_public_certificate); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->job_file_append); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobFileAppend"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->job_requeue); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobRequeue"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("JobSubmitPlugins"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->job_submit_plugins); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->keep_alive_time == (uint16_t) NO_VAL) snprintf(tmp_str, sizeof(tmp_str), "SYSTEM_DEFAULT"); else { snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->keep_alive_time); } key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("KeepAliveTime"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->kill_on_bad_exit); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("KillOnBadExit"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->kill_wait); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("KillWait"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("LaunchType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->launch_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("Licenses"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->licenses); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("LicensesUsed"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->licenses_used); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MailProg"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->mail_prog); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_array_sz); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MaxArraySize"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_job_cnt); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MaxJobCount"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_job_id); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MaxJobId"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->max_mem_per_cpu & MEM_PER_CPU) { key_pair->name = xstrdup("MaxMemPerCPU"); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_mem_per_cpu & (~MEM_PER_CPU)); key_pair->value = xstrdup(tmp_str); } else if (slurm_ctl_conf_ptr->max_mem_per_cpu) { key_pair->name = xstrdup("MaxMemPerNode"); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_mem_per_cpu); key_pair->value = xstrdup(tmp_str); } else { key_pair->name = xstrdup("MaxMemPerNode"); key_pair->value = xstrdup("UNLIMITED"); } snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_step_cnt); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MaxStepCount"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->max_tasks_per_node); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MaxTasksPerNode"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->msg_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MessageTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->min_job_age); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MinJobAge"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MpiDefault"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->mpi_default); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MpiParams"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->mpi_params); list_append(ret_list, key_pair); if (cluster_flags & CLUSTER_FLAG_MULTSD) { key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("MULTIPLE_SLURMD"); key_pair->value = xstrdup("1"); list_append(ret_list, key_pair); } snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->next_job_id); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("NEXT_JOB_ID"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->over_time_limit == (uint16_t) INFINITE) snprintf(tmp_str, sizeof(tmp_str), "UNLIMITED"); else snprintf(tmp_str, sizeof(tmp_str), "%u min", slurm_ctl_conf_ptr->over_time_limit); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("OverTimeLimit"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PluginDir"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->plugindir); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PlugStackConfig"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->plugstack); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PreemptMode"); key_pair->value = xstrdup(preempt_mode_string(slurm_ctl_conf_ptr-> preempt_mode)); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PreemptType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->preempt_type); list_append(ret_list, key_pair); if (strcmp(slurm_ctl_conf_ptr->priority_type, "priority/basic") == 0) { key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->priority_type); list_append(ret_list, key_pair); } else { secs2time_str((time_t) slurm_ctl_conf_ptr->priority_decay_hl, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityDecayHalfLife"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); secs2time_str((time_t)slurm_ctl_conf_ptr->priority_calc_period, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityCalcPeriod"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_favor_small); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityFavorSmall"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_flags); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityFlags"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); secs2time_str((time_t) slurm_ctl_conf_ptr->priority_max_age, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityMaxAge"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityUsageResetPeriod"); key_pair->value = xstrdup(_reset_period_str( slurm_ctl_conf_ptr-> priority_reset_period)); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->priority_type); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_weight_age); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityWeightAge"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_weight_fs); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityWeightFairShare"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_weight_js); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityWeightJobSize"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_weight_part); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityWeightPartition"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->priority_weight_qos); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PriorityWeightQOS"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); } private_data_string(slurm_ctl_conf_ptr->private_data, tmp_str, sizeof(tmp_str)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PrivateData"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ProctrackType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->proctrack_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("Prolog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->prolog); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PrologSlurmctld"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->prolog_slurmctld); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->propagate_prio_process); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PropagatePrioProcess"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PropagateResourceLimits"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->propagate_rlimits); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("PropagateResourceLimitsExcept"); key_pair->value = xstrdup(slurm_ctl_conf_ptr-> propagate_rlimits_except); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("RebootProgram"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->reboot_program); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ReconfigFlags"); key_pair->value = reconfig_flags2str(slurm_ctl_conf_ptr->reconfig_flags); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ResumeProgram"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->resume_program); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u nodes/min", slurm_ctl_conf_ptr->resume_rate); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ResumeRate"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->resume_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ResumeTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ResvEpilog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->resv_epilog); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->resv_over_run == (uint16_t) INFINITE) snprintf(tmp_str, sizeof(tmp_str), "UNLIMITED"); else snprintf(tmp_str, sizeof(tmp_str), "%u min", slurm_ctl_conf_ptr->resv_over_run); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ResvOverRun"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ResvProlog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->resv_prolog); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->ret2service); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("ReturnToService"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SallocDefaultCommand"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->salloc_default_command); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SchedulerParameters"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->sched_params); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->schedport); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SchedulerPort"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->schedrootfltr); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SchedulerRootFilter"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->sched_time_slice); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SchedulerTimeSlice"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SchedulerType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->schedtype); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SelectType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->select_type); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->select_type_param) { key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SelectTypeParameters"); key_pair->value = xstrdup( select_type_param_string(slurm_ctl_conf_ptr-> select_type_param)); list_append(ret_list, key_pair); } snprintf(tmp_str, sizeof(tmp_str), "%s(%u)", slurm_ctl_conf_ptr->slurm_user_name, slurm_ctl_conf_ptr->slurm_user_id); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmUser"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%s", log_num2string(slurm_ctl_conf_ptr->slurmctld_debug)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmctldDebug"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmctldLogFile"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmctld_logfile); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmSchedLogFile"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->sched_logfile); list_append(ret_list, key_pair); if (slurm_ctl_conf_ptr->slurmctld_port_count > 1) { uint32_t high_port = slurm_ctl_conf_ptr->slurmctld_port; high_port += (slurm_ctl_conf_ptr->slurmctld_port_count - 1); snprintf(tmp_str, sizeof(tmp_str), "%u-%u", slurm_ctl_conf_ptr->slurmctld_port, high_port); } else { snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->slurmctld_port); } key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmctldPort"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->slurmctld_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmctldTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%s", log_num2string(slurm_ctl_conf_ptr->slurmd_debug)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdDebug"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdLogFile"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmd_logfile); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdPidFile"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmd_pidfile); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdPlugstack"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmd_plugstack); list_append(ret_list, key_pair); #ifndef MULTIPLE_SLURMD snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->slurmd_port); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdPort"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); #endif key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdSpoolDir"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmd_spooldir); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->slurmd_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%s(%u)", slurm_ctl_conf_ptr->slurmd_user_name, slurm_ctl_conf_ptr->slurmd_user_id); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmdUser"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->sched_log_level); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmSchedLogLevel"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmctldPidFile"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmctld_pidfile); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SlurmctldPlugstack"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurmctld_plugstack); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SLURM_CONF"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->slurm_conf); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SLURM_VERSION"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->version); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SrunEpilog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->srun_epilog); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SrunProlog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->srun_prolog); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("StateSaveLocation"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->state_save_location); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SuspendExcNodes"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->suspend_exc_nodes); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SuspendExcParts"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->suspend_exc_parts); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SuspendProgram"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->suspend_program); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u nodes/min", slurm_ctl_conf_ptr->suspend_rate); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SuspendRate"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); if (!slurm_ctl_conf_ptr->suspend_time) snprintf(tmp_str, sizeof(tmp_str), "NONE"); else snprintf(tmp_str, sizeof(tmp_str), "%d sec", ((int)slurm_ctl_conf_ptr->suspend_time - 1)); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SuspendTime"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->suspend_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SuspendTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("SwitchType"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->switch_type); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TaskEpilog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->task_epilog); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TaskPlugin"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->task_plugin); list_append(ret_list, key_pair); slurm_sprint_cpu_bind_type(tmp_str, slurm_ctl_conf_ptr->task_plugin_param); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TaskPluginParam"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TaskProlog"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->task_prolog); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TmpFS"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->tmp_fs); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TopologyPlugin"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->topology_plugin); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->track_wckey); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TrackWCKey"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->tree_width); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("TreeWidth"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u", slurm_ctl_conf_ptr->use_pam); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("UsePam"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("UnkillableStepProgram"); key_pair->value = xstrdup(slurm_ctl_conf_ptr->unkillable_program); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->unkillable_timeout); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("UnkillableStepTimeout"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u percent", slurm_ctl_conf_ptr->vsize_factor); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("VSizeFactor"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); snprintf(tmp_str, sizeof(tmp_str), "%u sec", slurm_ctl_conf_ptr->wait_time); key_pair = xmalloc(sizeof(config_key_pair_t)); key_pair->name = xstrdup("WaitTime"); key_pair->value = xstrdup(tmp_str); list_append(ret_list, key_pair); return (void *)ret_list; }
int _get_sched_cpuset(hwloc_topology_t topology, hwloc_obj_type_t hwtype, hwloc_obj_type_t req_hwtype, cpu_set_t *mask, stepd_step_rec_t *job) { int nummasks, maskid, i, threads; char *curstr, *selstr; char mstr[1 + CPU_SETSIZE / 4]; uint32_t local_id = job->envtp->localid; char buftype[1024]; /* For CPU_BIND_RANK, CPU_BIND_MASK and CPU_BIND_MAP, generate sched * cpuset directly from cpu numbers. * For CPU_BIND_LDRANK, CPU_BIND_LDMASK and CPU_BIND_LDMAP, generate * sched cpuset from hwloc topology. */ slurm_sprint_cpu_bind_type(buftype, job->cpu_bind_type); debug3("task/cgroup: (%s[%d]) %s", buftype, job->cpu_bind_type, job->cpu_bind); CPU_ZERO(mask); if (job->cpu_bind_type & CPU_BIND_NONE) { return true; } if (job->cpu_bind_type & CPU_BIND_RANK) { threads = MAX(conf->threads, 1); CPU_SET(job->envtp->localid % (job->cpus*threads), mask); return true; } if (job->cpu_bind_type & CPU_BIND_LDRANK) { return _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, local_id, mask); } if (!job->cpu_bind) return false; nummasks = 1; selstr = NULL; /* get number of strings present in cpu_bind */ curstr = job->cpu_bind; while (*curstr) { if (nummasks == local_id+1) { selstr = curstr; break; } if (*curstr == ',') nummasks++; curstr++; } /* if we didn't already find the mask... */ if (!selstr) { /* ...select mask string by wrapping task ID into list */ maskid = local_id % nummasks; i = maskid; curstr = job->cpu_bind; while (*curstr && i) { if (*curstr == ',') i--; curstr++; } if (!*curstr) { return false; } selstr = curstr; } /* extract the selected mask from the list */ i = 0; curstr = mstr; while (*selstr && *selstr != ',' && i++ < (CPU_SETSIZE/4)) *curstr++ = *selstr++; *curstr = '\0'; if (job->cpu_bind_type & CPU_BIND_MASK) { /* convert mask string into cpu_set_t mask */ if (str_to_cpuset(mask, mstr) < 0) { error("task/cgroup: str_to_cpuset %s", mstr); return false; } return true; } if (job->cpu_bind_type & CPU_BIND_MAP) { unsigned int mycpu = 0; if (strncmp(mstr, "0x", 2) == 0) { mycpu = strtoul (&(mstr[2]), NULL, 16); } else { mycpu = strtoul (mstr, NULL, 10); } CPU_SET(mycpu, mask); return true; } if (job->cpu_bind_type & CPU_BIND_LDMASK) { int len = strlen(mstr); char *ptr = mstr + len - 1; uint32_t base = 0; curstr = mstr; /* skip 0x, it's all hex anyway */ if (len > 1 && !memcmp(mstr, "0x", 2L)) curstr += 2; while (ptr >= curstr) { char val = char_to_val(*ptr); if (val == (char) -1) return false; if (val & 1) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base, mask); if (val & 2) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base + 1, mask); if (val & 4) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base + 2, mask); if (val & 8) _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, base + 3, mask); len--; ptr--; base += 4; } return true; } if (job->cpu_bind_type & CPU_BIND_LDMAP) { uint32_t myldom = 0; if (strncmp(mstr, "0x", 2) == 0) { myldom = strtoul (&(mstr[2]), NULL, 16); } else { myldom = strtoul (mstr, NULL, 10); } return _get_ldom_sched_cpuset(topology, hwtype, req_hwtype, myldom, mask); } return false; }
/* * lllp_distribution * * Note: lllp stands for Lowest Level of Logical Processors. * * When automatic binding is enabled: * - no binding flags set >= CPU_BIND_NONE, and * - a auto binding level selected CPU_BIND_TO_{SOCKETS,CORES,THREADS} * Otherwise limit job step to the allocated CPUs * * generate the appropriate cpu_bind type and string which results in * the specified lllp distribution. * * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated) * IN- global task id array */ void lllp_distribution(launch_tasks_request_msg_t *req, uint32_t node_id) { int rc = SLURM_SUCCESS; bitstr_t **masks = NULL; char buf_type[100]; int maxtasks = req->tasks_to_launch[(int)node_id]; int whole_nodes, whole_sockets, whole_cores, whole_threads; int part_sockets, part_cores; const uint32_t *gtid = req->global_task_ids[(int)node_id]; static uint16_t bind_entity = CPU_BIND_TO_THREADS | CPU_BIND_TO_CORES | CPU_BIND_TO_SOCKETS | CPU_BIND_TO_LDOMS; static uint16_t bind_mode = CPU_BIND_NONE | CPU_BIND_MASK | CPU_BIND_RANK | CPU_BIND_MAP | CPU_BIND_LDMASK | CPU_BIND_LDRANK | CPU_BIND_LDMAP; if (req->cpu_bind_type & bind_mode) { /* Explicit step binding specified by user */ char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); if ((whole_nodes == 0) && avail_mask) { /* Step does NOT have access to whole node, * bind to full mask of available processors */ xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type &= (~bind_mode); req->cpu_bind_type |= CPU_BIND_MASK; } else { /* Step does have access to whole node, * bind to whatever step wants */ xfree(avail_mask); } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] manual binding: %s", req->job_id, buf_type); return; } if (!(req->cpu_bind_type & bind_entity)) { /* No bind unit (sockets, cores) specified by user, * pick something reasonable */ int max_tasks = req->tasks_to_launch[(int)node_id]; char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); debug("binding tasks:%d to " "nodes:%d sockets:%d:%d cores:%d:%d threads:%d", max_tasks, whole_nodes, whole_sockets ,part_sockets, whole_cores, part_cores, whole_threads); if ((max_tasks == whole_sockets) && (part_sockets == 0)) { req->cpu_bind_type |= CPU_BIND_TO_SOCKETS; goto make_auto; } if ((max_tasks == whole_cores) && (part_cores == 0)) { req->cpu_bind_type |= CPU_BIND_TO_CORES; goto make_auto; } if (max_tasks == whole_threads) { req->cpu_bind_type |= CPU_BIND_TO_THREADS; goto make_auto; } if (avail_mask) { xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type |= CPU_BIND_MASK; } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] auto binding off: %s", req->job_id, buf_type); return; make_auto: xfree(avail_mask); slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] implicit auto binding: " "%s, dist %d", req->job_id, buf_type, req->task_dist); } else { /* Explicit bind unit (sockets, cores) specified by user */ slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("lllp_distribution jobid [%u] binding: %s, dist %d", req->job_id, buf_type, req->task_dist); } switch (req->task_dist) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: /* tasks are distributed in blocks within a plane */ rc = _task_layout_lllp_block(req, node_id, &masks); break; case SLURM_DIST_CYCLIC: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC_CYCLIC: case SLURM_DIST_BLOCK_CYCLIC: rc = _task_layout_lllp_cyclic(req, node_id, &masks); break; default: if (req->cpus_per_task > 1) rc = _task_layout_lllp_multi(req, node_id, &masks); else rc = _task_layout_lllp_cyclic(req, node_id, &masks); req->task_dist = SLURM_DIST_BLOCK_CYCLIC; break; } /* FIXME: I'm worried about core_bitmap with CPU_BIND_TO_SOCKETS & * max_cores - does select/cons_res plugin allocate whole * socket??? Maybe not. Check srun man page. */ if (rc == SLURM_SUCCESS) { _task_layout_display_masks(req, gtid, maxtasks, masks); /* translate abstract masks to actual hardware layout */ _lllp_map_abstract_masks(maxtasks, masks); _task_layout_display_masks(req, gtid, maxtasks, masks); #ifdef HAVE_NUMA if (req->cpu_bind_type & CPU_BIND_TO_LDOMS) { _match_masks_to_ldom(maxtasks, masks); _task_layout_display_masks(req, gtid, maxtasks, masks); } #endif /* convert masks into cpu_bind mask string */ _lllp_generate_cpu_bind(req, maxtasks, masks); } else { char *avail_mask = _alloc_mask(req, &whole_nodes, &whole_sockets, &whole_cores, &whole_threads, &part_sockets, &part_cores); if (avail_mask) { xfree(req->cpu_bind); req->cpu_bind = avail_mask; req->cpu_bind_type &= (~bind_mode); req->cpu_bind_type |= CPU_BIND_MASK; } slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); error("lllp_distribution jobid [%u] overriding binding: %s", req->job_id, buf_type); error("Verify socket/core/thread counts in configuration"); } if (masks) _lllp_free_masks(maxtasks, masks); }
/* * slurm_sprint_node_table - output information about a specific Slurm nodes * based upon message as loaded using slurm_load_node * IN node_ptr - an individual node information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char *slurm_sprint_node_table(node_info_t *node_ptr, int one_liner) { uint32_t my_state = node_ptr->node_state; char *cloud_str = "", *comp_str = "", *drain_str = "", *power_str = ""; char time_str[32]; char *out = NULL, *reason_str = NULL; uint16_t alloc_cpus = 0; int idle_cpus; uint64_t alloc_memory; char *node_alloc_tres = NULL; char *line_end = (one_liner) ? " " : "\n "; slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL; if (slurm_load_ctl_conf((time_t) NULL, &slurm_ctl_conf_ptr) != SLURM_SUCCESS) fatal("Cannot load slurmctld conf file"); if (my_state & NODE_STATE_CLOUD) { my_state &= (~NODE_STATE_CLOUD); cloud_str = "+CLOUD"; } if (my_state & NODE_STATE_COMPLETING) { my_state &= (~NODE_STATE_COMPLETING); comp_str = "+COMPLETING"; } if (my_state & NODE_STATE_DRAIN) { my_state &= (~NODE_STATE_DRAIN); drain_str = "+DRAIN"; } if (my_state & NODE_STATE_FAIL) { my_state &= (~NODE_STATE_FAIL); drain_str = "+FAIL"; } if (my_state & NODE_STATE_POWER_SAVE) { my_state &= (~NODE_STATE_POWER_SAVE); power_str = "+POWER"; } if (my_state & NODE_STATE_POWERING_DOWN) { my_state &= (~NODE_STATE_POWERING_DOWN); power_str = "+POWERING_DOWN"; } slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ALLOCATED, &alloc_cpus); idle_cpus = node_ptr->cpus - alloc_cpus; if (idle_cpus && (idle_cpus != node_ptr->cpus)) { my_state &= NODE_STATE_FLAGS; my_state |= NODE_STATE_MIXED; } /****** Line 1 ******/ xstrfmtcat(out, "NodeName=%s ", node_ptr->name); if (node_ptr->arch) xstrfmtcat(out, "Arch=%s ", node_ptr->arch); if (node_ptr->cpu_bind) { char tmp_str[128]; slurm_sprint_cpu_bind_type(tmp_str, node_ptr->cpu_bind); xstrfmtcat(out, "CpuBind=%s ", tmp_str); } xstrfmtcat(out, "CoresPerSocket=%u ", node_ptr->cores); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "CPUAlloc=%u CPUTot=%u ", alloc_cpus, node_ptr->cpus); if (node_ptr->cpu_load == NO_VAL) xstrcat(out, "CPULoad=N/A"); else xstrfmtcat(out, "CPULoad=%.2f", (node_ptr->cpu_load / 100.0)); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "AvailableFeatures=%s", node_ptr->features); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "ActiveFeatures=%s", node_ptr->features_act); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s", node_ptr->gres); xstrcat(out, line_end); /****** Line (optional) ******/ if (node_ptr->gres_drain) { xstrfmtcat(out, "GresDrain=%s", node_ptr->gres_drain); xstrcat(out, line_end); } /****** Line (optional) ******/ if (node_ptr->gres_used) { xstrfmtcat(out, "GresUsed=%s", node_ptr->gres_used); xstrcat(out, line_end); } /****** Line (optional) ******/ { bool line_used = false; if (node_ptr->node_addr) { xstrfmtcat(out, "NodeAddr=%s ", node_ptr->node_addr); line_used = true; } if (node_ptr->node_hostname) { xstrfmtcat(out, "NodeHostName=%s ", node_ptr->node_hostname); line_used = true; } if (node_ptr->port != slurm_get_slurmd_port()) { xstrfmtcat(out, "Port=%u ", node_ptr->port); line_used = true; } if (node_ptr->version && xstrcmp(node_ptr->version, slurm_ctl_conf_ptr->version)) { xstrfmtcat(out, "Version=%s", node_ptr->version); line_used = true; } if (line_used) xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->os) { xstrfmtcat(out, "OS=%s ", node_ptr->os); xstrcat(out, line_end); } /****** Line ******/ slurm_get_select_nodeinfo(node_ptr->select_nodeinfo, SELECT_NODEDATA_MEM_ALLOC, NODE_STATE_ALLOCATED, &alloc_memory); xstrfmtcat(out, "RealMemory=%"PRIu64" AllocMem=%"PRIu64" ", node_ptr->real_memory, alloc_memory); if (node_ptr->free_mem == NO_VAL64) xstrcat(out, "FreeMem=N/A "); else xstrfmtcat(out, "FreeMem=%"PRIu64" ", node_ptr->free_mem); xstrfmtcat(out, "Sockets=%u Boards=%u", node_ptr->sockets, node_ptr->boards); xstrcat(out, line_end); /****** core & memory specialization Line (optional) ******/ if (node_ptr->core_spec_cnt || node_ptr->cpu_spec_list || node_ptr->mem_spec_limit) { if (node_ptr->core_spec_cnt) { xstrfmtcat(out, "CoreSpecCount=%u ", node_ptr->core_spec_cnt); } if (node_ptr->cpu_spec_list) { xstrfmtcat(out, "CPUSpecList=%s ", node_ptr->cpu_spec_list); } if (node_ptr->mem_spec_limit) { xstrfmtcat(out, "MemSpecLimit=%"PRIu64"", node_ptr->mem_spec_limit); } xstrcat(out, line_end); } /****** Line ******/ xstrfmtcat(out, "State=%s%s%s%s%s ThreadsPerCore=%u TmpDisk=%u Weight=%u ", node_state_string(my_state), cloud_str, comp_str, drain_str, power_str, node_ptr->threads, node_ptr->tmp_disk, node_ptr->weight); if (node_ptr->owner == NO_VAL) { xstrcat(out, "Owner=N/A "); } else { char *user_name = uid_to_string((uid_t) node_ptr->owner); xstrfmtcat(out, "Owner=%s(%u) ", user_name, node_ptr->owner); xfree(user_name); } xstrfmtcat(out, "MCS_label=%s", (node_ptr->mcs_label == NULL) ? "N/A" : node_ptr->mcs_label); xstrcat(out, line_end); /****** Line ******/ if ((node_ptr->next_state != NO_VAL) && (my_state & NODE_STATE_REBOOT)) { xstrfmtcat(out, "NextState=%s", node_state_string(node_ptr->next_state)); xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->partitions) { xstrfmtcat(out, "Partitions=%s ", node_ptr->partitions); xstrcat(out, line_end); } /****** Line ******/ if (node_ptr->boot_time) { slurm_make_time_str((time_t *)&node_ptr->boot_time, time_str, sizeof(time_str)); xstrfmtcat(out, "BootTime=%s ", time_str); } else { xstrcat(out, "BootTime=None "); } if (node_ptr->slurmd_start_time) { slurm_make_time_str ((time_t *)&node_ptr->slurmd_start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SlurmdStartTime=%s", time_str); } else { xstrcat(out, "SlurmdStartTime=None"); } xstrcat(out, line_end); /****** TRES Line ******/ select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_TRES_ALLOC_FMT_STR, NODE_STATE_ALLOCATED, &node_alloc_tres); xstrfmtcat(out, "CfgTRES=%s", node_ptr->tres_fmt_str); xstrcat(out, line_end); xstrfmtcat(out, "AllocTRES=%s", (node_alloc_tres) ? node_alloc_tres : ""); xfree(node_alloc_tres); xstrcat(out, line_end); /****** Power Management Line ******/ if (!node_ptr->power || (node_ptr->power->cap_watts == NO_VAL)) xstrcat(out, "CapWatts=n/a"); else xstrfmtcat(out, "CapWatts=%u", node_ptr->power->cap_watts); xstrcat(out, line_end); /****** Power Consumption Line ******/ if (!node_ptr->energy || node_ptr->energy->current_watts == NO_VAL) xstrcat(out, "CurrentWatts=n/s AveWatts=n/s"); else xstrfmtcat(out, "CurrentWatts=%u AveWatts=%u", node_ptr->energy->current_watts, node_ptr->energy->ave_watts); xstrcat(out, line_end); /****** external sensors Line ******/ if (!node_ptr->ext_sensors || node_ptr->ext_sensors->consumed_energy == NO_VAL64) xstrcat(out, "ExtSensorsJoules=n/s "); else xstrfmtcat(out, "ExtSensorsJoules=%"PRIu64" ", node_ptr->ext_sensors->consumed_energy); if (!node_ptr->ext_sensors || node_ptr->ext_sensors->current_watts == NO_VAL) xstrcat(out, "ExtSensorsWatts=n/s "); else xstrfmtcat(out, "ExtSensorsWatts=%u ", node_ptr->ext_sensors->current_watts); if (!node_ptr->ext_sensors || node_ptr->ext_sensors->temperature == NO_VAL) xstrcat(out, "ExtSensorsTemp=n/s"); else xstrfmtcat(out, "ExtSensorsTemp=%u", node_ptr->ext_sensors->temperature); xstrcat(out, line_end); /****** Line ******/ if (node_ptr->reason && node_ptr->reason[0]) xstrcat(reason_str, node_ptr->reason); if (reason_str) { int inx = 1; char *save_ptr = NULL, *tok, *user_name; tok = strtok_r(reason_str, "\n", &save_ptr); while (tok) { if (inx == 1) { xstrcat(out, "Reason="); } else { xstrcat(out, line_end); xstrcat(out, " "); } xstrfmtcat(out, "%s", tok); if ((inx++ == 1) && node_ptr->reason_time) { user_name = uid_to_string(node_ptr->reason_uid); slurm_make_time_str((time_t *)&node_ptr->reason_time, time_str, sizeof(time_str)); xstrfmtcat(out, " [%s@%s]", user_name, time_str); xfree(user_name); } tok = strtok_r(NULL, "\n", &save_ptr); } xfree(reason_str); } if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); slurm_free_ctl_conf(slurm_ctl_conf_ptr); return out; }
/* * slurm_sprint_partition_info - output information about a specific Slurm * partition based upon message as loaded using slurm_load_partitions * IN part_ptr - an individual partition information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, int one_liner ) { char *out = NULL; char *allow_deny, *value; uint16_t force, preempt_mode, val; char *line_end = (one_liner) ? " " : "\n "; /****** Line 1 ******/ xstrfmtcat(out, "PartitionName=%s", part_ptr->name); xstrcat(out, line_end); /****** Line 2 ******/ if ((part_ptr->allow_groups == NULL) || (part_ptr->allow_groups[0] == '\0')) xstrcat(out, "AllowGroups=ALL"); else { xstrfmtcat(out, "AllowGroups=%s", part_ptr->allow_groups); } if (part_ptr->allow_accounts || !part_ptr->deny_accounts) { allow_deny = "Allow"; if ((part_ptr->allow_accounts == NULL) || (part_ptr->allow_accounts[0] == '\0')) value = "ALL"; else value = part_ptr->allow_accounts; } else { allow_deny = "Deny"; value = part_ptr->deny_accounts; } xstrfmtcat(out, " %sAccounts=%s", allow_deny, value); if (part_ptr->allow_qos || !part_ptr->deny_qos) { allow_deny = "Allow"; if ((part_ptr->allow_qos == NULL) || (part_ptr->allow_qos[0] == '\0')) value = "ALL"; else value = part_ptr->allow_qos; } else { allow_deny = "Deny"; value = part_ptr->deny_qos; } xstrfmtcat(out, " %sQos=%s", allow_deny, value); xstrcat(out, line_end); /****** Line 3 ******/ if (part_ptr->allow_alloc_nodes == NULL) xstrcat(out, "AllocNodes=ALL"); else xstrfmtcat(out, "AllocNodes=%s", part_ptr->allow_alloc_nodes); if (part_ptr->alternate != NULL) { xstrfmtcat(out, " Alternate=%s", part_ptr->alternate); } if (part_ptr->flags & PART_FLAG_DEFAULT) xstrcat(out, " Default=YES"); else xstrcat(out, " Default=NO"); if (part_ptr->cpu_bind) { char tmp_str[128]; slurm_sprint_cpu_bind_type(tmp_str, part_ptr->cpu_bind); xstrfmtcat(out, " CpuBind=%s ", tmp_str); } if (part_ptr->qos_char) xstrfmtcat(out, " QoS=%s", part_ptr->qos_char); else xstrcat(out, " QoS=N/A"); xstrcat(out, line_end); /****** Line 5 ******/ if (part_ptr->default_time == INFINITE) xstrcat(out, "DefaultTime=UNLIMITED"); else if (part_ptr->default_time == NO_VAL) xstrcat(out, "DefaultTime=NONE"); else { char time_line[32]; secs2time_str(part_ptr->default_time * 60, time_line, sizeof(time_line)); xstrfmtcat(out, "DefaultTime=%s", time_line); } if (part_ptr->flags & PART_FLAG_NO_ROOT) xstrcat(out, " DisableRootJobs=YES"); else xstrcat(out, " DisableRootJobs=NO"); if (part_ptr->flags & PART_FLAG_EXCLUSIVE_USER) xstrcat(out, " ExclusiveUser=YES"); else xstrcat(out, " ExclusiveUser=NO"); xstrfmtcat(out, " GraceTime=%u", part_ptr->grace_time); if (part_ptr->flags & PART_FLAG_HIDDEN) xstrcat(out, " Hidden=YES"); else xstrcat(out, " Hidden=NO"); xstrcat(out, line_end); /****** Line 6 ******/ if (part_ptr->max_nodes == INFINITE) xstrcat(out, "MaxNodes=UNLIMITED"); else xstrfmtcat(out, "MaxNodes=%u", part_ptr->max_nodes); if (part_ptr->max_time == INFINITE) xstrcat(out, " MaxTime=UNLIMITED"); else { char time_line[32]; secs2time_str(part_ptr->max_time * 60, time_line, sizeof(time_line)); xstrfmtcat(out, " MaxTime=%s", time_line); } xstrfmtcat(out, " MinNodes=%u", part_ptr->min_nodes); if (part_ptr->flags & PART_FLAG_LLN) xstrcat(out, " LLN=YES"); else xstrcat(out, " LLN=NO"); if (part_ptr->max_cpus_per_node == INFINITE) xstrcat(out, " MaxCPUsPerNode=UNLIMITED"); else { xstrfmtcat(out, " MaxCPUsPerNode=%u", part_ptr->max_cpus_per_node); } xstrcat(out, line_end); /****** Line 7 ******/ xstrfmtcat(out, "Nodes=%s", part_ptr->nodes); xstrcat(out, line_end); /****** Line 7 ******/ xstrfmtcat(out, "PriorityJobFactor=%u", part_ptr->priority_job_factor); xstrfmtcat(out, " PriorityTier=%u", part_ptr->priority_tier); if (part_ptr->flags & PART_FLAG_ROOT_ONLY) xstrcat(out, " RootOnly=YES"); else xstrcat(out, " RootOnly=NO"); if (part_ptr->flags & PART_FLAG_REQ_RESV) xstrcat(out, " ReqResv=YES"); else xstrcat(out, " ReqResv=NO"); force = part_ptr->max_share & SHARED_FORCE; val = part_ptr->max_share & (~SHARED_FORCE); if (val == 0) xstrcat(out, " OverSubscribe=EXCLUSIVE"); else if (force) xstrfmtcat(out, " OverSubscribe=FORCE:%u", val); else if (val == 1) xstrcat(out, " OverSubscribe=NO"); else xstrfmtcat(out, " OverSubscribe=YES:%u", val); xstrcat(out, line_end); /****** Line ******/ if (part_ptr->over_time_limit == NO_VAL16) xstrfmtcat(out, "OverTimeLimit=NONE"); else if (part_ptr->over_time_limit == INFINITE16) xstrfmtcat(out, "OverTimeLimit=UNLIMITED"); else xstrfmtcat(out, "OverTimeLimit=%u", part_ptr->over_time_limit); preempt_mode = part_ptr->preempt_mode; if (preempt_mode == NO_VAL16) preempt_mode = slurm_get_preempt_mode(); /* use cluster param */ xstrfmtcat(out, " PreemptMode=%s", preempt_mode_string(preempt_mode)); xstrcat(out, line_end); /****** Line ******/ if (part_ptr->state_up == PARTITION_UP) xstrcat(out, "State=UP"); else if (part_ptr->state_up == PARTITION_DOWN) xstrcat(out, "State=DOWN"); else if (part_ptr->state_up == PARTITION_INACTIVE) xstrcat(out, "State=INACTIVE"); else if (part_ptr->state_up == PARTITION_DRAIN) xstrcat(out, "State=DRAIN"); else xstrcat(out, "State=UNKNOWN"); xstrfmtcat(out, " TotalCPUs=%u", part_ptr->total_cpus); xstrfmtcat(out, " TotalNodes=%u", part_ptr->total_nodes); xstrfmtcat(out, " SelectTypeParameters=%s", select_type_param_string(part_ptr->cr_type)); xstrcat(out, line_end); /****** Line ******/ value = job_defaults_str(part_ptr->job_defaults_list); xstrfmtcat(out, "JobDefaults=%s", value); xfree(value); xstrcat(out, line_end); /****** Line ******/ if (part_ptr->def_mem_per_cpu & MEM_PER_CPU) { if (part_ptr->def_mem_per_cpu == MEM_PER_CPU) { xstrcat(out, "DefMemPerCPU=UNLIMITED"); } else { xstrfmtcat(out, "DefMemPerCPU=%"PRIu64"", part_ptr->def_mem_per_cpu & (~MEM_PER_CPU)); } } else if (part_ptr->def_mem_per_cpu == 0) { xstrcat(out, "DefMemPerNode=UNLIMITED"); } else { xstrfmtcat(out, "DefMemPerNode=%"PRIu64"", part_ptr->def_mem_per_cpu); } if (part_ptr->max_mem_per_cpu & MEM_PER_CPU) { if (part_ptr->max_mem_per_cpu == MEM_PER_CPU) { xstrcat(out, " MaxMemPerCPU=UNLIMITED"); } else { xstrfmtcat(out, " MaxMemPerCPU=%"PRIu64"", part_ptr->max_mem_per_cpu & (~MEM_PER_CPU)); } } else if (part_ptr->max_mem_per_cpu == 0) { xstrcat(out, " MaxMemPerNode=UNLIMITED"); } else { xstrfmtcat(out, " MaxMemPerNode=%"PRIu64"", part_ptr->max_mem_per_cpu); } /****** Line 10 ******/ if (part_ptr->billing_weights_str) { xstrcat(out, line_end); xstrfmtcat(out, "TRESBillingWeights=%s", part_ptr->billing_weights_str); } if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }