// Find the last non-zero word, last bit set in word has index // [bit_fls(word)]. unsigned bitmap_fls(const uint32_t *bitmap, unsigned from, unsigned size) { unsigned from_word = BITMAP_WORD(from); unsigned index_last = BITMAP_WORD(size - 1); uint32_t last = bitmap[index_last] & BITMAP_MASK_LAST(size); if (index_last == from_word) { uint32_t mask = SLICE(~0u, (size - 1) % BITS_PER_WORD, from % BITS_PER_WORD) << (from % BITS_PER_WORD); last = last & mask; } if (last != 0) return index_last * BITS_PER_WORD + bit_fls(last); for (int w = index_last - 1; w >= (int)from_word; --w) { if (bitmap[w] != 0) return w * BITS_PER_WORD + bit_fls(bitmap[w]); } if (index_last != from_word && bitmap[from_word] != 0) { uint32_t first_mask = SLICE(~0u, BITS_PER_WORD - 1, from % BITS_PER_WORD) << (from % BITS_PER_WORD); uint32_t first_word = bitmap[from_word] & first_mask; if (first_word != 0) return from_word * BITS_PER_WORD + bit_fls(first_word); } return size; }
void test_fls32() { sput_fail_unless(bit_fls(0u)==-1, "Edge case: arg=0"); sput_fail_unless(bit_fls(1u)==0, "arg=1"); sput_fail_unless(bit_fls(0xFFFFFFFFu)==31, "arg=0xFFFFFFFF"); sput_fail_unless(bit_fls(0x80000000u)==31, "arg=0x80000000"); for (uint32_t i=0; i<32; ++i) { sput_fail_unless(bit_fls(1u<<i)==i, "General Test0"); sput_fail_unless(bit_fls(0xFFFFFFFFu>>i)==31-i, "General test1"); } }
void test_fls64() { sput_fail_unless(bit_fls(0ull)==-1, "Edge case: arg=0"); sput_fail_unless(bit_fls(1ull)==0, "arg=1"); sput_fail_unless(bit_fls(0xFFFFFFFFFFFFFFFFull)==63, "arg=0xFFFFFFFF"); sput_fail_unless(bit_ffs(0x8000000000000000ull)==63, "arg=0x8000000000000000"); for (uint64_t i=0; i<64; ++i) { sput_fail_unless(bit_fls(1ull<<i)==i, "General Test"); sput_fail_unless(bit_fls(0xFFFFFFFFFFFFFFFFull>>i)==63-i, "General test1"); } }
/* For a newly starting job, set "new_job_time" in each of it's nodes * NOTE: The job and node data structures must be locked on function entry */ extern void set_node_new_job(struct job_record *job_ptr, struct node_record *node_record_table_ptr) { int i, i_first, i_last; struct node_record *node_ptr; time_t now = time(NULL); if (!job_ptr || !job_ptr->node_bitmap) { error("%s: job_ptr node_bitmap is NULL", __func__); return; } i_first = bit_ffs(job_ptr->node_bitmap); if (i_first >= 0) i_last = bit_fls(job_ptr->node_bitmap); else i_last = i_first - 1; for (i = i_first; i <= i_last; i++) { if (!bit_test(job_ptr->node_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; if (node_ptr->power) node_ptr->power->new_job_time = now; } }
/* Compute resource usage for the given job on all available resources * * IN: job_ptr - pointer to the job requesting resources * IN: node_map - bitmap of available nodes * IN/OUT: core_map - bitmap of available cores * IN: cr_node_cnt - total number of nodes in the cluster * IN: cr_type - resource type * OUT: cpu_cnt - number of cpus that can be used by this job * IN: test_only - ignore allocated memory check * RET SLURM_SUCCESS index of selected node or -1 if none */ static int _get_res_usage(struct job_record *job_ptr, bitstr_t *node_map, bitstr_t *core_map, uint32_t cr_node_cnt, struct node_use_record *node_usage, uint16_t cr_type, uint16_t **cpu_cnt_ptr, bool test_only) { uint16_t *cpu_cnt, max_cpu_cnt = 0, part_lln_flag = 0; int i, i_first, i_last; int node_inx = -1; if (cr_node_cnt != node_record_count) { error("select/serial: node count inconsistent with slurmctld"); return SLURM_ERROR; } if (!job_ptr) { error("select/serial: NULL job pointer"); return SLURM_ERROR; } if (job_ptr->part_ptr && (job_ptr->part_ptr->flags & PART_FLAG_LLN)) part_lln_flag = 1; if (job_ptr->details && job_ptr->details->req_node_bitmap) bit_and(node_map, job_ptr->details->req_node_bitmap); cpu_cnt = xmalloc(cr_node_cnt * sizeof(uint16_t)); i_first = bit_ffs(node_map); if (i_first >= 0) i_last = bit_fls(node_map); else i_last = -2; for (i = i_first; i <= i_last; i++) { if (!bit_test(node_map, i)) continue; cpu_cnt[i] = _can_job_run_on_node(job_ptr, core_map, i, node_usage, cr_type, test_only); if (!(cr_type & CR_LLN) && !part_lln_flag && cpu_cnt[i]) { bit_nclear(node_map, 0, (node_record_count - 1)); bit_set(node_map, i); node_inx = i; break; /* select/serial: only need one node */ } } if ((cr_type & CR_LLN) || part_lln_flag) { for (i = i_first; i <= i_last; i++) { if (cpu_cnt[i] > max_cpu_cnt) { max_cpu_cnt = cpu_cnt[i]; node_inx = i; } } if (node_inx >= 0) { bit_nclear(node_map, 0, (node_record_count - 1)); bit_set(node_map, node_inx); } } *cpu_cnt_ptr = cpu_cnt; return node_inx; }
/* * bitmap2node_name_sortable - given a bitmap, build a list of comma * separated node names. names may include regular expressions * (e.g. "lx[01-10]") * IN bitmap - bitmap pointer * IN sort - returned sorted list or not * RET pointer to node list or NULL on error * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ char * bitmap2node_name_sortable (bitstr_t *bitmap, bool sort) { int i, first, last; hostlist_t hl; char *buf; if (bitmap == NULL) return xstrdup(""); first = bit_ffs(bitmap); if (first == -1) return xstrdup(""); last = bit_fls(bitmap); hl = hostlist_create(""); for (i = first; i <= last; i++) { if (bit_test(bitmap, i) == 0) continue; hostlist_push(hl, node_record_table_ptr[i].name); } if (sort) hostlist_sort(hl); buf = hostlist_ranged_string_xmalloc(hl); hostlist_destroy(hl); return buf; }
/* given an "avail" node_bitmap, return a corresponding "avail" core_bitmap */ bitstr_t *_make_core_bitmap(bitstr_t *node_map) { uint32_t n, c, nodes, size; uint32_t coff; int i_first, i_last; nodes = bit_size(node_map); size = cr_get_coremap_offset(nodes); bitstr_t *core_map = bit_alloc(size); i_first = bit_ffs(node_map); if (i_first >= 0) i_last = bit_fls(node_map); else i_last = -2; for (n = i_first, c = 0; n <= i_last; n++) { if (bit_test(node_map, n)) { coff = cr_get_coremap_offset(n + 1); while (c < coff) { bit_set(core_map, c++); } } } return core_map; }
/* For each running job, return power allocation/use information in a List * containing elements of type power_by_job_t. * NOTE: Job data structure must be locked on function entry * NOTE: Call list_delete() to free return value * NOTE: This function is currently unused. */ extern List get_job_power(List job_list, struct node_record *node_record_table_ptr) { struct node_record *node_ptr; struct job_record *job_ptr; ListIterator job_iterator; power_by_job_t *power_ptr; char jobid_buf[64] = ""; int i, i_first, i_last; uint64_t debug_flag = slurm_get_debug_flags(); List job_power_list = list_create(_job_power_del); time_t now = time(NULL); job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if (!IS_JOB_RUNNING(job_ptr)) continue; power_ptr = xmalloc(sizeof(power_by_job_t)); power_ptr->job_id = job_ptr->job_id; power_ptr->start_time = job_ptr->start_time; list_append(job_power_list, power_ptr); if (!job_ptr->node_bitmap) { error("%s: %s node_bitmap is NULL", __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf))); continue; } i_first = bit_ffs(job_ptr->node_bitmap); if (i_first < 0) continue; i_last = bit_fls(job_ptr->node_bitmap); for (i = i_first; i <= i_last; i++) { if (!bit_test(job_ptr->node_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; if (node_ptr->power) { power_ptr->alloc_watts += node_ptr->power->cap_watts; } if (node_ptr->energy) { power_ptr->used_watts += node_ptr->energy->current_watts; } } if (debug_flag & DEBUG_FLAG_POWER) { info("%s: %s Age=%ld(sec) AllocWatts=%u UsedWatts=%u", __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)), (long int) difftime(now, power_ptr->start_time), power_ptr->alloc_watts, power_ptr->used_watts); } } list_iterator_destroy(job_iterator); return job_power_list; }
void event_capture_init(event_capture_t* capture, uint64_t freq, uint32_t log2res) { assert(capture); assert(freq); uint32_t log2 = bit_fls(freq); uint32_t quantShift = log2 > log2res ? log2-log2res : 0; capture->freq = freq; capture->quantShift = quantShift; capture->maxPeriod = (1ul<<TIME_BITS)<<quantShift; }
/* power_job_reboot - Reboot compute nodes for a job from the head node */ extern int power_job_reboot(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; int i, i_first, i_last; struct node_record *node_ptr; bitstr_t *wake_node_bitmap = NULL; time_t now = time(NULL); char *nodes, *features = NULL; wake_node_bitmap = bit_alloc(node_record_count); i_first = bit_ffs(job_ptr->node_bitmap); i_last = bit_fls(job_ptr->node_bitmap); for (i = i_first; i <= i_last; i++) { if (!bit_test(job_ptr->node_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; resume_cnt++; resume_cnt_f++; node_ptr->node_state &= (~NODE_STATE_POWER_SAVE); node_ptr->node_state |= NODE_STATE_POWER_UP; node_ptr->node_state |= NODE_STATE_NO_RESPOND; bit_clear(power_node_bitmap, i); bit_clear(avail_node_bitmap, i); node_ptr->last_response = now + resume_timeout; bit_set(wake_node_bitmap, i); bit_set(resume_node_bitmap, i); } nodes = bitmap2node_name(wake_node_bitmap); if (nodes) { #if _DEBUG info("power_save: reboot nodes %s", nodes); #else verbose("power_save: reboot nodes %s", nodes); #endif if (job_ptr->details && job_ptr->details->features) features = xlate_features(job_ptr->details->features); _run_prog(resume_prog, nodes, features); xfree(features); } else { error("power_save: bitmap2nodename"); rc = SLURM_ERROR; } xfree(nodes); FREE_NULL_BITMAP(wake_node_bitmap); last_node_update = now; return rc; }
/* Remove any specialized cores from those allocated to the job */ static void _clear_spec_cores(struct job_record *job_ptr, bitstr_t *avail_core_bitmap) { int first_node, last_node, i_node; int first_core, last_core, i_core; int alloc_node = -1, alloc_core = -1, size; job_resources_t *job_res = job_ptr->job_resrcs; multi_core_data_t *mc_ptr = NULL; if (job_ptr->details && job_ptr->details->mc_ptr) mc_ptr = job_ptr->details->mc_ptr; size = bit_size(job_res->core_bitmap); bit_nset(job_res->core_bitmap, 0, size - 1); first_node = bit_ffs(job_res->node_bitmap); if (first_node >= 0) last_node = bit_fls(job_res->node_bitmap); else last_node = first_node - 1; for (i_node = first_node; i_node <= last_node; i_node++) { if (!bit_test(job_res->node_bitmap, i_node)) continue; job_res->cpus[++alloc_node] = 0; first_core = cr_get_coremap_offset(i_node); last_core = cr_get_coremap_offset(i_node + 1) - 1; for (i_core = first_core; i_core <= last_core; i_core++) { alloc_core++; if (bit_test(avail_core_bitmap, i_core)) { uint16_t tpc = select_node_record[i_node].vpus; if (mc_ptr && (mc_ptr->threads_per_core != NO_VAL16) && (mc_ptr->threads_per_core < tpc)) tpc = mc_ptr->threads_per_core; job_res->cpus[alloc_node] += tpc; } else { bit_clear(job_res->core_bitmap, alloc_core); } } } }
/* * bitmap2hostlist - given a bitmap, build a hostlist * IN bitmap - bitmap pointer * RET pointer to hostlist or NULL on error * globals: node_record_table_ptr - pointer to node table * NOTE: the caller must xfree the memory at node_list when no longer required */ hostlist_t bitmap2hostlist (bitstr_t *bitmap) { int i, first, last; hostlist_t hl; if (bitmap == NULL) return NULL; first = bit_ffs(bitmap); if (first == -1) return NULL; last = bit_fls(bitmap); hl = hostlist_create(NULL); for (i = first; i <= last; i++) { if (bit_test(bitmap, i) == 0) continue; hostlist_push_host(hl, node_record_table_ptr[i].name); } return hl; }
static int glnvg__renderCreateTexture(void* uptr, int type, int w, int h, int imageFlags, const unsigned char* data) { GLNVGcontext* gl = (GLNVGcontext*)uptr; GLNVGtexture* tex = glnvg__allocTexture(gl); if (tex == NULL) return 0; const bool formatRGBA8 = type == NVG_TEXTURE_RGBA; const bool useMipmaps = (imageFlags & NVG_IMAGE_GENERATE_MIPMAPS) != 0; glCreateTextures(GL_TEXTURE_2D, 1, &tex->tex); const uint32_t mipCount = useMipmaps ? bit_fls(core::max<uint32_t>(w, h)) : 1; glTextureStorage2D(tex->tex, mipCount, formatRGBA8 ? GL_RGBA8 : GL_R8, w, h); tex->width = w; tex->height = h; tex->type = type; if (data) { glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_UNPACK_ROW_LENGTH, tex->width); glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); glTextureSubImage2D(tex->tex, 0, 0, 0, w, h, formatRGBA8 ? GL_RGBA : GL_RED, GL_UNSIGNED_BYTE, data); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); // The new way to build mipmaps on GLES and GL3 if (useMipmaps) { glGenerateTextureMipmap(tex->tex); } } glTextureParameteri(tex->tex, GL_TEXTURE_MIN_FILTER, useMipmaps ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR); glTextureParameteri(tex->tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR); return tex->id; }
/* Compute resource usage for the given job on all available resources * * IN: job_ptr - pointer to the job requesting resources * IN: node_map - bitmap of available nodes * IN/OUT: core_map - bitmap of available cores * IN: cr_node_cnt - total number of nodes in the cluster * IN: cr_type - resource type * OUT: cpu_cnt - number of cpus that can be used by this job * IN: test_only - ignore allocated memory check * RET SLURM_SUCCESS index of selected node or -1 if none */ static int _get_res_usage(struct job_record *job_ptr, bitstr_t *node_map, bitstr_t *core_map, uint32_t cr_node_cnt, struct node_use_record *node_usage, uint16_t cr_type, uint16_t **cpu_cnt_ptr, bool test_only) { uint16_t *cpu_cnt; uint32_t n; int i_first, i_last; int node_inx = -1; if (cr_node_cnt != node_record_count) { error("select/serial: node count inconsistent with slurmctld"); return SLURM_ERROR; } if (job_ptr->details && job_ptr->details->req_node_bitmap) bit_and(node_map, job_ptr->details->req_node_bitmap); cpu_cnt = xmalloc(cr_node_cnt * sizeof(uint16_t)); i_first = bit_ffs(node_map); if (i_first >= 0) i_last = bit_fls(node_map); else i_last = -2; for (n = i_first; n <= i_last; n++) { if (!bit_test(node_map, n)) continue; cpu_cnt[n] = _can_job_run_on_node(job_ptr, core_map, n, node_usage, cr_type, test_only); if (cpu_cnt[n]) { bit_nclear(node_map, 0, (node_record_count - 1)); bit_set(node_map, n); node_inx = n; break; /* select/serial: only need one node */ } } *cpu_cnt_ptr = cpu_cnt; return node_inx; }
/* a helper function for _add_job_to_active when GS_SOCKET * a job has just been added to p_ptr->active_resmap, so set all cores of * each used socket to avoid activating another job on the same socket */ static void _fill_sockets(bitstr_t *job_nodemap, struct gs_part *p_ptr) { uint32_t c, i; int n, first_bit, last_bit; if (!job_nodemap || !p_ptr || !p_ptr->active_resmap) return; first_bit = bit_ffs(job_nodemap); last_bit = bit_fls(job_nodemap); if ((first_bit < 0) || (last_bit < 0)) fatal("gang: _afill_sockets: nodeless job?"); for (c = 0, n = 0; n < first_bit; n++) { c += _get_phys_bit_cnt(n); } for (n = first_bit; n <= last_bit; n++) { uint16_t s, socks, cps, cores_per_node; cores_per_node = _get_phys_bit_cnt(n); if (bit_test(job_nodemap, n) == 0) { c += cores_per_node; continue; } socks = _get_socket_cnt(n); cps = cores_per_node / socks; for (s = 0; s < socks; s++) { for (i = c; i < c+cps; i++) { if (bit_test(p_ptr->active_resmap, i)) break; } if (i < c+cps) { /* set all bits on this used socket */ bit_nset(p_ptr->active_resmap, c, c+cps-1); } c += cps; } } }
/** * do_basil_reserve - create a BASIL reservation. * IN job_ptr - pointer to job which has just been allocated resources * RET 0 or error code, job will abort or be requeued on failure */ extern int do_basil_reserve(struct job_record *job_ptr) { struct nodespec *ns_head = NULL; uint16_t mppwidth = 0, mppdepth, mppnppn; uint32_t mppmem = 0, node_min_mem = 0; uint32_t resv_id; int i, first_bit, last_bit; hostlist_t hl; long rc; char *user, batch_id[16]; if (!job_ptr->job_resrcs || job_ptr->job_resrcs->nhosts == 0) return SLURM_SUCCESS; debug3("job #%u: %u nodes = %s, cpus=%u" , job_ptr->job_id, job_ptr->job_resrcs->nhosts, job_ptr->job_resrcs->nodes, job_ptr->job_resrcs->ncpus ); if (job_ptr->job_resrcs->node_bitmap == NULL) { error("job %u node_bitmap not set", job_ptr->job_id); return SLURM_SUCCESS; } first_bit = bit_ffs(job_ptr->job_resrcs->node_bitmap); last_bit = bit_fls(job_ptr->job_resrcs->node_bitmap); if (first_bit == -1 || last_bit == -1) return SLURM_SUCCESS; /* no nodes allocated */ mppdepth = MAX(1, job_ptr->details->cpus_per_task); mppnppn = job_ptr->details->ntasks_per_node; /* mppmem */ if (job_ptr->details->pn_min_memory & MEM_PER_CPU) { /* Only honour --mem-per-cpu if --ntasks has been given */ if (job_ptr->details->num_tasks) mppmem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU; } else if (job_ptr->details->pn_min_memory) { node_min_mem = job_ptr->details->pn_min_memory; } hl = hostlist_create(""); if (hl == NULL) fatal("hostlist_create: malloc error"); for (i = first_bit; i <= last_bit; i++) { struct node_record *node_ptr = node_record_table_ptr + i; uint32_t basil_node_id; if (!bit_test(job_ptr->job_resrcs->node_bitmap, i)) continue; if (!node_ptr->name || node_ptr->name[0] == '\0') continue; /* bad node */ if (sscanf(node_ptr->name, "nid%05u", &basil_node_id) != 1) fatal("can not read basil_node_id from %s", node_ptr->name); if (ns_add_node(&ns_head, basil_node_id) != 0) { error("can not add node %s (nid%05u)", node_ptr->name, basil_node_id); free_nodespec(ns_head); return SLURM_ERROR; } if (node_min_mem) { uint32_t node_cpus, node_mem; if (slurmctld_conf.fast_schedule) { node_cpus = node_ptr->config_ptr->cpus; node_mem = node_ptr->config_ptr->real_memory; } else { node_cpus = node_ptr->cpus; node_mem = node_ptr->real_memory; } /* * ALPS 'Processing Elements per Node' value (aprun -N), * which in slurm is --ntasks-per-node and 'mppnppn' in * PBS: if --ntasks is specified, default to the number * of cores per node (also the default for 'aprun -N'). */ node_mem /= mppnppn ? mppnppn : node_cpus; mppmem = node_min_mem = MIN(node_mem, node_min_mem); } } /* mppwidth */ for (i = 0; i < job_ptr->job_resrcs->nhosts; i++) { uint16_t node_tasks = job_ptr->job_resrcs->cpus[i] / mppdepth; if (mppnppn && mppnppn < node_tasks) node_tasks = mppnppn; mppwidth += node_tasks; } snprintf(batch_id, sizeof(batch_id), "%u", job_ptr->job_id); user = uid_to_string(job_ptr->user_id); rc = basil_reserve(user, batch_id, mppwidth, mppdepth, mppnppn, mppmem, ns_head); xfree(user); if (rc <= 0) { /* errno value will be resolved by select_g_job_begin() */ errno = is_transient_error(rc) ? EAGAIN : ECONNABORTED; return SLURM_ERROR; } resv_id = rc; if (_set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) { /* * This is a fatal error since it means we will not be able to * confirm the reservation; no step will be able to run in it. */ error("job %u: can not set resId %u", job_ptr->job_id, resv_id); basil_release(resv_id); return SLURM_ERROR; } info("ALPS RESERVATION #%u, JobId %u: BASIL -n %d -N %d -d %d -m %d", resv_id, job_ptr->job_id, mppwidth, mppnppn, mppdepth, mppmem); return SLURM_SUCCESS; }
static int _grid_table_by_switch(button_processor_t *button_processor, List node_list) { int rc = SLURM_SUCCESS; int inx = 0, ii = 0; switch_record_bitmaps_t *sw_nodes_bitmaps_ptr = g_switch_nodes_maps; #if TOPO_DEBUG /* engage if want original display below switched */ ListIterator itr = list_iterator_create(node_list); sview_node_info_t *sview_node_info_ptr = NULL; #endif button_processor->inx = &inx; for (ii=0; ii<g_topo_info_msg_ptr->record_count; ii++, sw_nodes_bitmaps_ptr++) { int j = 0, first, last; if (g_topo_info_msg_ptr->topo_array[ii].level) continue; first = bit_ffs(sw_nodes_bitmaps_ptr->node_bitmap); if (first == -1) continue; last = bit_fls(sw_nodes_bitmaps_ptr->node_bitmap); button_processor->inx = &j; button_processor->force_row_break = false; for (j = first; j <= last; j++) { if (TOPO_DEBUG) g_print("allocated node = %s button# %d\n", g_node_info_ptr->node_array[j].name, j); if (!bit_test(sw_nodes_bitmaps_ptr->node_bitmap, j)) continue; /* if (!working_sview_config.show_hidden) { */ /* if (!check_part_includes_node(j)) */ /* continue; */ /* } */ if (j == last) button_processor->force_row_break = true; if ((rc = _add_button_to_list( &g_node_info_ptr->node_array[j], button_processor)) != SLURM_SUCCESS) break; button_processor->force_row_break = false; } rc = _add_button_to_list(NULL, button_processor); } #if TOPO_DEBUG /* engage this if want original display below * switched grid */ button_processor->inx = &inx; while ((sview_node_info_ptr = list_next(itr))) { if ((rc = _add_button_to_list( sview_node_info_ptr->node_ptr, button_processor)) != SLURM_SUCCESS) break; inx++; } list_iterator_destroy(itr); #endif /* This is needed to get the correct width of the grid window. * If it is not given then we get a really narrow window. */ gtk_table_set_row_spacing(button_processor->table, (*button_processor->coord_y)? ((*button_processor->coord_y)-1):0, 1); return rc; }
/* power_job_reboot - Reboot compute nodes for a job from the head node */ extern int power_job_reboot(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; int i, i_first, i_last; struct node_record *node_ptr; bitstr_t *boot_node_bitmap = NULL; time_t now = time(NULL); char *nodes, *features = NULL; pid_t pid; boot_node_bitmap = node_features_reboot(job_ptr); if (boot_node_bitmap == NULL) return SLURM_SUCCESS; i_first = bit_ffs(boot_node_bitmap); if (i_first >= 0) i_last = bit_fls(boot_node_bitmap); else i_last = i_first - 1; for (i = i_first; i <= i_last; i++) { if (!bit_test(boot_node_bitmap, i)) continue; node_ptr = node_record_table_ptr + i; resume_cnt++; resume_cnt_f++; node_ptr->node_state &= (~NODE_STATE_POWER_SAVE); node_ptr->node_state |= NODE_STATE_POWER_UP; node_ptr->node_state |= NODE_STATE_NO_RESPOND; bit_clear(power_node_bitmap, i); bit_clear(avail_node_bitmap, i); node_ptr->last_response = now + resume_timeout; bit_set(resume_node_bitmap, i); } nodes = bitmap2node_name(boot_node_bitmap); if (nodes) { job_ptr->job_state |= JOB_CONFIGURING; job_ptr->wait_all_nodes = 1; if (job_ptr->details && job_ptr->details->features && node_features_g_user_update(job_ptr->user_id)) { features = node_features_g_job_xlate( job_ptr->details->features); } pid = _run_prog(resume_prog, nodes, features); #if _DEBUG info("power_save: pid %d reboot nodes %s features %s", (int) pid, nodes, features); #else verbose("power_save: pid %d reboot nodes %s features %s", (int) pid, nodes, features); #endif xfree(features); } else { error("power_save: bitmap2nodename"); rc = SLURM_ERROR; } xfree(nodes); FREE_NULL_BITMAP(boot_node_bitmap); last_node_update = now; return rc; }
/** * do_basil_reserve - create a BASIL reservation. * IN job_ptr - pointer to job which has just been allocated resources * RET 0 or error code, job will abort or be requeued on failure */ extern int do_basil_reserve(struct job_record *job_ptr) { struct nodespec *ns_head = NULL; uint16_t mppwidth = 0, mppdepth, mppnppn; /* mppmem must be at least 1 for gang scheduling to work so * if you are wondering why gang scheduling isn't working you * should check your slurm.conf for DefMemPerNode */ uint32_t mppmem = 0, node_min_mem = 0; uint32_t resv_id; int i, first_bit, last_bit; long rc; char *user, batch_id[16]; struct basil_accel_param* bap; if (!job_ptr->job_resrcs || job_ptr->job_resrcs->nhosts == 0) return SLURM_SUCCESS; debug3("job #%u: %u nodes = %s, cpus=%u" , job_ptr->job_id, job_ptr->job_resrcs->nhosts, job_ptr->job_resrcs->nodes, job_ptr->job_resrcs->ncpus ); if (job_ptr->job_resrcs->node_bitmap == NULL) { error("job %u node_bitmap not set", job_ptr->job_id); return SLURM_SUCCESS; } first_bit = bit_ffs(job_ptr->job_resrcs->node_bitmap); last_bit = bit_fls(job_ptr->job_resrcs->node_bitmap); if (first_bit == -1 || last_bit == -1) return SLURM_SUCCESS; /* no nodes allocated */ mppdepth = MAX(1, job_ptr->details->cpus_per_task); mppnppn = job_ptr->details->ntasks_per_node; /* mppmem */ if (job_ptr->details->pn_min_memory & MEM_PER_CPU) { /* Only honour --mem-per-cpu if --ntasks has been given */ if (job_ptr->details->num_tasks) mppmem = job_ptr->details->pn_min_memory & ~MEM_PER_CPU; } else if (job_ptr->details->pn_min_memory) { node_min_mem = job_ptr->details->pn_min_memory; } for (i = first_bit; i <= last_bit; i++) { struct node_record *node_ptr = node_record_table_ptr + i; uint32_t basil_node_id; if (!bit_test(job_ptr->job_resrcs->node_bitmap, i)) continue; if (!node_ptr->name || node_ptr->name[0] == '\0') continue; /* bad node */ if (sscanf(node_ptr->name, "nid%05u", &basil_node_id) != 1) fatal("can not read basil_node_id from %s", node_ptr->name); if (ns_add_node(&ns_head, basil_node_id, false) != 0) { error("can not add node %s (nid%05u)", node_ptr->name, basil_node_id); free_nodespec(ns_head); return SLURM_ERROR; } if (node_min_mem) { uint32_t node_cpus, node_mem; int32_t tmp_mppmem; if (slurmctld_conf.fast_schedule) { node_cpus = node_ptr->config_ptr->cpus; node_mem = node_ptr->config_ptr->real_memory; } else { node_cpus = node_ptr->cpus; node_mem = node_ptr->real_memory; } /* * ALPS 'Processing Elements per Node' value (aprun -N), * which in slurm is --ntasks-per-node and 'mppnppn' in * PBS: if --ntasks is specified, default to the number * of cores per node (also the default for 'aprun -N'). * On a heterogeneous system the nodes aren't * always the same so keep track of the lowest * mppmem and use it as the level for all * nodes (mppmem is 0 when coming in). */ node_mem /= mppnppn ? mppnppn : node_cpus; tmp_mppmem = node_min_mem = MIN(node_mem, node_min_mem); /* If less than or equal to 0 make sure you have 1 at least since 0 means give all the memory to the job. */ if (tmp_mppmem <= 0) tmp_mppmem = 1; if (mppmem) mppmem = MIN(mppmem, tmp_mppmem); else mppmem = tmp_mppmem; } } /* mppwidth */ for (i = 0; i < job_ptr->job_resrcs->nhosts; i++) { uint16_t node_tasks = job_ptr->job_resrcs->cpus[i] / mppdepth; if (mppnppn && mppnppn < node_tasks) node_tasks = mppnppn; mppwidth += node_tasks; } snprintf(batch_id, sizeof(batch_id), "%u", job_ptr->job_id); user = uid_to_string(job_ptr->user_id); if (job_ptr->gres_list) bap = build_accel_param(job_ptr); else bap = NULL; rc = basil_reserve(user, batch_id, mppwidth, mppdepth, mppnppn, mppmem, ns_head, bap); xfree(user); if (rc <= 0) { /* errno value will be resolved by select_g_job_begin() */ errno = is_transient_error(rc) ? EAGAIN : ECONNABORTED; return SLURM_ERROR; } resv_id = rc; if (_set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RESV_ID, &resv_id) != SLURM_SUCCESS) { /* * This is a fatal error since it means we will not be able to * confirm the reservation; no step will be able to run in it. */ error("job %u: can not set resId %u", job_ptr->job_id, resv_id); basil_release(resv_id); return SLURM_ERROR; } if (mppmem) job_ptr->details->pn_min_memory = mppmem | MEM_PER_CPU; info("ALPS RESERVATION #%u, JobId %u: BASIL -n %d -N %d -d %d -m %d", resv_id, job_ptr->job_id, mppwidth, mppnppn, mppdepth, mppmem); return SLURM_SUCCESS; }
int main(int argc, char *argv[]) { note("Testing static decl"); { bitstr_t bit_decl(bs, 65); /*bitstr_t *bsp = bs;*/ bit_set(bs,9); bit_set(bs,14); TEST(bit_test(bs,9), "bit 9 set"); TEST(!bit_test(bs,12), "bit 12 not set"); TEST(bit_test(bs,14), "bit 14 set" ); /*bit_free(bsp);*/ /* triggers TEST in bit_free - OK */ } note("Testing basic vixie functions"); { bitstr_t *bs = bit_alloc(16), *bs2; /*bit_set(bs, 42);*/ /* triggers TEST in bit_set - OK */ bit_set(bs,9); bit_set(bs,14); TEST(bit_test(bs,9), "bit 9 set"); TEST(!bit_test(bs,12), "bit 12 not set" ); TEST(bit_test(bs,14), "bit 14 set"); bs2 = bit_copy(bs); bit_fill_gaps(bs2); TEST(bit_ffs(bs2) == 9, "first bit set = 9 "); TEST(bit_fls(bs2) == 14, "last bit set = 14"); TEST(bit_set_count(bs2) == 6, "bitstring"); TEST(bit_test(bs2,12), "bitstring"); TEST(bit_super_set(bs,bs2) == 1, "bitstring"); TEST(bit_super_set(bs2,bs) == 0, "bitstring"); bit_clear(bs,14); TEST(!bit_test(bs,14), "bitstring"); bit_nclear(bs,9,14); TEST(!bit_test(bs,9), "bitstring"); TEST(!bit_test(bs,12), "bitstring"); TEST(!bit_test(bs,14), "bitstring"); bit_nset(bs,9,14); TEST(bit_test(bs,9), "bitstring"); TEST(bit_test(bs,12), "bitstring"); TEST(bit_test(bs,14), "bitstring"); TEST(bit_ffs(bs) == 9, "ffs"); TEST(bit_ffc(bs) == 0, "ffc"); bit_nset(bs,0,8); TEST(bit_ffc(bs) == 15, "ffc"); bit_free(bs); /*bit_set(bs,9); */ /* triggers TEST in bit_set - OK */ } note("Testing and/or/not"); { bitstr_t *bs1 = bit_alloc(128); bitstr_t *bs2 = bit_alloc(128); bit_set(bs1, 100); bit_set(bs1, 104); bit_set(bs2, 100); bit_and(bs1, bs2); TEST(bit_test(bs1, 100), "and"); TEST(!bit_test(bs1, 104), "and"); bit_set(bs2, 110); bit_set(bs2, 111); bit_set(bs2, 112); bit_or(bs1, bs2); TEST(bit_test(bs1, 100), "or"); TEST(bit_test(bs1, 110), "or"); TEST(bit_test(bs1, 111), "or"); TEST(bit_test(bs1, 112), "or"); bit_not(bs1); TEST(!bit_test(bs1, 100), "not"); TEST(bit_test(bs1, 12), "not"); bit_free(bs1); bit_free(bs2); } note("testing bit selection"); { bitstr_t *bs1 = bit_alloc(128), *bs2; bit_set(bs1, 21); bit_set(bs1, 100); bit_fill_gaps(bs1); bs2 = bit_pick_cnt(bs1,20); if (bs2) { TEST(bit_set_count(bs2) == 20, "pick"); TEST(bit_ffs(bs2) == 21, "pick"); TEST(bit_fls(bs2) == 40, "pick"); bit_free(bs2); } else TEST(0, "alloc fail"); bit_free(bs1); } note("Testing realloc"); { bitstr_t *bs = bit_alloc(1); TEST(bit_ffs(bs) == -1, "bitstring"); bit_set(bs,0); /*bit_set(bs, 1000);*/ /* triggers TEST in bit_set - OK */ bs = bit_realloc(bs,1048576); bit_set(bs,1000); bit_set(bs,1048575); TEST(bit_test(bs, 0), "bitstring"); TEST(bit_test(bs, 1000), "bitstring"); TEST(bit_test(bs, 1048575), "bitstring"); TEST(bit_set_count(bs) == 3, "bitstring"); bit_clear(bs,0); bit_clear(bs,1000); TEST(bit_set_count(bs) == 1, "bitstring"); TEST(bit_ffs(bs) == 1048575, "bitstring"); bit_free(bs); } note("Testing bit_fmt"); { char tmpstr[1024]; bitstr_t *bs = bit_alloc(1024); TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), ""), "bitstring"); bit_set(bs,42); TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42"), "bitstring"); bit_set(bs,102); TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr),bs), "42,102"), "bitstring"); bit_nset(bs,9,14); TEST(!strcmp(bit_fmt(tmpstr,sizeof(tmpstr), bs), "9-14,42,102"), "bitstring"); } note("Testing bit_nffc/bit_nffs"); { bitstr_t *bs = bit_alloc(1024); bit_set(bs, 2); bit_set(bs, 6); bit_set(bs, 7); bit_nset(bs,12,1018); TEST(bit_nffc(bs, 2) == 0, "bitstring"); TEST(bit_nffc(bs, 3) == 3, "bitstring"); TEST(bit_nffc(bs, 4) == 8, "bitstring"); TEST(bit_nffc(bs, 5) == 1019, "bitstring"); TEST(bit_nffc(bs, 6) == -1, "bitstring"); TEST(bit_nffs(bs, 1) == 2, "bitstring"); TEST(bit_nffs(bs, 2) == 6, "bitstring"); TEST(bit_nffs(bs, 100) == 12, "bitstring"); TEST(bit_nffs(bs, 1023) == -1, "bitstring"); bit_free(bs); } note("Testing bit_unfmt"); { bitstr_t *bs = bit_alloc(1024); bitstr_t *bs2 = bit_alloc(1024); char tmpstr[4096]; bit_set(bs,1); bit_set(bs,3); bit_set(bs,30); bit_nset(bs,42,64); bit_nset(bs,97,1000); bit_fmt(tmpstr, sizeof(tmpstr), bs); TEST(bit_unfmt(bs2, tmpstr) != -1, "bitstring"); TEST(bit_equal(bs, bs2), "bitstring"); } totals(); return failed; }
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j, k; char time_str[32], *group_name, *user_name; char *gres_last = "", tmp1[128], tmp2[128]; char *tmp6_ptr; char tmp_line[1024 * 128]; char tmp_path[MAXPATHLEN]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *cpu_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int64_t nice; int bit_inx, bit_reps; uint64_t *last_mem_alloc_ptr = NULL; uint64_t last_mem_alloc = NO_VAL64; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint32_t threads; char *line_end = (one_liner) ? " " : "\n "; if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ xstrfmtcat(out, "JobId=%u ", job_ptr->job_id); if (job_ptr->array_job_id) { if (job_ptr->array_task_str) { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ", job_ptr->array_job_id, job_ptr->array_task_str); } else { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); } } xstrfmtcat(out, "JobName=%s", job_ptr->name); xstrcat(out, line_end); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s", user_name, job_ptr->user_id, group_name, job_ptr->group_id, (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label); xfree(user_name); xfree(group_name); xstrcat(out, line_end); /****** Line 3 ******/ nice = ((int64_t)job_ptr->nice) - NICE_OFFSET; xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s", job_ptr->priority, nice, job_ptr->account, job_ptr->qos); if (slurm_get_track_wckey()) xstrfmtcat(out, " WCKey=%s", job_ptr->wckey); xstrcat(out, line_end); /****** Line 4 ******/ xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state)); if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc); } else xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason)); xstrfmtcat(out, "Dependency=%s", job_ptr->dependency); xstrcat(out, line_end); /****** Line 5 ******/ xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag, job_ptr->reboot); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); /****** Line 5a (optional) ******/ if (job_ptr->show_flags & SHOW_DETAIL) { if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); } /****** Line 6 ******/ if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, time_str, sizeof(time_str)); xstrfmtcat(out, "RunTime=%s ", time_str); if (job_ptr->time_limit == NO_VAL) xstrcat(out, "TimeLimit=Partition_Limit "); else { mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeLimit=%s ", time_str); } if (job_ptr->time_min == 0) xstrcat(out, "TimeMin=N/A"); else { mins2time_str(job_ptr->time_min, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeMin=%s", time_str); } xstrcat(out, line_end); /****** Line 7 ******/ slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SubmitTime=%s ", time_str); slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EligibleTime=%s", time_str); xstrcat(out, line_end); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str)); xstrfmtcat(out, "ResizeTime=%s", time_str); xstrcat(out, line_end); } /****** Line 9 ******/ slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "StartTime=%s ", time_str); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) xstrcat(out, "EndTime=Unknown "); else { slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EndTime=%s ", time_str); } if (job_ptr->deadline) { slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str)); xstrfmtcat(out, "Deadline=%s", time_str); } else { xstrcat(out, "Deadline=N/A"); } xstrcat(out, line_end); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) xstrcat(out, "PreemptTime=None "); else { slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str)); xstrfmtcat(out, "PreemptTime=%s ", time_str); } if (job_ptr->suspend_time) { slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SuspendTime=%s ", time_str); } else xstrcat(out, "SuspendTime=None "); xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time); xstrcat(out, line_end); /****** Line 11 ******/ xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, line_end); /****** Line 12 ******/ xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, line_end); /****** Line 13 ******/ xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes); if (job_ptr->nodes && ionodes) { xstrfmtcat(out, "[%s]", ionodes); xfree(ionodes); } if (job_ptr->sched_nodes) xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes); xstrcat(out, line_end); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host); xstrcat(out, line_end); } /****** Line 14a (optional) ******/ if (job_ptr->fed_siblings) { xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s", job_ptr->fed_origin_str, job_ptr->fed_siblings_str); xstrcat(out, line_end); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else if (IS_JOB_PENDING(job_ptr)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; if (max_nodes && (max_nodes < min_nodes)) min_nodes = max_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = 0; } _sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes); xstrfmtcat(out, "NumNodes=%s ", tmp_line); _sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus); xstrfmtcat(out, "NumCPUs=%s ", tmp_line); xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks); xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task); if (job_ptr->boards_per_node == (uint16_t) NO_VAL) xstrcat(out, "ReqB:S:C:T=*:"); else xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node); if (job_ptr->sockets_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->sockets_per_board); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) xstrcat(out, "*"); else xstrfmtcat(out, "%u", job_ptr->threads_per_core); xstrcat(out, line_end); /****** Line 16 ******/ /* Tres should already of been converted at this point from simple */ xstrfmtcat(out, "TRES=%s", job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str : job_ptr->tres_req_str); xstrcat(out, line_end); /****** Line 17 ******/ if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) xstrcat(out, "Socks/Node=* "); else xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node); if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL) xstrcat(out, "NtasksPerN:B:S:C=*:"); else xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node); if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board); if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_socket == (uint16_t) INFINITE)) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket); if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_core == (uint16_t) INFINITE)) xstrcat(out, "* "); else xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core); if (job_ptr->core_spec == (uint16_t) NO_VAL) xstrcat(out, "CoreSpec=*"); else if (job_ptr->core_spec & CORE_SPEC_THREAD) xstrfmtcat(out, "ThreadSpec=%d", (job_ptr->core_spec & (~CORE_SPEC_THREAD))); else xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec); xstrcat(out, line_end); if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { /* only print 60 characters worth of this record */ if (length > 60) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); } length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]); if (job_resrcs->cpu_array_reps[i] > 1) { length += xstrfmtcat(out, "*%d", job_resrcs->cpu_array_reps[i]); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } xstrcat(out, line_end); } } else if (job_resrcs && job_resrcs->core_bitmap && ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) { hl = hostlist_create(job_resrcs->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_resrcs->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; gres_last = ""; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; host = hostlist_shift(hl); threads = _threads_per_core(host); cpu_bitmap = bit_alloc(bit_reps * threads); for (j = 0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)){ for (k = 0; k < threads; k++) bit_set(cpu_bitmap, (j * threads) + k); } bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap); FREE_NULL_BITMAP(cpu_bitmap); /* * If the allocation values for this host are not the * same as the last host, print the report of the last * group of hosts that had identical allocation values. */ if (xstrcmp(tmp1, tmp2) || ((rel_node_inx < job_ptr->gres_detail_cnt) && xstrcmp(job_ptr->gres_detail_str[rel_node_inx], gres_last)) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s " "Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); if (rel_node_inx < job_ptr->gres_detail_cnt) { gres_last = job_ptr-> gres_detail_str[rel_node_inx]; } else { gres_last = ""; } last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL64; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 18 ******/ if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinCPUsNode=%s ", tmp1); } else { xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus); } convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, line_end); /****** Line ******/ secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1)); xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s Reservation=%s", job_ptr->gres, job_ptr->resv_name); xstrcat(out, line_end); /****** Line 20 ******/ xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s", job_share_string(job_ptr->shared), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, line_end); /****** Line 21 ******/ xstrfmtcat(out, "Command=%s", job_ptr->command); xstrcat(out, line_end); /****** Line 22 ******/ xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "Block_ID=%s", select_buf); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrcat(out, select_buf); } /****** Line 26 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "CnloadImage=%s", select_buf); } /****** Line 27 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "MloaderImage=%s", select_buf); } /****** Line 28 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "IoloadImage=%s", select_buf); } } /****** Line (optional) ******/ if (job_ptr->admin_comment) { xstrcat(out, line_end); xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment); } /****** Line (optional) ******/ if (job_ptr->comment) { xstrcat(out, line_end); xstrfmtcat(out, "Comment=%s ", job_ptr->comment); } /****** Line 30 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdErr=%s", tmp_path); } /****** Line 31 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdIn=%s", tmp_path); } /****** Line 32 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdOut=%s", tmp_path); } /****** Line 33 (optional) ******/ if (job_ptr->batch_script) { xstrcat(out, line_end); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 34 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; xstrcat(out, line_end); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf); } /****** Line 35 (optional) ******/ if (job_ptr->burst_buffer) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer); } /****** Line (optional) ******/ if (job_ptr->burst_buffer_state) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBufferState=%s", job_ptr->burst_buffer_state); } /****** Line 36 (optional) ******/ if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1), job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min, job_ptr->cpu_freq_max, NO_VAL) != 0) { xstrcat(out, line_end); xstrcat(out, tmp1); } /****** Line 37 ******/ xstrcat(out, line_end); xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags)); /****** Line 38 (optional) ******/ if (job_ptr->bitflags) { xstrcat(out, line_end); if (job_ptr->bitflags & GRES_ENFORCE_BIND) xstrcat(out, "GresEnforceBind=Yes"); if (job_ptr->bitflags & KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=Yes"); if (job_ptr->bitflags & NO_KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=No"); if (job_ptr->bitflags & SPREAD_JOB) xstrcat(out, "SpreadJob=Yes"); } /****** END OF JOB RECORD ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j; char time_str[32], *group_name, *user_name; char tmp1[128], tmp2[128], tmp3[128], tmp4[128], tmp5[128], *tmp6_ptr; char tmp_line[512]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *core_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int bit_inx, bit_reps; uint32_t *last_mem_alloc_ptr = NULL; uint32_t last_mem_alloc = NO_VAL; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ snprintf(tmp_line, sizeof(tmp_line), "JobId=%u ", job_ptr->job_id); out = xstrdup(tmp_line); if (job_ptr->array_job_id) { snprintf(tmp_line, sizeof(tmp_line), "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); xstrcat(out, tmp_line); } snprintf(tmp_line, sizeof(tmp_line), "Name=%s", job_ptr->name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); snprintf(tmp_line, sizeof(tmp_line), "UserId=%s(%u) GroupId=%s(%u)", user_name, job_ptr->user_id, group_name, job_ptr->group_id); xfree(user_name); xfree(group_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 3 ******/ snprintf(tmp_line, sizeof(tmp_line), "Priority=%u Account=%s QOS=%s", job_ptr->priority, job_ptr->account, job_ptr->qos); xstrcat(out, tmp_line); if (slurm_get_track_wckey()) { snprintf(tmp_line, sizeof(tmp_line), " WCKey=%s", job_ptr->wckey); xstrcat(out, tmp_line); } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 4 ******/ if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } tmp6_ptr = job_ptr->state_desc; } else tmp6_ptr = job_reason_string(job_ptr->state_reason); snprintf(tmp_line, sizeof(tmp_line), "JobState=%s Reason=%s Dependency=%s", job_state_string(job_ptr->job_state), tmp6_ptr, job_ptr->dependency); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5 ******/ snprintf(tmp_line, sizeof(tmp_line), "Requeue=%u Restarts=%u BatchFlag=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag); xstrcat(out, tmp_line); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); snprintf(tmp_line, sizeof(tmp_line), "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5a (optional) ******/ if (!(job_ptr->show_flags & SHOW_DETAIL)) goto line6; if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); snprintf(tmp_line, sizeof(tmp_line), "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 6 ******/ line6: snprintf(tmp_line, sizeof(tmp_line), "RunTime="); xstrcat(out, tmp_line); if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, tmp1, sizeof(tmp1)); sprintf(tmp_line, "%s ", tmp1); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "TimeLimit="); xstrcat(out, tmp_line); if (job_ptr->time_limit == NO_VAL) sprintf(tmp_line, "Partition_Limit"); else { mins2time_str(job_ptr->time_limit, tmp_line, sizeof(tmp_line)); } xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), " TimeMin="); xstrcat(out, tmp_line); if (job_ptr->time_min == 0) sprintf(tmp_line, "N/A"); else { mins2time_str(job_ptr->time_min, tmp_line, sizeof(tmp_line)); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 7 ******/ slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", time_str); xstrcat(out, tmp_line); slurm_make_time_str((time_t *)&job_ptr->eligible_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "EligibleTime=%s", time_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str((time_t *)&job_ptr->resize_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "ResizeTime=%s", time_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } /****** Line 9 ******/ slurm_make_time_str((time_t *)&job_ptr->start_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "StartTime=%s ", time_str); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "EndTime="); xstrcat(out, tmp_line); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) sprintf(tmp_line, "Unknown"); else { slurm_make_time_str ((time_t *)&job_ptr->end_time, time_str, sizeof(time_str)); sprintf(tmp_line, "%s", time_str); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) sprintf(tmp_line, "PreemptTime=None "); else { slurm_make_time_str((time_t *)&job_ptr->preempt_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "PreemptTime=%s ", time_str); } xstrcat(out, tmp_line); if (job_ptr->suspend_time) { slurm_make_time_str ((time_t *)&job_ptr->suspend_time, time_str, sizeof(time_str)); } else { strncpy(time_str, "None", sizeof(time_str)); } snprintf(tmp_line, sizeof(tmp_line), "SuspendTime=%s SecsPreSuspend=%ld", time_str, (long int)job_ptr->pre_sus_time); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 11 ******/ snprintf(tmp_line, sizeof(tmp_line), "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 12 ******/ snprintf(tmp_line, sizeof(tmp_line), "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 13 ******/ xstrfmtcat(out, "%s=", nodelist); xstrcat(out, job_ptr->nodes); if (job_ptr->nodes && ionodes) { snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes); xstrcat(out, tmp_line); xfree(ionodes); } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { snprintf(tmp_line, sizeof(tmp_line), "BatchHost=%s", job_ptr->batch_host); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } _sprint_range(tmp1, sizeof(tmp1), job_ptr->num_cpus, job_ptr->max_cpus); _sprint_range(tmp2, sizeof(tmp2), min_nodes, max_nodes); if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) strcpy(tmp3, "*"); else snprintf(tmp3, sizeof(tmp3), "%u", job_ptr->sockets_per_node); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) strcpy(tmp4, "*"); else snprintf(tmp4, sizeof(tmp4), "%u", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) strcpy(tmp5, "*"); else snprintf(tmp5, sizeof(tmp5), "%u", job_ptr->threads_per_core); snprintf(tmp_line, sizeof(tmp_line), "NumNodes=%s NumCPUs=%s CPUs/Task=%u ReqS:C:T=%s:%s:%s", tmp2, tmp1, job_ptr->cpus_per_task, tmp3, tmp4, tmp5); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (!job_resrcs) goto line15; if (cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); length += 10; for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { if (length > 70) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); length += 4; } snprintf(tmp_line, sizeof(tmp_line), "%d", job_resrcs->cpus[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); if (job_resrcs->cpu_array_reps[i] > 1) { snprintf(tmp_line, sizeof(tmp_line), "*%d", job_resrcs->cpu_array_reps[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } } else { if (!job_resrcs->core_bitmap) goto line15; last = bit_fls(job_resrcs->core_bitmap); if (last == -1) goto line15; hl = hostlist_create(job_ptr->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_ptr->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; core_bitmap = bit_alloc(bit_reps); for (j=0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)) bit_set(core_bitmap, j); bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), core_bitmap); FREE_NULL_BITMAP(core_bitmap); host = hostlist_shift(hl); /* * If the allocation values for this host are not the same as the * last host, print the report of the last group of hosts that had * identical allocation values. */ if (strcmp(tmp1, tmp2) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); snprintf(tmp_line, sizeof(tmp_line), " Nodes=%s CPU_IDs=%s Mem=%u", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0); xfree(last_hosts); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); snprintf(tmp_line, sizeof(tmp_line), " Nodes=%s CPU_IDs=%s Mem=%u", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0); xfree(last_hosts); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 15 ******/ line15: if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%s", tmp1); } else { snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%u", job_ptr->pn_min_cpus); } xstrcat(out, tmp_line); convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA); snprintf(tmp_line, sizeof(tmp_line), " MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 16 ******/ snprintf(tmp_line, sizeof(tmp_line), "Features=%s Gres=%s Reservation=%s", job_ptr->features, job_ptr->gres, job_ptr->resv_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 17 ******/ snprintf(tmp_line, sizeof(tmp_line), "Shared=%s Contiguous=%d Licenses=%s Network=%s", (job_ptr->shared == 0 ? "0" : job_ptr->shared == 1 ? "1" : "OK"), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 18 ******/ snprintf(tmp_line, sizeof(tmp_line), "Command=%s", job_ptr->command); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 19 ******/ snprintf(tmp_line, sizeof(tmp_line), "WorkDir=%s", job_ptr->work_dir); xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 20 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "Block_ID=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 21 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); xstrcat(out, select_buf); } if (cluster_flags & CLUSTER_FLAG_BGL) { /****** Line 22 (optional) ******/ select_g_select_jobinfo_sprint( job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BLRTS_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "BlrtsImage=%s", select_buf); xstrcat(out, tmp_line); } } /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "LinuxImage=%s", select_buf); else snprintf(tmp_line, sizeof(tmp_line), "CnloadImage=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "MloaderImage=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 25 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "RamDiskImage=%s", select_buf); else snprintf(tmp_line, sizeof(tmp_line), "IoloadImage=%s", select_buf); xstrcat(out, tmp_line); } } /****** Line 26 (optional) ******/ if (job_ptr->comment) { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "Comment=%s ", job_ptr->comment); xstrcat(out, tmp_line); } /****** Line 27 (optional) ******/ if (job_ptr->batch_script) { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 28 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); snprintf(tmp_line, sizeof(tmp_line), "Switches=%u@%s\n", job_ptr->req_switch, time_buf); xstrcat(out, tmp_line); } /****** Line 29 (optional) ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
/* Translate bitmap representation from hex to decimal format, replacing * array_task_str. */ static void _xlate_task_str(slurmdb_job_rec_t *job_ptr) { static int bitstr_len = -1; int buf_size, len; int i, i_first, i_last, i_prev, i_step = 0; bitstr_t *task_bitmap; char *in_buf = job_ptr->array_task_str; char *out_buf = NULL; if (!in_buf) return; if (strlen(in_buf) < 3 || in_buf[1] != 'x') return; i = strlen(in_buf); task_bitmap = bit_alloc(i * 4); bit_unfmt_hexmask(task_bitmap, in_buf); /* Check first for a step function */ i_first = bit_ffs(task_bitmap); i_last = bit_fls(task_bitmap); if (((i_last - i_first) > 10) && !bit_test(task_bitmap, i_first + 1)) { bool is_step = true; i_prev = i_first; for (i = i_first + 1; i <= i_last; i++) { if (!bit_test(task_bitmap, i)) continue; if (i_step == 0) { i_step = i - i_prev; } else if ((i - i_prev) != i_step) { is_step = false; break; } i_prev = i; } if (is_step) { xstrfmtcat(out_buf, "%d-%d:%d", i_first, i_last, i_step); } } if (bitstr_len > 0) { /* Print the first bitstr_len bytes of the bitmap string */ buf_size = bitstr_len; out_buf = xmalloc(buf_size); bit_fmt(out_buf, buf_size, task_bitmap); len = strlen(out_buf); if (len > (buf_size - 3)) for (i = 0; i < 3; i++) out_buf[buf_size - 2 - i] = '.'; } else { /* Print the full bitmap's string representation. * For huge bitmaps this can take roughly one minute, * so let the client do the work */ buf_size = bit_size(task_bitmap) * 8; while (1) { out_buf = xmalloc(buf_size); bit_fmt(out_buf, buf_size, task_bitmap); len = strlen(out_buf); if ((len > 0) && (len < (buf_size - 32))) break; xfree(out_buf); buf_size *= 2; } } if (job_ptr->array_max_tasks) xstrfmtcat(out_buf, "%c%u", '%', job_ptr->array_max_tasks); xfree(job_ptr->array_task_str); job_ptr->array_task_str = out_buf; }
/* * Determine which of these nodes are usable by this job * * Remove nodes from the bitmap that don't have enough memory or gres to * support the job. * * Return SLURM_ERROR if a required node can't be used. * * if node_state = NODE_CR_RESERVED, clear bitmap (if node is required * then should we return NODE_BUSY!?!) * * if node_state = NODE_CR_ONE_ROW, then this node can only be used by * another NODE_CR_ONE_ROW job * * if node_state = NODE_CR_AVAILABLE AND: * - job_node_req = NODE_CR_RESERVED, then we need idle nodes * - job_node_req = NODE_CR_ONE_ROW, then we need idle or non-sharing nodes */ static int _verify_node_state(struct part_res_record *cr_part_ptr, struct job_record *job_ptr, bitstr_t * bitmap, uint16_t cr_type, struct node_use_record *node_usage, enum node_cr_state job_node_req) { struct node_record *node_ptr; uint32_t i, free_mem, gres_cpus, gres_cores, min_mem; int i_first, i_last; int core_start_bit, core_end_bit, cpus_per_core; List gres_list; if (job_ptr->details->pn_min_memory & MEM_PER_CPU) min_mem = job_ptr->details->pn_min_memory & (~MEM_PER_CPU); else min_mem = job_ptr->details->pn_min_memory; i_first = bit_ffs(bitmap); if (i_first >= 0) i_last = bit_fls(bitmap); else i_last = -2; for (i = i_first; i <= i_last; i++) { if (!bit_test(bitmap, i)) continue; node_ptr = select_node_record[i].node_ptr; core_start_bit = cr_get_coremap_offset(i); core_end_bit = cr_get_coremap_offset(i+1) - 1; cpus_per_core = select_node_record[i].cpus / (core_end_bit - core_start_bit + 1); /* node-level memory check */ if ((job_ptr->details->pn_min_memory) && (cr_type & CR_MEMORY)) { free_mem = select_node_record[i].real_memory; free_mem -= node_usage[i].alloc_memory; if (free_mem < min_mem) { debug3("select/serial: node %s no mem %u < %u", select_node_record[i].node_ptr->name, free_mem, min_mem); goto clear_bit; } } /* node-level gres check */ if (node_usage[i].gres_list) gres_list = node_usage[i].gres_list; else gres_list = node_ptr->gres_list; gres_cores = gres_plugin_job_test(job_ptr->gres_list, gres_list, true, NULL, 0, 0, job_ptr->job_id, node_ptr->name); gres_cpus = gres_cores; if (gres_cpus != NO_VAL) gres_cpus *= cpus_per_core; if (gres_cpus == 0) { debug3("select/serial: node %s lacks gres", node_ptr->name); goto clear_bit; } /* exclusive node check */ if (node_usage[i].node_state >= NODE_CR_RESERVED) { debug3("select/serial: node %s in exclusive use", node_ptr->name); goto clear_bit; /* non-resource-sharing node check */ } else if (node_usage[i].node_state >= NODE_CR_ONE_ROW) { if ((job_node_req == NODE_CR_RESERVED) || (job_node_req == NODE_CR_AVAILABLE)) { debug3("select/serial: node %s non-sharing", node_ptr->name); goto clear_bit; } /* cannot use this node if it is running jobs * in sharing partitions */ if (_is_node_busy(cr_part_ptr, i, 1, job_ptr->part_ptr)) { debug3("select/serial: node %s sharing?", node_ptr->name); goto clear_bit; } /* node is NODE_CR_AVAILABLE - check job request */ } else { if (job_node_req == NODE_CR_RESERVED) { if (_is_node_busy(cr_part_ptr, i, 0, job_ptr->part_ptr)) { debug3("select/serial: node %s busy", node_ptr->name); goto clear_bit; } } else if (job_node_req == NODE_CR_ONE_ROW) { /* cannot use this node if it is running jobs * in sharing partitions */ if (_is_node_busy(cr_part_ptr, i, 1, job_ptr->part_ptr)) { debug3("select/serial: node %s vbusy", node_ptr->name); goto clear_bit; } } } continue; /* node is usable, test next node */ clear_bit: /* This node is not usable by this job */ bit_clear(bitmap, i); if (job_ptr->details->req_node_bitmap && bit_test(job_ptr->details->req_node_bitmap, i)) { return SLURM_ERROR; } } return SLURM_SUCCESS; }