Cell *new_cell(color_t ncolor) { Cell *cell= (Cell *)malloc(sizeof(Cell) + bit_size(ncolor) - bit_size(1)); bit_clearall(cell->bit, ncolor) cell->n= ncolor; return cell; }
/* helper function for _expand_masks() * for each task, consider which other bits are set in avail_map * on the same socket */ static void _blot_mask_sockets(const uint32_t maxtasks, const uint32_t task, bitstr_t **masks, uint16_t hw_sockets, uint16_t hw_cores, uint16_t hw_threads, bitstr_t *avail_map) { uint16_t i, j, size = 0; int blot; if (!masks[task]) return; blot = bit_size(avail_map) / hw_sockets; if (blot <= 0) blot = 1; size = bit_size(masks[task]); for (i = 0; i < size; i++) { if (bit_test(masks[task], i)) { /* check if other bits are set in avail_map on this * socket and set each corresponding bit in masks */ uint16_t start = (i / blot) * blot; for (j = start; j < start+blot; j++) { if (bit_test(avail_map, j)) bit_set(masks[task], j); } } } }
/* * Reset environment variables as appropriate for a job (i.e. this one tasks) * based upon the job step's GRES state and assigned CPUs. */ extern void step_reset_env(char ***job_env_ptr, void *gres_ptr, bitstr_t *usable_gres) { int i, len, first_match = -1; char *dev_list = NULL; gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr; if ((gres_step_ptr != NULL) && (gres_step_ptr->node_cnt == 1) && (gres_step_ptr->gres_bit_alloc != NULL) && (gres_step_ptr->gres_bit_alloc[0] != NULL) && (usable_gres != NULL)) { len = MIN(bit_size(gres_step_ptr->gres_bit_alloc[0]), bit_size(usable_gres)); for (i = 0; i < len; i++) { if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i)) continue; if (first_match == -1) first_match = i; if (!bit_test(usable_gres, i)) continue; if (!dev_list) dev_list = xmalloc(128); else xstrcat(dev_list, ","); if (nic_devices && (i < nb_available_files) && (nic_devices[i] >= 0)) { xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]); } else { xstrfmtcat(dev_list, "mlx4_%d", i); } } if (!dev_list && (first_match != -1)) { i = first_match; dev_list = xmalloc(128); if (nic_devices && (i < nb_available_files) && (nic_devices[i] >= 0)) { xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]); } else { xstrfmtcat(dev_list, "mlx4_%d", i); } } } if (dev_list) { /* we assume mellanox cards and OpenMPI programm */ env_array_overwrite(job_env_ptr, "OMPI_MCA_btl_openib_if_include", dev_list); xfree(dev_list); } }
/* given an "avail" node_bitmap, return a corresponding "avail" core_bitmap */ bitstr_t *_make_core_bitmap(bitstr_t *node_map) { uint32_t n, c, nodes, size; uint32_t coff; int i_first, i_last; nodes = bit_size(node_map); size = cr_get_coremap_offset(nodes); bitstr_t *core_map = bit_alloc(size); i_first = bit_ffs(node_map); if (i_first >= 0) i_last = bit_fls(node_map); else i_last = -2; for (n = i_first, c = 0; n <= i_last; n++) { if (bit_test(node_map, n)) { coff = cr_get_coremap_offset(n + 1); while (c < coff) { bit_set(core_map, c++); } } } return core_map; }
/* * Set environment variables as appropriate for a job (i.e. all tasks) based * upon the job step's GRES state. */ extern void step_set_env(char ***job_env_ptr, void *gres_ptr) { int i, len; char *dev_list = NULL; gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr; if ((gres_step_ptr != NULL) && (gres_step_ptr->node_cnt == 1) && (gres_step_ptr->gres_bit_alloc != NULL) && (gres_step_ptr->gres_bit_alloc[0] != NULL)) { len = bit_size(gres_step_ptr->gres_bit_alloc[0]); for (i=0; i<len; i++) { if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i)) continue; if (!dev_list) dev_list = xmalloc(128); else xstrcat(dev_list, ","); xstrfmtcat(dev_list, "%d", i); } } if (dev_list) { env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES", dev_list); xfree(dev_list); } else { /* The gres.conf file must identify specific device files * in order to set the CUDA_VISIBLE_DEVICES env var */ env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES", "NoDevFiles"); } }
static void _print_jobs(struct gs_part *p_ptr) { int i; if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) { info("gang: part %s has %u jobs, %u shadows:", p_ptr->part_name, p_ptr->num_jobs, p_ptr->num_shadows); for (i = 0; i < p_ptr->num_shadows; i++) { info("gang: shadow job %u row_s %s, sig_s %s", p_ptr->shadow[i]->job_ptr->job_id, _print_flag(p_ptr->shadow[i]->row_state), _print_flag(p_ptr->shadow[i]->sig_state)); } for (i = 0; i < p_ptr->num_jobs; i++) { info("gang: job %u row_s %s, sig_s %s", p_ptr->job_list[i]->job_ptr->job_id, _print_flag(p_ptr->job_list[i]->row_state), _print_flag(p_ptr->job_list[i]->sig_state)); } if (p_ptr->active_resmap) { int s = bit_size(p_ptr->active_resmap); i = bit_set_count(p_ptr->active_resmap); info("gang: active resmap has %d of %d bits set", i, s); } } }
/* Return 1 if job "cpu count" fits in this row, else return 0 */ static int _can_cpus_fit(struct job_record *job_ptr, struct gs_part *p_ptr) { int i, j, size; uint16_t *p_cpus, *j_cpus; job_resources_t *job_res = job_ptr->job_resrcs; if (gr_type != GS_CPU) return 0; size = bit_size(job_res->node_bitmap); p_cpus = p_ptr->active_cpus; j_cpus = job_res->cpus; if (!p_cpus || !j_cpus) return 0; for (j = 0, i = 0; i < size; i++) { if (bit_test(job_res->node_bitmap, i)) { if (p_cpus[i]+j_cpus[j] > _get_phys_bit_cnt(i)) return 0; j++; } } return 1; }
/* Set the socket and core counts associated with a set of selected * nodes of a job_resources data structure based upon slurmctld state. * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based * upon the value of node_bitmap, also creates core_bitmap based upon * the total number of cores in the allocation). Call this ONLY from * slurmctld. Example of use: * * job_resources_t *job_resrcs_ptr = create_job_resources(); * node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap)); * rc = build_job_resources(job_resrcs_ptr, node_record_table_ptr, * slurmctld_conf.fast_schedule); */ extern int build_job_resources(job_resources_t *job_resrcs, void *node_rec_table, uint16_t fast_schedule) { int i, bitmap_len; int core_cnt = 0, sock_inx = -1; uint32_t cores, socks; struct node_record *node_ptr, *node_record_table; if (job_resrcs->node_bitmap == NULL) { error("build_job_resources: node_bitmap is NULL"); return SLURM_ERROR; } node_record_table = (struct node_record *) node_rec_table; xfree(job_resrcs->sockets_per_node); xfree(job_resrcs->cores_per_socket); xfree(job_resrcs->sock_core_rep_count); job_resrcs->sockets_per_node = xmalloc(sizeof(uint16_t) * job_resrcs->nhosts); job_resrcs->cores_per_socket = xmalloc(sizeof(uint16_t) * job_resrcs->nhosts); job_resrcs->sock_core_rep_count = xmalloc(sizeof(uint32_t) * job_resrcs->nhosts); bitmap_len = bit_size(job_resrcs->node_bitmap); for (i=0; i<bitmap_len; i++) { if (!bit_test(job_resrcs->node_bitmap, i)) continue; node_ptr = node_record_table + i; if (fast_schedule) { socks = node_ptr->config_ptr->sockets; cores = node_ptr->config_ptr->cores; } else { socks = node_ptr->sockets; cores = node_ptr->cores; } if ((sock_inx < 0) || (socks != job_resrcs->sockets_per_node[sock_inx]) || (cores != job_resrcs->cores_per_socket[sock_inx])) { sock_inx++; job_resrcs->sockets_per_node[sock_inx] = socks; job_resrcs->cores_per_socket[sock_inx] = cores; } job_resrcs->sock_core_rep_count[sock_inx]++; core_cnt += (cores * socks); } #ifndef HAVE_BG job_resrcs->core_bitmap = bit_alloc(core_cnt); job_resrcs->core_bitmap_used = bit_alloc(core_cnt); if ((job_resrcs->core_bitmap == NULL) || (job_resrcs->core_bitmap_used == NULL)) fatal("bit_alloc malloc failure"); #endif return SLURM_SUCCESS; }
static void bit_object_show(void* B) { fprintf(stdout, "\t__bit__ -> {\n" "\t object=>0x%p,\n" "\t size=>%d,\n" "\t count=>%d,\n" "\t}\n", B, bit_size(B), bit_count(B) ); }
/* * _lllp_map_abstract_mask * * Map one abstract block mask to a physical machine mask * * IN - mask to map * OUT - mapped mask (storage allocated in this routine) */ static bitstr_t *_lllp_map_abstract_mask(bitstr_t *bitmask) { int i, bit; int num_bits = bit_size(bitmask); bitstr_t *newmask = NULL; newmask = (bitstr_t *) bit_alloc(num_bits); /* remap to physical machine */ for (i = 0; i < num_bits; i++) { if (bit_test(bitmask,i)) { bit = BLOCK_MAP(i); if (bit < bit_size(newmask)) bit_set(newmask, bit); else error("%s: can't go from %d -> %d since we " "only have %"BITSTR_FMT" bits", __func__, i, bit, bit_size(newmask)); } } return newmask; }
extern int valid_job_resources(job_resources_t *job_resrcs, void *node_rec_table, uint16_t fast_schedule) { int i, bitmap_len; int sock_inx = 0, sock_cnt = 0; uint32_t cores, socks; struct node_record *node_ptr, *node_record_table; if (job_resrcs->node_bitmap == NULL) { error("valid_job_resources: node_bitmap is NULL"); return SLURM_ERROR; } if ((job_resrcs->sockets_per_node == NULL) || (job_resrcs->cores_per_socket == NULL) || (job_resrcs->sock_core_rep_count == NULL)) { error("valid_job_resources: socket/core array is NULL"); return SLURM_ERROR; } node_record_table = (struct node_record *) node_rec_table; bitmap_len = bit_size(job_resrcs->node_bitmap); for (i=0; i<bitmap_len; i++) { if (!bit_test(job_resrcs->node_bitmap, i)) continue; node_ptr = node_record_table + i; if (fast_schedule) { socks = node_ptr->config_ptr->sockets; cores = node_ptr->config_ptr->cores; } else { socks = node_ptr->sockets; cores = node_ptr->cores; } if (sock_cnt >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_cnt = 0; } if ((socks != job_resrcs->sockets_per_node[sock_inx]) || (cores != job_resrcs->cores_per_socket[sock_inx])) { error("valid_job_resources: " "%s sockets:%u,%u, cores %u,%u", node_ptr->name, socks, job_resrcs->sockets_per_node[sock_inx], cores, job_resrcs->cores_per_socket[sock_inx]); return SLURM_ERROR; } sock_cnt++; } return SLURM_SUCCESS; }
static bool _is_task_in_job(job_info_t *job_ptr, int array_id) { int len; if (job_ptr->array_task_id == array_id) return true; if (!job_ptr->array_bitmap) return false; len = bit_size((bitstr_t *)job_ptr->array_bitmap); if (len <= array_id) return false; return (bit_test((bitstr_t *)job_ptr->array_bitmap, array_id)); }
/* * Set environment variables as appropriate for a job (i.e. all tasks) based * upon the job step's GRES state. */ extern void step_set_env(char ***job_env_ptr, void *gres_ptr) { int i, len, local_inx = 0; char *dev_list = NULL; gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr; bool use_local_dev_index = _use_local_device_index(); if ((gres_step_ptr != NULL) && (gres_step_ptr->node_cnt == 1) && (gres_step_ptr->gres_bit_alloc != NULL) && (gres_step_ptr->gres_bit_alloc[0] != NULL)) { len = bit_size(gres_step_ptr->gres_bit_alloc[0]); for (i = 0; i < len; i++) { if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i)) continue; if (!dev_list) dev_list = xmalloc(128); else xstrcat(dev_list, ","); if (use_local_dev_index) { xstrfmtcat(dev_list, "mlx4_%d", local_inx++); } else if (nic_devices && (i < nb_available_files) && (nic_devices[i] >= 0)) { xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]); } else { xstrfmtcat(dev_list, "mlx4_%d", i); } } } else if (gres_step_ptr && (gres_step_ptr->gres_cnt_alloc > 0)) { /* The gres.conf file must identify specific device files * in order to set the OMPI_MCA_btl_openib_if_include env var */ error("gres/nic unable to set OMPI_MCA_btl_openib_if_include, " "no device files configured"); } else { xstrcat(dev_list, "NoDevFiles"); } if (dev_list) { /* we assume mellanox cards and OpenMPI programm */ env_array_overwrite(job_env_ptr, "OMPI_MCA_btl_openib_if_include", dev_list); xfree(dev_list); } }
/* Return a copy of core_bitmap only for the specific node */ extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr, uint32_t node_id) { int i, bit_inx = 0, core_cnt = 0; bitstr_t *core_bitmap; xassert(job_resrcs_ptr); for (i = 0; i < job_resrcs_ptr->nhosts; i++) { if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * job_resrcs_ptr->sock_core_rep_count[i]; node_id -= job_resrcs_ptr->sock_core_rep_count[i]; } else { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * node_id; core_cnt = job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i]; break; } } if (core_cnt < 1) { error("copy_job_resources_node: core_cnt=0"); return NULL; } i = bit_size(job_resrcs_ptr->core_bitmap); if ((bit_inx + core_cnt) > i) { error("copy_job_resources_node: offset > bitmap size " "(%d >= %d)", (bit_inx + core_cnt), i); return NULL; } core_bitmap = bit_alloc(core_cnt); if (!core_bitmap) fatal("copy_job_resources_node: bit_alloc(%d): %m", core_cnt); for (i = 0; i < core_cnt; i++) { if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++)) bit_set(core_bitmap, i); } return core_bitmap; }
extern int get_job_resources_offset(job_resources_t *job_resrcs_ptr, uint32_t node_id, uint16_t socket_id, uint16_t core_id) { int i, bit_inx = 0; xassert(job_resrcs_ptr); for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * job_resrcs_ptr->sock_core_rep_count[i]; node_id -= job_resrcs_ptr->sock_core_rep_count[i]; } else if (socket_id >= job_resrcs_ptr-> sockets_per_node[i]) { error("get_job_resrcs_bit: socket_id >= socket_cnt " "(%u >= %u)", socket_id, job_resrcs_ptr->sockets_per_node[i]); return -1; } else if (core_id >= job_resrcs_ptr->cores_per_socket[i]) { error("get_job_resrcs_bit: core_id >= core_cnt " "(%u >= %u)", core_id, job_resrcs_ptr->cores_per_socket[i]); return -1; } else { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * node_id; bit_inx += job_resrcs_ptr->cores_per_socket[i] * socket_id; bit_inx += core_id; break; } } i = bit_size(job_resrcs_ptr->core_bitmap); if (bit_inx >= i) { error("get_job_resources_bit: offset >= bitmap size " "(%d >= %d)", bit_inx, i); return -1; } return bit_inx; }
int init_complex_table(struct complex_table *p1) { int i; struct complex_table *p2=p1; for(i=0;i<p1->number;i++) { p2=p2->next; p2->a=0; p2->b=0; } p2=p1; for(i=0;i<p1->number;i++) { p2=p2->next; p2->number=bit_order(i,bit_size(p1->number-1)); } return 0; }
/* Remove any specialized cores from those allocated to the job */ static void _clear_spec_cores(struct job_record *job_ptr, bitstr_t *avail_core_bitmap) { int first_node, last_node, i_node; int first_core, last_core, i_core; int alloc_node = -1, alloc_core = -1, size; job_resources_t *job_res = job_ptr->job_resrcs; multi_core_data_t *mc_ptr = NULL; if (job_ptr->details && job_ptr->details->mc_ptr) mc_ptr = job_ptr->details->mc_ptr; size = bit_size(job_res->core_bitmap); bit_nset(job_res->core_bitmap, 0, size - 1); first_node = bit_ffs(job_res->node_bitmap); if (first_node >= 0) last_node = bit_fls(job_res->node_bitmap); else last_node = first_node - 1; for (i_node = first_node; i_node <= last_node; i_node++) { if (!bit_test(job_res->node_bitmap, i_node)) continue; job_res->cpus[++alloc_node] = 0; first_core = cr_get_coremap_offset(i_node); last_core = cr_get_coremap_offset(i_node + 1) - 1; for (i_core = first_core; i_core <= last_core; i_core++) { alloc_core++; if (bit_test(avail_core_bitmap, i_core)) { uint16_t tpc = select_node_record[i_node].vpus; if (mc_ptr && (mc_ptr->threads_per_core != NO_VAL16) && (mc_ptr->threads_per_core < tpc)) tpc = mc_ptr->threads_per_core; job_res->cpus[alloc_node] += tpc; } else { bit_clear(job_res->core_bitmap, alloc_core); } } } }
static int _change_job_resources_node(job_resources_t *job_resrcs_ptr, uint32_t node_id, bool new_value) { int i, bit_inx = 0, core_cnt = 0; xassert(job_resrcs_ptr); for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * job_resrcs_ptr->sock_core_rep_count[i]; node_id -= job_resrcs_ptr->sock_core_rep_count[i]; } else { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * node_id; core_cnt = job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i]; break; } } if (core_cnt < 1) { error("_change_job_resources_node: core_cnt=0"); return SLURM_ERROR; } i = bit_size(job_resrcs_ptr->core_bitmap); if ((bit_inx + core_cnt) > i) { error("_change_job_resources_node: offset > bitmap size " "(%d >= %d)", (bit_inx + core_cnt), i); return SLURM_ERROR; } for (i=0; i<core_cnt; i++) { if (new_value) bit_set(job_resrcs_ptr->core_bitmap, bit_inx++); else bit_clear(job_resrcs_ptr->core_bitmap, bit_inx++); } return SLURM_SUCCESS; }
static bool _task_id_in_job(job_info_t *job_ptr, uint32_t array_id) { bitstr_t *array_bitmap; uint32_t array_len; if ((array_id == NO_VAL) || (array_id == job_ptr->array_task_id)) return true; array_bitmap = (bitstr_t *) job_ptr->array_bitmap; if (array_bitmap == NULL) return false; array_len = bit_size(array_bitmap); if (array_id >= array_len) return false; if (bit_test(array_bitmap, array_id)) return true; return false; }
/* helper function for _expand_masks() */ static void _blot_mask(bitstr_t *mask, uint16_t blot) { uint16_t i, size = 0; int prev = -1; if (!mask) return; size = bit_size(mask); for (i = 0; i < size; i++) { if (bit_test(mask, i)) { /* fill in this blot */ uint16_t start = (i / blot) * blot; if (start != prev) { bit_nset(mask, start, start+blot-1); prev = start; } } } }
/* Return the count of core bitmaps set for the specific node */ extern int count_job_resources_node(job_resources_t *job_resrcs_ptr, uint32_t node_id) { int i, bit_inx = 0, core_cnt = 0; int set_cnt = 0; xassert(job_resrcs_ptr); for (i=0; i<job_resrcs_ptr->nhosts; i++) { if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * job_resrcs_ptr->sock_core_rep_count[i]; node_id -= job_resrcs_ptr->sock_core_rep_count[i]; } else { bit_inx += job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i] * node_id; core_cnt = job_resrcs_ptr->sockets_per_node[i] * job_resrcs_ptr->cores_per_socket[i]; break; } } if (core_cnt < 1) { error("count_job_resources_node: core_cnt=0"); return set_cnt; } i = bit_size(job_resrcs_ptr->core_bitmap); if ((bit_inx + core_cnt) > i) { error("count_job_resources_node: offset > bitmap size " "(%d >= %d)", (bit_inx + core_cnt), i); return set_cnt; } for (i=0; i<core_cnt; i++) { if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++)) set_cnt++; } return set_cnt; }
/* * Set environment variables as appropriate for a job (i.e. all tasks) based * upon the job's GRES state. */ extern void job_set_env(char ***job_env_ptr, void *gres_ptr) { int i, len; char *dev_list = NULL; gres_job_state_t *gres_job_ptr = (gres_job_state_t *) gres_ptr; if ((gres_job_ptr != NULL) && (gres_job_ptr->node_cnt == 1) && (gres_job_ptr->gres_bit_alloc != NULL) && (gres_job_ptr->gres_bit_alloc[0] != NULL)) { len = bit_size(gres_job_ptr->gres_bit_alloc[0]); for (i=0; i<len; i++) { if (!bit_test(gres_job_ptr->gres_bit_alloc[0], i)) continue; if (!dev_list) dev_list = xmalloc(128); else xstrcat(dev_list, ","); if (gpu_devices && (i < nb_available_files) && (gpu_devices[i] >= 0)) xstrfmtcat(dev_list, "%d", gpu_devices[i]); else xstrfmtcat(dev_list, "%d", i); } } else if (gres_job_ptr && (gres_job_ptr->gres_cnt_alloc > 0)) { /* The gres.conf file must identify specific device files * in order to set the CUDA_VISIBLE_DEVICES env var */ error("gres/gpu unable to set CUDA_VISIBLE_DEVICES, " "no device files configured"); } else { xstrcat(dev_list, "NoDevFiles"); } if (dev_list) { env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES", dev_list); xfree(dev_list); } }
static int debug_check_stuff(void *key, void *data, int depth, void *arg) { const dbref key_val = (dbref)key; XCODE *const xcode_obj = data; int osize, size; MAP *map; osize = size = SpecialObjects[xcode_obj->type].datasize; switch (xcode_obj->type) { case GTYPE_MAP: map = (MAP *)xcode_obj; if(map->map) { size += sizeof(map->map[0][0]) * map->map_width * map->map_height; size += bit_size(map); size += obj_size(map); size += mech_size(map); } break; default: break; } if(smallest[xcode_obj->type] < 0 || size < smallest[xcode_obj->type]) smallest[xcode_obj->type] = size; if(largest[xcode_obj->type] < 0 || size > largest[xcode_obj->type]) largest[xcode_obj->type] = size; total[xcode_obj->type] += size; number[xcode_obj->type]++; if(cheat_player > 0) notify_printf(cheat_player, "#%5d: %10s %5d", key_val, SpecialObjects[xcode_obj->type].type, xcode_obj->type == GTYPE_AUTO ? ((AUTO *)xcode_obj)->mymechnum : 0 ); return 1; }
/* helper function for _expand_masks() */ static void _blot_mask(bitstr_t *mask, bitstr_t *avail_map, uint16_t blot) { uint16_t i, j, size = 0; int prev = -1; if (!mask) return; size = bit_size(mask); for (i = 0; i < size; i++) { if (bit_test(mask, i)) { /* fill in this blot */ uint16_t start = (i / blot) * blot; if (start != prev) { for (j = start; j < start + blot; j++) { if (bit_test(avail_map, j)) bit_set(mask, j); } prev = start; } } } }
int init_data_table(struct data *p1,double *a) { int i; struct data *p2=p1; for(i=0;i<p1->number;i++) { p2=p2->next; p2->value=0; } p2=p1; for(i=0;i<p1->number;i++) { p2=p2->next; p2->number=bit_order(i,bit_size(p1->number-1)); } p2=p1; for(i=0;i<p1->number;i++) { p2=p2->next; p2->value=a[p2->number]; } return 0; }
/* _match_mask_to_ldom * * expand each mask to encompass the whole locality domain * within which it currently exists * NOTE: this assumes that the masks are already in logical * (and not abstract) CPU order. */ static void _match_masks_to_ldom(const uint32_t maxtasks, bitstr_t **masks) { uint32_t i, b, size; if (!masks || !masks[0]) return; size = bit_size(masks[0]); for(i = 0; i < maxtasks; i++) { for (b = 0; b < size; b++) { if (bit_test(masks[i], b)) { /* get the NUMA node for this CPU, and then * set all CPUs in the mask that exist in * the same CPU */ int c; uint16_t nnid = slurm_get_numa_node(b); for (c = 0; c < size; c++) { if (slurm_get_numa_node(c) == nnid) bit_set(masks[i], c); } } } } }
static int _preemption_loop(mysql_conn_t *mysql_conn, int begin_qosid, bitstr_t *preempt_bitstr) { slurmdb_qos_rec_t qos_rec; int rc = 0, i=0; xassert(preempt_bitstr); /* check in the preempt list for all qos's preempted */ for(i=0; i<bit_size(preempt_bitstr); i++) { if (!bit_test(preempt_bitstr, i)) continue; memset(&qos_rec, 0, sizeof(qos_rec)); qos_rec.id = i; assoc_mgr_fill_in_qos(mysql_conn, &qos_rec, ACCOUNTING_ENFORCE_QOS, NULL); /* check if the begin_qosid is preempted by this qos * if so we have a loop */ if (qos_rec.preempt_bitstr && bit_test(qos_rec.preempt_bitstr, begin_qosid)) { error("QOS id %d has a loop at QOS %s", begin_qosid, qos_rec.name); rc = 1; break; } else if (qos_rec.preempt_bitstr) { /* check this qos' preempt list and make sure no loops exist there either */ if ((rc = _preemption_loop(mysql_conn, begin_qosid, qos_rec.preempt_bitstr))) break; } } return rc; }
extern int bg_free_block(bg_record_t *bg_record, bool wait, bool locked) { int rc = SLURM_SUCCESS; int count = 0; if (!bg_record) { error("bg_free_block: there was no bg_record"); return SLURM_ERROR; } if (!locked) slurm_mutex_lock(&block_state_mutex); while (count < MAX_FREE_RETRIES) { /* block was removed */ if (bg_record->magic != BLOCK_MAGIC) { error("block was removed while freeing it here"); xassert(0); if (!locked) slurm_mutex_unlock(&block_state_mutex); return SLURM_SUCCESS; } /* Reset these here so we don't try to reboot it when the state goes to free. */ bg_record->boot_state = 0; bg_record->boot_count = 0; /* Here we don't need to check if the block is still * in exsistance since this function can't be called on * the same block twice. It may * had already been removed at this point also. */ #ifdef HAVE_BG_FILES if (bg_record->state != BG_BLOCK_FREE && bg_record->state != BG_BLOCK_TERM) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bridge_destroy %s", bg_record->bg_block_id); rc = bridge_block_free(bg_record); if (rc != SLURM_SUCCESS) { if (rc == BG_ERROR_BLOCK_NOT_FOUND) { debug("block %s is not found", bg_record->bg_block_id); bg_record->state = BG_BLOCK_FREE; break; } else if (rc == BG_ERROR_FREE) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bridge_block_free" "(%s): %s State = %s", bg_record->bg_block_id, bg_err_str(rc), bg_block_state_string( bg_record->state)); } else if (rc == BG_ERROR_INVALID_STATE) { #ifndef HAVE_BGL /* If the state is error and we get an incompatible state back here, it means we set it ourselves so break out. */ if (bg_record->state & BG_BLOCK_ERROR_FLAG) break; #endif if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bridge_block_free" "(%s): %s State = %s", bg_record->bg_block_id, bg_err_str(rc), bg_block_state_string( bg_record->state)); #ifdef HAVE_BGQ if (bg_record->state != BG_BLOCK_FREE && bg_record->state != BG_BLOCK_TERM) bg_record->state = BG_BLOCK_TERM; #endif } else { error("bridge_block_free" "(%s): %s State = %s", bg_record->bg_block_id, bg_err_str(rc), bg_block_state_string( bg_record->state)); } } } #else /* Fake a free since we are n deallocating state before this. */ if (bg_record->state & BG_BLOCK_ERROR_FLAG) { /* This will set the state to ERROR(Free) * just incase the state was ERROR(SOMETHING ELSE) */ bg_record->state = BG_BLOCK_ERROR_FLAG; break; } else if (!wait || (count >= 3)) bg_record->state = BG_BLOCK_FREE; else if (bg_record->state != BG_BLOCK_FREE) bg_record->state = BG_BLOCK_TERM; #endif if (!wait || (bg_record->state == BG_BLOCK_FREE) #ifndef HAVE_BGL || (bg_record->state & BG_BLOCK_ERROR_FLAG) #endif ) { break; } /* If we were locked outside of this we need to unlock to not cause deadlock on this mutex until we are done. */ slurm_mutex_unlock(&block_state_mutex); sleep(FREE_SLEEP_INTERVAL); count++; slurm_mutex_lock(&block_state_mutex); } rc = SLURM_SUCCESS; if ((bg_record->state == BG_BLOCK_FREE) || (bg_record->state & BG_BLOCK_ERROR_FLAG)) { if (bg_record->err_ratio && (bg_record->state == BG_BLOCK_FREE)) { /* Sometime the realtime server can report software error on cnodes even though the block is free. If this is the case we need to manually clear them. */ ba_mp_t *found_ba_mp; ListIterator itr = list_iterator_create(bg_record->ba_mp_list); debug("Block %s is free, but has %u cnodes in error. " "This can happen if a large block goes into " "error and then is freed and the state of " "the block changes before the " "database informs all the cnodes are back to " "normal. This is no big deal.", bg_record->bg_block_id, bg_record->cnode_err_cnt); while ((found_ba_mp = list_next(itr))) { if (!found_ba_mp->used) continue; if (!found_ba_mp->cnode_err_bitmap) found_ba_mp->cnode_err_bitmap = bit_alloc( bg_conf->mp_cnode_cnt); bit_nclear(found_ba_mp->cnode_err_bitmap, 0, bit_size(found_ba_mp-> cnode_err_bitmap)-1); } list_iterator_destroy(itr); bg_record->cnode_err_cnt = 0; bg_record->err_ratio = 0; } remove_from_bg_list(bg_lists->booted, bg_record); } else if (count >= MAX_FREE_RETRIES) { /* Something isn't right, go mark this one in an error state. */ update_block_msg_t block_msg; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bg_free_block: block %s is not in state " "free (%s), putting it in error state.", bg_record->bg_block_id, bg_block_state_string(bg_record->state)); slurm_init_update_block_msg(&block_msg); block_msg.bg_block_id = bg_record->bg_block_id; block_msg.state = BG_BLOCK_ERROR_FLAG; block_msg.reason = "Block would not deallocate"; slurm_mutex_unlock(&block_state_mutex); select_g_update_block(&block_msg); slurm_mutex_lock(&block_state_mutex); rc = SLURM_ERROR; } if (!locked) slurm_mutex_unlock(&block_state_mutex); return rc; }
/* Log the contents of a job_resources data structure using info() */ extern void log_job_resources(uint32_t job_id, job_resources_t *job_resrcs_ptr) { int bit_inx = 0, bit_reps, i; int array_size, node_inx; int sock_inx = 0, sock_reps = 0; if (job_resrcs_ptr == NULL) { error("log_job_resources: job_resrcs_ptr is NULL"); return; } info("===================="); info("job_id:%u nhosts:%u ncpus:%u node_req:%u nodes=%s", job_id, job_resrcs_ptr->nhosts, job_resrcs_ptr->ncpus, job_resrcs_ptr->node_req, job_resrcs_ptr->nodes); if (job_resrcs_ptr->cpus == NULL) { error("log_job_resources: cpus array is NULL"); return; } if (job_resrcs_ptr->memory_allocated == NULL) { error("log_job_resources: memory array is NULL"); return; } if ((job_resrcs_ptr->cores_per_socket == NULL) || (job_resrcs_ptr->sockets_per_node == NULL) || (job_resrcs_ptr->sock_core_rep_count == NULL)) { error("log_job_resources: socket/core array is NULL"); return; } if (job_resrcs_ptr->core_bitmap == NULL) { error("log_job_resources: core_bitmap is NULL"); return; } if (job_resrcs_ptr->core_bitmap_used == NULL) { error("log_job_resources: core_bitmap_used is NULL"); return; } array_size = bit_size(job_resrcs_ptr->core_bitmap); /* Can only log node_bitmap from slurmctld, so don't bother here */ for (node_inx=0; node_inx<job_resrcs_ptr->nhosts; node_inx++) { uint32_t cpus_used = 0, memory_allocated = 0, memory_used = 0; info("Node[%d]:", node_inx); if (sock_reps >= job_resrcs_ptr->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; if (job_resrcs_ptr->cpus_used) cpus_used = job_resrcs_ptr->cpus_used[node_inx]; if (job_resrcs_ptr->memory_used) memory_used = job_resrcs_ptr->memory_used[node_inx]; if (job_resrcs_ptr->memory_allocated) memory_allocated = job_resrcs_ptr-> memory_allocated[node_inx]; info(" Mem(MB):%u:%u Sockets:%u Cores:%u CPUs:%u:%u", memory_allocated, memory_used, job_resrcs_ptr->sockets_per_node[sock_inx], job_resrcs_ptr->cores_per_socket[sock_inx], job_resrcs_ptr->cpus[node_inx], cpus_used); bit_reps = job_resrcs_ptr->sockets_per_node[sock_inx] * job_resrcs_ptr->cores_per_socket[sock_inx]; for (i=0; i<bit_reps; i++) { if (bit_inx >= array_size) { error("log_job_resources: array size wrong"); break; } if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx)) { char *core_used = ""; if (bit_test(job_resrcs_ptr-> core_bitmap_used, bit_inx)) core_used = " and in use"; info(" Socket[%d] Core[%d] is allocated%s", (i / job_resrcs_ptr-> cores_per_socket[sock_inx]), (i % job_resrcs_ptr-> cores_per_socket[sock_inx]), core_used); } bit_inx++; } } for (node_inx=0; node_inx<job_resrcs_ptr->cpu_array_cnt; node_inx++) { if (node_inx == 0) info("--------------------"); info("cpu_array_value[%d]:%u reps:%u", node_inx, job_resrcs_ptr->cpu_array_value[node_inx], job_resrcs_ptr->cpu_array_reps[node_inx]); } info("===================="); }
/* To effectively deal with heterogeneous nodes, we fake a cyclic * distribution to figure out how many cpus are needed on each node. * * This routine is a slightly modified "version" of the routine * _task_layout_block in src/common/dist_tasks.c. We do not need to * assign tasks to job->hostid[] and job->tids[][] at this point so * the cpu allocation is the same for cyclic and block. * * For the consumable resources support we need to determine what * "node/CPU/Core/thread"-tuplets will be allocated for a given job. * In the past we assumed that we only allocated one task per CPU (at * that point the lowest level of logical processor) and didn't allow * the use of overcommit. We have changed this philosophy and are now * allowing people to overcommit their resources and expect the system * administrator to enable the task/affinity plug-in which will then * bind all of a job's tasks to its allocated resources thereby * avoiding interference between co-allocated running jobs. * * In the consumable resources environment we need to determine the * layout schema within slurmctld. * * We have a core_bitmap of all available cores. All we're doing here * is removing cores that are not needed based on the task count, and * the choice of cores to remove is based on the distribution: * - "cyclic" removes cores "evenly", starting from the last socket, * - "block" removes cores from the "last" socket(s) * - "plane" removes cores "in chunks" */ extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type) { int error_code, cr_cpu = 1; if (((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) || (job_ptr->details->whole_node != 0)) && (job_ptr->details->core_spec == 0)) { /* the job has been allocated an EXCLUSIVE set of nodes, * so it gets all of the bits in the core_bitmap and * all of the available CPUs in the cpus array */ int size = bit_size(job_ptr->job_resrcs->core_bitmap); bit_nset(job_ptr->job_resrcs->core_bitmap, 0, size-1); return SLURM_SUCCESS; } _log_select_maps("cr_dist/start", job_ptr->job_resrcs->node_bitmap, job_ptr->job_resrcs->core_bitmap); if (job_ptr->details->task_dist == SLURM_DIST_PLANE) { /* perform a plane distribution on the 'cpus' array */ error_code = _compute_plane_dist(job_ptr); if (error_code != SLURM_SUCCESS) { error("cons_res: cr_dist: Error in " "_compute_plane_dist"); return error_code; } } else { /* perform a cyclic distribution on the 'cpus' array */ error_code = _compute_c_b_task_dist(job_ptr); if (error_code != SLURM_SUCCESS) { error("cons_res: cr_dist: Error in " "_compute_c_b_task_dist"); return error_code; } } /* now sync up the core_bitmap with the allocated 'cpus' array * based on the given distribution AND resource setting */ if ((cr_type & CR_CORE) || (cr_type & CR_SOCKET)) cr_cpu = 0; if (cr_cpu) { _block_sync_core_bitmap(job_ptr, cr_type); return SLURM_SUCCESS; } /* * If SelectTypeParameters mentions to use a block distribution for * cores by default, use that kind of distribution if no particular * cores distribution specified. * Note : cyclic cores distribution, which is the default, is treated * by the next code block */ if ( slurmctld_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK ) { switch(job_ptr->details->task_dist) { case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_UNKNOWN: _block_sync_core_bitmap(job_ptr, cr_type); return SLURM_SUCCESS; } } /* Determine the number of logical processors per node needed * for this job. Make sure below matches the layouts in * lllp_distribution in plugins/task/affinity/dist_task.c (FIXME) */ switch(job_ptr->details->task_dist) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: _block_sync_core_bitmap(job_ptr, cr_type); break; case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_BLOCK_CYCLIC: case SLURM_DIST_CYCLIC_CYCLIC: case SLURM_DIST_BLOCK_CFULL: case SLURM_DIST_CYCLIC_CFULL: case SLURM_DIST_UNKNOWN: error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type); break; default: error("select/cons_res: invalid task_dist entry"); return SLURM_ERROR; } _log_select_maps("cr_dist/fini", job_ptr->job_resrcs->node_bitmap, job_ptr->job_resrcs->core_bitmap); return error_code; }