Exemplo n.º 1
0
Cell *new_cell(color_t ncolor)
{
    Cell *cell= (Cell *)malloc(sizeof(Cell) + bit_size(ncolor) - bit_size(1));
    bit_clearall(cell->bit, ncolor)
    cell->n= ncolor;
    return cell;
}
Exemplo n.º 2
0
/* helper function for _expand_masks()
 * for each task, consider which other bits are set in avail_map
 * on the same socket */
static void _blot_mask_sockets(const uint32_t maxtasks, const uint32_t task,
			       bitstr_t **masks, uint16_t hw_sockets,
			       uint16_t hw_cores, uint16_t hw_threads,
			       bitstr_t *avail_map)
{
  	uint16_t i, j, size = 0;
	int blot;

	if (!masks[task])
 		return;

	blot = bit_size(avail_map) / hw_sockets;
	if (blot <= 0)
		blot = 1;
	size = bit_size(masks[task]);
	for (i = 0; i < size; i++) {
		if (bit_test(masks[task], i)) {
			/* check if other bits are set in avail_map on this
			 * socket and set each corresponding bit in masks */
			uint16_t start = (i / blot) * blot;
			for (j = start; j < start+blot; j++) {
				if (bit_test(avail_map, j))
					bit_set(masks[task], j);
			}
		}
	}
}
Exemplo n.º 3
0
/*
 * Reset environment variables as appropriate for a job (i.e. this one tasks)
 * based upon the job step's GRES state and assigned CPUs.
 */
extern void step_reset_env(char ***job_env_ptr, void *gres_ptr,
			   bitstr_t *usable_gres)
{
	int i, len, first_match = -1;
	char *dev_list = NULL;
	gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr;

	if ((gres_step_ptr != NULL) &&
	    (gres_step_ptr->node_cnt == 1) &&
	    (gres_step_ptr->gres_bit_alloc != NULL) &&
	    (gres_step_ptr->gres_bit_alloc[0] != NULL) &&
	    (usable_gres != NULL)) {
		len = MIN(bit_size(gres_step_ptr->gres_bit_alloc[0]),
			  bit_size(usable_gres));
		for (i = 0; i < len; i++) {
			if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i))
				continue;
			if (first_match == -1)
				first_match = i;
			if (!bit_test(usable_gres, i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			if (nic_devices && (i < nb_available_files) &&
			    (nic_devices[i] >= 0)) {
				xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]);
			} else {
				xstrfmtcat(dev_list, "mlx4_%d", i);
			}
		}
		if (!dev_list && (first_match != -1)) {
			i = first_match;
			dev_list = xmalloc(128);
			if (nic_devices && (i < nb_available_files) &&
			    (nic_devices[i] >= 0)) {
				xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]);
			} else {
				xstrfmtcat(dev_list, "mlx4_%d", i);
			}
		}
	}

	if (dev_list) {
		/* we assume mellanox cards and OpenMPI programm */
		env_array_overwrite(job_env_ptr,
				    "OMPI_MCA_btl_openib_if_include",
				    dev_list);
		xfree(dev_list);
	}
}
Exemplo n.º 4
0
/* given an "avail" node_bitmap, return a corresponding "avail" core_bitmap */
bitstr_t *_make_core_bitmap(bitstr_t *node_map)
{
	uint32_t n, c, nodes, size;
	uint32_t coff;
	int i_first, i_last;

	nodes = bit_size(node_map);
	size = cr_get_coremap_offset(nodes);
	bitstr_t *core_map = bit_alloc(size);

	i_first = bit_ffs(node_map);
	if (i_first >= 0)
		i_last  = bit_fls(node_map);
	else
		i_last = -2;
	for (n = i_first, c = 0; n <= i_last; n++) {
		if (bit_test(node_map, n)) {
			coff = cr_get_coremap_offset(n + 1);
			while (c < coff) {
				bit_set(core_map, c++);
			}
		}
	}
	return core_map;
}
Exemplo n.º 5
0
/*
 * Set environment variables as appropriate for a job (i.e. all tasks) based
 * upon the job step's GRES state.
 */
extern void step_set_env(char ***job_env_ptr, void *gres_ptr)
{
	int i, len;
	char *dev_list = NULL;
	gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr;

	if ((gres_step_ptr != NULL) &&
	    (gres_step_ptr->node_cnt == 1) &&
	    (gres_step_ptr->gres_bit_alloc != NULL) &&
	    (gres_step_ptr->gres_bit_alloc[0] != NULL)) {
		len = bit_size(gres_step_ptr->gres_bit_alloc[0]);
		for (i=0; i<len; i++) {
			if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			xstrfmtcat(dev_list, "%d", i);
		}
	}
	if (dev_list) {
		env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES",
				    dev_list);
		xfree(dev_list);
	} else {
		/* The gres.conf file must identify specific device files
		 * in order to set the CUDA_VISIBLE_DEVICES env var */
		env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES",
				    "NoDevFiles");
	}
}
Exemplo n.º 6
0
Arquivo: gang.c Projeto: corburn/slurm
static void _print_jobs(struct gs_part *p_ptr)
{
	int i;

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {
		info("gang:  part %s has %u jobs, %u shadows:",
		     p_ptr->part_name, p_ptr->num_jobs, p_ptr->num_shadows);
		for (i = 0; i < p_ptr->num_shadows; i++) {
			info("gang:   shadow job %u row_s %s, sig_s %s",
			     p_ptr->shadow[i]->job_ptr->job_id,
			     _print_flag(p_ptr->shadow[i]->row_state),
			     _print_flag(p_ptr->shadow[i]->sig_state));
		}
		for (i = 0; i < p_ptr->num_jobs; i++) {
			info("gang:   job %u row_s %s, sig_s %s",
			     p_ptr->job_list[i]->job_ptr->job_id,
			     _print_flag(p_ptr->job_list[i]->row_state),
			     _print_flag(p_ptr->job_list[i]->sig_state));
		}
		if (p_ptr->active_resmap) {
			int s = bit_size(p_ptr->active_resmap);
			i = bit_set_count(p_ptr->active_resmap);
			info("gang:  active resmap has %d of %d bits set",
		  	     i, s);
		}
	}
}
Exemplo n.º 7
0
Arquivo: gang.c Projeto: corburn/slurm
/* Return 1 if job "cpu count" fits in this row, else return 0 */
static int _can_cpus_fit(struct job_record *job_ptr, struct gs_part *p_ptr)
{
	int i, j, size;
	uint16_t *p_cpus, *j_cpus;
	job_resources_t *job_res = job_ptr->job_resrcs;

	if (gr_type != GS_CPU)
		return 0;

	size = bit_size(job_res->node_bitmap);
	p_cpus = p_ptr->active_cpus;
	j_cpus = job_res->cpus;

	if (!p_cpus || !j_cpus)
		return 0;

	for (j = 0, i = 0; i < size; i++) {
		if (bit_test(job_res->node_bitmap, i)) {
			if (p_cpus[i]+j_cpus[j] > _get_phys_bit_cnt(i))
				return 0;
			j++;
		}
	}
	return 1;
}
Exemplo n.º 8
0
/* Set the socket and core counts associated with a set of selected
 * nodes of a job_resources data structure based upon slurmctld state.
 * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based
 * upon the value of node_bitmap, also creates core_bitmap based upon
 * the total number of cores in the allocation). Call this ONLY from
 * slurmctld. Example of use:
 *
 * job_resources_t *job_resrcs_ptr = create_job_resources();
 * node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap));
 * rc = build_job_resources(job_resrcs_ptr, node_record_table_ptr,
 *			     slurmctld_conf.fast_schedule);
 */
extern int build_job_resources(job_resources_t *job_resrcs,
			       void *node_rec_table, uint16_t fast_schedule)
{
	int i, bitmap_len;
	int core_cnt = 0, sock_inx = -1;
	uint32_t cores, socks;
	struct node_record *node_ptr, *node_record_table;

	if (job_resrcs->node_bitmap == NULL) {
		error("build_job_resources: node_bitmap is NULL");
		return SLURM_ERROR;
	}

	node_record_table = (struct node_record *) node_rec_table;
	xfree(job_resrcs->sockets_per_node);
	xfree(job_resrcs->cores_per_socket);
	xfree(job_resrcs->sock_core_rep_count);
	job_resrcs->sockets_per_node = xmalloc(sizeof(uint16_t) *
					       job_resrcs->nhosts);
	job_resrcs->cores_per_socket = xmalloc(sizeof(uint16_t) *
					       job_resrcs->nhosts);
	job_resrcs->sock_core_rep_count = xmalloc(sizeof(uint32_t) *
						  job_resrcs->nhosts);

	bitmap_len = bit_size(job_resrcs->node_bitmap);
	for (i=0; i<bitmap_len; i++) {
		if (!bit_test(job_resrcs->node_bitmap, i))
			continue;
		node_ptr = node_record_table + i;
		if (fast_schedule) {
			socks = node_ptr->config_ptr->sockets;
			cores = node_ptr->config_ptr->cores;
		} else {
			socks = node_ptr->sockets;
			cores = node_ptr->cores;
		}
		if ((sock_inx < 0) ||
		    (socks != job_resrcs->sockets_per_node[sock_inx]) ||
		    (cores != job_resrcs->cores_per_socket[sock_inx])) {
			sock_inx++;
			job_resrcs->sockets_per_node[sock_inx] = socks;
			job_resrcs->cores_per_socket[sock_inx] = cores;
		}
		job_resrcs->sock_core_rep_count[sock_inx]++;
		core_cnt += (cores * socks);
	}
#ifndef HAVE_BG
	job_resrcs->core_bitmap      = bit_alloc(core_cnt);
	job_resrcs->core_bitmap_used = bit_alloc(core_cnt);
	if ((job_resrcs->core_bitmap == NULL) ||
	    (job_resrcs->core_bitmap_used == NULL))
		fatal("bit_alloc malloc failure");
#endif
	return SLURM_SUCCESS;
}
Exemplo n.º 9
0
static void bit_object_show(void* B)
{
  fprintf(stdout, 
      "\t__bit__ -> {\n"
      "\t  object=>0x%p,\n"
      "\t  size=>%d,\n"
      "\t  count=>%d,\n"
      "\t}\n", 
      B, bit_size(B), bit_count(B)
      );
}
Exemplo n.º 10
0
/*
 * _lllp_map_abstract_mask
 *
 * Map one abstract block mask to a physical machine mask
 *
 * IN - mask to map
 * OUT - mapped mask (storage allocated in this routine)
 */
static bitstr_t *_lllp_map_abstract_mask(bitstr_t *bitmask)
{
    	int i, bit;
	int num_bits = bit_size(bitmask);
	bitstr_t *newmask = NULL;
	newmask = (bitstr_t *) bit_alloc(num_bits);

	/* remap to physical machine */
	for (i = 0; i < num_bits; i++) {
		if (bit_test(bitmask,i)) {
			bit = BLOCK_MAP(i);
			if (bit < bit_size(newmask))
				bit_set(newmask, bit);
			else
				error("%s: can't go from %d -> %d since we "
				      "only have %"BITSTR_FMT" bits",
				      __func__, i, bit, bit_size(newmask));
		}
	}
	return newmask;
}
Exemplo n.º 11
0
extern int valid_job_resources(job_resources_t *job_resrcs,
			       void *node_rec_table,
			       uint16_t fast_schedule)
{
	int i, bitmap_len;
	int sock_inx = 0, sock_cnt = 0;
	uint32_t cores, socks;
	struct node_record *node_ptr, *node_record_table;

	if (job_resrcs->node_bitmap == NULL) {
		error("valid_job_resources: node_bitmap is NULL");
		return SLURM_ERROR;
	}
	if ((job_resrcs->sockets_per_node == NULL) ||
	    (job_resrcs->cores_per_socket == NULL) ||
	    (job_resrcs->sock_core_rep_count == NULL)) {
		error("valid_job_resources: socket/core array is NULL");
		return SLURM_ERROR;
	}

	node_record_table = (struct node_record *) node_rec_table;
	bitmap_len = bit_size(job_resrcs->node_bitmap);
	for (i=0; i<bitmap_len; i++) {
		if (!bit_test(job_resrcs->node_bitmap, i))
			continue;
		node_ptr = node_record_table + i;
		if (fast_schedule) {
			socks = node_ptr->config_ptr->sockets;
			cores = node_ptr->config_ptr->cores;
		} else {
			socks = node_ptr->sockets;
			cores = node_ptr->cores;
		}
		if (sock_cnt >= job_resrcs->sock_core_rep_count[sock_inx]) {
			sock_inx++;
			sock_cnt = 0;
		}
		if ((socks != job_resrcs->sockets_per_node[sock_inx]) ||
		    (cores != job_resrcs->cores_per_socket[sock_inx])) {
			error("valid_job_resources: "
			      "%s sockets:%u,%u, cores %u,%u",
			      node_ptr->name,
			      socks,
			      job_resrcs->sockets_per_node[sock_inx],
			      cores,
			      job_resrcs->cores_per_socket[sock_inx]);
			return SLURM_ERROR;
		}
		sock_cnt++;
	}
	return SLURM_SUCCESS;
}
Exemplo n.º 12
0
static bool _is_task_in_job(job_info_t *job_ptr, int array_id)
{
	int len;

	if (job_ptr->array_task_id == array_id)
		return true;

	if (!job_ptr->array_bitmap)
		return false;
	len = bit_size((bitstr_t *)job_ptr->array_bitmap);
	if (len <= array_id)
		return false;
	return (bit_test((bitstr_t *)job_ptr->array_bitmap, array_id));
}
Exemplo n.º 13
0
/*
 * Set environment variables as appropriate for a job (i.e. all tasks) based
 * upon the job step's GRES state.
 */
extern void step_set_env(char ***job_env_ptr, void *gres_ptr)
{
	int i, len, local_inx = 0;
	char *dev_list = NULL;
	gres_step_state_t *gres_step_ptr = (gres_step_state_t *) gres_ptr;
	bool use_local_dev_index = _use_local_device_index();

	if ((gres_step_ptr != NULL) &&
	    (gres_step_ptr->node_cnt == 1) &&
	    (gres_step_ptr->gres_bit_alloc != NULL) &&
	    (gres_step_ptr->gres_bit_alloc[0] != NULL)) {
		len = bit_size(gres_step_ptr->gres_bit_alloc[0]);
		for (i = 0; i < len; i++) {
			if (!bit_test(gres_step_ptr->gres_bit_alloc[0], i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			if (use_local_dev_index) {
				xstrfmtcat(dev_list, "mlx4_%d", local_inx++);
			} else if (nic_devices && (i < nb_available_files) &&
				   (nic_devices[i] >= 0)) {
				xstrfmtcat(dev_list, "mlx4_%d", nic_devices[i]);
			} else {
				xstrfmtcat(dev_list, "mlx4_%d", i);
			}
		}
	} else if (gres_step_ptr && (gres_step_ptr->gres_cnt_alloc > 0)) {
		/* The gres.conf file must identify specific device files
		 * in order to set the OMPI_MCA_btl_openib_if_include env var */
		error("gres/nic unable to set OMPI_MCA_btl_openib_if_include, "
		      "no device files configured");
	} else {
		xstrcat(dev_list, "NoDevFiles");
	}

	if (dev_list) {
		/* we assume mellanox cards and OpenMPI programm */
		env_array_overwrite(job_env_ptr,
				    "OMPI_MCA_btl_openib_if_include",
				    dev_list);
		xfree(dev_list);
	}
}
Exemplo n.º 14
0
/* Return a copy of core_bitmap only for the specific node */
extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr,
					  uint32_t node_id)
{
	int i, bit_inx = 0, core_cnt = 0;
	bitstr_t *core_bitmap;

	xassert(job_resrcs_ptr);

	for (i = 0; i < job_resrcs_ptr->nhosts; i++) {
		if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				   job_resrcs_ptr->cores_per_socket[i] *
				   job_resrcs_ptr->sock_core_rep_count[i];
			node_id -= job_resrcs_ptr->sock_core_rep_count[i];
		} else {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				   job_resrcs_ptr->cores_per_socket[i] *
				   node_id;
			core_cnt = job_resrcs_ptr->sockets_per_node[i] *
				   job_resrcs_ptr->cores_per_socket[i];
			break;
		}
	}
	if (core_cnt < 1) {
		error("copy_job_resources_node: core_cnt=0");
		return NULL;
	}

	i = bit_size(job_resrcs_ptr->core_bitmap);
	if ((bit_inx + core_cnt) > i) {
		error("copy_job_resources_node: offset > bitmap size "
		      "(%d >= %d)", (bit_inx + core_cnt), i);
		return NULL;
	}

	core_bitmap = bit_alloc(core_cnt);
	if (!core_bitmap)
		fatal("copy_job_resources_node: bit_alloc(%d): %m", core_cnt);
	for (i = 0; i < core_cnt; i++) {
		if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++))
			bit_set(core_bitmap, i);
	}

	return core_bitmap;
}
Exemplo n.º 15
0
extern int get_job_resources_offset(job_resources_t *job_resrcs_ptr,
				    uint32_t node_id, uint16_t socket_id,
				    uint16_t core_id)
{
	int i, bit_inx = 0;

	xassert(job_resrcs_ptr);

	for (i=0; i<job_resrcs_ptr->nhosts; i++) {
		if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i] *
				job_resrcs_ptr->sock_core_rep_count[i];
			node_id -= job_resrcs_ptr->sock_core_rep_count[i];
		} else if (socket_id >= job_resrcs_ptr->
			   sockets_per_node[i]) {
			error("get_job_resrcs_bit: socket_id >= socket_cnt "
			      "(%u >= %u)", socket_id,
			      job_resrcs_ptr->sockets_per_node[i]);
			return -1;
		} else if (core_id >= job_resrcs_ptr->cores_per_socket[i]) {
			error("get_job_resrcs_bit: core_id >= core_cnt "
			      "(%u >= %u)", core_id,
			      job_resrcs_ptr->cores_per_socket[i]);
			return -1;
		} else {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i] *
				node_id;
			bit_inx += job_resrcs_ptr->cores_per_socket[i] *
				socket_id;
			bit_inx += core_id;
			break;
		}
	}
	i = bit_size(job_resrcs_ptr->core_bitmap);
	if (bit_inx >= i) {
		error("get_job_resources_bit: offset >= bitmap size "
		      "(%d >= %d)", bit_inx, i);
		return -1;
	}

	return bit_inx;
}
Exemplo n.º 16
0
int init_complex_table(struct complex_table *p1)
{
    int i;
    struct complex_table *p2=p1;
    for(i=0;i<p1->number;i++)
    {
        p2=p2->next;
        p2->a=0;
        p2->b=0;
    }
    p2=p1;
    for(i=0;i<p1->number;i++)
    {
        p2=p2->next;
        p2->number=bit_order(i,bit_size(p1->number-1));
    }

    return 0;
}
Exemplo n.º 17
0
/* Remove any specialized cores from those allocated to the job */
static void _clear_spec_cores(struct job_record *job_ptr,
			      bitstr_t *avail_core_bitmap)
{
	int first_node, last_node, i_node;
	int first_core, last_core, i_core;
	int alloc_node = -1, alloc_core = -1, size;
	job_resources_t *job_res = job_ptr->job_resrcs;
	multi_core_data_t *mc_ptr = NULL;

	if (job_ptr->details && job_ptr->details->mc_ptr)
		mc_ptr = job_ptr->details->mc_ptr;

	size = bit_size(job_res->core_bitmap);
	bit_nset(job_res->core_bitmap, 0, size - 1);

	first_node = bit_ffs(job_res->node_bitmap);
	if (first_node >= 0)
		last_node = bit_fls(job_res->node_bitmap);
	else
		last_node = first_node - 1;

	for (i_node = first_node; i_node <= last_node; i_node++) {
		if (!bit_test(job_res->node_bitmap, i_node))
			continue;
		job_res->cpus[++alloc_node] = 0;
		first_core = cr_get_coremap_offset(i_node);
		last_core  = cr_get_coremap_offset(i_node + 1) - 1;
		for (i_core = first_core; i_core <= last_core; i_core++) {
			alloc_core++;
			if (bit_test(avail_core_bitmap, i_core)) {
				uint16_t tpc = select_node_record[i_node].vpus;
				if (mc_ptr &&
				    (mc_ptr->threads_per_core != NO_VAL16) &&
				    (mc_ptr->threads_per_core < tpc))
					tpc = mc_ptr->threads_per_core;

				job_res->cpus[alloc_node] += tpc;
			} else {
				bit_clear(job_res->core_bitmap, alloc_core);
			}
		}
	}
}
Exemplo n.º 18
0
static int _change_job_resources_node(job_resources_t *job_resrcs_ptr,
				      uint32_t node_id, bool new_value)
{
	int i, bit_inx = 0, core_cnt = 0;

	xassert(job_resrcs_ptr);

	for (i=0; i<job_resrcs_ptr->nhosts; i++) {
		if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i] *
				job_resrcs_ptr->sock_core_rep_count[i];
			node_id -= job_resrcs_ptr->sock_core_rep_count[i];
		} else {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i] *
				node_id;
			core_cnt = job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i];
			break;
		}
	}
	if (core_cnt < 1) {
		error("_change_job_resources_node: core_cnt=0");
		return SLURM_ERROR;
	}

	i = bit_size(job_resrcs_ptr->core_bitmap);
	if ((bit_inx + core_cnt) > i) {
		error("_change_job_resources_node: offset > bitmap size "
		      "(%d >= %d)", (bit_inx + core_cnt), i);
		return SLURM_ERROR;
	}

	for (i=0; i<core_cnt; i++) {
		if (new_value)
			bit_set(job_resrcs_ptr->core_bitmap, bit_inx++);
		else
			bit_clear(job_resrcs_ptr->core_bitmap, bit_inx++);
	}

	return SLURM_SUCCESS;
}
Exemplo n.º 19
0
static bool _task_id_in_job(job_info_t *job_ptr, uint32_t array_id)
{
	bitstr_t *array_bitmap;
	uint32_t array_len;

	if ((array_id == NO_VAL) ||
	    (array_id == job_ptr->array_task_id))
		return true;

	array_bitmap = (bitstr_t *) job_ptr->array_bitmap;
	if (array_bitmap == NULL)
		return false;
	array_len = bit_size(array_bitmap);
	if (array_id >= array_len)
		return false;
	if (bit_test(array_bitmap, array_id))
		return true;
	return false;
}
Exemplo n.º 20
0
/* helper function for _expand_masks() */
static void _blot_mask(bitstr_t *mask, uint16_t blot)
{
	uint16_t i, size = 0;
	int prev = -1;

	if (!mask)
		return;
	size = bit_size(mask);
	for (i = 0; i < size; i++) {
		if (bit_test(mask, i)) {
			/* fill in this blot */
			uint16_t start = (i / blot) * blot;
			if (start != prev) {
				bit_nset(mask, start, start+blot-1);
				prev = start;
			}
		}
	}
}
Exemplo n.º 21
0
/* Return the count of core bitmaps set for the specific node */
extern int count_job_resources_node(job_resources_t *job_resrcs_ptr,
				    uint32_t node_id)
{
	int i, bit_inx = 0, core_cnt = 0;
	int set_cnt = 0;

	xassert(job_resrcs_ptr);

	for (i=0; i<job_resrcs_ptr->nhosts; i++) {
		if (job_resrcs_ptr->sock_core_rep_count[i] <= node_id) {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i] *
				job_resrcs_ptr->sock_core_rep_count[i];
			node_id -= job_resrcs_ptr->sock_core_rep_count[i];
		} else {
			bit_inx += job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i] *
				node_id;
			core_cnt = job_resrcs_ptr->sockets_per_node[i] *
				job_resrcs_ptr->cores_per_socket[i];
			break;
		}
	}
	if (core_cnt < 1) {
		error("count_job_resources_node: core_cnt=0");
		return set_cnt;
	}

	i = bit_size(job_resrcs_ptr->core_bitmap);
	if ((bit_inx + core_cnt) > i) {
		error("count_job_resources_node: offset > bitmap size "
		      "(%d >= %d)", (bit_inx + core_cnt), i);
		return set_cnt;
	}

	for (i=0; i<core_cnt; i++) {
		if (bit_test(job_resrcs_ptr->core_bitmap, bit_inx++))
			set_cnt++;
	}

	return set_cnt;
}
Exemplo n.º 22
0
/*
 * Set environment variables as appropriate for a job (i.e. all tasks) based
 * upon the job's GRES state.
 */
extern void job_set_env(char ***job_env_ptr, void *gres_ptr)
{
	int i, len;
	char *dev_list = NULL;
	gres_job_state_t *gres_job_ptr = (gres_job_state_t *) gres_ptr;

	if ((gres_job_ptr != NULL) &&
	    (gres_job_ptr->node_cnt == 1) &&
	    (gres_job_ptr->gres_bit_alloc != NULL) &&
	    (gres_job_ptr->gres_bit_alloc[0] != NULL)) {
		len = bit_size(gres_job_ptr->gres_bit_alloc[0]);
		for (i=0; i<len; i++) {
			if (!bit_test(gres_job_ptr->gres_bit_alloc[0], i))
				continue;
			if (!dev_list)
				dev_list = xmalloc(128);
			else
				xstrcat(dev_list, ",");
			if (gpu_devices && (i < nb_available_files) &&
			    (gpu_devices[i] >= 0))
				xstrfmtcat(dev_list, "%d", gpu_devices[i]);
			else
				xstrfmtcat(dev_list, "%d", i);
		}
	} else if (gres_job_ptr && (gres_job_ptr->gres_cnt_alloc > 0)) {
		/* The gres.conf file must identify specific device files
		 * in order to set the CUDA_VISIBLE_DEVICES env var */
		error("gres/gpu unable to set CUDA_VISIBLE_DEVICES, "
		      "no device files configured");
	} else {
		xstrcat(dev_list, "NoDevFiles");
	}

	if (dev_list) {
		env_array_overwrite(job_env_ptr,"CUDA_VISIBLE_DEVICES",
				    dev_list);
		xfree(dev_list);
	}
}
Exemplo n.º 23
0
Arquivo: debug.c Projeto: chazu/btmux
static int
debug_check_stuff(void *key, void *data, int depth, void *arg)
{
	const dbref key_val = (dbref)key;
	XCODE *const xcode_obj = data;

	int osize, size;
	MAP *map;

	osize = size = SpecialObjects[xcode_obj->type].datasize;

	switch (xcode_obj->type) {
	case GTYPE_MAP:
		map = (MAP *)xcode_obj;
		if(map->map) {
			size += sizeof(map->map[0][0]) * map->map_width * map->map_height;
			size += bit_size(map);
			size += obj_size(map);
			size += mech_size(map);
		}
		break;

	default:
		break;
	}

	if(smallest[xcode_obj->type] < 0 || size < smallest[xcode_obj->type])
		smallest[xcode_obj->type] = size;
	if(largest[xcode_obj->type] < 0 || size > largest[xcode_obj->type])
		largest[xcode_obj->type] = size;
	total[xcode_obj->type] += size;
	number[xcode_obj->type]++;

	if(cheat_player > 0)
		notify_printf(cheat_player, "#%5d: %10s %5d", key_val,
		              SpecialObjects[xcode_obj->type].type, xcode_obj->type == GTYPE_AUTO ? ((AUTO *)xcode_obj)->mymechnum : 0 );

	return 1;
}
Exemplo n.º 24
0
/* helper function for _expand_masks() */
static void _blot_mask(bitstr_t *mask, bitstr_t *avail_map, uint16_t blot)
{
	uint16_t i, j, size = 0;
	int prev = -1;

	if (!mask)
		return;
	size = bit_size(mask);
	for (i = 0; i < size; i++) {
		if (bit_test(mask, i)) {
			/* fill in this blot */
			uint16_t start = (i / blot) * blot;
			if (start != prev) {
				for (j = start; j < start + blot; j++) {
					if (bit_test(avail_map, j))
						bit_set(mask, j);
				}
				prev = start;
			}
		}
	}
}
Exemplo n.º 25
0
int init_data_table(struct data *p1,double *a)
{
    int i;
    struct data *p2=p1;
    for(i=0;i<p1->number;i++)
    {
        p2=p2->next;
        p2->value=0;
    }
    p2=p1;
    for(i=0;i<p1->number;i++)
    {
        p2=p2->next;
        p2->number=bit_order(i,bit_size(p1->number-1));
    }
    p2=p1;
    for(i=0;i<p1->number;i++)
    {
        p2=p2->next;
        p2->value=a[p2->number];
    }
    return 0;
}
Exemplo n.º 26
0
/* _match_mask_to_ldom
 *
 * expand each mask to encompass the whole locality domain
 * within which it currently exists
 * NOTE: this assumes that the masks are already in logical
 * (and not abstract) CPU order.
 */
static void _match_masks_to_ldom(const uint32_t maxtasks, bitstr_t **masks)
{
	uint32_t i, b, size;

	if (!masks || !masks[0])
		return;
	size = bit_size(masks[0]);
	for(i = 0; i < maxtasks; i++) {
		for (b = 0; b < size; b++) {
			if (bit_test(masks[i], b)) {
				/* get the NUMA node for this CPU, and then
				 * set all CPUs in the mask that exist in
				 * the same CPU */
				int c;
				uint16_t nnid = slurm_get_numa_node(b);
				for (c = 0; c < size; c++) {
					if (slurm_get_numa_node(c) == nnid)
						bit_set(masks[i], c);
				}
			}
		}
	}
}
Exemplo n.º 27
0
static int _preemption_loop(mysql_conn_t *mysql_conn, int begin_qosid,
			    bitstr_t *preempt_bitstr)
{
	slurmdb_qos_rec_t qos_rec;
	int rc = 0, i=0;

	xassert(preempt_bitstr);

	/* check in the preempt list for all qos's preempted */
	for(i=0; i<bit_size(preempt_bitstr); i++) {
		if (!bit_test(preempt_bitstr, i))
			continue;

		memset(&qos_rec, 0, sizeof(qos_rec));
		qos_rec.id = i;
		assoc_mgr_fill_in_qos(mysql_conn, &qos_rec,
				      ACCOUNTING_ENFORCE_QOS,
				      NULL);
		/* check if the begin_qosid is preempted by this qos
		 * if so we have a loop */
		if (qos_rec.preempt_bitstr
		    && bit_test(qos_rec.preempt_bitstr, begin_qosid)) {
			error("QOS id %d has a loop at QOS %s",
			      begin_qosid, qos_rec.name);
			rc = 1;
			break;
		} else if (qos_rec.preempt_bitstr) {
			/* check this qos' preempt list and make sure
			   no loops exist there either */
			if ((rc = _preemption_loop(mysql_conn, begin_qosid,
						   qos_rec.preempt_bitstr)))
				break;
		}
	}
	return rc;
}
Exemplo n.º 28
0
extern int bg_free_block(bg_record_t *bg_record, bool wait, bool locked)
{
	int rc = SLURM_SUCCESS;
	int count = 0;

	if (!bg_record) {
		error("bg_free_block: there was no bg_record");
		return SLURM_ERROR;
	}

	if (!locked)
		slurm_mutex_lock(&block_state_mutex);

	while (count < MAX_FREE_RETRIES) {
		/* block was removed */
		if (bg_record->magic != BLOCK_MAGIC) {
			error("block was removed while freeing it here");
			xassert(0);
			if (!locked)
				slurm_mutex_unlock(&block_state_mutex);
			return SLURM_SUCCESS;
		}
		/* Reset these here so we don't try to reboot it
		   when the state goes to free.
		*/
		bg_record->boot_state = 0;
		bg_record->boot_count = 0;
		/* Here we don't need to check if the block is still
		 * in exsistance since this function can't be called on
		 * the same block twice.  It may
		 * had already been removed at this point also.
		 */
#ifdef HAVE_BG_FILES
		if (bg_record->state != BG_BLOCK_FREE
		    && bg_record->state != BG_BLOCK_TERM) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("bridge_destroy %s",
				     bg_record->bg_block_id);
			rc = bridge_block_free(bg_record);
			if (rc != SLURM_SUCCESS) {
				if (rc == BG_ERROR_BLOCK_NOT_FOUND) {
					debug("block %s is not found",
					      bg_record->bg_block_id);
					bg_record->state = BG_BLOCK_FREE;
					break;
				} else if (rc == BG_ERROR_FREE) {
					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_SELECT_TYPE)
						info("bridge_block_free"
						     "(%s): %s State = %s",
						     bg_record->bg_block_id,
						     bg_err_str(rc),
						     bg_block_state_string(
							     bg_record->state));
				} else if (rc == BG_ERROR_INVALID_STATE) {
#ifndef HAVE_BGL
					/* If the state is error and
					   we get an incompatible
					   state back here, it means
					   we set it ourselves so
					   break out.
					*/
					if (bg_record->state
					    & BG_BLOCK_ERROR_FLAG)
						break;
#endif
					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_SELECT_TYPE)
						info("bridge_block_free"
						     "(%s): %s State = %s",
						     bg_record->bg_block_id,
						     bg_err_str(rc),
						     bg_block_state_string(
							     bg_record->state));
#ifdef HAVE_BGQ
					if (bg_record->state != BG_BLOCK_FREE
					    && bg_record->state
					    != BG_BLOCK_TERM)
					bg_record->state = BG_BLOCK_TERM;
#endif
				} else {
					error("bridge_block_free"
					      "(%s): %s State = %s",
					      bg_record->bg_block_id,
					      bg_err_str(rc),
					      bg_block_state_string(
						      bg_record->state));
				}
			}
		}
#else
		/* Fake a free since we are n deallocating
		   state before this.
		*/
		if (bg_record->state & BG_BLOCK_ERROR_FLAG) {
			/* This will set the state to ERROR(Free)
			 * just incase the state was ERROR(SOMETHING ELSE) */
			bg_record->state = BG_BLOCK_ERROR_FLAG;
			break;
		} else if (!wait || (count >= 3))
			bg_record->state = BG_BLOCK_FREE;
		else if (bg_record->state != BG_BLOCK_FREE)
			bg_record->state = BG_BLOCK_TERM;
#endif

		if (!wait || (bg_record->state == BG_BLOCK_FREE)
#ifndef HAVE_BGL
		    ||  (bg_record->state & BG_BLOCK_ERROR_FLAG)
#endif
			) {
			break;
		}
		/* If we were locked outside of this we need to unlock
		   to not cause deadlock on this mutex until we are
		   done.
		*/
		slurm_mutex_unlock(&block_state_mutex);
		sleep(FREE_SLEEP_INTERVAL);
		count++;
		slurm_mutex_lock(&block_state_mutex);
	}

	rc = SLURM_SUCCESS;
	if ((bg_record->state == BG_BLOCK_FREE)
	    || (bg_record->state & BG_BLOCK_ERROR_FLAG)) {

		if (bg_record->err_ratio
		    && (bg_record->state == BG_BLOCK_FREE)) {
			/* Sometime the realtime server can report
			   software error on cnodes even though the
			   block is free.  If this is the case we need
			   to manually clear them.
			*/
			ba_mp_t *found_ba_mp;
			ListIterator itr =
				list_iterator_create(bg_record->ba_mp_list);
			debug("Block %s is free, but has %u cnodes in error.  "
			      "This can happen if a large block goes into "
			      "error and then is freed and the state of "
			      "the block changes before the "
			      "database informs all the cnodes are back to "
			      "normal.  This is no big deal.",
			      bg_record->bg_block_id, bg_record->cnode_err_cnt);
			while ((found_ba_mp = list_next(itr))) {
				if (!found_ba_mp->used)
					continue;

				if (!found_ba_mp->cnode_err_bitmap)
					found_ba_mp->cnode_err_bitmap =
						bit_alloc(
							bg_conf->mp_cnode_cnt);

				bit_nclear(found_ba_mp->cnode_err_bitmap, 0,
					   bit_size(found_ba_mp->
						    cnode_err_bitmap)-1);
			}
			list_iterator_destroy(itr);
			bg_record->cnode_err_cnt = 0;
			bg_record->err_ratio = 0;
		}

		remove_from_bg_list(bg_lists->booted, bg_record);
	} else if (count >= MAX_FREE_RETRIES) {
		/* Something isn't right, go mark this one in an error
		   state. */
		update_block_msg_t block_msg;
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("bg_free_block: block %s is not in state "
			     "free (%s), putting it in error state.",
			     bg_record->bg_block_id,
			     bg_block_state_string(bg_record->state));
		slurm_init_update_block_msg(&block_msg);
		block_msg.bg_block_id = bg_record->bg_block_id;
		block_msg.state = BG_BLOCK_ERROR_FLAG;
		block_msg.reason = "Block would not deallocate";
		slurm_mutex_unlock(&block_state_mutex);
		select_g_update_block(&block_msg);
		slurm_mutex_lock(&block_state_mutex);
		rc = SLURM_ERROR;
	}
	if (!locked)
		slurm_mutex_unlock(&block_state_mutex);

	return rc;
}
Exemplo n.º 29
0
/* Log the contents of a job_resources data structure using info() */
extern void log_job_resources(uint32_t job_id,
			      job_resources_t *job_resrcs_ptr)
{
	int bit_inx = 0, bit_reps, i;
	int array_size, node_inx;
	int sock_inx = 0, sock_reps = 0;

	if (job_resrcs_ptr == NULL) {
		error("log_job_resources: job_resrcs_ptr is NULL");
		return;
	}

	info("====================");
	info("job_id:%u nhosts:%u ncpus:%u node_req:%u nodes=%s",
	     job_id, job_resrcs_ptr->nhosts, job_resrcs_ptr->ncpus,
	     job_resrcs_ptr->node_req, job_resrcs_ptr->nodes);

	if (job_resrcs_ptr->cpus == NULL) {
		error("log_job_resources: cpus array is NULL");
		return;
	}
	if (job_resrcs_ptr->memory_allocated == NULL) {
		error("log_job_resources: memory array is NULL");
		return;
	}
	if ((job_resrcs_ptr->cores_per_socket == NULL) ||
	    (job_resrcs_ptr->sockets_per_node == NULL) ||
	    (job_resrcs_ptr->sock_core_rep_count == NULL)) {
		error("log_job_resources: socket/core array is NULL");
		return;
	}
	if (job_resrcs_ptr->core_bitmap == NULL) {
		error("log_job_resources: core_bitmap is NULL");
		return;
	}
	if (job_resrcs_ptr->core_bitmap_used == NULL) {
		error("log_job_resources: core_bitmap_used is NULL");
		return;
	}
	array_size = bit_size(job_resrcs_ptr->core_bitmap);

	/* Can only log node_bitmap from slurmctld, so don't bother here */
	for (node_inx=0; node_inx<job_resrcs_ptr->nhosts; node_inx++) {
		uint32_t cpus_used = 0, memory_allocated = 0, memory_used = 0;
		info("Node[%d]:", node_inx);

		if (sock_reps >=
		    job_resrcs_ptr->sock_core_rep_count[sock_inx]) {
			sock_inx++;
			sock_reps = 0;
		}
		sock_reps++;

		if (job_resrcs_ptr->cpus_used)
			cpus_used = job_resrcs_ptr->cpus_used[node_inx];
		if (job_resrcs_ptr->memory_used)
			memory_used = job_resrcs_ptr->memory_used[node_inx];
		if (job_resrcs_ptr->memory_allocated)
			memory_allocated = job_resrcs_ptr->
				memory_allocated[node_inx];

		info("  Mem(MB):%u:%u  Sockets:%u  Cores:%u  CPUs:%u:%u",
		     memory_allocated, memory_used,
		     job_resrcs_ptr->sockets_per_node[sock_inx],
		     job_resrcs_ptr->cores_per_socket[sock_inx],
		     job_resrcs_ptr->cpus[node_inx],
		     cpus_used);

		bit_reps = job_resrcs_ptr->sockets_per_node[sock_inx] *
			job_resrcs_ptr->cores_per_socket[sock_inx];
		for (i=0; i<bit_reps; i++) {
			if (bit_inx >= array_size) {
				error("log_job_resources: array size wrong");
				break;
			}
			if (bit_test(job_resrcs_ptr->core_bitmap,
				     bit_inx)) {
				char *core_used = "";
				if (bit_test(job_resrcs_ptr->
					     core_bitmap_used, bit_inx))
					core_used = " and in use";
				info("  Socket[%d] Core[%d] is allocated%s",
				     (i / job_resrcs_ptr->
				      cores_per_socket[sock_inx]),
				     (i % job_resrcs_ptr->
				      cores_per_socket[sock_inx]),
				     core_used);
			}
			bit_inx++;
		}
	}
	for (node_inx=0; node_inx<job_resrcs_ptr->cpu_array_cnt;
	     node_inx++) {
		if (node_inx == 0)
			info("--------------------");
		info("cpu_array_value[%d]:%u reps:%u", node_inx,
		     job_resrcs_ptr->cpu_array_value[node_inx],
		     job_resrcs_ptr->cpu_array_reps[node_inx]);
	}
	info("====================");
}
Exemplo n.º 30
0
/* To effectively deal with heterogeneous nodes, we fake a cyclic
 * distribution to figure out how many cpus are needed on each node.
 *
 * This routine is a slightly modified "version" of the routine
 * _task_layout_block in src/common/dist_tasks.c. We do not need to
 * assign tasks to job->hostid[] and job->tids[][] at this point so
 * the cpu allocation is the same for cyclic and block.
 *
 * For the consumable resources support we need to determine what
 * "node/CPU/Core/thread"-tuplets will be allocated for a given job.
 * In the past we assumed that we only allocated one task per CPU (at
 * that point the lowest level of logical processor) and didn't allow
 * the use of overcommit. We have changed this philosophy and are now
 * allowing people to overcommit their resources and expect the system
 * administrator to enable the task/affinity plug-in which will then
 * bind all of a job's tasks to its allocated resources thereby
 * avoiding interference between co-allocated running jobs.
 *
 * In the consumable resources environment we need to determine the
 * layout schema within slurmctld.
 *
 * We have a core_bitmap of all available cores. All we're doing here
 * is removing cores that are not needed based on the task count, and
 * the choice of cores to remove is based on the distribution:
 * - "cyclic" removes cores "evenly", starting from the last socket,
 * - "block" removes cores from the "last" socket(s)
 * - "plane" removes cores "in chunks"
 */
extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type)
{
	int error_code, cr_cpu = 1;

	if (((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) ||
	     (job_ptr->details->whole_node != 0)) &&
	    (job_ptr->details->core_spec == 0)) {
		/* the job has been allocated an EXCLUSIVE set of nodes,
		 * so it gets all of the bits in the core_bitmap and
		 * all of the available CPUs in the cpus array */
		int size = bit_size(job_ptr->job_resrcs->core_bitmap);
		bit_nset(job_ptr->job_resrcs->core_bitmap, 0, size-1);
		return SLURM_SUCCESS;
	}

	_log_select_maps("cr_dist/start", job_ptr->job_resrcs->node_bitmap,
			 job_ptr->job_resrcs->core_bitmap);
	if (job_ptr->details->task_dist == SLURM_DIST_PLANE) {
		/* perform a plane distribution on the 'cpus' array */
		error_code = _compute_plane_dist(job_ptr);
		if (error_code != SLURM_SUCCESS) {
			error("cons_res: cr_dist: Error in "
			      "_compute_plane_dist");
			return error_code;
		}
	} else {
		/* perform a cyclic distribution on the 'cpus' array */
		error_code = _compute_c_b_task_dist(job_ptr);
		if (error_code != SLURM_SUCCESS) {
			error("cons_res: cr_dist: Error in "
			      "_compute_c_b_task_dist");
			return error_code;
		}
	}

	/* now sync up the core_bitmap with the allocated 'cpus' array
	 * based on the given distribution AND resource setting */
	if ((cr_type & CR_CORE) || (cr_type & CR_SOCKET))
		cr_cpu = 0;

	if (cr_cpu) {
		_block_sync_core_bitmap(job_ptr, cr_type);
		return SLURM_SUCCESS;
	}

	/*
	 * If SelectTypeParameters mentions to use a block distribution for
	 * cores by default, use that kind of distribution if no particular
	 * cores distribution specified.
	 * Note : cyclic cores distribution, which is the default, is treated
	 * by the next code block
	 */
	if ( slurmctld_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK ) {
		switch(job_ptr->details->task_dist) {
		case SLURM_DIST_ARBITRARY:
		case SLURM_DIST_BLOCK:
		case SLURM_DIST_CYCLIC:
		case SLURM_DIST_UNKNOWN:
			_block_sync_core_bitmap(job_ptr, cr_type);
			return SLURM_SUCCESS;
		}
	}

	/* Determine the number of logical processors per node needed
	 * for this job. Make sure below matches the layouts in
	 * lllp_distribution in plugins/task/affinity/dist_task.c (FIXME) */
	switch(job_ptr->details->task_dist) {
	case SLURM_DIST_BLOCK_BLOCK:
	case SLURM_DIST_CYCLIC_BLOCK:
	case SLURM_DIST_PLANE:
		_block_sync_core_bitmap(job_ptr, cr_type);
		break;
	case SLURM_DIST_ARBITRARY:
	case SLURM_DIST_BLOCK:
	case SLURM_DIST_CYCLIC:
	case SLURM_DIST_BLOCK_CYCLIC:
	case SLURM_DIST_CYCLIC_CYCLIC:
	case SLURM_DIST_BLOCK_CFULL:
	case SLURM_DIST_CYCLIC_CFULL:
	case SLURM_DIST_UNKNOWN:
		error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type);
		break;
	default:
		error("select/cons_res: invalid task_dist entry");
		return SLURM_ERROR;
	}

	_log_select_maps("cr_dist/fini", job_ptr->job_resrcs->node_bitmap,
			 job_ptr->job_resrcs->core_bitmap);
	return error_code;
}