Exemplo n.º 1
0
static int _job_resrcs_to_hv(job_info_t *job_info, HV *hv)
{
	AV *av;
	HV *nr_hv;
	bitstr_t *cpu_bitmap;
	int sock_inx, sock_reps, last, cnt = 0, i, j, k;
	char tmp1[128], tmp2[128];
	char *host;
	job_resources_t *job_resrcs = job_info->job_resrcs;
	int bit_inx, bit_reps;
	int abs_node_inx, rel_node_inx;
	uint64_t *last_mem_alloc_ptr = NULL;
	uint64_t last_mem_alloc = NO_VAL64;
	char *last_hosts;
	hostlist_t hl, hl_last;
	uint32_t threads;

	if (!job_resrcs || !job_resrcs->core_bitmap
	    || ((last = slurm_bit_fls(job_resrcs->core_bitmap)) == -1))
		return 0;

	if (!(hl = slurm_hostlist_create(job_resrcs->nodes)))
		return 1;

	if (!(hl_last = slurm_hostlist_create(NULL)))
		return 1;
	av = newAV();

	bit_inx = 0;
	i = sock_inx = sock_reps = 0;
	abs_node_inx = job_info->node_inx[i];

/*	tmp1[] stores the current cpu(s) allocated	*/
	tmp2[0] = '\0';	/* stores last cpu(s) allocated */
	for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
	     rel_node_inx++) {

		if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) {
			sock_inx++;
			sock_reps = 0;
		}
		sock_reps++;

		bit_reps = job_resrcs->sockets_per_node[sock_inx] *
			job_resrcs->cores_per_socket[sock_inx];
		host = slurm_hostlist_shift(hl);
		threads = _threads_per_core(host);
		cpu_bitmap = slurm_bit_alloc(bit_reps * threads);
		for (j = 0; j < bit_reps; j++) {
			if (slurm_bit_test(job_resrcs->core_bitmap, bit_inx)){
				for (k = 0; k < threads; k++)
					slurm_bit_set(cpu_bitmap,
						      (j * threads) + k);
			}
			bit_inx++;
		}
		slurm_bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap);
		FREE_NULL_BITMAP(cpu_bitmap);
/*
 *		If the allocation values for this host are not the same as the
 *		last host, print the report of the last group of hosts that had
 *		identical allocation values.
 */
		if (strcmp(tmp1, tmp2) ||
		    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
		    (job_resrcs->memory_allocated &&
		     (last_mem_alloc !=
		      job_resrcs->memory_allocated[rel_node_inx]))) {
			if (slurm_hostlist_count(hl_last)) {
				last_hosts =
					slurm_hostlist_ranged_string_xmalloc(
						hl_last);
				nr_hv = newHV();
				hv_store_charp(nr_hv, "nodes", last_hosts);
				hv_store_charp(nr_hv, "cpu_ids", tmp2);
				hv_store_uint64_t(nr_hv, "mem",
						  last_mem_alloc_ptr ?
						  last_mem_alloc : 0);
				av_store(av, cnt++, newRV_noinc((SV*)nr_hv));
				xfree(last_hosts);
				slurm_hostlist_destroy(hl_last);
				hl_last = slurm_hostlist_create(NULL);
			}
			strcpy(tmp2, tmp1);
			last_mem_alloc_ptr = job_resrcs->memory_allocated;
			if (last_mem_alloc_ptr)
				last_mem_alloc = job_resrcs->
					memory_allocated[rel_node_inx];
			else
				last_mem_alloc = NO_VAL64;
		}
		slurm_hostlist_push_host(hl_last, host);
		free(host);

		if (bit_inx > last)
			break;

		if (abs_node_inx > job_info->node_inx[i+1]) {
			i += 2;
			abs_node_inx = job_info->node_inx[i];
		} else {
			abs_node_inx++;
		}
	}

	if (slurm_hostlist_count(hl_last)) {
		last_hosts = slurm_hostlist_ranged_string_xmalloc(hl_last);
		nr_hv = newHV();
		hv_store_charp(nr_hv, "nodes", last_hosts);
		hv_store_charp(nr_hv, "cpu_ids", tmp2);
		hv_store_uint64_t(nr_hv, "mem",
				  last_mem_alloc_ptr ?
				  last_mem_alloc : 0);
		av_store(av, cnt++, newRV_noinc((SV*)nr_hv));
		xfree(last_hosts);
	}
	slurm_hostlist_destroy(hl);
	slurm_hostlist_destroy(hl_last);
	hv_store_sv(hv, "node_resrcs", newRV_noinc((SV*)av));

	return 0;
}
Exemplo n.º 2
0
void pmixp_coll_ring_log(pmixp_coll_t *coll)
{
	int i;
	pmixp_coll_ring_t *ring = &coll->state.ring;
	char *nodename, *next, *prev;
	char *out_str = NULL;

	PMIXP_ERROR("%p: %s state seq=%d",
		    coll, pmixp_coll_type2str(coll->type), coll->seq);
	nodename = pmixp_info_job_host(coll->my_peerid);
	PMIXP_ERROR("my peerid: %d:%s", coll->my_peerid, nodename);
	xfree(nodename);

	next = pmixp_info_job_host(_ring_next_id(coll));
	prev = pmixp_info_job_host(_ring_prev_id(coll));
	xstrfmtcat(out_str,"neighbor id: next %d:%s, prev %d:%s",
		   _ring_next_id(coll), next, _ring_prev_id(coll), prev);
	PMIXP_ERROR("%s", out_str);
	xfree(next);
	xfree(prev);
	xfree(out_str);


	for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
		pmixp_coll_ring_ctx_t *coll_ctx = &ring->ctx_array[i];

		PMIXP_ERROR("Context ptr=%p, #%d, in-use=%d",
			    coll_ctx, i, coll_ctx->in_use);

		if (coll_ctx->in_use) {
			int id;
			char *done_contrib, *wait_contrib;
			hostlist_t hl_done_contrib, hl_wait_contrib;

			pmixp_hostset_from_ranges(coll->pset.procs,
						  coll->pset.nprocs,
						  &hl_done_contrib);
			hl_wait_contrib = hostlist_copy(hl_done_contrib);

			PMIXP_ERROR("\t seq=%d contribs: loc=%d/prev=%d/fwd=%d",
				    coll_ctx->seq, coll_ctx->contrib_local,
				    coll_ctx->contrib_prev,
				    coll_ctx->forward_cnt);
			PMIXP_ERROR("\t neighbor contribs [%d]:",
				    coll->peers_cnt);

			for (id = 0; id < coll->peers_cnt; id++) {
				char *nodename = pmixp_info_job_host(id);

				if(coll_ctx->contrib_map[id]) {
					hostlist_delete_host(hl_wait_contrib,
							     nodename);
				} else {
					hostlist_delete_host(hl_done_contrib,
							     nodename);
				}
				xfree(nodename);
			}
			done_contrib = slurm_hostlist_ranged_string_xmalloc(
				hl_done_contrib);
			wait_contrib = slurm_hostlist_ranged_string_xmalloc(
				hl_wait_contrib);
			PMIXP_ERROR("\t done contrib: %s",
				    strlen(done_contrib) ? done_contrib : "-");
			PMIXP_ERROR("\t wait contrib: %s",
				    strlen(wait_contrib) ? wait_contrib : "-");
			PMIXP_ERROR("\t status=%s",
				    pmixp_coll_ring_state2str(coll_ctx->state));
			PMIXP_ERROR("\t buf size=%u, remain=%u",
				    size_buf(coll_ctx->ring_buf),
				    remaining_buf(coll_ctx->ring_buf));
			xfree(done_contrib);
			xfree(wait_contrib);
			hostlist_destroy(hl_done_contrib);
			hostlist_destroy(hl_wait_contrib);
		}
	}
}