int pmixp_coll_ring_init(pmixp_coll_t *coll, hostlist_t *hl) { #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("called"); #endif int i; pmixp_coll_ring_ctx_t *coll_ctx = NULL; pmixp_coll_ring_t *ring = &coll->state.ring; char *p; int rel_id = hostlist_find(*hl, pmixp_info_hostname()); /* compute the next absolute id of the neighbor */ p = hostlist_nth(*hl, (rel_id + 1) % coll->peers_cnt); ring->next_peerid = pmixp_info_job_hostid(p); free(p); ring->fwrd_buf_pool = list_create(pmixp_free_buf); ring->ring_buf_pool = list_create(pmixp_free_buf); for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) { coll_ctx = &ring->ctx_array[i]; coll_ctx->coll = coll; coll_ctx->in_use = false; coll_ctx->seq = coll->seq; coll_ctx->contrib_local = false; coll_ctx->contrib_prev = 0; coll_ctx->state = PMIXP_COLL_RING_SYNC; // TODO bit vector coll_ctx->contrib_map = xmalloc(sizeof(bool) * coll->peers_cnt); } return SLURM_SUCCESS; }
/* * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin */ int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs, size_t nprocs, pmixp_coll_type_t type) { hostlist_t hl; int max_depth, width, depth, i; char *p; #ifndef NDEBUG coll->magic = PMIXP_COLL_STATE_MAGIC; #endif coll->type = type; coll->state = PMIXP_COLL_SYNC; coll->pset.procs = xmalloc(sizeof(*procs) * nprocs); coll->pset.nprocs = nprocs; memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs); if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) { /* TODO: provide ranges output routine */ PMIXP_ERROR("Bad ranges information"); goto err_exit; } #ifdef PMIXP_COLL_DEBUG /* if we debug collectives - store a copy of a full * hostlist to resolve participant id to the hostname */ coll->peers_hl = hostlist_copy(hl); #endif width = slurm_get_tree_width(); coll->peers_cnt = hostlist_count(hl); coll->my_peerid = hostlist_find(hl, pmixp_info_hostname()); reverse_tree_info(coll->my_peerid, coll->peers_cnt, width, &coll->prnt_peerid, &coll->chldrn_cnt, &depth, &max_depth); /* We interested in amount of direct childs */ coll->seq = 0; coll->contrib_children = 0; coll->contrib_local = false; coll->chldrn_ids = xmalloc(sizeof(int) * width); coll->contrib_chld = xmalloc(sizeof(int) * width); coll->chldrn_cnt = reverse_tree_direct_children(coll->my_peerid, coll->peers_cnt, width, depth, coll->chldrn_ids); if (coll->prnt_peerid == -1) { /* if we are the root of the tree: * - we don't have a parent; * - we have large list of all_childrens (we don't want * ourselfs there) */ coll->prnt_host = NULL; coll->all_chldrn_hl = hostlist_copy(hl); hostlist_delete_host(coll->all_chldrn_hl, pmixp_info_hostname()); coll->chldrn_str = hostlist_ranged_string_xmalloc(coll->all_chldrn_hl); } else { /* for all other nodes in the tree we need to know: * - nodename of our parent; * - we don't need a list of all_childrens and hl anymore */ /* * setup parent id's */ p = hostlist_nth(hl, coll->prnt_peerid); coll->prnt_host = xstrdup(p); free(p); /* reset prnt_peerid to the global peer */ coll->prnt_peerid = pmixp_info_job_hostid(coll->prnt_host); /* * setup root id's * (we need this for the SLURM API communication case) */ p = hostlist_nth(hl, 0); coll->root_host = xstrdup(p); free(p); /* reset prnt_peerid to the global peer */ coll->root_peerid = pmixp_info_job_hostid(coll->root_host); /* use empty hostlist here */ coll->all_chldrn_hl = hostlist_create(""); coll->chldrn_str = NULL; } /* fixup children peer ids to te global ones */ for(i=0; i<coll->chldrn_cnt; i++){ p = hostlist_nth(hl, coll->chldrn_ids[i]); coll->chldrn_ids[i] = pmixp_info_job_hostid(p); free(p); } hostlist_destroy(hl); /* Collective state */ coll->ufwd_buf = pmixp_server_buf_new(); coll->dfwd_buf = pmixp_server_buf_new(); _reset_coll_ufwd(coll); _reset_coll_dfwd(coll); coll->cbdata = NULL; coll->cbfunc = NULL; /* init fine grained lock */ slurm_mutex_init(&coll->lock); return SLURM_SUCCESS; err_exit: return SLURM_ERROR; }