int pmixp_server_pp_send(int nodeid, int size) { Buf buf = pmixp_server_buf_new(); int rc; pmixp_ep_t ep; struct pp_cbdata *cbdata = xmalloc(sizeof(*cbdata)); grow_buf(buf, size); ep.type = PMIXP_EP_NOIDEID; ep.ep.nodeid = nodeid; cbdata->buf = buf; cbdata->size = size; set_buf_offset(buf,get_buf_offset(buf) + size); rc = pmixp_server_send_nb(&ep, PMIXP_MSG_PINGPONG, _pmixp_pp_count, buf, pingpong_complete, (void*)cbdata); if (SLURM_SUCCESS != rc) { char *nodename = pmixp_info_job_host(nodeid); PMIXP_ERROR("Was unable to wait for the parent %s to " "become alive", nodename); xfree(nodename); } return rc; }
static Buf _get_fwd_buf(pmixp_coll_ring_ctx_t *coll_ctx) { pmixp_coll_ring_t *ring = _ctx_get_coll_ring(coll_ctx); Buf buf = list_pop(ring->fwrd_buf_pool); if (!buf) { buf = pmixp_server_buf_new(); } return buf; }
static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf) { char nhdr[PMIXP_BASE_HDR_MAX]; bool send_init = false; size_t dsize = 0, hsize = 0; pmixp_dconn_t *dconn; _direct_proto_message_t *init_msg = NULL; int rc = SLURM_SUCCESS; char *ep_data = NULL; uint32_t ep_len = 0; dconn = pmixp_dconn_lock(hdr->nodeid); if (!dconn) { /* Should not happen */ xassert( dconn ); abort(); } /* Retrieve endpoint information */ _base_hdr_unpack_ext(buf, &ep_data, &ep_len); /* Check if init message is required to establish * the connection */ if (!pmixp_dconn_require_connect(dconn, &send_init)) { goto unlock; } if (send_init) { Buf buf_init = pmixp_server_buf_new(); pmixp_base_hdr_t bhdr; init_msg = xmalloc(sizeof(*init_msg)); PMIXP_BASE_HDR_SETUP(bhdr, PMIXP_MSG_INIT_DIRECT, 0, buf_init); bhdr.ext_flag = 1; hsize = _direct_hdr_pack(&bhdr, nhdr); init_msg->sent_cb = pmixp_server_sent_buf_cb; init_msg->cbdata = buf_init; init_msg->hdr = bhdr; init_msg->buffer = _buf_finalize(buf_init, nhdr, hsize, &dsize); init_msg->buf_ptr = buf_init; } rc = pmixp_dconn_connect(dconn, ep_data, ep_len, init_msg); if (rc) { PMIXP_ERROR("Unable to connect to %d", dconn->nodeid); if (init_msg) { /* need to release `init_msg` here */ free_buf(init_msg->buf_ptr); xfree(init_msg); } goto unlock; } switch (pmixp_dconn_progress_type(dconn)) { case PMIXP_DCONN_PROGRESS_SW:{ /* this direct connection has fd that needs to be * polled to progress, use connection interface for that */ pmixp_io_engine_t *eng = pmixp_dconn_engine(dconn); pmixp_conn_t *conn; conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, eng, _direct_new_msg_conn, _direct_return_connection, dconn); if (conn) { eio_obj_t *obj; obj = eio_obj_create(pmixp_io_fd(eng), &direct_peer_ops, (void *)conn); eio_new_obj(pmixp_info_io(), obj); eio_signal_wakeup(pmixp_info_io()); } else { /* TODO: handle this error */ rc = SLURM_ERROR; goto unlock; } break; } case PMIXP_DCONN_PROGRESS_HW: { break; } default: /* Should not happen */ xassert(0 && pmixp_dconn_progress_type(dconn)); /* TODO: handle this error */ } unlock: pmixp_dconn_unlock(dconn); return rc; }
/* * Based on ideas provided by Hongjia Cao <*****@*****.**> in PMI2 plugin */ int pmixp_coll_init(pmixp_coll_t *coll, const pmix_proc_t *procs, size_t nprocs, pmixp_coll_type_t type) { hostlist_t hl; int max_depth, width, depth, i; char *p; #ifndef NDEBUG coll->magic = PMIXP_COLL_STATE_MAGIC; #endif coll->type = type; coll->state = PMIXP_COLL_SYNC; coll->pset.procs = xmalloc(sizeof(*procs) * nprocs); coll->pset.nprocs = nprocs; memcpy(coll->pset.procs, procs, sizeof(*procs) * nprocs); if (SLURM_SUCCESS != _hostset_from_ranges(procs, nprocs, &hl)) { /* TODO: provide ranges output routine */ PMIXP_ERROR("Bad ranges information"); goto err_exit; } #ifdef PMIXP_COLL_DEBUG /* if we debug collectives - store a copy of a full * hostlist to resolve participant id to the hostname */ coll->peers_hl = hostlist_copy(hl); #endif width = slurm_get_tree_width(); coll->peers_cnt = hostlist_count(hl); coll->my_peerid = hostlist_find(hl, pmixp_info_hostname()); reverse_tree_info(coll->my_peerid, coll->peers_cnt, width, &coll->prnt_peerid, &coll->chldrn_cnt, &depth, &max_depth); /* We interested in amount of direct childs */ coll->seq = 0; coll->contrib_children = 0; coll->contrib_local = false; coll->chldrn_ids = xmalloc(sizeof(int) * width); coll->contrib_chld = xmalloc(sizeof(int) * width); coll->chldrn_cnt = reverse_tree_direct_children(coll->my_peerid, coll->peers_cnt, width, depth, coll->chldrn_ids); if (coll->prnt_peerid == -1) { /* if we are the root of the tree: * - we don't have a parent; * - we have large list of all_childrens (we don't want * ourselfs there) */ coll->prnt_host = NULL; coll->all_chldrn_hl = hostlist_copy(hl); hostlist_delete_host(coll->all_chldrn_hl, pmixp_info_hostname()); coll->chldrn_str = hostlist_ranged_string_xmalloc(coll->all_chldrn_hl); } else { /* for all other nodes in the tree we need to know: * - nodename of our parent; * - we don't need a list of all_childrens and hl anymore */ /* * setup parent id's */ p = hostlist_nth(hl, coll->prnt_peerid); coll->prnt_host = xstrdup(p); free(p); /* reset prnt_peerid to the global peer */ coll->prnt_peerid = pmixp_info_job_hostid(coll->prnt_host); /* * setup root id's * (we need this for the SLURM API communication case) */ p = hostlist_nth(hl, 0); coll->root_host = xstrdup(p); free(p); /* reset prnt_peerid to the global peer */ coll->root_peerid = pmixp_info_job_hostid(coll->root_host); /* use empty hostlist here */ coll->all_chldrn_hl = hostlist_create(""); coll->chldrn_str = NULL; } /* fixup children peer ids to te global ones */ for(i=0; i<coll->chldrn_cnt; i++){ p = hostlist_nth(hl, coll->chldrn_ids[i]); coll->chldrn_ids[i] = pmixp_info_job_hostid(p); free(p); } hostlist_destroy(hl); /* Collective state */ coll->ufwd_buf = pmixp_server_buf_new(); coll->dfwd_buf = pmixp_server_buf_new(); _reset_coll_ufwd(coll); _reset_coll_dfwd(coll); coll->cbdata = NULL; coll->cbfunc = NULL; /* init fine grained lock */ slurm_mutex_init(&coll->lock); return SLURM_SUCCESS; err_exit: return SLURM_ERROR; }