int pmixp_coll_ring_check(pmixp_coll_t *coll, pmixp_coll_ring_msg_hdr_t *hdr) { char *nodename = NULL; int rc; if (hdr->nodeid != _ring_prev_id(coll)) { nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("%p: unexpected contrib from %s:%u, expected is %d", coll, nodename, hdr->nodeid, _ring_prev_id(coll)); return SLURM_ERROR; } rc = pmixp_coll_check(coll, hdr->seq); if (PMIXP_COLL_REQ_FAILURE == rc) { /* this is an unacceptable event: either something went * really wrong or the state machine is incorrect. * This will 100% lead to application hang. */ nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("Bad collective seq. #%d from %s:%u, current is %d", hdr->seq, nodename, hdr->nodeid, coll->seq); pmixp_debug_hang(0); /* enable hang to debug this! */ slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); xfree(nodename); return SLURM_SUCCESS; } else if (PMIXP_COLL_REQ_SKIP == rc) { #ifdef PMIXP_COLL_DEBUG nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("Wrong collective seq. #%d from nodeid %u, current is %d, skip this message", hdr->seq, hdr->nodeid, coll->seq); #endif return SLURM_ERROR; } return SLURM_SUCCESS; }
int pmixp_server_pp_send(int nodeid, int size) { Buf buf = pmixp_server_buf_new(); int rc; pmixp_ep_t ep; struct pp_cbdata *cbdata = xmalloc(sizeof(*cbdata)); grow_buf(buf, size); ep.type = PMIXP_EP_NOIDEID; ep.ep.nodeid = nodeid; cbdata->buf = buf; cbdata->size = size; set_buf_offset(buf,get_buf_offset(buf) + size); rc = pmixp_server_send_nb(&ep, PMIXP_MSG_PINGPONG, _pmixp_pp_count, buf, pingpong_complete, (void*)cbdata); if (SLURM_SUCCESS != rc) { char *nodename = pmixp_info_job_host(nodeid); PMIXP_ERROR("Was unable to wait for the parent %s to " "become alive", nodename); xfree(nodename); } return rc; }
/* * Receive the first message identifying initiator */ static void _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg) { pmixp_io_engine_t *eng = pmixp_conn_get_eng(conn); pmixp_base_hdr_t *hdr = (pmixp_base_hdr_t *)_hdr; pmixp_dconn_t *dconn = NULL; pmixp_conn_t *new_conn; eio_obj_t *obj; int fd; fd = pmixp_io_detach(eng); dconn = pmixp_dconn_accept(hdr->nodeid, fd); if (!dconn) { /* connection was refused because we already * have established connection * It seems that some sort of race condition occured */ char *nodename = pmixp_info_job_host(hdr->nodeid); close(fd); PMIXP_ERROR("Failed to accept direct connection from %s", nodename); xfree(nodename); return; } new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, pmixp_dconn_engine(dconn), _direct_new_msg_conn, _direct_return_connection, dconn); pmixp_dconn_unlock(dconn); obj = eio_obj_create(fd, &direct_peer_ops, (void *)new_conn); eio_new_obj(pmixp_info_io(), obj); /* wakeup this connection to get processed */ eio_signal_wakeup(pmixp_info_io()); }
static int _slurm_send(pmixp_ep_t *ep, pmixp_base_hdr_t bhdr, Buf buf) { const char *addr = NULL, *data = NULL, *hostlist = NULL; char nhdr[PMIXP_BASE_HDR_MAX]; size_t hsize = 0, dsize = 0; int rc; /* setup the header */ addr = pmixp_info_srv_usock_path(); bhdr.ext_flag = 0; if (pmixp_info_srv_direct_conn() && PMIXP_EP_NOIDEID == ep->type) { bhdr.ext_flag = 1; } hsize = _slurm_pack_hdr(&bhdr, nhdr); data = _buf_finalize(buf, nhdr, hsize, &dsize); switch( ep->type ){ case PMIXP_EP_HLIST: hostlist = ep->ep.hostlist; rc = pmixp_stepd_send(ep->ep.hostlist, addr, data, dsize, 500, 7, 0); break; case PMIXP_EP_NOIDEID: { char *nodename = pmixp_info_job_host(ep->ep.nodeid); rc = pmixp_p2p_send(nodename, addr, data, dsize, 500, 7, 0); xfree(nodename); break; } default: PMIXP_ERROR("Bad value of the EP type: %d", (int)ep->type); abort(); } if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send message to %s, size = %u, " "hostlist:\n%s", addr, (uint32_t) dsize, hostlist); } return rc; }
static void _process_server_request(pmixp_base_hdr_t *hdr, Buf buf) { int rc; switch (hdr->type) { case PMIXP_MSG_FAN_IN: case PMIXP_MSG_FAN_OUT: { pmixp_coll_t *coll; pmixp_proc_t *procs = NULL; size_t nprocs = 0; pmixp_coll_type_t type = 0; int c_nodeid; rc = pmixp_coll_unpack_info(buf, &type, &c_nodeid, &procs, &nprocs); if (SLURM_SUCCESS != rc) { char *nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("Bad message header from node %s", nodename); xfree(nodename); goto exit; } coll = pmixp_state_coll_get(type, procs, nprocs); xfree(procs); PMIXP_DEBUG("FENCE collective message from nodeid = %u, " "type = %s, seq = %d", hdr->nodeid, ((PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out"), hdr->seq); rc = pmixp_coll_check_seq(coll, hdr->seq); if (PMIXP_COLL_REQ_FAILURE == rc) { /* this is unexepable event: either something went * really wrong or the state machine is incorrect. * This will 100% lead to application hang. */ char *nodename = pmixp_info_job_host(hdr->nodeid); PMIXP_ERROR("Bad collective seq. #%d from %s, current" " is %d", hdr->seq, nodename, coll->seq); pmixp_debug_hang(0); /* enable hang to debug this! */ slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); xfree(nodename); break; } else if (PMIXP_COLL_REQ_SKIP == rc) { PMIXP_DEBUG("Wrong collective seq. #%d from" " nodeid %u, current is %d, skip " "this message", hdr->seq, hdr->nodeid, coll->seq); goto exit; } if (PMIXP_MSG_FAN_IN == hdr->type) { pmixp_coll_contrib_child(coll, hdr->nodeid, hdr->seq, buf); } else { pmixp_coll_contrib_parent(coll, hdr->nodeid, hdr->seq, buf); } break; } case PMIXP_MSG_DMDX: { pmixp_dmdx_process(buf, hdr->nodeid, hdr->seq); /* buf will be free'd by the PMIx callback so * protect the data by voiding the buffer. * Use the statement below instead of (buf = NULL) * to maintain incapsulation - in general `buf`is * not a pointer, but opaque type. */ buf = create_buf(NULL, 0); break; } case PMIXP_MSG_INIT_DIRECT: PMIXP_DEBUG("Direct connection init from %d", hdr->nodeid); break; #ifndef NDEBUG case PMIXP_MSG_PINGPONG: { /* if the pingpong mode was activated - * node 0 sends ping requests * and receiver assumed to respond back to node 0 */ int msize = remaining_buf(buf); if (pmixp_info_nodeid()) { pmixp_server_pp_send(0, msize); } else { if (pmixp_server_pp_same_thread()) { if (pmixp_server_pp_count() == pmixp_server_pp_warmups()) { pmixp_server_pp_start(); } if (!pmixp_server_pp_check_fini(msize)) { pmixp_server_pp_send(1, msize); } } } pmixp_server_pp_inc(); break; } #endif default: PMIXP_ERROR("Unknown message type %d", hdr->type); break; } exit: free_buf(buf); }
static int _progress_ufwd(pmixp_coll_t *coll) { pmixp_ep_t ep[coll->chldrn_cnt]; int ep_cnt = 0; int rc, i; char *nodename = NULL; pmixp_coll_cbdata_t *cbdata = NULL; xassert(PMIXP_COLL_UPFWD == coll->state); /* for some reasons doesnt switch to downfwd */ switch (coll->ufwd_status) { case PMIXP_COLL_SND_FAILED: /* something went wrong with upward send. * notify libpmix about that and abort * collective */ if (coll->cbfunc) { coll->cbfunc(PMIX_ERROR, NULL, 0, coll->cbdata, NULL, NULL); } _reset_coll(coll); /* Don't need to do anything else */ return false; case PMIXP_COLL_SND_ACTIVE: /* still waiting for the send completion */ return false; case PMIXP_COLL_SND_DONE: if (coll->contrib_prnt) { /* all-set to go to the next stage */ break; } return false; default: /* Should not happen, fatal error */ abort(); } /* We now can upward part for the next collective */ _reset_coll_ufwd(coll); /* move to the next state */ coll->state = PMIXP_COLL_DOWNFWD; coll->dfwd_status = PMIXP_COLL_SND_ACTIVE; if (!pmixp_info_srv_direct_conn()) { /* only root of the tree should get here */ xassert(0 > coll->prnt_peerid); if (coll->chldrn_cnt) { /* We can run on just one node */ ep[ep_cnt].type = PMIXP_EP_HLIST; ep[ep_cnt].ep.hostlist = coll->chldrn_str; ep_cnt++; } } else { for(i=0; i<coll->chldrn_cnt; i++){ ep[i].type = PMIXP_EP_NOIDEID; ep[i].ep.nodeid = coll->chldrn_ids[i]; ep_cnt++; } } /* We need to wait for ep_cnt send completions + the local callback */ coll->dfwd_cb_wait = ep_cnt; if (ep_cnt || coll->cbfunc) { /* allocate the callback data */ cbdata = xmalloc(sizeof(pmixp_coll_cbdata_t)); cbdata->coll = coll; cbdata->seq = coll->seq; cbdata->refcntr = ep_cnt; if (coll->cbfunc) { cbdata->refcntr++; } } for(i=0; i < ep_cnt; i++){ rc = pmixp_server_send_nb(&ep[i], PMIXP_MSG_FAN_OUT, coll->seq, coll->dfwd_buf, _dfwd_sent_cb, cbdata); if (SLURM_SUCCESS != rc) { if (PMIXP_EP_NOIDEID == ep[i].type){ nodename = pmixp_info_job_host(ep[i].ep.nodeid); PMIXP_ERROR("Cannot send data (size = %lu), " "to %s:%d", (uint64_t) get_buf_offset(coll->dfwd_buf), nodename, ep[i].ep.nodeid); xfree(nodename); } else { PMIXP_ERROR("Cannot send data (size = %lu), " "to %s", (uint64_t) get_buf_offset(coll->dfwd_buf), ep[i].ep.hostlist); } coll->dfwd_status = PMIXP_COLL_SND_FAILED; } #ifdef PMIXP_COLL_DEBUG if (PMIXP_EP_NOIDEID == ep[i].type) { nodename = pmixp_info_job_host(ep[i].ep.nodeid); PMIXP_DEBUG("%p: fwd to %s:%d, size = %lu", coll, nodename, ep[i].ep.nodeid, (uint64_t) get_buf_offset(coll->dfwd_buf)); xfree(nodename); } else { PMIXP_DEBUG("%p: fwd to %s, size = %lu", coll, ep[i].ep.hostlist, (uint64_t) get_buf_offset(coll->dfwd_buf)); } #endif } if (coll->cbfunc) { char *data = get_buf_data(coll->dfwd_buf) + coll->dfwd_offset; size_t size = get_buf_offset(coll->dfwd_buf) - coll->dfwd_offset; coll->dfwd_cb_wait++; coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata, _libpmix_cb, (void *)cbdata); #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: local delivery, size = %lu", coll, (uint64_t)size); #endif } /* events observed - need another iteration */ return true; }
int pmixp_coll_contrib_parent(pmixp_coll_t *coll, uint32_t peerid, uint32_t seq, Buf buf) { #ifdef PMIXP_COLL_DEBUG char *nodename = NULL; int lpeerid = -1; #endif char *data_src = NULL, *data_dst = NULL; uint32_t size; int expected_peerid; /* lock the structure */ slurm_mutex_lock(&coll->lock); if (pmixp_info_srv_direct_conn()) { expected_peerid = coll->prnt_peerid; } else { expected_peerid = coll->root_peerid; } /* Sanity check */ pmixp_coll_sanity_check(coll); if (expected_peerid != peerid) { char *nodename = pmixp_info_job_host(peerid); /* protect ourselfs if we are running with no asserts */ PMIXP_ERROR("%p: parent contrib from bad nodeid=%s:%u, " "expect=%d", coll, nodename, peerid, expected_peerid); xfree(nodename); goto proceed; } #ifdef PMIXP_COLL_DEBUG nodename = pmixp_info_job_host(peerid); lpeerid = hostlist_find(coll->peers_hl, nodename); /* Mark this event */ PMIXP_DEBUG("%p: contrib/rem from %s:%d(%d): state=%s, size=%u", coll, nodename, peerid, lpeerid, pmixp_coll_state2str(coll->state), remaining_buf(buf)); #endif switch (coll->state) { case PMIXP_COLL_SYNC: case PMIXP_COLL_COLLECT: /* It looks like a retransmission attempt when remote side * identified transmission failure, but we actually successfuly * received the message */ #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: prev contrib from %s:%d(%d): " "seq=%u, cur_seq=%u, state=%s", coll, nodename, peerid, lpeerid, seq, coll->seq, pmixp_coll_state2str(coll->state)); #endif /* sanity check */ if ((coll->seq - 1) != seq) { /* FATAL: should not happen in normal workflow */ char *nodename = pmixp_info_job_host(peerid); PMIXP_ERROR("%p: unexpected contrib from %s:%d: " "contrib_seq = %d, coll->seq = %d, " "state=%s", coll, nodename, peerid, seq, coll->seq, pmixp_coll_state2str(coll->state)); xfree(nodename); xassert((coll->seq - 1) == seq); abort(); } goto proceed; case PMIXP_COLL_UPFWD_WSC:{ /* we are not actually ready to receive this contribution as * the upward portion of the collective wasn't received yet. * This should not happen as SAPI (SLURM API) is blocking and * we chould transit to PMIXP_COLL_UPFWD_WPC immediately */ /* FATAL: should not happen in normal workflow */ char *nodename = pmixp_info_job_host(peerid); PMIXP_ERROR("%p: unexpected contrib from %s:%d: " "contrib_seq = %d, coll->seq = %d, " "state=%s", coll, nodename, peerid, seq, coll->seq, pmixp_coll_state2str(coll->state)); xfree(nodename); xassert((coll->seq - 1) == seq); abort(); } case PMIXP_COLL_UPFWD: case PMIXP_COLL_UPFWD_WPC: /* we were waiting for this */ break; case PMIXP_COLL_DOWNFWD: /* It looks like a retransmission attempt when remote side * identified transmission failure, but we actually successfuly * received the message */ #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: double contrib from %s:%d(%d) " "seq=%u, cur_seq=%u, state=%s", coll, nodename, peerid, lpeerid, seq, coll->seq, pmixp_coll_state2str(coll->state)); #endif /* sanity check */ if (coll->seq != seq) { char *nodename = pmixp_info_job_host(peerid); /* FATAL: should not happen in normal workflow */ PMIXP_ERROR("%p: unexpected contrib from %s:%d: " "seq = %d, coll->seq = %d, state=%s", coll, nodename, peerid, seq, coll->seq, pmixp_coll_state2str(coll->state)); xassert((coll->seq - 1) == seq); xfree(nodename); abort(); } goto proceed; default: /* should not happen in normal workflow */ PMIXP_ERROR("%p: unknown collective state %s", coll, pmixp_coll_state2str(coll->state)); abort(); } /* Because of possible timeouts/delays in transmission we * can receive a contribution second time. Avoid duplications * by checking our records. */ if (coll->contrib_prnt) { char *nodename = pmixp_info_job_host(peerid); /* May be 0 or 1. If grater - transmission skew, ignore. * NOTE: this output is not on the critical path - * don't preprocess it out */ PMIXP_DEBUG("%p: multiple contributions from parent %s:%d", coll, nodename, peerid); xfree(nodename); /* this is duplication, skip. */ goto proceed; } coll->contrib_prnt = true; data_src = get_buf_data(buf) + get_buf_offset(buf); size = remaining_buf(buf); pmixp_server_buf_reserve(coll->dfwd_buf, size); data_dst = get_buf_data(coll->dfwd_buf) + get_buf_offset(coll->dfwd_buf); memcpy(data_dst, data_src, size); set_buf_offset(coll->dfwd_buf, get_buf_offset(coll->dfwd_buf) + size); proceed: _progress_coll(coll); #ifdef PMIXP_COLL_DEBUG if (nodename) { PMIXP_DEBUG("%p: finish: node=%s:%d(%d), state=%s", coll, nodename, peerid, lpeerid, pmixp_coll_state2str(coll->state)); xfree(nodename); } #endif /* unlock the structure */ slurm_mutex_unlock(&coll->lock); return SLURM_SUCCESS; }
int pmixp_coll_contrib_child(pmixp_coll_t *coll, uint32_t peerid, uint32_t seq, Buf buf) { char *data_src = NULL, *data_dst = NULL; uint32_t size; int chld_id; /* lock the structure */ slurm_mutex_lock(&coll->lock); pmixp_coll_sanity_check(coll); if (0 > (chld_id = _chld_id(coll, peerid))) { char *nodename = pmixp_info_job_host(peerid); char *avail_ids = _chld_ids_str(coll); PMIXP_DEBUG("%p: contribution from the non-child node " "%s:%d, acceptable ids: %s", coll, nodename, peerid, avail_ids); xfree(nodename); xfree(avail_ids); } #ifdef PMIXP_COLL_DEBUG char *nodename = pmixp_info_job_host(peerid); int lpeerid = hostlist_find(coll->peers_hl, nodename); PMIXP_DEBUG("%p: contrib/rem from %s:%d(%d:%d):, state=%s, size=%u", coll, nodename, peerid, lpeerid, chld_id, pmixp_coll_state2str(coll->state), remaining_buf(buf)); #endif switch (coll->state) { case PMIXP_COLL_SYNC: /* change the state */ coll->ts = time(NULL); /* fall-thru */ case PMIXP_COLL_COLLECT: /* sanity check */ if (coll->seq != seq) { char *nodename = pmixp_info_job_host(peerid); /* FATAL: should not happen in normal workflow */ PMIXP_ERROR("%p: unexpected contrib from %s:%d " "(child #%d) seq = %d, coll->seq = %d, " "state=%s", coll, nodename, peerid, chld_id, seq, coll->seq, pmixp_coll_state2str(coll->state)); xassert(coll->seq == seq); abort(); } break; case PMIXP_COLL_UPFWD: case PMIXP_COLL_UPFWD_WSC: /* FATAL: should not happen in normal workflow */ PMIXP_ERROR("%p: unexpected contrib from %s:%d, state = %s", coll, nodename, peerid, pmixp_coll_state2str(coll->state)); xassert(0); abort(); case PMIXP_COLL_UPFWD_WPC: case PMIXP_COLL_DOWNFWD: #ifdef PMIXP_COLL_DEBUG /* It looks like a retransmission attempt when remote side * identified transmission failure, but we actually successfuly * received the message */ PMIXP_DEBUG("%p: contrib for the next collective " "from=%s:%d(%d:%d) contrib_seq=%u, coll->seq=%u, " "state=%s", coll, nodename, peerid, lpeerid, chld_id, seq, coll->seq, pmixp_coll_state2str(coll->state)); #endif if ((coll->seq +1) != seq) { char *nodename = pmixp_info_job_host(peerid); /* should not happen in normal workflow */ PMIXP_ERROR("%p: unexpected contrib from %s:%d(x:%d) " "seq = %d, coll->seq = %d, " "state=%s", coll, nodename, peerid, chld_id, seq, coll->seq, pmixp_coll_state2str(coll->state)); xfree(nodename); xassert((coll->seq +1) == seq); abort(); } break; default: /* should not happen in normal workflow */ PMIXP_ERROR("%p: unknown collective state %s", coll, pmixp_coll_state2str(coll->state)); abort(); } /* Because of possible timeouts/delays in transmission we * can receive a contribution second time. Avoid duplications * by checking our records. */ if (coll->contrib_chld[chld_id]) { char *nodename = pmixp_info_job_host(peerid); /* May be 0 or 1. If grater - transmission skew, ignore. * NOTE: this output is not on the critical path - * don't preprocess it out */ PMIXP_DEBUG("%p: multiple contribs from %s:%d(x:%d)", coll, nodename, peerid, chld_id); /* this is duplication, skip. */ xfree(nodename); goto proceed; } data_src = get_buf_data(buf) + get_buf_offset(buf); size = remaining_buf(buf); pmixp_server_buf_reserve(coll->ufwd_buf, size); data_dst = get_buf_data(coll->ufwd_buf) + get_buf_offset(coll->ufwd_buf); memcpy(data_dst, data_src, size); set_buf_offset(coll->ufwd_buf, get_buf_offset(coll->ufwd_buf) + size); /* increase number of individual contributions */ coll->contrib_chld[chld_id] = true; /* increase number of total contributions */ coll->contrib_children++; proceed: _progress_coll(coll); #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: finish: node=%s:%d(%d:%d), state=%s", coll, nodename, peerid, lpeerid, chld_id, pmixp_coll_state2str(coll->state)); xfree(nodename); #endif /* unlock the structure */ slurm_mutex_unlock(&coll->lock); return SLURM_SUCCESS; }
static void _process_server_request(recv_header_t *_hdr, void *payload) { send_header_t *hdr = &_hdr->send_hdr; char *nodename = pmixp_info_job_host(hdr->nodeid); Buf buf; int rc; buf = create_buf(payload, hdr->msgsize); switch (hdr->type) { case PMIXP_MSG_FAN_IN: case PMIXP_MSG_FAN_OUT: { pmixp_coll_t *coll; pmix_proc_t *procs = NULL; size_t nprocs = 0; pmixp_coll_type_t type = 0; rc = pmixp_coll_unpack_ranges(buf, &type, &procs, &nprocs); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Bad message header from node %s", nodename); return; } coll = pmixp_state_coll_get(type, procs, nprocs); xfree(procs); PMIXP_DEBUG("FENCE collective message from node \"%s\", type = %s, seq = %d", nodename, (PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out", hdr->seq); rc = pmixp_coll_check_seq(coll, hdr->seq, nodename); if (PMIXP_COLL_REQ_FAILURE == rc) { /* this is unexepable event: either something went * really wrong or the state machine is incorrect. * This will 100% lead to application hang. */ PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d", hdr->seq, nodename, coll->seq); pmixp_debug_hang(0); /* enable hang to debug this! */ slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); break; } else if (PMIXP_COLL_REQ_SKIP == rc) { PMIXP_DEBUG("Wrong collective seq. #%d from %s, current is %d, skip this message", hdr->seq, nodename, coll->seq); free_buf(buf); break; } if (PMIXP_MSG_FAN_IN == hdr->type) { pmixp_coll_contrib_node(coll, nodename, buf); /* we don't need this buffer anymore */ free_buf(buf); } else { pmixp_coll_bcast(coll, buf); /* buf will be free'd by the PMIx callback */ } break; } case PMIXP_MSG_DMDX: { pmixp_dmdx_process(buf, nodename, hdr->seq); break; } case PMIXP_MSG_HEALTH_CHK: { /* this is just health ping. * TODO: can we do something more sophisticated? */ free_buf(buf); break; } default: PMIXP_ERROR("Unknown message type %d", hdr->type); break; } xfree(nodename); }
void pmixp_coll_ring_log(pmixp_coll_t *coll) { int i; pmixp_coll_ring_t *ring = &coll->state.ring; char *nodename, *next, *prev; char *out_str = NULL; PMIXP_ERROR("%p: %s state seq=%d", coll, pmixp_coll_type2str(coll->type), coll->seq); nodename = pmixp_info_job_host(coll->my_peerid); PMIXP_ERROR("my peerid: %d:%s", coll->my_peerid, nodename); xfree(nodename); next = pmixp_info_job_host(_ring_next_id(coll)); prev = pmixp_info_job_host(_ring_prev_id(coll)); xstrfmtcat(out_str,"neighbor id: next %d:%s, prev %d:%s", _ring_next_id(coll), next, _ring_prev_id(coll), prev); PMIXP_ERROR("%s", out_str); xfree(next); xfree(prev); xfree(out_str); for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) { pmixp_coll_ring_ctx_t *coll_ctx = &ring->ctx_array[i]; PMIXP_ERROR("Context ptr=%p, #%d, in-use=%d", coll_ctx, i, coll_ctx->in_use); if (coll_ctx->in_use) { int id; char *done_contrib, *wait_contrib; hostlist_t hl_done_contrib, hl_wait_contrib; pmixp_hostset_from_ranges(coll->pset.procs, coll->pset.nprocs, &hl_done_contrib); hl_wait_contrib = hostlist_copy(hl_done_contrib); PMIXP_ERROR("\t seq=%d contribs: loc=%d/prev=%d/fwd=%d", coll_ctx->seq, coll_ctx->contrib_local, coll_ctx->contrib_prev, coll_ctx->forward_cnt); PMIXP_ERROR("\t neighbor contribs [%d]:", coll->peers_cnt); for (id = 0; id < coll->peers_cnt; id++) { char *nodename = pmixp_info_job_host(id); if(coll_ctx->contrib_map[id]) { hostlist_delete_host(hl_wait_contrib, nodename); } else { hostlist_delete_host(hl_done_contrib, nodename); } xfree(nodename); } done_contrib = slurm_hostlist_ranged_string_xmalloc( hl_done_contrib); wait_contrib = slurm_hostlist_ranged_string_xmalloc( hl_wait_contrib); PMIXP_ERROR("\t done contrib: %s", strlen(done_contrib) ? done_contrib : "-"); PMIXP_ERROR("\t wait contrib: %s", strlen(wait_contrib) ? wait_contrib : "-"); PMIXP_ERROR("\t status=%s", pmixp_coll_ring_state2str(coll_ctx->state)); PMIXP_ERROR("\t buf size=%u, remain=%u", size_buf(coll_ctx->ring_buf), remaining_buf(coll_ctx->ring_buf)); xfree(done_contrib); xfree(wait_contrib); hostlist_destroy(hl_done_contrib); hostlist_destroy(hl_wait_contrib); } } }