int pmixp_server_send(char *hostlist, pmixp_srv_cmd_t type, uint32_t seq, const char *addr, void *data, size_t size, int p2p) { send_header_t hdr; char nhdr[sizeof(send_header_t)]; size_t hsize; int rc; hdr.magic = PMIX_SERVER_MSG_MAGIC; hdr.type = type; hdr.msgsize = size - SEND_HDR_SIZE; hdr.seq = seq; /* Store global nodeid that is * independent from exact collective */ hdr.nodeid = pmixp_info_nodeid_job(); hsize = _send_pack_hdr(&hdr, nhdr); memcpy(data, nhdr, hsize); if( !p2p ){ rc = pmixp_stepd_send(hostlist, addr, data, size, 500, 7, 0); } else { rc = pmixp_p2p_send(hostlist, addr, data, size, 500, 7, 0); } if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send message to %s, size = %u, hostlist:\n%s", addr, (uint32_t) size, hostlist); } return rc; }
void pmixp_coll_ring_reset_if_to(pmixp_coll_t *coll, time_t ts) { pmixp_coll_ring_ctx_t *coll_ctx; int i; /* lock the structure */ slurm_mutex_lock(&coll->lock); for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) { coll_ctx = &coll->state.ring.ctx_array[i]; if (!coll_ctx->in_use || (PMIXP_COLL_RING_SYNC == coll_ctx->state)) { continue; } if (ts - coll->ts > pmixp_info_timeout()) { /* respond to the libpmix */ if (coll_ctx->contrib_local && coll->cbfunc) { pmixp_lib_modex_invoke(coll->cbfunc, PMIXP_ERR_TIMEOUT, NULL, 0, coll->cbdata, NULL, NULL); } /* report the timeout event */ PMIXP_ERROR("%p: collective timeout seq=%d", coll, coll_ctx->seq); pmixp_coll_log(coll); /* drop the collective */ _reset_coll_ring(coll_ctx); } } /* unlock the structure */ slurm_mutex_unlock(&coll->lock); }
int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr) { int i; memset(&_pmixp_dconn_h, 0, sizeof(_pmixp_dconn_h)); #ifdef HAVE_UCX if (pmixp_info_srv_direct_conn_ucx()) { _poll_fd = pmixp_dconn_ucx_prepare(&_pmixp_dconn_h, &ep_data, &ep_len); _progress_type = PMIXP_DCONN_PROGRESS_HW; _conn_type = PMIXP_DCONN_CONN_TYPE_ONESIDE; } else #endif { _poll_fd = pmixp_dconn_tcp_prepare(&_pmixp_dconn_h, &ep_data, &ep_len); _progress_type = PMIXP_DCONN_PROGRESS_SW; _conn_type = PMIXP_DCONN_CONN_TYPE_TWOSIDE; } if (SLURM_ERROR == _poll_fd) { PMIXP_ERROR("Cannot get polling fd"); return SLURM_ERROR; } _pmixp_dconn_conns = xmalloc(sizeof(*_pmixp_dconn_conns) * node_cnt); _pmixp_dconn_conn_cnt = node_cnt; for (i=0; i<_pmixp_dconn_conn_cnt; i++) { slurm_mutex_init(&_pmixp_dconn_conns[i].lock); _pmixp_dconn_conns[i].nodeid = i; _pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT; _pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr); } return SLURM_SUCCESS; }
int pmixp_coll_unpack_ranges(Buf buf, pmixp_coll_type_t *type, pmix_proc_t **r, size_t *nr) { pmix_proc_t *procs = NULL; uint32_t nprocs = 0; uint32_t tmp; int i, rc; /* 1. extract the type of collective */ if (SLURM_SUCCESS != (rc = unpack32(&tmp, buf))) { PMIXP_ERROR("Cannot unpack collective type"); return rc; } *type = tmp; /* 2. get the number of ranges */ if (SLURM_SUCCESS != (rc = unpack32(&nprocs, buf))) { PMIXP_ERROR("Cannot unpack collective type"); return rc; } *nr = nprocs; procs = xmalloc(sizeof(pmix_proc_t) * nprocs); *r = procs; for (i = 0; i < (int)nprocs; i++) { /* 3. get namespace/rank of particular process */ rc = unpackmem(procs[i].nspace, &tmp, buf); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot unpack namespace for process #%d", i); return rc; } procs[i].nspace[tmp] = '\0'; unsigned int tmp; rc = unpack32(&tmp, buf); procs[i].rank = tmp; if (SLURM_SUCCESS != rc) { PMIXP_ERROR( "Cannot unpack ranks for process #%d, nsp=%s", i, procs[i].nspace); return rc; } } return SLURM_SUCCESS; }
static int _slurm_send(pmixp_ep_t *ep, pmixp_base_hdr_t bhdr, Buf buf) { const char *addr = NULL, *data = NULL, *hostlist = NULL; char nhdr[PMIXP_BASE_HDR_MAX]; size_t hsize = 0, dsize = 0; int rc; /* setup the header */ addr = pmixp_info_srv_usock_path(); bhdr.ext_flag = 0; if (pmixp_info_srv_direct_conn() && PMIXP_EP_NOIDEID == ep->type) { bhdr.ext_flag = 1; } hsize = _slurm_pack_hdr(&bhdr, nhdr); data = _buf_finalize(buf, nhdr, hsize, &dsize); switch( ep->type ){ case PMIXP_EP_HLIST: hostlist = ep->ep.hostlist; rc = pmixp_stepd_send(ep->ep.hostlist, addr, data, dsize, 500, 7, 0); break; case PMIXP_EP_NOIDEID: { char *nodename = pmixp_info_job_host(ep->ep.nodeid); rc = pmixp_p2p_send(nodename, addr, data, dsize, 500, 7, 0); xfree(nodename); break; } default: PMIXP_ERROR("Bad value of the EP type: %d", (int)ep->type); abort(); } if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send message to %s, size = %u, " "hostlist:\n%s", addr, (uint32_t) dsize, hostlist); } return rc; }
int pmixp_dmdx_get(const char *nspace, int rank, pmix_modex_cbfunc_t cbfunc, void *cbdata) { dmdx_req_info_t *req; char *addr, *host; Buf buf; int rc; uint32_t seq; /* need to send the request */ host = pmixp_nspace_resolve(nspace, rank); xassert(NULL != host); if (NULL == host) { return SLURM_ERROR; } buf = pmixp_server_new_buf(); /* setup message header */ _setup_header(buf, DMDX_REQUEST, nspace, rank, SLURM_SUCCESS); /* generate namespace usocket name */ addr = pmixp_info_nspace_usock(nspace); /* store cur seq. num and move to the next request */ seq = _dmdx_seq_num++; /* track this request */ req = xmalloc(sizeof(dmdx_req_info_t)); req->seq_num = seq; req->cbfunc = cbfunc; req->cbdata = cbdata; req->ts = time(NULL); #ifndef NDEBUG strncpy(req->nspace, nspace, PMIX_MAX_NSLEN); req->rank = rank; #endif list_append(_dmdx_requests, req); /* send the request */ rc = pmixp_server_send(host, PMIXP_MSG_DMDX, seq, addr, get_buf_data(buf), get_buf_offset(buf), 1); /* cleanup the resources */ xfree(addr); free_buf(buf); /* check the return status */ if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send direct modex request to %s", host); cbfunc(PMIX_ERROR, NULL, 0, cbdata, NULL, NULL); return SLURM_ERROR; } return rc; }
static void _dmdx_resp(Buf buf, char *sender_host, uint32_t seq_num) { dmdx_req_info_t *req; int rank, rc = SLURM_SUCCESS; int status; char *ns = NULL, *sender_ns = NULL; char *data = NULL; uint32_t size = 0; /* find the request tracker */ ListIterator it = list_iterator_create(_dmdx_requests); req = (dmdx_req_info_t *)list_find(it, _dmdx_req_cmp, &seq_num); if (NULL == req) { /* We haven't sent this request! */ PMIXP_ERROR("Received DMDX response with bad " "seq_num=%d from %s!", seq_num, sender_host); list_iterator_destroy(it); rc = SLURM_ERROR; goto exit; } /* get the service data */ rc = _read_info(buf, &ns, &rank, &sender_ns, &status); if (SLURM_SUCCESS != rc) { /* notify libpmix about an error */ req->cbfunc(PMIX_ERROR, NULL, 0, req->cbdata, NULL, NULL); goto exit; } /* get the modex blob */ if (SLURM_SUCCESS != (rc = unpackmem_ptr(&data, &size, buf))) { /* notify libpmix about an error */ req->cbfunc(PMIX_ERROR, NULL, 0, req->cbdata, NULL, NULL); goto exit; } /* call back to libpmix-server */ req->cbfunc(status, data, size, req->cbdata, pmixp_free_Buf, (void *)buf); /* release tracker & list iterator */ req = NULL; list_delete_item(it); list_iterator_destroy(it); exit: if (SLURM_SUCCESS != rc) { /* we are not expect libpmix to call the callback * to cleanup this buffer */ free_buf(buf); } /* no sense to return errors, engine can't do anything * anyway. We've notified libpmix, that's enough */ }
bool pmixp_server_pp_check_fini(int size) { if ( (pmixp_server_pp_count() + 1) >= (_pmixp_pp_warmup + _pmixp_pp_iters)){ slurm_mutex_lock(&_pmixp_pp_lock); PMIXP_ERROR("latency: %d - %.9lf", size, (GET_TS() - _pmixp_pp_start) / _pmixp_pp_iters ); slurm_mutex_unlock(&_pmixp_pp_lock); return true; } return false; }
static int _read_type(Buf buf, dmdx_type_t *type) { unsigned char t; int rc; /* 1. unpack message type */ if (SLURM_SUCCESS != (rc = unpack8(&t, buf))) { PMIXP_ERROR("Cannot unpack message type!"); return SLURM_ERROR; } *type = (dmdx_type_t)t; return SLURM_SUCCESS; }
static void _fan_in_finished(pmixp_coll_t *coll) { xassert(PMIXP_COLL_FAN_IN == coll->state); coll->state = PMIXP_COLL_FAN_OUT; memset(coll->ch_contribs, 0, sizeof(int) * coll->children_cnt); coll->contrib_cntr = 0; coll->contrib_local = 0; set_buf_offset(coll->buf, coll->serv_offs); if (SLURM_SUCCESS != _pack_ranges(coll)) { PMIXP_ERROR("Cannot pack ranges to coll message header!"); } }
static int _read_info(Buf buf, char **ns, int *rank, char **sender_ns, int *status) { uint32_t cnt, uint32_tmp; int rc; *ns = NULL; *sender_ns = NULL; /* 1. unpack namespace */ if (SLURM_SUCCESS != (rc = unpackmem_ptr(ns, &cnt, buf))) { PMIXP_ERROR("Cannot unpack requested namespace!"); return rc; } /* We supposed to unpack a whole null-terminated string (with '\0')! * (*ns)[cnt] = '\0'; */ /* 2. unpack rank */ if (SLURM_SUCCESS != (rc = unpack32(&uint32_tmp, buf))) { PMIXP_ERROR("Cannot unpack requested rank!"); return rc; } *rank = uint32_tmp; if (SLURM_SUCCESS != (rc = unpackmem_ptr(sender_ns, &cnt, buf))) { PMIXP_ERROR("Cannot unpack sender namespace!"); return rc; } /* We supposed to unpack a whole null-terminated string (with '\0')! * (*sender_ns)[cnt] = '\0'; */ /* 4. unpack status */ if (SLURM_SUCCESS != (rc = unpack32(&uint32_tmp, buf))) { PMIXP_ERROR("Cannot unpack rank!"); return rc; } *status = uint32_tmp; return SLURM_SUCCESS; }
static void _reset_coll_dfwd(pmixp_coll_t *coll) { /* downwards status */ (void)pmixp_server_buf_reset(coll->dfwd_buf); if (SLURM_SUCCESS != _pack_coll_info(coll, coll->dfwd_buf)) { PMIXP_ERROR("Cannot pack ranges to message header!"); } coll->dfwd_cb_cnt = 0; coll->dfwd_cb_wait = 0; coll->dfwd_status = PMIXP_COLL_SND_NONE; coll->contrib_prnt = false; /* Save the toal service offset */ coll->dfwd_offset = get_buf_offset(coll->dfwd_buf); }
static void _reset_coll_ufwd(pmixp_coll_t *coll) { /* upward status */ coll->contrib_children = 0; coll->contrib_local = false; memset(coll->contrib_chld, 0, sizeof(coll->contrib_chld[0]) * coll->chldrn_cnt); coll->serv_offs = pmixp_server_buf_reset(coll->ufwd_buf); if (SLURM_SUCCESS != _pack_coll_info(coll, coll->ufwd_buf)) { PMIXP_ERROR("Cannot pack ranges to message header!"); } coll->ufwd_offset = get_buf_offset(coll->ufwd_buf); coll->ufwd_status = PMIXP_COLL_SND_NONE; }
void pmixp_coll_log(pmixp_coll_t *coll) { PMIXP_ERROR("Dumping collective state"); switch(coll->type) { case PMIXP_COLL_TYPE_FENCE_RING: pmixp_coll_ring_log(coll); break; case PMIXP_COLL_TYPE_FENCE_TREE: pmixp_coll_tree_log(coll); break; default: break; } }
/* * For this to work the following conditions supposed to be * satisfied: * - SLURM has to be configured with `--enable-debug` option * - jobstep needs to have at least two nodes * In this case communication exchange will be done between * the first two nodes. */ void pmixp_server_run_cperf() { int size; size_t start, end, bound; pmixp_debug_hang(0); start = 1 << _pmixp_cperf_low; end = 1 << _pmixp_cperf_up; bound = 1 << _pmixp_cperf_bound; for (size = start; size <= end; size *= 2) { int j, iters = _pmixp_cperf_siter; struct timeval tv1, tv2; if (size >= bound) { iters = _pmixp_cperf_liter; } double times[iters]; char *data = xmalloc(size); PMIXP_ERROR("coll perf %d", size); for(j=0; j<iters; j++){ gettimeofday(&tv1, NULL); _pmixp_server_cperf_iter(data, size); gettimeofday(&tv2, NULL); times[j] = tv2.tv_sec + 1E-6 * tv2.tv_usec - (tv1.tv_sec + 1E-6 * tv1.tv_usec); } for(j=0; j<iters; j++){ /* Output measurements to the slurmd.log */ PMIXP_ERROR("\t%d %d: %.9lf", j, size, times[j]); } xfree(data); } }
int pmixp_fixrights(char *path, uid_t uid, mode_t mode) { char nested_path[PATH_MAX]; DIR *dp; struct dirent *ent; int rc; /* * Make sure that "directory" exists and is a directory. */ if (1 != (rc = _is_dir(path))) { PMIXP_ERROR("path=\"%s\" is not a directory", path); return (rc == 0) ? -1 : rc; } if ((dp = opendir(path)) == NULL) { PMIXP_ERROR_STD("cannot open path=\"%s\"", path); return -1; } while ((ent = readdir(dp)) != NULL) { if (0 == xstrcmp(ent->d_name, ".") || 0 == xstrcmp(ent->d_name, "..")) { /* skip special dir's */ continue; } snprintf(nested_path, sizeof(nested_path), "%s/%s", path, ent->d_name); if (_is_dir(nested_path)) { if ((rc = _file_fix_rights(nested_path, uid, mode))) { PMIXP_ERROR_STD("cannot fix permissions for " "\"%s\"", nested_path); return -1; } pmixp_rmdir_recursively(nested_path); } else { if ((rc = _file_fix_rights(nested_path, uid, mode))) { PMIXP_ERROR_STD("cannot fix permissions for " "\"%s\"", nested_path); return -1; } } } closedir(dp); return 0; }
static void _reset_coll(pmixp_coll_t *coll) { switch (coll->state) { case PMIXP_COLL_SYNC: /* already reset */ xassert(!coll->contrib_local && !coll->contrib_children && !coll->contrib_prnt); break; case PMIXP_COLL_COLLECT: case PMIXP_COLL_UPFWD: case PMIXP_COLL_UPFWD_WSC: coll->seq++; coll->state = PMIXP_COLL_SYNC; _reset_coll_ufwd(coll); _reset_coll_dfwd(coll); coll->cbdata = NULL; coll->cbfunc = NULL; break; case PMIXP_COLL_UPFWD_WPC: /* If we were waiting for the parent contrib, * upward portion is already reset, and may contain * next collective's data */ case PMIXP_COLL_DOWNFWD: /* same with downward state */ coll->seq++; _reset_coll_dfwd(coll); if (coll->contrib_local || coll->contrib_children) { /* next collective was already started */ coll->state = PMIXP_COLL_COLLECT; } else { coll->state = PMIXP_COLL_SYNC; } if (!coll->contrib_local) { /* drop the callback info if we haven't started * next collective locally */ coll->cbdata = NULL; coll->cbfunc = NULL; } break; default: PMIXP_ERROR("Bad collective state = %d", (int)coll->state); abort(); } }
void pmixp_dmdx_process(Buf buf, char *host, uint32_t seq) { dmdx_type_t type; _read_type(buf, &type); switch (type) { case DMDX_REQUEST: _dmdx_req(buf, host, seq); break; case DMDX_RESPONSE: _dmdx_resp(buf, host, seq); break; default: PMIXP_ERROR("Bad request from host %s. Skip", host); break; } }
int pmixp_coll_belong_chk(const pmixp_proc_t *procs, size_t nprocs) { int i; pmixp_namespace_t *nsptr = pmixp_nspaces_local(); /* Find my namespace in the range */ for (i = 0; i < nprocs; i++) { if (0 != xstrcmp(procs[i].nspace, nsptr->name)) { continue; } if (pmixp_lib_is_wildcard(procs[i].rank)) return 0; if (0 <= pmixp_info_taskid2localid(procs[i].rank)) { return 0; } } /* we don't participate in this collective! */ PMIXP_ERROR("No process controlled by this slurmstepd is involved in this collective."); return -1; }
int pmixp_coll_ring_local(pmixp_coll_t *coll, char *data, size_t size, void *cbfunc, void *cbdata) { int ret = SLURM_SUCCESS; pmixp_coll_ring_ctx_t *coll_ctx = NULL; /* lock the structure */ slurm_mutex_lock(&coll->lock); /* sanity check */ pmixp_coll_sanity_check(coll); /* setup callback info */ coll->cbfunc = cbfunc; coll->cbdata = cbdata; coll_ctx = pmixp_coll_ring_ctx_new(coll); if (!coll_ctx) { PMIXP_ERROR("Can not get new ring collective context, seq=%u", coll->seq); ret = SLURM_ERROR; goto exit; } #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: contrib/loc: seqnum=%u, state=%d, size=%lu", coll_ctx, coll_ctx->seq, coll_ctx->state, size); #endif if (_pmixp_coll_contrib(coll_ctx, coll->my_peerid, 0, data, size)) { goto exit; } /* mark local contribution */ coll_ctx->contrib_local = true; _progress_coll_ring(coll_ctx); exit: /* unlock the structure */ slurm_mutex_unlock(&coll->lock); return ret; }
static void _fan_out_finished(pmixp_coll_t *coll) { coll->seq++; /* move to the next collective */ switch (coll->state) { case PMIXP_COLL_FAN_OUT: coll->state = PMIXP_COLL_SYNC; break; case PMIXP_COLL_FAN_OUT_IN: /* we started to receive data for the new collective * switch to the fan-in stage */ coll->state = PMIXP_COLL_FAN_IN; /* set the right timestamp */ coll->ts = coll->ts_next; break; default: PMIXP_ERROR("Bad collective state = %d", coll->state); xassert(PMIXP_COLL_FAN_OUT == coll->state || PMIXP_COLL_FAN_OUT_IN == coll->state); } }
int pmixp_coll_belong_chk(pmixp_coll_type_t type, const pmix_proc_t *procs, size_t nprocs) { int i; pmixp_namespace_t *nsptr = pmixp_nspaces_local(); /* Find my namespace in the range */ for (i = 0; i < nprocs; i++) { if (0 != xstrcmp(procs[i].nspace, nsptr->name)) { continue; } if ((procs[i].rank == PMIX_RANK_WILDCARD)) return 0; if (0 <= pmixp_info_taskid2localid(procs[i].rank)) { return 0; } } /* we don't participate in this collective! */ PMIXP_ERROR("Have collective that doesn't include this job's namespace"); return -1; }
int pmixp_rmdir_recursively(char *path) { char nested_path[PATH_MAX]; DIR *dp; struct dirent *ent; int rc; /* * Make sure that "directory" exists and is a directory. */ if (1 != (rc = _is_dir(path))) { PMIXP_ERROR("path=\"%s\" is not a directory", path); return (rc == 0) ? -1 : rc; } if ((dp = opendir(path)) == NULL) { PMIXP_ERROR_STD("cannot open path=\"%s\"", path); return -1; } while ((ent = readdir(dp)) != NULL) { if (0 == strcmp(ent->d_name, ".") || 0 == strcmp(ent->d_name, "..")) { /* skip special dir's */ continue; } snprintf(nested_path, sizeof(nested_path), "%s/%s", path, ent->d_name); if (_is_dir(nested_path)) { pmixp_rmdir_recursively(nested_path); } else { unlink(nested_path); } } closedir(dp); if ((rc = rmdir(path))) { PMIXP_ERROR_STD("Cannot remove path=\"%s\"", path); } return rc; }
static void _respond_with_error(int seq_num, char *sender_host, char *sender_ns, int status) { Buf buf = create_buf(NULL, 0); char *addr; int rc; /* rank doesn't matter here, don't send it */ _setup_header(buf, DMDX_RESPONSE, pmixp_info_namespace(), -1, status); /* generate namespace usocket name */ addr = pmixp_info_nspace_usock(sender_ns); /* send response */ rc = pmixp_server_send(sender_host, PMIXP_MSG_DMDX, seq_num, addr, get_buf_data(buf), get_buf_offset(buf), 1); if (SLURM_SUCCESS != rc) { PMIXP_ERROR("Cannot send direct modex error" " response to %s", sender_host); } xfree(addr); free_buf(buf); }
void pmixp_coll_reset_if_to(pmixp_coll_t *coll, time_t ts) { /* lock the */ slurm_mutex_lock(&coll->lock); if (PMIXP_COLL_SYNC == coll->state) { goto unlock; } if (ts - coll->ts > pmixp_info_timeout()) { /* respond to the libpmix */ coll->cbfunc(PMIX_ERR_TIMEOUT, NULL, 0, coll->cbdata, NULL, NULL); /* drop the collective */ _reset_coll(coll); /* report the timeout event */ PMIXP_ERROR("Collective timeout!"); } unlock: /* unlock the structure */ slurm_mutex_unlock(&coll->lock); }
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx, int contrib_id, uint32_t hop, char *data, size_t size) { pmixp_coll_t *coll = _ctx_get_coll(coll_ctx); char *data_ptr = NULL; int ret; /* change the state */ coll->ts = time(NULL); /* save contribution */ if (!size_buf(coll_ctx->ring_buf)) { grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt); } else if(remaining_buf(coll_ctx->ring_buf) < size) { uint32_t new_size = size_buf(coll_ctx->ring_buf) + size * _ring_remain_contrib(coll_ctx); grow_buf(coll_ctx->ring_buf, new_size); } grow_buf(coll_ctx->ring_buf, size); data_ptr = get_buf_data(coll_ctx->ring_buf) + get_buf_offset(coll_ctx->ring_buf); memcpy(data_ptr, data, size); set_buf_offset(coll_ctx->ring_buf, get_buf_offset(coll_ctx->ring_buf) + size); /* check for ring is complete */ if (contrib_id != _ring_next_id(coll)) { /* forward data to the next node */ ret = _ring_forward_data(coll_ctx, contrib_id, hop, data_ptr, size); if (ret) { PMIXP_ERROR("Cannot forward ring data"); return SLURM_ERROR; } } return SLURM_SUCCESS; }
static void _libpmix_cb(void *_vcbdata) { pmixp_coll_cbdata_t *cbdata = (pmixp_coll_cbdata_t*)_vcbdata; pmixp_coll_t *coll = cbdata->coll; /* lock the collective */ slurm_mutex_lock(&coll->lock); if (cbdata->seq != coll->seq) { /* it seems like this collective was reset since the time * we initiated this send. * Just exit to avoid data corruption. */ PMIXP_ERROR("%p: collective was reset: myseq=%u, curseq=%u", coll, cbdata->seq, coll->seq); goto exit; } xassert(PMIXP_COLL_DOWNFWD == coll->state); coll->dfwd_cb_cnt++; #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: state: %s, snd_status=%s, compl_cnt=%d/%d", coll, pmixp_coll_state2str(coll->state), pmixp_coll_sndstatus2str(coll->dfwd_status), coll->dfwd_cb_cnt, coll->dfwd_cb_wait); #endif _progress_coll(coll); exit: xassert(0 < cbdata->refcntr); cbdata->refcntr--; if (!cbdata->refcntr) { xfree(cbdata); } /* unlock the collective */ slurm_mutex_unlock(&coll->lock); }
void pmixp_coll_free(pmixp_coll_t *coll) { pmixp_coll_sanity_check(coll); if (NULL != coll->pset.procs) { xfree(coll->pset.procs); } #ifdef PMIXP_COLL_DEBUG hostlist_destroy(coll->peers_hl); #endif /* check for collective in a not-SYNC state - something went wrong */ switch(coll->type) { case PMIXP_COLL_TYPE_FENCE_TREE: if (PMIXP_COLL_TREE_SYNC != coll->state.tree.state) pmixp_coll_log(coll); pmixp_coll_tree_free(&coll->state.tree); break; case PMIXP_COLL_TYPE_FENCE_RING: { int i, ctx_in_use = 0; for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) { pmixp_coll_ring_ctx_t *coll_ctx = &coll->state.ring.ctx_array[i]; if (coll_ctx->in_use) ctx_in_use++; } if (ctx_in_use) pmixp_coll_log(coll); pmixp_coll_ring_free(&coll->state.ring); break; } default: PMIXP_ERROR("Unknown coll type"); break; } xfree(coll); }
int pmixp_stepd_send(const char *nodelist, const char *address, const char *data, uint32_t len, unsigned int start_delay, unsigned int retry_cnt, int silent) { int retry = 0, rc; unsigned int delay = start_delay; /* in milliseconds */ char *copy_of_nodelist = xstrdup(nodelist); while (1) { if (!silent && retry >= 1) { PMIXP_DEBUG("send failed, rc=%d, try #%d", rc, retry); } rc = slurm_forward_data(©_of_nodelist, (char *)address, len, data); if (rc == SLURM_SUCCESS) break; retry++; if (retry >= retry_cnt) { PMIXP_ERROR("send failed, rc=%d, exceeded the retry limit", rc); break; } /* wait with constantly increasing delay */ struct timespec ts = {(delay / 1000), ((delay % 1000) * 1000000)}; nanosleep(&ts, NULL); delay *= 2; } xfree(copy_of_nodelist); return rc; }
static int _progress_dfwd(pmixp_coll_t *coll) { xassert(PMIXP_COLL_DOWNFWD == coll->state); /* if all childrens + local callbacks was invoked */ if (coll->dfwd_cb_wait == coll->dfwd_cb_cnt) { coll->dfwd_status = PMIXP_COLL_SND_DONE; } switch (coll->dfwd_status) { case PMIXP_COLL_SND_ACTIVE: return false; case PMIXP_COLL_SND_FAILED: /* something went wrong with upward send. * notify libpmix about that and abort * collective */ PMIXP_ERROR("%p: failed to send, abort collective", coll); if (coll->cbfunc) { coll->cbfunc(PMIX_ERROR, NULL, 0, coll->cbdata, NULL, NULL); } _reset_coll(coll); /* Don't need to do anything else */ return false; case PMIXP_COLL_SND_DONE: break; default: /* Should not happen, fatal error */ abort(); } #ifdef PMIXP_COLL_DEBUG PMIXP_DEBUG("%p: collective is DONE", coll); #endif _reset_coll(coll); return true; }