static int _serv_read(eio_obj_t *obj, List objs)
{
	PMIXP_DEBUG("fd = %d", obj->fd);
	pmixp_io_engine_t *me = (pmixp_io_engine_t *)obj->arg;
	bool proceed = true;

	pmixp_debug_hang(0);

	/* Read and process all received messages */
	while (proceed) {
		switch( _process_message(me) ){
		case 2:
			obj->shutdown = true;
			PMIXP_DEBUG("Connection finalized fd = %d", obj->fd);
			/* cleanup after this connection */
			eio_remove_obj(obj, objs);
			xfree(me);
		case 0:
			proceed = 0;
		case 1:
			break;
		}
	}
	return 0;
}
Example #2
0
static int _serv_read(eio_obj_t *obj, List objs)
{
	/* sanity check */
	xassert(NULL != obj );
	if (obj->shutdown) {
		/* corresponding connection will be
		 * cleaned up during plugin finalize
		 */
		return 0;
	}

	PMIXP_DEBUG("fd = %d", obj->fd);
	pmixp_conn_t *conn = (pmixp_conn_t *)obj->arg;
	bool proceed = true;

	/* debug stub */
	pmixp_debug_hang(0);

	/* Read and process all received messages */
	while (proceed) {
		if (!pmixp_conn_progress_rcv(conn)) {
			proceed = 0;
		}
		if (!pmixp_conn_is_alive(conn)) {
			obj->shutdown = true;
			PMIXP_DEBUG("Connection closed fd = %d", obj->fd);
			/* cleanup after this connection */
			eio_remove_obj(obj, objs);
			pmixp_conn_return(conn);
			proceed = 0;
		}
	}
	return 0;
}
Example #3
0
static int _serv_write(eio_obj_t *obj, List objs)
{
	/* sanity check */
	xassert(NULL != obj );
	if (obj->shutdown) {
		/* corresponding connection will be
		 * cleaned up during plugin finalize
		 */
		return 0;
	}

	PMIXP_DEBUG("fd = %d", obj->fd);
	pmixp_conn_t *conn = (pmixp_conn_t *)obj->arg;

	/* debug stub */
	pmixp_debug_hang(0);

	/* progress sends */
	pmixp_conn_progress_snd(conn);

	/* if we are done with this connection - remove it */
	if (!pmixp_conn_is_alive(conn)) {
		obj->shutdown = true;
		PMIXP_DEBUG("Connection finalized fd = %d", obj->fd);
		/* cleanup after this connection */
		eio_remove_obj(obj, objs);
		pmixp_conn_return(conn);
	}
	return 0;
}
Example #4
0
static void _dfwd_sent_cb(int rc, pmixp_p2p_ctx_t ctx, void *_vcbdata)
{
	pmixp_coll_cbdata_t *cbdata = (pmixp_coll_cbdata_t*)_vcbdata;
	pmixp_coll_t *coll = cbdata->coll;


	if( PMIXP_P2P_REGULAR == ctx ){
		/* lock the collective */
		slurm_mutex_lock(&coll->lock);
	}

	if (cbdata->seq != coll->seq) {
		/* it seems like this collective was reset since the time
		 * we initiated this send.
		 * Just exit to avoid data corruption.
		 */
		PMIXP_DEBUG("Collective was reset!");
		goto exit;
	}

	xassert(PMIXP_COLL_DOWNFWD == coll->state);

	/* Change  the status */
	if( SLURM_SUCCESS == rc ){
		coll->dfwd_cb_cnt++;
	} else {
		coll->dfwd_status = PMIXP_COLL_SND_FAILED;
	}

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: state: %s, snd_status=%s, compl_cnt=%d/%d",
		    coll, pmixp_coll_state2str(coll->state),
		    pmixp_coll_sndstatus2str(coll->dfwd_status),
		    coll->dfwd_cb_cnt, coll->dfwd_cb_wait);
#endif

exit:
	xassert(0 < cbdata->refcntr);
	cbdata->refcntr--;
	if (!cbdata->refcntr) {
		xfree(cbdata);
	}

	if( PMIXP_P2P_REGULAR == ctx ){
		/* progress, in the inline case progress
		 * will be invoked by the caller */
		_progress_coll(coll);

		/* unlock the collective */
		slurm_mutex_unlock(&coll->lock);
	}
}
Example #5
0
static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys,
		const pmix_info_t info[], size_t ninfo,
		pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
	PMIXP_DEBUG("called");
	return PMIX_ERR_NOT_IMPLEMENTED;
}
Example #6
0
static pmix_status_t _disconnect_fn(const pmix_proc_t procs[], size_t nprocs,
				    const pmix_info_t info[], size_t ninfo,
				    pmix_op_cbfunc_t cbfunc, void *cbdata)
{
	PMIXP_DEBUG("called");
	return PMIX_ERR_NOT_SUPPORTED;
}
Example #7
0
void pmixp_server_direct_conn(int fd)
{
	eio_obj_t *obj;
	pmixp_conn_t *conn;
	PMIXP_DEBUG("Request from fd = %d", fd);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);
	pmixp_fd_set_nodelay(fd);
	conn = pmixp_conn_new_temp(PMIXP_PROTO_DIRECT, fd,
				   _direct_conn_establish);

	/* try to process right here */
	pmixp_conn_progress_rcv(conn);
	if (!pmixp_conn_is_alive(conn)) {
		/* success, don't need this connection anymore */
		pmixp_conn_return(conn);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &direct_peer_ops, (void *)conn);
	eio_new_obj(pmixp_info_io(), obj);
	/* wakeup this connection to get processed */
	eio_signal_wakeup(pmixp_info_io());
}
Example #8
0
/*
 * TODO: we need to keep track of the "me"
 * structures created here, because we need to
 * free them in "pmixp_stepd_finalize"
 */
void pmixp_server_slurm_conn(int fd)
{
	eio_obj_t *obj;
	pmixp_conn_t *conn = NULL;

	PMIXP_DEBUG("Request from fd = %d", fd);
	pmixp_debug_hang(0);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);
	conn = pmixp_conn_new_temp(PMIXP_PROTO_SLURM, fd, _slurm_new_msg);

	/* try to process right here */
	pmixp_conn_progress_rcv(conn);
	if (!pmixp_conn_is_alive(conn)) {
		/* success, don't need this connection anymore */
		pmixp_conn_return(conn);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &slurm_peer_ops, (void *)conn);
	eio_new_obj(pmixp_info_io(), obj);
}
Example #9
0
int pmixp_coll_ring_init(pmixp_coll_t *coll, hostlist_t *hl)
{
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("called");
#endif
	int i;
	pmixp_coll_ring_ctx_t *coll_ctx = NULL;
	pmixp_coll_ring_t *ring = &coll->state.ring;
	char *p;
	int rel_id = hostlist_find(*hl, pmixp_info_hostname());

	/* compute the next absolute id of the neighbor */
	p = hostlist_nth(*hl, (rel_id + 1) % coll->peers_cnt);
	ring->next_peerid = pmixp_info_job_hostid(p);
	free(p);

	ring->fwrd_buf_pool = list_create(pmixp_free_buf);
	ring->ring_buf_pool = list_create(pmixp_free_buf);

	for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
		coll_ctx = &ring->ctx_array[i];
		coll_ctx->coll = coll;
		coll_ctx->in_use = false;
		coll_ctx->seq = coll->seq;
		coll_ctx->contrib_local = false;
		coll_ctx->contrib_prev = 0;
		coll_ctx->state = PMIXP_COLL_RING_SYNC;
		// TODO bit vector
		coll_ctx->contrib_map = xmalloc(sizeof(bool) * coll->peers_cnt);
	}

	return SLURM_SUCCESS;
}
Example #10
0
int pmixp_stepd_finalize(void)
{
	char *path;
	if (!_was_initialized) {
		/* nothing to do */
		return 0;
	}

	pmixp_libpmix_finalize();
	pmixp_dmdx_finalize();

	pmixp_conn_fini();
	pmixp_dconn_fini();

	pmixp_state_finalize();
	pmixp_nspaces_finalize();

	/* cleanup the UNIX socket */
	PMIXP_DEBUG("Remove PMIx plugin usock");
	close(pmixp_info_srv_usock_fd());
	path = pmixp_info_nspace_usock(pmixp_info_namespace());
	unlink(path);
	xfree(path);

	/* free the information */
	pmixp_info_free();
	return SLURM_SUCCESS;
}
Example #11
0
int pmixp_coll_contrib_local(pmixp_coll_t *coll, pmixp_coll_type_t type,
			     char *data, size_t ndata,
			     void *cbfunc, void *cbdata) {
	int ret = SLURM_SUCCESS;

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: %s seq=%d, size=%lu", coll, pmixp_coll_type2str(type),
		    coll->seq, ndata);
#endif
	switch (type) {
	case PMIXP_COLL_TYPE_FENCE_TREE:
		ret = pmixp_coll_tree_local(coll, data, ndata,
					    cbfunc, cbdata);
		break;
	case PMIXP_COLL_TYPE_FENCE_RING:
		ret = pmixp_coll_ring_local(coll, data, ndata,
					    cbfunc, cbdata);
		break;
	default:
		ret = SLURM_ERROR;
		break;
	}

	return ret;
}
/*
 * TODO: we need to keep track of the "me"
 * structures created here, because we need to
 * free them in "pmixp_stepd_finalize"
 */
void pmix_server_new_conn(int fd)
{
	eio_obj_t *obj;
	PMIXP_DEBUG("Request from fd = %d", fd);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);

	pmixp_io_engine_t *me = xmalloc(sizeof(pmixp_io_engine_t));
	pmix_io_init(me, fd, srv_rcvd_header);
	/* We use slurm_forward_data to send message to stepd's
	 * SLURM will put user ID there. We need to skip it.
	 */
	pmix_io_rcvd_padding(me, sizeof(uint32_t));

	if( 2 == _process_message(me) ){
		/* connection was fully processed here */
		xfree(me);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &peer_ops, (void *)me);
	eio_new_obj(pmixp_info_io(), obj);
}
Example #13
0
int pmixp_p2p_send(const char *nodename, const char *address, const char *data,
		   uint32_t len, unsigned int start_delay,
		   unsigned int retry_cnt, int silent)
{
	int retry = 0, rc;
	unsigned int delay = start_delay; /* in milliseconds */

	pmixp_debug_hang(0);

	while (1) {
		if (!silent && retry >= 1) {
			PMIXP_DEBUG("send failed, rc=%d, try #%d", rc, retry);
		}

		rc = _pmix_p2p_send_core(nodename, address, data, len);

		if (rc == SLURM_SUCCESS)
			break;

		retry++;
		if (retry >= retry_cnt) {
			PMIXP_ERROR("send failed, rc=%d, exceeded the retry limit", rc);
			break;
		}

		/* wait with constantly increasing delay */
		struct timespec ts =
		{(delay / 1000), ((delay % 1000) * 1000000)};
		nanosleep(&ts, NULL);
		delay *= 2;
	}

	return rc;
}
Example #14
0
static pmix_status_t _unpublish_fn(const pmix_proc_t *proc, char **keys,
				   const pmix_info_t info[], size_t ninfo,
				   pmix_op_cbfunc_t cbfunc, void *cbdata)
{
	PMIXP_DEBUG("called");
	return PMIX_ERR_NOT_SUPPORTED;
}
Example #15
0
static pmix_status_t _fencenb_fn(const pmix_proc_t procs_v2[], size_t nprocs,
				 const pmix_info_t info[], size_t ninfo,
				 char *data, size_t ndata,
				 pmix_modex_cbfunc_t cbfunc, void *cbdata)
{
	PMIXP_DEBUG("called");
	pmixp_coll_t *coll;
	pmixp_coll_type_t type = PMIXP_COLL_TYPE_FENCE;
	pmix_status_t status = PMIX_SUCCESS;
	int ret;
	size_t i;
	pmixp_proc_t *procs = xmalloc(sizeof(*procs) * nprocs);

	for (i = 0; i < nprocs; i++) {
		procs[i].rank = procs_v2[i].rank;
		strncpy(procs[i].nspace, procs_v2[i].nspace, PMIXP_MAX_NSLEN);
	}
	coll = pmixp_state_coll_get(type, procs, nprocs);
	ret = pmixp_coll_contrib_local(coll, data, ndata, cbfunc, cbdata);
	xfree(procs);

	if (SLURM_SUCCESS != ret) {
		status = PMIX_ERROR;
		goto error;
	}
	return PMIX_SUCCESS;
error:
	cbfunc(status, NULL, 0, cbdata, NULL, NULL);

	return status;
}
Example #16
0
static int _pmix_p2p_send_core(const char *nodename, const char *address,
			       const char *data, uint32_t len)
{
	int rc, timeout;
	slurm_msg_t msg;
	forward_data_msg_t req;
	List ret_list;
	ret_data_info_t *ret_data_info = NULL;

	pmixp_debug_hang(0);

	slurm_msg_t_init(&msg);

	PMIXP_DEBUG("nodelist=%s, address=%s, len=%u", nodename, address, len);
	req.address = (char *)address;
	req.len = len;
	/* there is not much we can do - just cast) */
	req.data = (char*)data;

	msg.msg_type = REQUEST_FORWARD_DATA;
	msg.data = &req;

	if (slurm_conf_get_addr(nodename, &msg.address) == SLURM_ERROR) {
		PMIXP_ERROR("Can't find address for host "
			    "%s, check slurm.conf", nodename);
		return SLURM_ERROR;
	}

	timeout = slurm_get_msg_timeout() * 1000;
	msg.forward.timeout = timeout;
	msg.forward.cnt = 0;
	msg.forward.nodelist = NULL;
	ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout);
	if (!ret_list) {
		/* This should never happen (when this was
		 * written slurm_send_addr_recv_msgs always
		 * returned a list */
		PMIXP_ERROR("No return list given from "
			    "slurm_send_addr_recv_msgs spawned for %s",
			    nodename);
		return SLURM_ERROR;
	} else if ((errno != SLURM_COMMUNICATIONS_CONNECTION_ERROR) &&
		   !list_count(ret_list)) {
		PMIXP_ERROR("failed to send to %s, errno=%d", nodename, errno);
		return SLURM_ERROR;
	}

	rc = SLURM_SUCCESS;
	while ((ret_data_info = list_pop(ret_list))) {
		int temp_rc = slurm_get_return_code(ret_data_info->type,
						    ret_data_info->data);
		if (temp_rc != SLURM_SUCCESS)
			rc = temp_rc;
		destroy_data_info(ret_data_info);
	}

	FREE_NULL_LIST(ret_list);

	return rc;
}
Example #17
0
static pmix_status_t _spawn_fn(const pmix_proc_t *proc,
			       const pmix_info_t job_info[], size_t ninfo,
			       const pmix_app_t apps[], size_t napps,
			       pmix_spawn_cbfunc_t cbfunc, void *cbdata)
{
	PMIXP_DEBUG("called");
	return PMIX_ERR_NOT_SUPPORTED;
}
Example #18
0
static pmix_status_t _dmodex_fn(const pmix_proc_t *proc,
				const pmix_info_t info[], size_t ninfo,
				pmix_modex_cbfunc_t cbfunc, void *cbdata)
{
	int rc;
	PMIXP_DEBUG("called");

	rc = pmixp_dmdx_get(proc->nspace, proc->rank, cbfunc, cbdata);

	return (SLURM_SUCCESS == rc) ? PMIX_SUCCESS : PMIX_ERROR;
}
Example #19
0
static pmix_status_t abort_fn(const pmix_proc_t *proc, void *server_object,
			      int status, const char msg[], pmix_proc_t procs[],
			      size_t nprocs, pmix_op_cbfunc_t cbfunc, void *cbdata)
{
	/* Just kill this stepid for now. Think what we can do for FT here? */
	PMIXP_DEBUG("called: status = %d, msg = %s", status, msg);
	slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL);

	if (NULL != cbfunc) {
		cbfunc(PMIX_SUCCESS, cbdata);
	}
	return PMIX_SUCCESS;
}
Example #20
0
static int _ring_forward_data(pmixp_coll_ring_ctx_t *coll_ctx, uint32_t contrib_id,
			      uint32_t hop_seq, void *data, size_t size)
{
	pmixp_coll_ring_msg_hdr_t hdr;
	pmixp_coll_t *coll = _ctx_get_coll(coll_ctx);
	pmixp_coll_ring_t *ring = &coll->state.ring;
	hdr.nodeid = coll->my_peerid;
	hdr.msgsize = size;
	hdr.seq = coll_ctx->seq;
	hdr.hop_seq = hop_seq;
	hdr.contrib_id = contrib_id;
	pmixp_ep_t *ep = (pmixp_ep_t*)xmalloc(sizeof(*ep));
	pmixp_coll_ring_cbdata_t *cbdata = NULL;
	uint32_t offset = 0;
	Buf buf = _get_fwd_buf(coll_ctx);
	int rc = SLURM_SUCCESS;


	pmixp_coll_ring_ctx_sanity_check(coll_ctx);

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: transit data to nodeid=%d, seq=%d, hop=%d, size=%lu, contrib=%d",
		    coll_ctx, _ring_next_id(coll), hdr.seq,
		    hdr.hop_seq, hdr.msgsize, hdr.contrib_id);
#endif
	if (!buf) {
		rc = SLURM_ERROR;
		goto exit;
	}
	ep->type = PMIXP_EP_NOIDEID;
	ep->ep.nodeid = ring->next_peerid;

	/* pack ring info */
	_pack_coll_ring_info(coll, &hdr, buf);

	/* insert payload to buf */
	offset = get_buf_offset(buf);
	pmixp_server_buf_reserve(buf, size);
	memcpy(get_buf_data(buf) + offset, data, size);
	set_buf_offset(buf, offset + size);

	cbdata = xmalloc(sizeof(pmixp_coll_ring_cbdata_t));
	cbdata->buf = buf;
	cbdata->coll = coll;
	cbdata->coll_ctx = coll_ctx;
	cbdata->seq = coll_ctx->seq;
	rc = pmixp_server_send_nb(ep, PMIXP_MSG_RING, coll_ctx->seq, buf,
				  _ring_sent_cb, cbdata);
exit:
	return rc;
}
Example #21
0
int pmixp_coll_contrib_local(pmixp_coll_t *coll, char *data, size_t size)
{
	PMIXP_DEBUG("%s:%d: get local contribution", pmixp_info_namespace(),
			pmixp_info_nodeid());

	/* sanity check */
	pmixp_coll_sanity_check(coll);

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	/* change the collective state if need */
	if (PMIXP_COLL_SYNC == coll->state) {
		PMIXP_DEBUG(
				"%s:%d: get local contribution: switch to PMIXP_COLL_FAN_IN",
				pmixp_info_namespace(), pmixp_info_nodeid());
		coll->state = PMIXP_COLL_FAN_IN;
		coll->ts = time(NULL);
	}
	xassert(PMIXP_COLL_FAN_IN == coll->state);

	/* save & mark local contribution */
	coll->contrib_local = true;
	grow_buf(coll->buf, size);
	memcpy(get_buf_data(coll->buf) + get_buf_offset(coll->buf), data, size);
	set_buf_offset(coll->buf, get_buf_offset(coll->buf) + size);

	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	/* check if the collective is ready to progress */
	_progress_fan_in(coll);

	PMIXP_DEBUG("%s:%d: get local contribution: finish",
			pmixp_info_namespace(), pmixp_info_nodeid());

	return SLURM_SUCCESS;
}
Example #22
0
void _progres_fan_out(pmixp_coll_t *coll, Buf buf)
{
	PMIXP_DEBUG("%s:%d: start", pmixp_info_namespace(), pmixp_info_nodeid());

	pmixp_coll_sanity_check(coll);

	xassert(PMIXP_COLL_FAN_OUT == coll->state || PMIXP_COLL_FAN_OUT_IN == coll->state);

	/* update the database */
	if (NULL != coll->cbfunc) {
		void *data = get_buf_data(buf) + get_buf_offset(buf);
		size_t size = remaining_buf(buf);
		PMIXP_DEBUG("%s:%d: use the callback", pmixp_info_namespace(),
				pmixp_info_nodeid());
		coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata,
				pmixp_free_Buf, (void *)buf);
	}
	/* Prepare for the next collective operation */
	_fan_out_finished(coll);

	PMIXP_DEBUG("%s:%d: collective is prepared for the next use",
			pmixp_info_namespace(), pmixp_info_nodeid());
}
Example #23
0
static void _ring_sent_cb(int rc, pmixp_p2p_ctx_t ctx, void *_cbdata)
{
	pmixp_coll_ring_cbdata_t *cbdata = (pmixp_coll_ring_cbdata_t*)_cbdata;
	pmixp_coll_ring_ctx_t *coll_ctx = cbdata->coll_ctx;
	pmixp_coll_t *coll = cbdata->coll;
	Buf buf = cbdata->buf;

	pmixp_coll_sanity_check(coll);

	if (PMIXP_P2P_REGULAR == ctx) {
		/* lock the collective */
		slurm_mutex_lock(&coll->lock);
	}
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: called %d", coll_ctx, coll_ctx->seq);
#endif
	if (cbdata->seq != coll_ctx->seq) {
		/* it seems like this collective was reset since the time
		 * we initiated this send.
		 * Just exit to avoid data corruption.
		 */
		PMIXP_DEBUG("%p: collective was reset!", coll_ctx);
		goto exit;
	}
	coll_ctx->forward_cnt++;
	_progress_coll_ring(coll_ctx);

exit:
	pmixp_server_buf_reset(buf);
	list_push(coll->state.ring.fwrd_buf_pool, buf);

	if (PMIXP_P2P_REGULAR == ctx) {
		/* unlock the collective */
		slurm_mutex_unlock(&coll->lock);
	}
	xfree(cbdata);
}
Example #24
0
static void _reset_coll_ring(pmixp_coll_ring_ctx_t *coll_ctx)
{
	pmixp_coll_t *coll = _ctx_get_coll(coll_ctx);
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: called", coll_ctx);
#endif
	pmixp_coll_ring_ctx_sanity_check(coll_ctx);
	coll_ctx->in_use = false;
	coll_ctx->state = PMIXP_COLL_RING_SYNC;
	coll_ctx->contrib_local = false;
	coll_ctx->contrib_prev = 0;
	coll_ctx->forward_cnt = 0;
	coll->ts = time(NULL);
	memset(coll_ctx->contrib_map, 0, sizeof(bool) * coll->peers_cnt);
	coll_ctx->ring_buf = NULL;
}
Example #25
0
void pmixp_coll_bcast(pmixp_coll_t *coll, Buf buf)
{
	PMIXP_DEBUG("%s:%d: start", pmixp_info_namespace(), pmixp_info_nodeid());

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	_progres_fan_out(coll, buf);

	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	/* We may already start next collective. Try to progress!
	 * its OK if we in SYNC - there will be no-op */
	_progress_fan_in(coll);
}
Example #26
0
int pmixp_coll_ring_local(pmixp_coll_t *coll, char *data, size_t size,
			  void *cbfunc, void *cbdata)
{
	int ret = SLURM_SUCCESS;
	pmixp_coll_ring_ctx_t *coll_ctx = NULL;

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	/* sanity check */
	pmixp_coll_sanity_check(coll);

	/* setup callback info */
	coll->cbfunc = cbfunc;
	coll->cbdata = cbdata;

	coll_ctx = pmixp_coll_ring_ctx_new(coll);
	if (!coll_ctx) {
		PMIXP_ERROR("Can not get new ring collective context, seq=%u",
			    coll->seq);
		ret = SLURM_ERROR;
		goto exit;
	}

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: contrib/loc: seqnum=%u, state=%d, size=%lu",
		    coll_ctx, coll_ctx->seq, coll_ctx->state, size);
#endif

	if (_pmixp_coll_contrib(coll_ctx, coll->my_peerid, 0, data, size)) {
		goto exit;
	}

	/* mark local contribution */
	coll_ctx->contrib_local = true;
	_progress_coll_ring(coll_ctx);

exit:
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	return ret;
}
Example #27
0
static void _libpmix_cb(void *_vcbdata)
{
	pmixp_coll_cbdata_t *cbdata = (pmixp_coll_cbdata_t*)_vcbdata;
	pmixp_coll_t *coll = cbdata->coll;

	/* lock the collective */
	slurm_mutex_lock(&coll->lock);

	if (cbdata->seq != coll->seq) {
		/* it seems like this collective was reset since the time
		 * we initiated this send.
		 * Just exit to avoid data corruption.
		 */
		PMIXP_ERROR("%p: collective was reset: myseq=%u, curseq=%u",
			    coll, cbdata->seq, coll->seq);
		goto exit;
	}

	xassert(PMIXP_COLL_DOWNFWD == coll->state);

	coll->dfwd_cb_cnt++;
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: state: %s, snd_status=%s, compl_cnt=%d/%d",
		    coll, pmixp_coll_state2str(coll->state),
		    pmixp_coll_sndstatus2str(coll->dfwd_status),
		    coll->dfwd_cb_cnt, coll->dfwd_cb_wait);
#endif
	_progress_coll(coll);

exit:
	xassert(0 < cbdata->refcntr);
	cbdata->refcntr--;
	if (!cbdata->refcntr) {
		xfree(cbdata);
	}

	/* unlock the collective */
	slurm_mutex_unlock(&coll->lock);
}
Example #28
0
pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs,
			 const pmix_info_t info[], size_t ninfo,
			 char *data, size_t ndata,
			 pmix_modex_cbfunc_t cbfunc, void *cbdata)
{
	PMIXP_DEBUG("called");
	pmixp_coll_t *coll;
	pmixp_coll_type_t type = PMIXP_COLL_TYPE_FENCE;
	pmix_status_t status = PMIX_SUCCESS;

	pmixp_debug_hang(0);

	coll = pmixp_state_coll_get(type, procs, nprocs);
	pmixp_coll_set_callback(coll, cbfunc, cbdata);
	if (SLURM_SUCCESS != pmixp_coll_contrib_local(coll, data, ndata)) {
		goto error;
	}
	return PMIX_SUCCESS;
error:
	cbfunc(status, NULL, 0, cbdata, NULL, NULL);
	return status;
}
Example #29
0
static int _progress_ufwd_wpc(pmixp_coll_t *coll)
{
	xassert(PMIXP_COLL_UPFWD_WPC == coll->state);

	if (!coll->contrib_prnt) {
		return false;
	}

	/* Need to wait only for the local completion callback if installed*/
	coll->dfwd_status = PMIXP_COLL_SND_ACTIVE;
	coll->dfwd_cb_wait = 0;


	/* move to the next state */
	coll->state = PMIXP_COLL_DOWNFWD;

	/* local delivery */
	if (coll->cbfunc) {
		pmixp_coll_cbdata_t *cbdata;
		cbdata = xmalloc(sizeof(pmixp_coll_cbdata_t));
		cbdata->coll = coll;
		cbdata->seq = coll->seq;
		cbdata->refcntr = 1;

		char *data = get_buf_data(coll->dfwd_buf) + coll->dfwd_offset;
		size_t size = get_buf_offset(coll->dfwd_buf) -
				coll->dfwd_offset;
		coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata,
			     _libpmix_cb, (void *)cbdata);
		coll->dfwd_cb_wait++;
#ifdef PMIXP_COLL_DEBUG
		PMIXP_DEBUG("%p: local delivery, size = %lu",
			    coll, (uint64_t)size);
#endif
	}

	/* events observed - need another iteration */
	return true;
}
Example #30
0
int pmixp_stepd_send(const char *nodelist, const char *address,
		     const char *data, uint32_t len,
		     unsigned int start_delay,
		     unsigned int retry_cnt, int silent)
{

	int retry = 0, rc;
	unsigned int delay = start_delay; /* in milliseconds */
	char *copy_of_nodelist = xstrdup(nodelist);

	while (1) {
		if (!silent && retry >= 1) {
			PMIXP_DEBUG("send failed, rc=%d, try #%d", rc, retry);
		}

		rc = slurm_forward_data(&copy_of_nodelist, (char *)address,
					len, data);

		if (rc == SLURM_SUCCESS)
			break;

		retry++;
		if (retry >= retry_cnt) {
			PMIXP_ERROR("send failed, rc=%d, exceeded the retry limit", rc);
			break;
		}

		/* wait with constantly increasing delay */
		struct timespec ts =
		{(delay / 1000), ((delay % 1000) * 1000000)};
		nanosleep(&ts, NULL);
		delay *= 2;
	}
	xfree(copy_of_nodelist);

	return rc;
}