Example #1
0
void do_multirecv(int len)
{
	int i, ret;
	ssize_t sz;
	struct fi_cq_tagged_entry s_cqe, d_cqe;
	struct iovec iov;
	struct fi_msg msg;
	uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0};
	uint64_t r_e[NUMEPS] = {0};
	uint64_t flags;
	int nrecvs = 3;

	rdm_sr_init_data(source, len, 0xab);
	rdm_sr_init_data(target, len, 0);

	/* Post receives first to force matching in SMSG callback. */
	iov.iov_base = target;
	iov.iov_len = len * nrecvs + 63;

	msg.msg_iov = &iov;
	msg.desc = (void **)rem_mr;
	msg.iov_count = 1;
	msg.addr = gni_addr[0];
	msg.context = source;
	msg.data = (uint64_t)source;

	sz = fi_recvmsg(ep[1], &msg, FI_MULTI_RECV);
	cr_assert_eq(sz, 0);

	for (i = 0; i < nrecvs; i++) {
		sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1],
			     target);
		cr_assert_eq(sz, 0);
	}

	/* need to progress both CQs simultaneously for rendezvous */
	do {
		ret = fi_cq_read(msg_cq[0], &s_cqe, 1);
		if (ret == 1) {
			rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND),
					 0, 0, 0, false);
			s[0]++;
		}
		ret = fi_cq_read(msg_cq[1], &d_cqe, 1);
		flags = (r[1] < (nrecvs -1 )) ? FI_MSG | FI_RECV :
				FI_MSG | FI_RECV | FI_MULTI_RECV;
		if (ret == 1) {
			rdm_sr_check_cqe(&d_cqe, source,
					 flags,
					 target + (r[1] * len), len, 0, true);
			cr_assert(rdm_sr_check_data(source, d_cqe.buf, len),
				  "Data mismatch");
			r[1]++;
		}
	} while (s[0] < nrecvs || r[1] < nrecvs);

	rdm_sr_check_cntrs(s, r, s_e, r_e);

	dbg_printf("got context events!\n");
}
Example #2
0
ssize_t rxm_recvmsg(struct fid_ep *ep_fid, const struct fi_msg *msg,
		uint64_t flags)
{
	struct rxm_ep *rxm_ep;

	rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);
	return fi_recvmsg(rxm_ep->srx_ctx, msg, flags);
}
Example #3
0
void Connection::post_recv_msg(const struct fi_msg* wr) {
  int err = fi_recvmsg(ep_, wr, FI_COMPLETION);
  if (err) {
    L_(fatal) << "fi_recvmsg failed: " << err << "=" << fi_strerror(-err);
    throw LibfabricException("fi_recvmsg failed");
  }

  ++total_recv_requests_;
}
Example #4
0
/*
ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg,
		   uint64_t flags);
*/
void do_recvmsg(int len)
{
	int ret;
	ssize_t sz;
	int source_done = 0, dest_done = 0;
	struct fi_cq_tagged_entry s_cqe, d_cqe;
	struct iovec iov;
	struct fi_msg msg;
	uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0};
	uint64_t r_e[NUMEPS] = {0};

	rdm_sr_init_data(source, len, 0xab);
	rdm_sr_init_data(target, len, 0);

	sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target);
	cr_assert_eq(sz, 0);

	iov.iov_base = target;
	iov.iov_len = len;

	msg.msg_iov = &iov;
	msg.desc = (void **)rem_mr;
	msg.iov_count = 1;
	msg.addr = gni_addr[0];
	msg.context = source;
	msg.data = (uint64_t)source;

	sz = fi_recvmsg(ep[1], &msg, 0);
	cr_assert_eq(sz, 0);

	/* need to progress both CQs simultaneously for rendezvous */
	do {
		ret = fi_cq_read(msg_cq[0], &s_cqe, 1);
		if (ret == 1) {
			source_done = 1;
		}
		ret = fi_cq_read(msg_cq[1], &d_cqe, 1);
		if (ret == 1) {
			dest_done = 1;
		}
	} while (!(source_done && dest_done));

	rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false);
	rdm_sr_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV), target, len, 0,
			false);

	s[0] = 1; r[1] = 1;
	rdm_sr_check_cntrs(s, r, s_e, r_e);

	dbg_printf("got context events!\n");

	cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch");
}
Example #5
0
/*
ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg,
		uint64_t flags);
 */
void do_recvmsg(int len)
{
	int ret;
	ssize_t sz;
	struct fi_cq_entry cqe;
	struct fi_msg msg;
	struct iovec iov;

	rdm_sr_init_data(source, len, 0xab);
	rdm_sr_init_data(target, len, 0);

	sz = fi_send(ep[0], source, len, loc_mr, gni_addr[1], target);
	cr_assert_eq(sz, 0);

	iov.iov_base = target;
	iov.iov_len = len;

	msg.msg_iov = &iov;
	msg.desc = (void **)&rem_mr;
	msg.iov_count = 1;
	msg.addr = gni_addr[0];
	msg.context = source;
	msg.data = (uint64_t)source;

	sz = fi_recvmsg(ep[1], &msg, 0);
	cr_assert_eq(sz, 0);

	while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) {
		pthread_yield();
	}

	cr_assert_eq(ret, 1);
	cr_assert_eq((uint64_t)cqe.op_context, (uint64_t)target);

	dbg_printf("got send context event!\n");

	while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) {
		pthread_yield();
	}

	cr_assert_eq(ret, 1);
	cr_assert_eq((uint64_t)cqe.op_context, (uint64_t)source);

	dbg_printf("got recv context event!\n");

	cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch");
}
Example #6
0
/*
 * Set up the progress thread
 */
static void progress_thread(void *args) {
  struct progress_thread_info* pti = args;
  const int id = pti->id;
  const int num_rbufs = 2;
  struct iovec iov[num_rbufs];
  struct fi_msg msg[num_rbufs];
  struct ofi_am_info* dst_buf[num_rbufs];
  const int rbuf_len = 10;
  const size_t rbuf_size = rbuf_len*sizeof(dst_buf[0][0]);
  const int num_cqes = rbuf_len;
  struct fi_cq_data_entry cqes[num_cqes];
  int num_read;
  
  int i;

  for (i = 0; i < num_rbufs; i++) {
    dst_buf[i] = chpl_mem_allocMany(rbuf_len, sizeof(dst_buf[i][0]),
                                    CHPL_RT_MD_COMM_PER_LOC_INFO, 0, 0);
    iov[i].iov_base = dst_buf[i];
    iov[i].iov_len = rbuf_size;
    msg[i].msg_iov = &iov[i];
    msg[i].desc = (void **) fi_mr_desc(ofi.mr);
    msg[i].iov_count = 1;
    msg[i].addr = FI_ADDR_UNSPEC;
    msg[i].context = (void *) (uint64_t) i;
    msg[i].data = 0x0;
    OFICHKERR(fi_recvmsg(ofi.am_rx_ep[id], &msg[i], FI_MULTI_RECV));
  }

  // Count this progress thread as running.  The creator thread wants to
  // be released as soon as at least one progress thread is running, so
  // if we're the first, do that.
  if (atomic_fetch_add_uint_least32_t(&progress_thread_count, 1) == 0) {
    CALL_CHECK_ZERO(pthread_mutex_lock(&progress_thread_entEx_cond_mutex));
    CALL_CHECK_ZERO(pthread_cond_signal(&progress_thread_enter_cond));
    CALL_CHECK_ZERO(pthread_mutex_unlock(&progress_thread_entEx_cond_mutex));
  }

  // Wait for events
  while (!atomic_load_bool(&progress_threads_please_exit)) {
    num_read = fi_cq_read(ofi.am_rx_cq[id], cqes, num_cqes);
    if (num_read > 0) {
      for (i = 0; i < num_read; i++) {
        chpl_comm_ofi_am_handler(&cqes[i]);
        // send ack
      }
    } else {
      if (num_read != -FI_EAGAIN) {
        chpl_internal_error(fi_strerror(-num_read));
      }      
    }
  }

  // Un-count this progress thread.  Whoever told us to exit wants to
  // be released once all the progress threads are done, so if we're
  // the last, do that.
  if (atomic_fetch_sub_uint_least32_t(&progress_thread_count, 1) == 1) {
    CALL_CHECK_ZERO(pthread_mutex_lock(&progress_thread_entEx_cond_mutex));
    CALL_CHECK_ZERO(pthread_cond_signal(&progress_thread_exit_cond));
    CALL_CHECK_ZERO(pthread_mutex_unlock(&progress_thread_entEx_cond_mutex));
  }
}
Example #7
0
int mrail_cq_process_buf_recv(struct fi_cq_tagged_entry *comp,
			      struct mrail_recv *recv)
{
	struct fi_recv_context *recv_ctx = comp->op_context;
	struct fi_msg msg = {
		.context = recv_ctx,
	};
	struct mrail_ep *mrail_ep;
	struct mrail_pkt *mrail_pkt;
	size_t size, len;
	int ret, retv = 0;

	if (comp->flags & FI_MORE) {
		msg.msg_iov	= recv->iov;
		msg.iov_count	= recv->count;
		msg.addr	= recv->addr;

		recv_ctx->context = recv;

		ret = fi_recvmsg(recv_ctx->ep, &msg, FI_CLAIM);
		if (ret) {
			FI_WARN(&mrail_prov, FI_LOG_CQ,
				"Unable to claim buffered recv\n");
			assert(0);
			// TODO write cq error entry
		}
		return ret;
	}

	mrail_ep = recv_ctx->ep->fid.context;
	mrail_pkt = (struct mrail_pkt *)comp->buf;

	len = comp->len - sizeof(*mrail_pkt);

	size = ofi_copy_to_iov(&recv->iov[1], recv->count - 1, 0,
			       mrail_pkt->data, len);

	if (size < len) {
		FI_WARN(&mrail_prov, FI_LOG_CQ, "Message truncated recv buf "
			"size: %zu message length: %zu\n", size, len);
		ret = ofi_cq_write_error_trunc(
			mrail_ep->util_ep.rx_cq, recv->context,
			recv->comp_flags | (comp->flags & FI_REMOTE_CQ_DATA),
			0, NULL, comp->data, mrail_pkt->hdr.tag, comp->len - size);
		if (ret) {
			FI_WARN(&mrail_prov, FI_LOG_CQ,
				"Unable to write truncation error to util cq\n");
			retv = ret;
		}
		goto out;
	}
	ret = mrail_cq_write_recv_comp(mrail_ep, &mrail_pkt->hdr, comp, recv);
	if (ret)
		retv = ret;
out:
	ret = fi_recvmsg(recv_ctx->ep, &msg, FI_DISCARD);
	if (ret) {
		FI_WARN(&mrail_prov, FI_LOG_CQ,
			"Unable to discard buffered recv\n");
		retv = ret;
	}
	mrail_push_recv(recv);
	return retv;
}