Exemple #1
0
struct rxd_x_entry *rxd_rx_entry_init(struct rxd_ep *ep,
			const struct iovec *iov, size_t iov_count, uint64_t tag,
			uint64_t ignore, void *context, fi_addr_t addr,
			uint32_t op, uint32_t flags)
{
	struct rxd_x_entry *rx_entry;

	rx_entry = rxd_get_rx_entry(ep, op);
	if (!rx_entry) {
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "could not get rx entry\n");
		return NULL;
	}

	rx_entry->peer = addr;
	rx_entry->flags = flags;
	rx_entry->bytes_done = 0;
	rx_entry->offset = 0;
	rx_entry->next_seg_no = 0;
	rx_entry->iov_count = iov_count;
	rx_entry->op = op;
	rx_entry->ignore = ignore;

	memcpy(rx_entry->iov, iov, sizeof(*rx_entry->iov) * iov_count);

	rx_entry->cq_entry.op_context = context;
	rx_entry->cq_entry.len = ofi_total_iov_len(iov, iov_count);
	rx_entry->cq_entry.buf = iov[0].iov_base;
	rx_entry->cq_entry.tag = tag;

	rx_entry->cq_entry.flags = ofi_rx_cq_flags(op);
	dlist_init(&rx_entry->entry);

	return rx_entry;
}
Exemple #2
0
/* Get a match_iov derived from iov whose size matches given length */
static int rxm_match_iov(const struct iovec *iov, void **desc,
			 uint8_t count, uint64_t offset, size_t match_len,
			 struct rxm_iov *match_iov)
{
	uint8_t i;

	assert(count <= RXM_IOV_LIMIT);

	for (i = 0; i < count; i++) {
		if (offset >= iov[i].iov_len) {
			offset -= iov[i].iov_len;
			continue;
		}

		match_iov->iov[i].iov_base = (char *)iov[i].iov_base + offset;
		match_iov->iov[i].iov_len = MIN(iov[i].iov_len - offset, match_len);
		if (desc)
			match_iov->desc[i] = desc[i];

		match_len -= match_iov->iov[i].iov_len;
		if (!match_len)
			break;
		offset = 0;
	}

	if (match_len) {
		FI_WARN(&rxm_prov, FI_LOG_CQ,
			"Given iov size (%zu) < match_len (remained match_len = %zu)!\n",
			ofi_total_iov_len(iov, count), match_len);
		return -FI_ETOOSMALL;
	}

	match_iov->count = i + 1;
	return FI_SUCCESS;
}
Exemple #3
0
static ssize_t mrail_sendmsg(struct fid_ep *ep_fid, const struct fi_msg *msg,
			     uint64_t flags)
{
	return mrail_send_common(ep_fid, msg->msg_iov, msg->desc, msg->iov_count,
				 ofi_total_iov_len(msg->msg_iov, msg->iov_count),
				 msg->addr, msg->data, msg->context,
				 flags | mrail_comp_flag(ep_fid));
}
Exemple #4
0
// TODO go for separate recv functions (recvmsg, recvv, etc) to be optimal
static ssize_t
mrail_recv_common(struct mrail_ep *mrail_ep, struct mrail_recv_queue *recv_queue,
		  struct iovec *iov, size_t count, void *context,
		  fi_addr_t src_addr, uint64_t tag, uint64_t ignore,
		  uint64_t flags, uint64_t comp_flags)
{
	struct mrail_recv *recv;
	struct mrail_unexp_msg_entry *unexp_msg_entry;

	recv = mrail_pop_recv(mrail_ep);
	if (!recv)
		return -FI_EAGAIN;

	recv->count 		= count + 1;
	recv->context 		= context;
	recv->flags 		= flags;
	recv->comp_flags 	|= comp_flags;
	recv->addr	 	= src_addr;
	recv->tag 		= tag;
	recv->ignore 		= ignore;

	memcpy(&recv->iov[1], iov, sizeof(*iov) * count);

	FI_DBG(&mrail_prov, FI_LOG_EP_DATA, "Posting recv of length: %zu "
	       "src_addr: 0x%" PRIx64 " tag: 0x%" PRIx64 " ignore: 0x%" PRIx64
	       "\n", ofi_total_iov_len(iov, count), recv->addr,
	       recv->tag, recv->ignore);

	ofi_ep_lock_acquire(&mrail_ep->util_ep);
	unexp_msg_entry = container_of(dlist_remove_first_match(
						&recv_queue->unexp_msg_list,
						recv_queue->match_unexp,
						recv),
				       struct mrail_unexp_msg_entry,
				       entry);
	if (!unexp_msg_entry) {
		dlist_insert_tail(&recv->entry, &recv_queue->recv_list);
		ofi_ep_lock_release(&mrail_ep->util_ep);
		return 0;
	}
	ofi_ep_lock_release(&mrail_ep->util_ep);

	FI_DBG(recv_queue->prov, FI_LOG_EP_DATA, "Match for posted recv"
	       " with addr: 0x%" PRIx64 ", tag: 0x%" PRIx64 " ignore: "
	       "0x%" PRIx64 " found in unexpected msg queue\n",
	       recv->addr, recv->tag, recv->ignore);

	return mrail_cq_process_buf_recv((struct fi_cq_tagged_entry *)
					 unexp_msg_entry->data, recv);
}
Exemple #5
0
ssize_t rxd_writev(struct fid_ep *ep_fid, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct rxd_ep *ep;
	struct fi_rma_iov rma_iov;

	ep = container_of(ep_fid, struct rxd_ep, util_ep.ep_fid.fid);

	rma_iov.addr = addr;
	rma_iov.len  = ofi_total_iov_len(iov, count);
	rma_iov.key = key;

	return rxd_generic_rma(ep, iov, count, &rma_iov, 1, desc,
			       dest_addr, context, ofi_op_write, 0,
			       rxd_ep_tx_flags(ep));
}
Exemple #6
0
static ssize_t rxd_generic_write_inject(struct rxd_ep *rxd_ep,
		const struct iovec *iov, size_t iov_count,
		const struct fi_rma_iov *rma_iov, size_t rma_count,
		fi_addr_t addr, void *context, uint32_t op, uint64_t data,
		uint32_t rxd_flags)
{
	struct rxd_x_entry *tx_entry;
	fi_addr_t rxd_addr;
	ssize_t ret = -FI_EAGAIN;

	assert(iov_count <= RXD_IOV_LIMIT && rma_count <= RXD_IOV_LIMIT);
	assert(ofi_total_iov_len(iov, iov_count) <= rxd_ep_domain(rxd_ep)->max_inline_rma);

	fastlock_acquire(&rxd_ep->util_ep.lock);
	fastlock_acquire(&rxd_ep->util_ep.tx_cq->cq_lock);

	if (ofi_cirque_isfull(rxd_ep->util_ep.tx_cq->cirq))
		goto out;

	rxd_addr = rxd_ep_av(rxd_ep)->fi_addr_table[addr];
	ret = rxd_send_rts_if_needed(rxd_ep, rxd_addr);
	if (ret)
		goto out;

	tx_entry = rxd_tx_entry_init(rxd_ep, iov, iov_count, NULL, 0, rma_count, data,
				     0, context, rxd_addr, op, rxd_flags);
	if (!tx_entry)
		goto out;

	ret = rxd_ep_send_op(rxd_ep, tx_entry, rma_iov, rma_count, NULL, 0, 0, 0);
	if (ret) {
		rxd_tx_entry_free(rxd_ep, tx_entry);
		goto out;
	}

	if (tx_entry->op == RXD_READ_REQ)
		goto out;

	ret = 0;

out:
	fastlock_release(&rxd_ep->util_ep.tx_cq->cq_lock);
	fastlock_release(&rxd_ep->util_ep.lock);
	return ret;
}
Exemple #7
0
static void smr_format_inject_atomic(struct smr_cmd *cmd, fi_addr_t peer_id,
				     const struct iovec *iov, size_t count,
				     const struct iovec *resultv,
				     size_t result_count,
				     const struct iovec *compv,
				     size_t comp_count,
				     uint32_t op, enum fi_datatype datatype,
				     enum fi_op atomic_op,
				     struct smr_region *smr,
				     struct smr_inject_buf *tx_buf)
{
	size_t comp_size;

	smr_generic_format(cmd, peer_id, op, 0, datatype,
			   atomic_op, 0, 0);
	cmd->msg.hdr.op_src = smr_src_inject;
	cmd->msg.hdr.src_data = (char **) tx_buf - (char **) smr;

	switch (op) {
	case ofi_op_atomic:
	case ofi_op_atomic_fetch:
		if (atomic_op == FI_ATOMIC_READ)
			cmd->msg.hdr.size = ofi_total_iov_len(resultv, result_count);
		else
			cmd->msg.hdr.size = ofi_copy_from_iov(tx_buf->data,
						SMR_INJECT_SIZE, iov, count, 0);
		break;
	case ofi_op_atomic_compare:
		cmd->msg.hdr.size = ofi_copy_from_iov(tx_buf->buf,
						SMR_COMP_INJECT_SIZE, iov, count, 0);
		comp_size = ofi_copy_from_iov(tx_buf->comp, SMR_COMP_INJECT_SIZE,
					      compv, comp_count, 0);
		if (comp_size != cmd->msg.hdr.size)
			FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
				"atomic and compare buffer size mimatch\n");
		break;
	default:
		break;
	}
}
Exemple #8
0
static ssize_t rxm_ep_readv(struct fid_ep *ep_fid, const struct iovec *iov,
			    void **desc, size_t count, fi_addr_t src_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};

	return rxm_ep_rma_common(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep), fi_readmsg, FI_READ);
}

static ssize_t rxm_ep_read(struct fid_ep *ep_fid, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr, uint64_t addr,
			   uint64_t key, void *context)
{
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_rma_common(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep), fi_readmsg, FI_READ);
}

static inline void
rxm_ep_format_rma_msg(struct rxm_rma_buf *rma_buf, const struct fi_msg_rma *orig_msg,
		      struct iovec *rxm_iov, struct fi_msg_rma *rxm_msg)
{
	rxm_msg->context = rma_buf;
	rxm_msg->addr = orig_msg->addr;
	rxm_msg->data = orig_msg->data;

	ofi_copy_from_iov(rma_buf->pkt.data, rma_buf->pkt.hdr.size,
			  orig_msg->msg_iov, orig_msg->iov_count, 0);
	rxm_iov->iov_base = &rma_buf->pkt.data;
	rxm_iov->iov_len = rma_buf->pkt.hdr.size;
	rxm_msg->msg_iov = rxm_iov;
	rxm_msg->desc = &rma_buf->hdr.desc;
	rxm_msg->iov_count = 1;

	rxm_msg->rma_iov = orig_msg->rma_iov;
	rxm_msg->rma_iov_count = orig_msg->rma_iov_count;
}

static inline ssize_t
rxm_ep_rma_emulate_inject_msg(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn, size_t total_size,
			      const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_rma_buf *rma_buf;
	ssize_t ret;
	struct iovec rxm_msg_iov = { 0 };
	struct fi_msg_rma rxm_rma_msg = { 0 };

	assert(msg->rma_iov_count <= rxm_ep->rxm_info->tx_attr->rma_iov_limit);

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	rma_buf = rxm_rma_buf_alloc(rxm_ep);
	if (OFI_UNLIKELY(!rma_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from RMA buffer pool\n");
		ret = -FI_ENOMEM;
		goto unlock;
	}

	rma_buf->pkt.hdr.size = total_size;
	rma_buf->app_context = msg->context;
	rma_buf->flags = flags;
	rxm_ep_format_rma_msg(rma_buf, msg, &rxm_msg_iov, &rxm_rma_msg);

	flags = (flags & ~FI_INJECT) | FI_COMPLETION;

	ret = fi_writemsg(rxm_conn->msg_ep, &rxm_rma_msg, flags);
	if (OFI_UNLIKELY(ret)) {
		if (ret == -FI_EAGAIN)
			rxm_ep_do_progress(&rxm_ep->util_ep);
		ofi_buf_free(rma_buf);
	}
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}

static inline ssize_t
rxm_ep_rma_emulate_inject(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
			  const void *buf, size_t len, uint64_t data,
			  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
			  uint64_t flags)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = data,
	};

	return rxm_ep_rma_emulate_inject_msg(rxm_ep, rxm_conn, len, &msg, flags);
}

static inline ssize_t
rxm_ep_rma_inject_common(struct rxm_ep *rxm_ep, const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_conn *rxm_conn;
	size_t total_size = ofi_total_iov_len(msg->msg_iov, msg->iov_count);
	ssize_t ret;

	assert(total_size <= rxm_ep->rxm_info->tx_attr->inject_size);

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if ((total_size <= rxm_ep->msg_info->tx_attr->inject_size) &&
	    !(flags & FI_COMPLETION) &&
	    (msg->iov_count == 1) && (msg->rma_iov_count == 1)) {
		if (flags & FI_REMOTE_CQ_DATA) {
			ret = fi_inject_writedata(rxm_conn->msg_ep,
						  msg->msg_iov->iov_base,
						  msg->msg_iov->iov_len, msg->data,
						  msg->addr, msg->rma_iov->addr,
						  msg->rma_iov->key);
		} else {
			ret = fi_inject_write(rxm_conn->msg_ep,
					      msg->msg_iov->iov_base,
					      msg->msg_iov->iov_len, msg->addr,
					      msg->rma_iov->addr,
					      msg->rma_iov->key);
		}
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_write* for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject_msg(rxm_ep, rxm_conn, total_size, msg, flags);
	}
}

static inline ssize_t
rxm_ep_generic_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
			uint64_t flags)
{
	struct rxm_ep *rxm_ep =
		container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	if (flags & FI_INJECT)
		return rxm_ep_rma_inject_common(rxm_ep, msg, flags);
	else
		return rxm_ep_rma_common(rxm_ep, msg, flags,
					 fi_writemsg, FI_WRITE);
}

static inline ssize_t
rxm_ep_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_ep *rxm_ep =
		container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, msg, flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t rxm_ep_writev(struct fid_ep *ep_fid, const struct iovec *iov,
			     void **desc, size_t count, fi_addr_t dest_addr,
			     uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_writedata(struct fid_ep *ep_fid, const void *buf,
				size_t len, void *desc, uint64_t data,
				fi_addr_t dest_addr, uint64_t addr,
				uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep) |
				       FI_REMOTE_CQ_DATA);
}

static ssize_t rxm_ep_write(struct fid_ep *ep_fid, const void *buf,
			    size_t len, void *desc, fi_addr_t dest_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_inject_write(struct fid_ep *ep_fid, const void *buf,
				   size_t len, fi_addr_t dest_addr,
				   uint64_t addr, uint64_t key)
{
	ssize_t ret;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	ret = rxm_ep_prepare_tx(rxm_ep, dest_addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if (len <= rxm_ep->msg_info->tx_attr->inject_size) {
		ret = fi_inject_write(rxm_conn->msg_ep, buf, len,
				      dest_addr, addr, key);
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_write for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject(rxm_ep, rxm_conn, buf, len,
						 0, dest_addr, addr, key, FI_INJECT);
	}
}

static ssize_t rxm_ep_inject_writedata(struct fid_ep *ep_fid, const void *buf,
				       size_t len, uint64_t data,
				       fi_addr_t dest_addr, uint64_t addr,
				       uint64_t key)
{
	ssize_t ret;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	ret = rxm_ep_prepare_tx(rxm_ep, dest_addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if (len <= rxm_ep->msg_info->tx_attr->inject_size) {
		ret = fi_inject_writedata(rxm_conn->msg_ep, buf, len,
					  data, dest_addr, addr, key);
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_writedata for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject(rxm_ep, rxm_conn, buf, len,
						 data, dest_addr, addr, key,
						 FI_REMOTE_CQ_DATA | FI_INJECT);
	}
}

struct fi_ops_rma rxm_ops_rma = {
	.size = sizeof (struct fi_ops_rma),
	.read = rxm_ep_read,
	.readv = rxm_ep_readv,
	.readmsg = rxm_ep_readmsg,
	.write = rxm_ep_write,
	.writev = rxm_ep_writev,
	.writemsg = rxm_ep_writemsg,
	.inject = rxm_ep_inject_write,
	.writedata = rxm_ep_writedata,
	.injectdata = rxm_ep_inject_writedata,
};
Exemple #9
0
struct rxd_x_entry *rxd_tx_entry_init(struct rxd_ep *ep, const struct iovec *iov,
				      size_t iov_count, const struct iovec *res_iov,
				      size_t res_count, size_t rma_count,
				      uint64_t data, uint64_t tag, void *context,
				      fi_addr_t addr, uint32_t op, uint32_t flags)
{
	struct rxd_x_entry *tx_entry;
	struct rxd_domain *rxd_domain = rxd_ep_domain(ep);
	size_t max_inline;

	tx_entry = rxd_get_tx_entry(ep, op);
	if (!tx_entry) {
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "could not get tx entry\n");
		return NULL;
	}

	tx_entry->op = op;
	tx_entry->peer = addr;
	tx_entry->flags = flags;
	tx_entry->bytes_done = 0;
	tx_entry->offset = 0;
	tx_entry->next_seg_no = 0;
	tx_entry->iov_count = iov_count;
	memcpy(&tx_entry->iov[0], iov, sizeof(*iov) * iov_count);
	if (res_count) {
		tx_entry->res_count = res_count;
		memcpy(&tx_entry->res_iov[0], res_iov, sizeof(*res_iov) * res_count);
	}

	tx_entry->cq_entry.op_context = context;
	tx_entry->cq_entry.len = ofi_total_iov_len(iov, iov_count);
	tx_entry->cq_entry.buf = iov[0].iov_base;
	tx_entry->cq_entry.flags = ofi_tx_cq_flags(op);
	tx_entry->cq_entry.tag = tag;

	tx_entry->pkt = NULL;

	max_inline = rxd_domain->max_inline_msg;
	if (tx_entry->cq_entry.flags & FI_RMA)
		max_inline -= sizeof(struct ofi_rma_iov) * rma_count;

	if (tx_entry->flags & RXD_TAG_HDR)
		max_inline -= sizeof(tx_entry->cq_entry.tag);
	if (tx_entry->flags & RXD_REMOTE_CQ_DATA) {
		max_inline -= sizeof(tx_entry->cq_entry.data);
		tx_entry->cq_entry.data = data;
	}

	if (rma_count > 1 || tx_entry->cq_entry.flags & FI_READ ||
	    tx_entry->cq_entry.len > max_inline)
		max_inline -= sizeof(struct rxd_sar_hdr);
	else
		tx_entry->flags |= RXD_INLINE;

	if (tx_entry->cq_entry.flags & FI_ATOMIC || tx_entry->cq_entry.len <= max_inline)
		tx_entry->num_segs = 1;
	else if (tx_entry->cq_entry.flags & FI_READ)
		tx_entry->num_segs = ofi_div_ceil(tx_entry->cq_entry.len,
						  rxd_domain->max_seg_sz);
	else
		tx_entry->num_segs = ofi_div_ceil(tx_entry->cq_entry.len - max_inline,
						  rxd_domain->max_seg_sz) + 1;

	if ((tx_entry->op == RXD_READ_REQ || tx_entry->op == RXD_ATOMIC_FETCH ||
	     tx_entry->op == RXD_ATOMIC_COMPARE) &&
	    ep->peers[tx_entry->peer].unacked_cnt < ep->peers[tx_entry->peer].tx_window &&
	    ep->peers[tx_entry->peer].peer_addr != FI_ADDR_UNSPEC)
		dlist_insert_tail(&tx_entry->entry,
				  &ep->peers[tx_entry->peer].rma_rx_list);
	else
		dlist_insert_tail(&tx_entry->entry,
				  &ep->peers[tx_entry->peer].tx_list);

	return tx_entry;
}
Exemple #10
0
static ssize_t rxm_ep_readv(struct fid_ep *ep_fid, const struct iovec *iov,
			    void **desc, size_t count, fi_addr_t src_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_readmsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_read(struct fid_ep *ep_fid, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr, uint64_t addr,
			   uint64_t key, void *context)
{
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_readmsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_rma_inject(struct fid_ep *msg_ep, struct rxm_ep *rxm_ep,
				 const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_tx_entry *tx_entry;
	struct rxm_tx_buf *tx_buf;
	struct fi_msg_rma msg_rma;
	struct iovec iov;
	size_t size;
	ssize_t ret;

	size = ofi_total_iov_len(msg->msg_iov, msg->iov_count);

	if (size > rxm_ep->rxm_info->tx_attr->inject_size)
		return -FI_EMSGSIZE;

	/* Use fi_inject_write instead of fi_writemsg since the latter generates
	 * completion by default */
	if (size <= rxm_ep->msg_info->tx_attr->inject_size &&
	    !(flags & FI_COMPLETION)) {
		if (flags & FI_REMOTE_CQ_DATA)
			return fi_inject_writedata(msg_ep, msg->msg_iov->iov_base,
					       msg->msg_iov->iov_len, msg->data,
					       msg->addr, msg->rma_iov->addr,
					       msg->rma_iov->key);
		else
			return fi_inject_write(msg_ep, msg->msg_iov->iov_base,
					       msg->msg_iov->iov_len, msg->addr,
					       msg->rma_iov->addr,
					       msg->rma_iov->key);
	}

	tx_buf = rxm_tx_buf_get(rxm_ep, RXM_BUF_POOL_TX_MSG);
	if (!tx_buf) {
		FI_WARN(&rxm_prov, FI_LOG_CQ, "TX queue full!\n");
		rxm_ep_progress_multi(&rxm_ep->util_ep);
		return -FI_EAGAIN;
	}

	tx_entry = rxm_tx_entry_get(&rxm_ep->send_queue);
	if (!tx_entry) {
		rxm_ep_progress_multi(&rxm_ep->util_ep);
		ret = -FI_EAGAIN;
		goto err1;
	}

	tx_entry->state = RXM_TX;
	tx_entry->flags = flags;
	tx_entry->comp_flags = FI_RMA | FI_WRITE;
	tx_entry->tx_buf = tx_buf;

	ofi_copy_from_iov(tx_buf->pkt.data, size, msg->msg_iov, msg->iov_count, 0);

	iov.iov_base = &tx_buf->pkt.data;
	iov.iov_len = size;

	msg_rma.msg_iov = &iov;
	msg_rma.desc = &tx_buf->hdr.desc;
	msg_rma.iov_count = 1;
	msg_rma.addr = msg->addr;
	msg_rma.rma_iov = msg->rma_iov;
	msg_rma.rma_iov_count = msg->rma_iov_count;
	msg_rma.context = tx_entry;
	msg_rma.data = msg->data;
	flags = (flags & ~FI_INJECT) | FI_COMPLETION;

	ret = fi_writemsg(msg_ep, &msg_rma, flags);
	if (ret) {
		if (ret == -FI_EAGAIN)
			rxm_ep_progress_multi(&rxm_ep->util_ep);
		goto err2;
	}
	return 0;
err2:
	rxm_tx_entry_release(&rxm_ep->send_queue, tx_entry);
err1:
	rxm_tx_buf_release(rxm_ep, tx_buf);
	return ret;
}

static ssize_t rxm_ep_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
			       uint64_t flags)
{
	struct util_cmap_handle *handle;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep;
	int ret;

	rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	ret = ofi_cmap_get_handle(rxm_ep->util_ep.cmap, msg->addr, &handle);
	if (OFI_UNLIKELY(ret))
		return ret;
	rxm_conn = container_of(handle, struct rxm_conn, handle);

	if (flags & FI_INJECT)
		return rxm_ep_rma_inject(rxm_conn->msg_ep, rxm_ep, msg, flags);
	else
		return rxm_ep_rma_common(rxm_conn->msg_ep, rxm_ep, msg, flags,
					 fi_writemsg, FI_WRITE);
}

static ssize_t rxm_ep_writev(struct fid_ep *ep_fid, const struct iovec *iov,
			     void **desc, size_t count, fi_addr_t dest_addr,
			     uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_writedata(struct fid_ep *ep_fid, const void *buf,
				size_t len, void *desc, uint64_t data,
				fi_addr_t dest_addr, uint64_t addr,
				uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep) |
			       FI_REMOTE_CQ_DATA);
}

static ssize_t rxm_ep_write(struct fid_ep *ep_fid, const void *buf,
			    size_t len, void *desc, fi_addr_t dest_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_inject_write(struct fid_ep *ep_fid, const void *buf,
			     size_t len, fi_addr_t dest_addr, uint64_t addr,
			     uint64_t key)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg,
			       (rxm_ep_tx_flags(rxm_ep) & ~FI_COMPLETION) |
			       FI_INJECT);
}

static ssize_t rxm_ep_inject_writedata(struct fid_ep *ep_fid, const void *buf,
				       size_t len, uint64_t data,
				       fi_addr_t dest_addr, uint64_t addr,
				       uint64_t key)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg,
			       (rxm_ep_tx_flags(rxm_ep) & ~FI_COMPLETION) |
			       FI_INJECT | FI_REMOTE_CQ_DATA);
}

struct fi_ops_rma rxm_ops_rma = {
	.size = sizeof (struct fi_ops_rma),
	.read = rxm_ep_read,
	.readv = rxm_ep_readv,
	.readmsg = rxm_ep_readmsg,
	.write = rxm_ep_write,
	.writev = rxm_ep_writev,
	.writemsg = rxm_ep_writemsg,
	.inject = rxm_ep_inject_write,
	.writedata = rxm_ep_writedata,
	.injectdata = rxm_ep_inject_writedata,
};
Exemple #11
0
static ssize_t mrail_send(struct fid_ep *ep_fid, const void *buf, size_t len,
			  void *desc, fi_addr_t dest_addr, void *context)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_send_common(ep_fid, &iov, &desc, 1, len, dest_addr, 0,
				 context, mrail_comp_flag(ep_fid));
}

static ssize_t mrail_inject(struct fid_ep *ep_fid, const void *buf, size_t len,
			    fi_addr_t dest_addr)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_send_common(ep_fid, &iov, NULL, 1, len, dest_addr, 0,
				 NULL, mrail_inject_flags(ep_fid));
}

static ssize_t mrail_injectdata(struct fid_ep *ep_fid, const void *buf,
				size_t len, uint64_t data, fi_addr_t dest_addr)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_send_common(ep_fid, &iov, NULL, 1, len, dest_addr, data,
				 NULL, (mrail_inject_flags(ep_fid) |
					FI_REMOTE_CQ_DATA));
}

static ssize_t
mrail_tsendmsg(struct fid_ep *ep_fid, const struct fi_msg_tagged *msg,
	       uint64_t flags)
{
	return mrail_tsend_common(ep_fid, msg->msg_iov, msg->desc, msg->iov_count,
				  ofi_total_iov_len(msg->msg_iov, msg->iov_count),
				  msg->addr, msg->tag, msg->data, msg->context,
				  flags | mrail_comp_flag(ep_fid));
}

static ssize_t mrail_tsend(struct fid_ep *ep_fid, const void *buf, size_t len,
			   void *desc, fi_addr_t dest_addr, uint64_t tag,
			   void *context)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_tsend_common(ep_fid, &iov, &desc, 1, len, dest_addr, tag,
				  0, context, mrail_comp_flag(ep_fid));
}

static ssize_t mrail_tsenddata(struct fid_ep *ep_fid, const void *buf, size_t len,
			       void *desc, uint64_t data, fi_addr_t dest_addr,
			       uint64_t tag, void *context)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_tsend_common(ep_fid, &iov, &desc, 1, len, dest_addr, tag,
				  data, context, (mrail_comp_flag(ep_fid) |
						  FI_REMOTE_CQ_DATA));
}

static ssize_t mrail_tinject(struct fid_ep *ep_fid, const void *buf, size_t len,
			     fi_addr_t dest_addr, uint64_t tag)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_tsend_common(ep_fid, &iov, NULL, 1, len, dest_addr, tag,
				  0, NULL, mrail_inject_flags(ep_fid));
}

static ssize_t mrail_tinjectdata(struct fid_ep *ep_fid, const void *buf,
				 size_t len, uint64_t data, fi_addr_t dest_addr,
				 uint64_t tag)
{
	struct iovec iov = {
		.iov_base 	= (void *)buf,
		.iov_len 	= len,
	};
	return mrail_tsend_common(ep_fid, &iov, NULL, 1, len, dest_addr, tag,
				  data, NULL, (mrail_inject_flags(ep_fid) |
					       FI_REMOTE_CQ_DATA));
}

static struct mrail_unexp_msg_entry *
mrail_get_unexp_msg_entry(struct mrail_recv_queue *recv_queue, void *context)
{
	// TODO use buf pool
	// context would be mrail_ep from which u can get the buf pool
	struct mrail_unexp_msg_entry *unexp_msg_entry =
		malloc(sizeof(*unexp_msg_entry) + sizeof(struct fi_cq_tagged_entry));
	return unexp_msg_entry;
}

static int mrail_getname(fid_t fid, void *addr, size_t *addrlen)
{
	struct mrail_ep *mrail_ep =
		container_of(fid, struct mrail_ep, util_ep.ep_fid.fid);
	struct mrail_domain *mrail_domain =
		container_of(mrail_ep->util_ep.domain, struct mrail_domain,
			     util_domain);
	size_t i, offset = 0, rail_addrlen;
	int ret;

	if (*addrlen < mrail_domain->addrlen)
		return -FI_ETOOSMALL;

	for (i = 0; i < mrail_ep->num_eps; i++) {
		rail_addrlen = *addrlen - offset;
		ret = fi_getname(&mrail_ep->rails[i].ep->fid,
				 (char *)addr + offset, &rail_addrlen);
		if (ret) {
			FI_WARN(&mrail_prov, FI_LOG_EP_CTRL,
				"Unable to get name for rail: %zd\n", i);
			return ret;
		}
		offset += rail_addrlen;
	}
	return 0;
}


static void mrail_tx_buf_init(void *pool_ctx, void *buf)
{
	struct mrail_ep *mrail_ep = pool_ctx;
	struct mrail_tx_buf *tx_buf = buf;

	tx_buf->ep		= mrail_ep;
	tx_buf->hdr.version	= MRAIL_HDR_VERSION;
}

static void mrail_ep_free_bufs(struct mrail_ep *mrail_ep)
{
	if (mrail_ep->req_pool)
		util_buf_pool_destroy(mrail_ep->req_pool);

	if (mrail_ep->ooo_recv_pool)
		util_buf_pool_destroy(mrail_ep->ooo_recv_pool);

	if (mrail_ep->tx_buf_pool)
		util_buf_pool_destroy(mrail_ep->tx_buf_pool);

	if (mrail_ep->recv_fs)
		mrail_recv_fs_free(mrail_ep->recv_fs);
}

static int mrail_ep_alloc_bufs(struct mrail_ep *mrail_ep)
{
	struct util_buf_attr attr = {
		.size		= sizeof(struct mrail_tx_buf),
		.alignment	= sizeof(void *),
		.max_cnt	= 0,
		.chunk_cnt	= 64,
		.alloc_hndlr	= NULL,
		.free_hndlr	= NULL,
		.init		= mrail_tx_buf_init,
		.ctx		= mrail_ep,
	};
	size_t buf_size, rxq_total_size = 0;
	struct fi_info *fi;
	int ret;

	for (fi = mrail_ep->info->next; fi; fi = fi->next)
		rxq_total_size += fi->rx_attr->size;

	mrail_ep->recv_fs = mrail_recv_fs_create(rxq_total_size, mrail_init_recv,
						 mrail_ep);
	if (!mrail_ep->recv_fs)
		return -FI_ENOMEM;

	ret = util_buf_pool_create(&mrail_ep->ooo_recv_pool,
				   sizeof(struct mrail_ooo_recv),
				   sizeof(void *), 0, 64);
	if (!mrail_ep->ooo_recv_pool)
		goto err;

	ret = util_buf_pool_create_attr(&attr, &mrail_ep->tx_buf_pool);
	if (!mrail_ep->tx_buf_pool)
		goto err;

	buf_size = (sizeof(struct mrail_req) +
		    (mrail_ep->num_eps * sizeof(struct mrail_subreq)));

	ret = util_buf_pool_create(&mrail_ep->req_pool, buf_size,
				   sizeof(void *), 0, 64);
	if (ret)
		goto err;
	return 0;
err:
	mrail_ep_free_bufs(mrail_ep);
	return ret;
}

static int mrail_ep_close(fid_t fid)
{
	struct mrail_ep *mrail_ep =
		container_of(fid, struct mrail_ep, util_ep.ep_fid.fid);
	int ret, retv = 0;
	size_t i;

	mrail_ep_free_bufs(mrail_ep);

	for (i = 0; i < mrail_ep->num_eps; i++) {
		ret = fi_close(&mrail_ep->rails[i].ep->fid);
		if (ret)
			retv = ret;
	}
	free(mrail_ep->rails);

	ret = ofi_endpoint_close(&mrail_ep->util_ep);
	if (ret)
		retv = ret;
	free(mrail_ep);
	return retv;
}

static int mrail_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags)
{
	struct mrail_ep *mrail_ep =
		container_of(ep_fid, struct mrail_ep, util_ep.ep_fid.fid);
	struct mrail_cq *mrail_cq;
	struct mrail_av *mrail_av;
	struct util_cntr *cntr;
	int ret = 0;
	size_t i;

	switch (bfid->fclass) {
	case FI_CLASS_AV:
		mrail_av = container_of(bfid, struct mrail_av,
					util_av.av_fid.fid);
		ret = ofi_ep_bind_av(&mrail_ep->util_ep, &mrail_av->util_av);
		if (ret)
			return ret;
		for (i = 0; i < mrail_ep->num_eps; i++) {
			ret = fi_ep_bind(mrail_ep->rails[i].ep,
					 &mrail_av->avs[i]->fid, flags);
			if (ret)
				return ret;
		}
		break;
	case FI_CLASS_CQ:
		mrail_cq = container_of(bfid, struct mrail_cq,
					util_cq.cq_fid.fid);

		ret = ofi_ep_bind_cq(&mrail_ep->util_ep, &mrail_cq->util_cq,
				     flags);
		if (ret)
			return ret;
		for (i = 0; i < mrail_ep->num_eps; i++) {
			ret = fi_ep_bind(mrail_ep->rails[i].ep,
					 &mrail_cq->cqs[i]->fid, flags);
			if (ret)
				return ret;
		}
		break;
	case FI_CLASS_CNTR:
		cntr = container_of(bfid, struct util_cntr, cntr_fid.fid);

		ret = ofi_ep_bind_cntr(&mrail_ep->util_ep, cntr, flags);
		if (ret)
			return ret;
		break;
	case FI_CLASS_EQ:
		ret = -FI_ENOSYS;
		break;
	default:
		FI_WARN(&mrail_prov, FI_LOG_EP_CTRL, "invalid fid class\n");
		ret = -FI_EINVAL;
		break;
	}
	return ret;
}

static int mrail_ep_ctrl(struct fid *fid, int command, void *arg)
{
	struct mrail_ep *mrail_ep;
	size_t i, buf_recv_min = sizeof(struct mrail_hdr);
	int ret;

	mrail_ep = container_of(fid, struct mrail_ep, util_ep.ep_fid.fid);

	switch (command) {
	case FI_ENABLE:
		if (!mrail_ep->util_ep.rx_cq || !mrail_ep->util_ep.tx_cq)
			return -FI_ENOCQ;
		if (!mrail_ep->util_ep.av)
			return -FI_ENOAV;
		for (i = 0; i < mrail_ep->num_eps; i++) {
			ret = fi_setopt(&mrail_ep->rails[i].ep->fid,
					FI_OPT_ENDPOINT, FI_OPT_BUFFERED_MIN,
					&buf_recv_min, sizeof(buf_recv_min));
			if (ret)
				return ret;

			ret = fi_enable(mrail_ep->rails[i].ep);
			if (ret)
				return ret;
		}
		break;
	default:
		return -FI_ENOSYS;
	}
	return 0;
}

static struct fi_ops mrail_ep_fi_ops = {
	.size = sizeof(struct fi_ops),
	.close = mrail_ep_close,
	.bind = mrail_ep_bind,
	.control = mrail_ep_ctrl,
	.ops_open = fi_no_ops_open,
};

static int mrail_ep_setopt(fid_t fid, int level, int optname,
		const void *optval, size_t optlen)
{
	struct mrail_ep *mrail_ep;
	size_t i;
	int ret = 0;

	mrail_ep = container_of(fid, struct mrail_ep, util_ep.ep_fid.fid);

	for (i = 0; i < mrail_ep->num_eps; i++) {
		ret = fi_setopt(&mrail_ep->rails[i].ep->fid, level, optname,
				optval, optlen);
		if (ret)
			return ret;
	}

	return ret;
}

static struct fi_ops_ep mrail_ops_ep = {
	.size = sizeof(struct fi_ops_ep),
	.cancel = fi_no_cancel,
	.getopt = fi_no_getopt,
	.setopt = mrail_ep_setopt,
	.tx_ctx = fi_no_tx_ctx,
	.rx_ctx = fi_no_rx_ctx,
	.rx_size_left = fi_no_rx_size_left,
	.tx_size_left = fi_no_tx_size_left,
};

static struct fi_ops_cm mrail_ops_cm = {
	.size = sizeof(struct fi_ops_cm),
	.setname = fi_no_setname,
	.getname = mrail_getname,
	.getpeer = fi_no_getpeer,
	.connect = fi_no_connect,
	.listen = fi_no_listen,
	.accept = fi_no_accept,
	.reject = fi_no_reject,
	.shutdown = fi_no_shutdown,
	.join = fi_no_join,
};

static struct fi_ops_msg mrail_ops_msg = {
	.size = sizeof(struct fi_ops_msg),
	.recv = mrail_recv,
	.recvv = fi_no_msg_recvv,
	.recvmsg = mrail_recvmsg,
	.send = mrail_send,
	.sendv = fi_no_msg_sendv,
	.sendmsg = mrail_sendmsg,
	.inject = mrail_inject,
	.senddata = fi_no_msg_senddata,
	.injectdata = mrail_injectdata,
};

struct fi_ops_tagged mrail_ops_tagged = {
	.size = sizeof(struct fi_ops_tagged),
	.recv = mrail_trecv,
	.recvv = fi_no_tagged_recvv,
	.recvmsg = mrail_trecvmsg,
	.send = mrail_tsend,
	.sendv = fi_no_tagged_sendv,
	.sendmsg = mrail_tsendmsg,
	.inject = mrail_tinject,
	.senddata = mrail_tsenddata,
	.injectdata = mrail_tinjectdata,
};

void mrail_ep_progress(struct util_ep *ep)
{
	struct mrail_ep *mrail_ep;
	mrail_ep = container_of(ep, struct mrail_ep, util_ep);
	mrail_progress_deferred_reqs(mrail_ep);
}

int mrail_ep_open(struct fid_domain *domain_fid, struct fi_info *info,
		  struct fid_ep **ep_fid, void *context)
{
	struct mrail_domain *mrail_domain =
		container_of(domain_fid, struct mrail_domain,
			     util_domain.domain_fid);
	struct mrail_ep *mrail_ep;
	struct fi_info *fi;
	size_t i;
	int ret;

	if (strcmp(mrail_domain->info->domain_attr->name,
		    info->domain_attr->name)) {
		FI_WARN(&mrail_prov, FI_LOG_EP_CTRL, "info domain name: %s "
			"doesn't match fid_domain name: %s!\n",
			info->domain_attr->name,
			mrail_domain->info->domain_attr->name);
		return -FI_EINVAL;
	}

	mrail_ep = calloc(1, sizeof(*mrail_ep));
	if (!mrail_ep)
		return -FI_ENOMEM;

	// TODO detect changes b/w mrail_domain->info and info arg
	// this may be difficult and we may not support such changes
	mrail_ep->info = mrail_domain->info;
	mrail_ep->num_eps = mrail_domain->num_domains;

	ret = ofi_endpoint_init(domain_fid, &mrail_util_prov, info, &mrail_ep->util_ep,
				context, &mrail_ep_progress);
	if (ret) {
		goto free_ep;
	}

	mrail_ep->rails = calloc(mrail_ep->num_eps, sizeof(*mrail_ep->rails));
	if (!mrail_ep->rails) {
		ret = -FI_ENOMEM;
		goto err;
	}

	for (i = 0, fi = mrail_ep->info->next; fi; fi = fi->next, i++) {
		fi->tx_attr->op_flags &= ~FI_COMPLETION;
		ret = fi_endpoint(mrail_domain->domains[i], fi,
				  &mrail_ep->rails[i].ep, mrail_ep);
		if (ret) {
			FI_WARN(&mrail_prov, FI_LOG_EP_CTRL,
				"Unable to open EP\n");
			goto err;
		}
		mrail_ep->rails[i].info = fi;
	}

	ret = mrail_ep_alloc_bufs(mrail_ep);
	if (ret)
		goto err;

	slist_init(&mrail_ep->deferred_reqs);

	if (mrail_ep->info->caps & FI_DIRECTED_RECV) {
		mrail_recv_queue_init(&mrail_prov, &mrail_ep->recv_queue,
				      mrail_match_recv_addr,
				      mrail_match_unexp_addr,
				      mrail_get_unexp_msg_entry);
		mrail_recv_queue_init(&mrail_prov, &mrail_ep->trecv_queue,
				      mrail_match_recv_addr_tag,
				      mrail_match_unexp_addr_tag,
				      mrail_get_unexp_msg_entry);
	} else {
		mrail_recv_queue_init(&mrail_prov, &mrail_ep->recv_queue,
				      mrail_match_recv_any,
				      mrail_match_unexp_any,
				      mrail_get_unexp_msg_entry);
		mrail_recv_queue_init(&mrail_prov, &mrail_ep->trecv_queue,
				      mrail_match_recv_tag,
				      mrail_match_unexp_tag,
				      mrail_get_unexp_msg_entry);
	}

	ofi_atomic_initialize32(&mrail_ep->tx_rail, 0);
	ofi_atomic_initialize32(&mrail_ep->rx_rail, 0);

	*ep_fid = &mrail_ep->util_ep.ep_fid;
	(*ep_fid)->fid.ops = &mrail_ep_fi_ops;
	(*ep_fid)->ops = &mrail_ops_ep;
	(*ep_fid)->cm = &mrail_ops_cm;
	(*ep_fid)->msg = &mrail_ops_msg;
	(*ep_fid)->tagged = &mrail_ops_tagged;
	(*ep_fid)->rma = &mrail_ops_rma;

	return 0;
err:
	mrail_ep_close(&mrail_ep->util_ep.ep_fid.fid);
free_ep:
	free(mrail_ep);
	return ret;
}
Exemple #12
0
static ssize_t
rxm_ep_atomic_common(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
		const struct fi_msg_atomic *msg, const struct fi_ioc *comparev,
		void **compare_desc, size_t compare_iov_count,
		struct fi_ioc *resultv, void **result_desc,
		size_t result_iov_count, uint32_t op, uint64_t flags)
{
	struct rxm_tx_atomic_buf *tx_buf;
	struct rxm_atomic_hdr *atomic_hdr;
	struct iovec buf_iov[RXM_IOV_LIMIT];
	struct iovec cmp_iov[RXM_IOV_LIMIT];
	size_t datatype_sz = ofi_datatype_size(msg->datatype);
	size_t buf_len = 0;
	size_t cmp_len = 0;
	size_t tot_len;
	ssize_t ret;

	assert(msg->iov_count <= RXM_IOV_LIMIT &&
	       msg->rma_iov_count <= RXM_IOV_LIMIT);

	if (flags & FI_REMOTE_CQ_DATA) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"atomic with remote CQ data not supported\n");
		return -FI_EINVAL;
	}

	if (msg->op != FI_ATOMIC_READ) {
		assert(msg->msg_iov);
		ofi_ioc_to_iov(msg->msg_iov, buf_iov, msg->iov_count,
			       datatype_sz);
		buf_len = ofi_total_iov_len(buf_iov, msg->iov_count);
	}

	if (op == ofi_op_atomic_compare) {
		assert(comparev);
		ofi_ioc_to_iov(comparev, cmp_iov, compare_iov_count,
			       datatype_sz);
		cmp_len = ofi_total_iov_len(cmp_iov, compare_iov_count);
		assert(buf_len == cmp_len);
	}

	tot_len = buf_len + cmp_len + sizeof(struct rxm_atomic_hdr) +
			sizeof(struct rxm_pkt);

	if (tot_len > rxm_eager_limit) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"atomic data too large %zu\n", tot_len);
		return -FI_EINVAL;
	}

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	tx_buf = (struct rxm_tx_atomic_buf *)
		 rxm_tx_buf_alloc(rxm_ep, RXM_BUF_POOL_TX_ATOMIC);
	if (OFI_UNLIKELY(!tx_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from Atomic buffer pool\n");
		ret = -FI_EAGAIN;
		goto unlock;
	}

	rxm_ep_format_atomic_pkt_hdr(rxm_conn, tx_buf, tot_len, op,
				msg->datatype, msg->op, flags, msg->data,
				msg->rma_iov, msg->rma_iov_count);
	tx_buf->pkt.ctrl_hdr.msg_id = ofi_buf_index(tx_buf);
	tx_buf->app_context = msg->context;

	atomic_hdr = (struct rxm_atomic_hdr *) tx_buf->pkt.data;

	ofi_copy_from_iov(atomic_hdr->data, buf_len, buf_iov,
			  msg->iov_count, 0);
	if (cmp_len)
		ofi_copy_from_iov(atomic_hdr->data + buf_len, cmp_len,
				  cmp_iov, compare_iov_count, 0);

	tx_buf->result_iov_count = result_iov_count;
	if (resultv)
		ofi_ioc_to_iov(resultv, tx_buf->result_iov, result_iov_count,
			       datatype_sz);

	ret = rxm_ep_send_atomic_req(rxm_ep, rxm_conn, tx_buf, tot_len);
	if (ret)
		ofi_buf_free(tx_buf);
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}