Example #1
0
static inline int
rxm_ep_send_atomic_req(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
		       struct rxm_tx_atomic_buf *tx_buf, uint64_t len)
{
	int ret;

	/* Atomic request TX completion processing is performed when the
	 * software generated atomic response message is received. */
	tx_buf->hdr.state = RXM_ATOMIC_RESP_WAIT;
	if (len <= rxm_ep->inject_limit)
		ret = fi_inject(rxm_conn->msg_ep, &tx_buf->pkt, len, 0);
	else
		ret = fi_send(rxm_conn->msg_ep, &tx_buf->pkt, len,
			      tx_buf->hdr.desc, 0, tx_buf);
	if (ret == -FI_EAGAIN)
		rxm_ep_do_progress(&rxm_ep->util_ep);

	if (OFI_LIKELY(!ret))
		FI_DBG(&rxm_prov, FI_LOG_EP_DATA, "sent atomic request: op: %"
		       PRIu8 " msg_id: 0x%" PRIx64 "\n", tx_buf->pkt.hdr.op,
		       tx_buf->pkt.ctrl_hdr.msg_id);
	else if (OFI_UNLIKELY(ret != -FI_EAGAIN))
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "unable to send atomic "
			"request: op: %" PRIu8 " msg_id: 0x%" PRIx64 "\n",
			tx_buf->pkt.hdr.op, tx_buf->pkt.ctrl_hdr.msg_id);
	return ret;
}
Example #2
0
fi_addr_t efa_ah_qpn_to_addr(struct efa_ep *ep, uint16_t ah, uint16_t qpn)
{
	struct efa_reverse_av *reverse_av;
	struct efa_av *av = ep->av;
	struct efa_ah_qpn key = {
		.efa_ah = ah,
		.qpn = qpn,
	};

	HASH_FIND(hh, av->reverse_av, &key, sizeof(key), reverse_av);

	return OFI_LIKELY(!!reverse_av) ? reverse_av->fi_addr : FI_ADDR_NOTAVAIL;
}
Example #3
0
static int efa_av_remove(struct fid_av *av_fid, fi_addr_t *fi_addr,
			 size_t count, uint64_t flags)
{
	struct efa_av *av = container_of(av_fid, struct efa_av, av_fid);
	struct efa_conn *conn = NULL;
	char str[INET6_ADDRSTRLEN];
	int ret = 0;
	int i;

	if (!fi_addr || (av->type != FI_AV_MAP && av->type != FI_AV_TABLE))
		return -FI_EINVAL;

	for (i = 0; i < count; i++) {
		struct efa_reverse_av *reverse_av;
		struct efa_ah_qpn key;

		if (fi_addr[i] == FI_ADDR_NOTAVAIL)
			continue;

		if (av->type == FI_AV_MAP) {
			conn = (struct efa_conn *)fi_addr[i];
		} else { /* (av->type == FI_AV_TABLE) */
			conn = av->conn_table[fi_addr[i]];
			av->conn_table[fi_addr[i]] = NULL;
			av->next = MIN(av->next, fi_addr[i]);
		}
		if (!conn)
			continue;

		key.efa_ah = conn->ah->efa_address_handle;
		key.qpn = conn->ep_addr.qpn;
		HASH_FIND(hh, av->reverse_av, &key, sizeof(key), reverse_av);
		if (OFI_LIKELY(!!reverse_av)) {
			HASH_DEL(av->reverse_av, reverse_av);
			free(reverse_av);
		}

		ret = efa_cmd_destroy_ah(conn->ah);
		if (ret)
			return ret;

		memset(str, 0, sizeof(str));
		inet_ntop(AF_INET6, conn->ep_addr.raw, str, INET6_ADDRSTRLEN);
		EFA_INFO(FI_LOG_AV, "av_remove conn[%p] with GID[%s] QP[%u]\n", conn,
			 str, conn->ep_addr.qpn);

		free(conn);
		av->used--;
	}
	return ret;
}
Example #4
0
static inline ssize_t
rxm_ep_rma_common(struct rxm_ep *rxm_ep, const struct fi_msg_rma *msg, uint64_t flags,
		  rxm_rma_msg_fn rma_msg, uint64_t comp_flags)
{
	struct rxm_rma_buf *rma_buf;
	struct fi_msg_rma msg_rma = *msg;
	struct rxm_conn *rxm_conn;
	void *mr_desc[RXM_IOV_LIMIT] = { 0 };
	int ret;

	assert(msg->rma_iov_count <= rxm_ep->rxm_info->tx_attr->rma_iov_limit);

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	rma_buf = rxm_rma_buf_alloc(rxm_ep);
	if (OFI_UNLIKELY(!rma_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from RMA buffer pool\n");
		ret = -FI_ENOMEM;
		goto unlock;
	}

	rma_buf->app_context = msg->context;
	rma_buf->flags = flags;

	ret = rxm_ep_rma_reg_iov(rxm_ep, msg_rma.msg_iov, msg_rma.desc, mr_desc,
				 msg_rma.iov_count, comp_flags & (FI_WRITE | FI_READ),
				 rma_buf);
	if (OFI_UNLIKELY(ret))
		goto release;

	msg_rma.desc = mr_desc;
	msg_rma.context = rma_buf;

	ret = rma_msg(rxm_conn->msg_ep, &msg_rma, flags);
	if (OFI_LIKELY(!ret))
		goto unlock;

	if ((rxm_ep->msg_mr_local) && (!rxm_ep->rxm_mr_local))
		rxm_ep_msg_mr_closev(rma_buf->mr.mr, rma_buf->mr.count);
release:
	ofi_buf_free(rma_buf);
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}
Example #5
0
static void client_recv_connresp(struct util_wait *wait,
				 struct tcpx_cm_context *cm_ctx)
{
	struct fi_eq_err_entry err_entry = { 0 };
	struct tcpx_ep *ep;
	ssize_t ret;

	assert(cm_ctx->fid->fclass == FI_CLASS_EP);
	ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid);

	ret = ofi_wait_fd_del(wait, ep->conn_fd);
	if (ret) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL,
			"Could not remove fd from wait\n");
		goto err;
	}

	ret = proc_conn_resp(cm_ctx, ep);
	if (ret)
		goto err;

	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Received Accept from server\n");
	free(cm_ctx);
	return;
err:
	err_entry.fid = cm_ctx->fid;
	err_entry.context = cm_ctx->fid->context;
	err_entry.err = -ret;
	if (cm_ctx->cm_data_sz) {
		err_entry.err_data = calloc(1, cm_ctx->cm_data_sz);
		if (OFI_LIKELY(err_entry.err_data != NULL)) {
			memcpy(err_entry.err_data, cm_ctx->cm_data,
			       cm_ctx->cm_data_sz);
			err_entry.err_data_size = cm_ctx->cm_data_sz;
		}
	}
	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL,
	       "fi_eq_write the conn refused %"PRId64"\n", ret);
	free(cm_ctx);
	/* `err_entry.err_data` must live until it is passed to user */
	ret = fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY,
			  &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR);
	if (OFI_UNLIKELY(ret < 0)) {
		free(err_entry.err_data);
	}
}
Example #6
0
static ssize_t rxm_ep_readv(struct fid_ep *ep_fid, const struct iovec *iov,
			    void **desc, size_t count, fi_addr_t src_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};

	return rxm_ep_rma_common(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep), fi_readmsg, FI_READ);
}

static ssize_t rxm_ep_read(struct fid_ep *ep_fid, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr, uint64_t addr,
			   uint64_t key, void *context)
{
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_rma_common(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep), fi_readmsg, FI_READ);
}

static inline void
rxm_ep_format_rma_msg(struct rxm_rma_buf *rma_buf, const struct fi_msg_rma *orig_msg,
		      struct iovec *rxm_iov, struct fi_msg_rma *rxm_msg)
{
	rxm_msg->context = rma_buf;
	rxm_msg->addr = orig_msg->addr;
	rxm_msg->data = orig_msg->data;

	ofi_copy_from_iov(rma_buf->pkt.data, rma_buf->pkt.hdr.size,
			  orig_msg->msg_iov, orig_msg->iov_count, 0);
	rxm_iov->iov_base = &rma_buf->pkt.data;
	rxm_iov->iov_len = rma_buf->pkt.hdr.size;
	rxm_msg->msg_iov = rxm_iov;
	rxm_msg->desc = &rma_buf->hdr.desc;
	rxm_msg->iov_count = 1;

	rxm_msg->rma_iov = orig_msg->rma_iov;
	rxm_msg->rma_iov_count = orig_msg->rma_iov_count;
}

static inline ssize_t
rxm_ep_rma_emulate_inject_msg(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn, size_t total_size,
			      const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_rma_buf *rma_buf;
	ssize_t ret;
	struct iovec rxm_msg_iov = { 0 };
	struct fi_msg_rma rxm_rma_msg = { 0 };

	assert(msg->rma_iov_count <= rxm_ep->rxm_info->tx_attr->rma_iov_limit);

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	rma_buf = rxm_rma_buf_alloc(rxm_ep);
	if (OFI_UNLIKELY(!rma_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from RMA buffer pool\n");
		ret = -FI_ENOMEM;
		goto unlock;
	}

	rma_buf->pkt.hdr.size = total_size;
	rma_buf->app_context = msg->context;
	rma_buf->flags = flags;
	rxm_ep_format_rma_msg(rma_buf, msg, &rxm_msg_iov, &rxm_rma_msg);

	flags = (flags & ~FI_INJECT) | FI_COMPLETION;

	ret = fi_writemsg(rxm_conn->msg_ep, &rxm_rma_msg, flags);
	if (OFI_UNLIKELY(ret)) {
		if (ret == -FI_EAGAIN)
			rxm_ep_do_progress(&rxm_ep->util_ep);
		ofi_buf_free(rma_buf);
	}
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}

static inline ssize_t
rxm_ep_rma_emulate_inject(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
			  const void *buf, size_t len, uint64_t data,
			  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
			  uint64_t flags)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = data,
	};

	return rxm_ep_rma_emulate_inject_msg(rxm_ep, rxm_conn, len, &msg, flags);
}

static inline ssize_t
rxm_ep_rma_inject_common(struct rxm_ep *rxm_ep, const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_conn *rxm_conn;
	size_t total_size = ofi_total_iov_len(msg->msg_iov, msg->iov_count);
	ssize_t ret;

	assert(total_size <= rxm_ep->rxm_info->tx_attr->inject_size);

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if ((total_size <= rxm_ep->msg_info->tx_attr->inject_size) &&
	    !(flags & FI_COMPLETION) &&
	    (msg->iov_count == 1) && (msg->rma_iov_count == 1)) {
		if (flags & FI_REMOTE_CQ_DATA) {
			ret = fi_inject_writedata(rxm_conn->msg_ep,
						  msg->msg_iov->iov_base,
						  msg->msg_iov->iov_len, msg->data,
						  msg->addr, msg->rma_iov->addr,
						  msg->rma_iov->key);
		} else {
			ret = fi_inject_write(rxm_conn->msg_ep,
					      msg->msg_iov->iov_base,
					      msg->msg_iov->iov_len, msg->addr,
					      msg->rma_iov->addr,
					      msg->rma_iov->key);
		}
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_write* for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject_msg(rxm_ep, rxm_conn, total_size, msg, flags);
	}
}

static inline ssize_t
rxm_ep_generic_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
			uint64_t flags)
{
	struct rxm_ep *rxm_ep =
		container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	if (flags & FI_INJECT)
		return rxm_ep_rma_inject_common(rxm_ep, msg, flags);
	else
		return rxm_ep_rma_common(rxm_ep, msg, flags,
					 fi_writemsg, FI_WRITE);
}

static inline ssize_t
rxm_ep_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_ep *rxm_ep =
		container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, msg, flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t rxm_ep_writev(struct fid_ep *ep_fid, const struct iovec *iov,
			     void **desc, size_t count, fi_addr_t dest_addr,
			     uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_writedata(struct fid_ep *ep_fid, const void *buf,
				size_t len, void *desc, uint64_t data,
				fi_addr_t dest_addr, uint64_t addr,
				uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep) |
				       FI_REMOTE_CQ_DATA);
}

static ssize_t rxm_ep_write(struct fid_ep *ep_fid, const void *buf,
			    size_t len, void *desc, fi_addr_t dest_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_inject_write(struct fid_ep *ep_fid, const void *buf,
				   size_t len, fi_addr_t dest_addr,
				   uint64_t addr, uint64_t key)
{
	ssize_t ret;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	ret = rxm_ep_prepare_tx(rxm_ep, dest_addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if (len <= rxm_ep->msg_info->tx_attr->inject_size) {
		ret = fi_inject_write(rxm_conn->msg_ep, buf, len,
				      dest_addr, addr, key);
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_write for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject(rxm_ep, rxm_conn, buf, len,
						 0, dest_addr, addr, key, FI_INJECT);
	}
}

static ssize_t rxm_ep_inject_writedata(struct fid_ep *ep_fid, const void *buf,
				       size_t len, uint64_t data,
				       fi_addr_t dest_addr, uint64_t addr,
				       uint64_t key)
{
	ssize_t ret;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	ret = rxm_ep_prepare_tx(rxm_ep, dest_addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if (len <= rxm_ep->msg_info->tx_attr->inject_size) {
		ret = fi_inject_writedata(rxm_conn->msg_ep, buf, len,
					  data, dest_addr, addr, key);
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_writedata for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject(rxm_ep, rxm_conn, buf, len,
						 data, dest_addr, addr, key,
						 FI_REMOTE_CQ_DATA | FI_INJECT);
	}
}

struct fi_ops_rma rxm_ops_rma = {
	.size = sizeof (struct fi_ops_rma),
	.read = rxm_ep_read,
	.readv = rxm_ep_readv,
	.readmsg = rxm_ep_readmsg,
	.write = rxm_ep_write,
	.writev = rxm_ep_writev,
	.writemsg = rxm_ep_writemsg,
	.inject = rxm_ep_inject_write,
	.writedata = rxm_ep_writedata,
	.injectdata = rxm_ep_inject_writedata,
};