예제 #1
0
static inline ssize_t
rxm_ep_rma_common(struct rxm_ep *rxm_ep, const struct fi_msg_rma *msg, uint64_t flags,
		  rxm_rma_msg_fn rma_msg, uint64_t comp_flags)
{
	struct rxm_rma_buf *rma_buf;
	struct fi_msg_rma msg_rma = *msg;
	struct rxm_conn *rxm_conn;
	void *mr_desc[RXM_IOV_LIMIT] = { 0 };
	int ret;

	assert(msg->rma_iov_count <= rxm_ep->rxm_info->tx_attr->rma_iov_limit);

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	rma_buf = rxm_rma_buf_alloc(rxm_ep);
	if (OFI_UNLIKELY(!rma_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from RMA buffer pool\n");
		ret = -FI_ENOMEM;
		goto unlock;
	}

	rma_buf->app_context = msg->context;
	rma_buf->flags = flags;

	ret = rxm_ep_rma_reg_iov(rxm_ep, msg_rma.msg_iov, msg_rma.desc, mr_desc,
				 msg_rma.iov_count, comp_flags & (FI_WRITE | FI_READ),
				 rma_buf);
	if (OFI_UNLIKELY(ret))
		goto release;

	msg_rma.desc = mr_desc;
	msg_rma.context = rma_buf;

	ret = rma_msg(rxm_conn->msg_ep, &msg_rma, flags);
	if (OFI_LIKELY(!ret))
		goto unlock;

	if ((rxm_ep->msg_mr_local) && (!rxm_ep->rxm_mr_local))
		rxm_ep_msg_mr_closev(rma_buf->mr.mr, rma_buf->mr.count);
release:
	ofi_buf_free(rma_buf);
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}
예제 #2
0
/* SMSG callback for AMO remote counter control message. */
int __smsg_amo_cntr(void *data, void *msg)
{
	int ret = FI_SUCCESS;
	struct gnix_vc *vc = (struct gnix_vc *)data;
	struct gnix_smsg_amo_cntr_hdr *hdr =
			(struct gnix_smsg_amo_cntr_hdr *)msg;
	struct gnix_fid_ep *ep = vc->ep;
	gni_return_t status;

	if (hdr->flags & FI_REMOTE_WRITE && ep->rwrite_cntr) {
		ret = _gnix_cntr_inc(ep->rwrite_cntr);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_DATA,
				  "_gnix_cntr_inc() failed: %d\n",
				  ret);
	}

	if (hdr->flags & FI_REMOTE_READ && ep->rread_cntr) {
		ret = _gnix_cntr_inc(ep->rread_cntr);
		if (ret != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_DATA,
				  "_gnix_cntr_inc() failed: %d\n",
				  ret);
	}

	status = GNI_SmsgRelease(vc->gni_ep);
	if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) {
		GNIX_WARN(FI_LOG_EP_DATA,
			  "GNI_SmsgRelease returned %s\n",
			  gni_err_str[status]);
		ret = gnixu_to_fi_errno(status);
	}

	return ret;
}
예제 #3
0
/* Must call with cq->lock held */
static inline int fi_ibv_poll_outstanding_cq(struct fi_ibv_ep *ep,
					     struct fi_ibv_cq *cq)
{
	struct fi_ibv_wce *wce;
	struct ibv_wc wc;
	ssize_t ret;

	ret = ibv_poll_cq(cq->cq, 1, &wc);
	if (ret <= 0)
		return ret;

	/* Handle WR entry when user doesn't request the completion */
	if (wc.wr_id == VERBS_INJECT_FLAG) {
		/* To ensure the new iteration */
		return 1;
	}

	if (wc.status != IBV_WC_WR_FLUSH_ERR) {
		ret = fi_ibv_wc_2_wce(cq, &wc, &wce);
		if (OFI_UNLIKELY(ret)) {
			ret = -FI_EAGAIN;
			goto fn;
		}
		slist_insert_tail(&wce->entry, &cq->wcq);
	}
	ret = 1;
fn:

	return ret;
}
예제 #4
0
static inline ssize_t
rxm_ep_rma_reg_iov(struct rxm_ep *rxm_ep, const struct iovec *msg_iov,
		   void **desc, void **desc_storage, size_t iov_count,
		   uint64_t comp_flags, struct rxm_rma_buf *rma_buf)
{
	size_t i;

	if (rxm_ep->msg_mr_local) {
		if (!rxm_ep->rxm_mr_local) {
			ssize_t ret =
				rxm_ep_msg_mr_regv(rxm_ep, msg_iov, iov_count,
						   comp_flags & (FI_WRITE | FI_READ),
						   rma_buf->mr.mr);
			if (OFI_UNLIKELY(ret))
				return ret;

			for (i = 0; i < iov_count; i++)
				desc_storage[i] = fi_mr_desc(rma_buf->mr.mr[i]);
			rma_buf->mr.count = iov_count;
		} else {
			for (i = 0; i < iov_count; i++)
				desc_storage[i] = fi_mr_desc(desc[i]);
		}
	}
	return FI_SUCCESS;
}
예제 #5
0
static inline int
rxm_ep_send_atomic_req(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
		       struct rxm_tx_atomic_buf *tx_buf, uint64_t len)
{
	int ret;

	/* Atomic request TX completion processing is performed when the
	 * software generated atomic response message is received. */
	tx_buf->hdr.state = RXM_ATOMIC_RESP_WAIT;
	if (len <= rxm_ep->inject_limit)
		ret = fi_inject(rxm_conn->msg_ep, &tx_buf->pkt, len, 0);
	else
		ret = fi_send(rxm_conn->msg_ep, &tx_buf->pkt, len,
			      tx_buf->hdr.desc, 0, tx_buf);
	if (ret == -FI_EAGAIN)
		rxm_ep_do_progress(&rxm_ep->util_ep);

	if (OFI_LIKELY(!ret))
		FI_DBG(&rxm_prov, FI_LOG_EP_DATA, "sent atomic request: op: %"
		       PRIu8 " msg_id: 0x%" PRIx64 "\n", tx_buf->pkt.hdr.op,
		       tx_buf->pkt.ctrl_hdr.msg_id);
	else if (OFI_UNLIKELY(ret != -FI_EAGAIN))
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "unable to send atomic "
			"request: op: %" PRIu8 " msg_id: 0x%" PRIx64 "\n",
			tx_buf->pkt.hdr.op, tx_buf->pkt.ctrl_hdr.msg_id);
	return ret;
}
예제 #6
0
파일: rxm_cq.c 프로젝트: ofiwg/libfabric
static int rxm_cq_write_error_trunc(struct rxm_rx_buf *rx_buf, size_t done_len)
{
	int ret;

	if (rx_buf->ep->util_ep.flags & OFI_CNTR_ENABLED)
		rxm_cntr_incerr(rx_buf->ep->util_ep.rx_cntr);

	FI_WARN(&rxm_prov, FI_LOG_CQ, "Message truncated: "
		"recv buf length: %zu message length: %" PRIu64 "\n",
		done_len, rx_buf->pkt.hdr.size);
	ret = ofi_cq_write_error_trunc(rx_buf->ep->util_ep.rx_cq,
				       rx_buf->recv_entry->context,
				       rx_buf->recv_entry->comp_flags |
				       rxm_cq_get_rx_comp_flags(rx_buf),
				       rx_buf->pkt.hdr.size,
				       rx_buf->recv_entry->rxm_iov.iov[0].iov_base,
				       rx_buf->pkt.hdr.data, rx_buf->pkt.hdr.tag,
				       rx_buf->pkt.hdr.size - done_len);
	if (OFI_UNLIKELY(ret)) {
		FI_WARN(&rxm_prov, FI_LOG_CQ,
			"Unable to write recv error CQ\n");
		return ret;
	}
	return 0;
}
예제 #7
0
int ofi_monitor_subscribe(struct ofi_notification_queue *nq,
			  void *addr, size_t len,
			  struct ofi_subscription *subscription)
{
	int ret;

	FI_DBG(&core_prov, FI_LOG_MR,
	       "subscribing addr=%p len=%zu subscription=%p nq=%p\n",
	       addr, len, subscription, nq);

	/* Ensure the subscription is initialized before we can get events */
	dlist_init(&subscription->entry);

	subscription->nq = nq;
	subscription->addr = addr;
	subscription->len = len;
	fastlock_acquire(&nq->lock);
	nq->refcnt++;
	fastlock_release(&nq->lock);

	ret = nq->monitor->subscribe(nq->monitor, addr, len, subscription);
	if (OFI_UNLIKELY(ret)) {
		FI_WARN(&core_prov, FI_LOG_MR,
			"Failed (ret = %d) to monitor addr=%p len=%zu",
			ret, addr, len);
		fastlock_acquire(&nq->lock);
		nq->refcnt--;
		fastlock_release(&nq->lock);
	}
	return ret;
}
예제 #8
0
static ssize_t
mrail_tsend_common(struct fid_ep *ep_fid, const struct iovec *iov, void **desc,
		   size_t count, size_t len, fi_addr_t dest_addr, uint64_t tag,
		   uint64_t data, void *context, uint64_t flags)
{
	struct mrail_ep *mrail_ep = container_of(ep_fid, struct mrail_ep,
						 util_ep.ep_fid.fid);
	struct mrail_peer_info *peer_info;
	struct iovec *iov_dest = alloca(sizeof(*iov_dest) * (count + 1));
	struct mrail_tx_buf *tx_buf;
	uint32_t i = mrail_get_tx_rail(mrail_ep);
	struct fi_msg msg;
	ssize_t ret;

	peer_info = ofi_av_get_addr(mrail_ep->util_ep.av, (int) dest_addr);

	ofi_ep_lock_acquire(&mrail_ep->util_ep);

	tx_buf = mrail_get_tx_buf(mrail_ep, context, peer_info->seq_no++,
				  ofi_op_tagged, flags | FI_TAGGED);
	if (OFI_UNLIKELY(!tx_buf)) {
		ret = -FI_ENOMEM;
		goto err1;
	}
	tx_buf->hdr.tag = tag;
	mrail_copy_iov_hdr(&tx_buf->hdr, iov_dest, iov, count);

	msg.msg_iov 	= iov_dest;
	msg.desc    	= desc;
	msg.iov_count	= count + 1;
	msg.addr	= dest_addr;
	msg.context	= tx_buf;
	msg.data	= data;

	if (len < mrail_ep->rails[i].info->tx_attr->inject_size)
		flags |= FI_INJECT;

	FI_DBG(&mrail_prov, FI_LOG_EP_DATA, "Posting tsend of length: %" PRIu64
	       " dest_addr: 0x%" PRIx64 " tag: 0x%" PRIx64 " seq: %d"
	       " on rail: %d\n", len, dest_addr, tag, peer_info->seq_no - 1, i);

	ret = fi_sendmsg(mrail_ep->rails[i].ep, &msg, flags);
	if (ret) {
		FI_WARN(&mrail_prov, FI_LOG_EP_DATA,
			"Unable to fi_sendmsg on rail: %" PRIu32 "\n", i);
		goto err2;
	} else if (!(flags & FI_COMPLETION)) {
		ofi_ep_tx_cntr_inc(&mrail_ep->util_ep);
	}
	ofi_ep_lock_release(&mrail_ep->util_ep);
	return ret;
err2:
	util_buf_release(mrail_ep->tx_buf_pool, tx_buf);
err1:
	peer_info->seq_no--;
	ofi_ep_lock_release(&mrail_ep->util_ep);
	return ret;
}
예제 #9
0
static ssize_t fi_ibv_cq_read(struct fid_cq *cq_fid, void *buf, size_t count)
{
	struct fi_ibv_cq *cq;
	struct fi_ibv_wce *wce;
	struct slist_entry *entry;
	struct ibv_wc wc;
	ssize_t ret = 0, i;

	cq = container_of(cq_fid, struct fi_ibv_cq, util_cq.cq_fid);

	cq->util_cq.cq_fastlock_acquire(&cq->util_cq.cq_lock);

	for (i = 0; i < count; i++) {
		if (!slist_empty(&cq->wcq)) {
			wce = container_of(cq->wcq.head, struct fi_ibv_wce, entry);
			if (wce->wc.status) {
				ret = -FI_EAVAIL;
				break;
			}
			entry = slist_remove_head(&cq->wcq);
			wce = container_of(entry, struct fi_ibv_wce, entry);
			cq->read_entry(&wce->wc, (char *)buf + i * cq->entry_size);
			util_buf_release(cq->wce_pool, wce);
			continue;
		}

		ret = fi_ibv_poll_cq(cq, &wc);
		if (ret <= 0)
			break;

		/* Insert error entry into wcq */
		if (OFI_UNLIKELY(wc.status)) {
			if (wc.status == IBV_WC_WR_FLUSH_ERR) {
				/* Handle case when remote side destroys
				 * the connection, but local side isn't aware
				 * about that yet */
				VERBS_DBG(FI_LOG_CQ,
					  "Ignoring WC with status "
					  "IBV_WC_WR_FLUSH_ERR(%d)\n",
					  wc.status);
				i--;
				continue;
			}
			wce = util_buf_alloc(cq->wce_pool);
			if (!wce) {
				cq->util_cq.cq_fastlock_release(&cq->util_cq.cq_lock);
				return -FI_ENOMEM;
			}
			memset(wce, 0, sizeof(*wce));
			memcpy(&wce->wc, &wc, sizeof wc);
			slist_insert_tail(&wce->entry, &cq->wcq);
			ret = -FI_EAVAIL;
			break;
		}

		cq->read_entry(&wc, (char *)buf + i * cq->entry_size);
	}
예제 #10
0
static int
util_mr_cache_create(struct ofi_mr_cache *cache, const struct iovec *iov,
		     uint64_t access, struct ofi_mr_entry **entry)
{
	int ret;

	FI_DBG(cache->domain->prov, FI_LOG_MR, "create %p (len: %" PRIu64 ")\n",
	       iov->iov_base, iov->iov_len);

	util_mr_cache_process_events(cache);

	*entry = util_buf_alloc(cache->entry_pool);
	if (OFI_UNLIKELY(!*entry))
		return -FI_ENOMEM;

	(*entry)->iov = *iov;
	(*entry)->use_cnt = 1;

	ret = cache->add_region(cache, *entry);
	if (ret) {
		while (ret && ofi_mr_cache_flush(cache)) {
			ret = cache->add_region(cache, *entry);
		}
		if (ret) {
			assert(!ofi_mr_cache_flush(cache));
			util_buf_release(cache->entry_pool, *entry);
			return ret;
		}
	}

	cache->cached_size += iov->iov_len;
	if ((++cache->cached_cnt > cache->max_cached_cnt) ||
	    (cache->cached_size > cache->max_cached_size)) {
		(*entry)->cached = 0;
	} else {
		if (cache->mr_storage.insert(&cache->mr_storage,
					     &(*entry)->iov, *entry)) {
			ret = -FI_ENOMEM;
			goto err;
		}
		(*entry)->cached = 1;

		ret = ofi_monitor_subscribe(&cache->nq, iov->iov_base, iov->iov_len,
					    &(*entry)->subscription);
		if (ret)
			goto err;
		(*entry)->subscribed = 1;
	}

	return 0;

err:
	util_mr_free_entry(cache, *entry);
	return ret;
}
예제 #11
0
static struct ofi_mr_entry *ofi_mr_rbt_storage_find(struct ofi_mr_storage *storage,
						    const struct iovec *key)
{
	struct ofi_mr_entry *entry;
	RbtIterator iter = rbtFind((RbtHandle)storage->storage, (void *)key);
	if (OFI_UNLIKELY(!iter))
		return iter;

	rbtKeyValue(storage->storage, iter, (void *)&key, (void *)&entry);
	return entry;
}
예제 #12
0
static ssize_t
rxm_ep_generic_atomic_writemsg(struct rxm_ep *rxm_ep, const struct fi_msg_atomic *msg,
			       uint64_t flags)
{
	int ret;
	struct rxm_conn *rxm_conn;

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	return rxm_ep_atomic_common(rxm_ep, rxm_conn, msg, NULL, NULL, 0,
				    NULL, NULL, 0, ofi_op_atomic, flags);
}
예제 #13
0
파일: rxm_cq.c 프로젝트: ofiwg/libfabric
static inline int
rxm_cq_tx_comp_write(struct rxm_ep *rxm_ep, uint64_t comp_flags,
		     void *app_context,  uint64_t flags)
{
	if (flags & FI_COMPLETION) {
		int ret = ofi_cq_write(rxm_ep->util_ep.tx_cq, app_context,
				       comp_flags, 0, NULL, 0, 0);
		if (OFI_UNLIKELY(ret)) {
			FI_WARN(&rxm_prov, FI_LOG_CQ,
				"Unable to report completion\n");
			return ret;
		}
		rxm_cq_log_comp(comp_flags);
	}
	return 0;
}
예제 #14
0
int ofi_monitor_subscribe(struct ofi_mem_monitor *monitor,
			  const void *addr, size_t len)
{
	int ret;

	FI_DBG(&core_prov, FI_LOG_MR,
	       "subscribing addr=%p len=%zu\n", addr, len);

	ret = monitor->subscribe(monitor, addr, len);
	if (OFI_UNLIKELY(ret)) {
		FI_WARN(&core_prov, FI_LOG_MR,
			"Failed (ret = %d) to monitor addr=%p len=%zu\n",
			ret, addr, len);
	}
	return ret;
}
예제 #15
0
static void client_recv_connresp(struct util_wait *wait,
				 struct tcpx_cm_context *cm_ctx)
{
	struct fi_eq_err_entry err_entry = { 0 };
	struct tcpx_ep *ep;
	ssize_t ret;

	assert(cm_ctx->fid->fclass == FI_CLASS_EP);
	ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid);

	ret = ofi_wait_fd_del(wait, ep->conn_fd);
	if (ret) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL,
			"Could not remove fd from wait\n");
		goto err;
	}

	ret = proc_conn_resp(cm_ctx, ep);
	if (ret)
		goto err;

	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Received Accept from server\n");
	free(cm_ctx);
	return;
err:
	err_entry.fid = cm_ctx->fid;
	err_entry.context = cm_ctx->fid->context;
	err_entry.err = -ret;
	if (cm_ctx->cm_data_sz) {
		err_entry.err_data = calloc(1, cm_ctx->cm_data_sz);
		if (OFI_LIKELY(err_entry.err_data != NULL)) {
			memcpy(err_entry.err_data, cm_ctx->cm_data,
			       cm_ctx->cm_data_sz);
			err_entry.err_data_size = cm_ctx->cm_data_sz;
		}
	}
	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL,
	       "fi_eq_write the conn refused %"PRId64"\n", ret);
	free(cm_ctx);
	/* `err_entry.err_data` must live until it is passed to user */
	ret = fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY,
			  &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR);
	if (OFI_UNLIKELY(ret < 0)) {
		free(err_entry.err_data);
	}
}
예제 #16
0
static struct mrail_tx_buf *mrail_get_tx_buf(struct mrail_ep *mrail_ep,
					     void *context, uint32_t seq,
					     uint8_t op, uint64_t flags)
{
	struct mrail_tx_buf *tx_buf = util_buf_alloc(mrail_ep->tx_buf_pool);
	if (OFI_UNLIKELY(!tx_buf))
		return NULL;

	assert(tx_buf->ep == mrail_ep);
	assert(tx_buf->hdr.version == MRAIL_HDR_VERSION);

	tx_buf->context		= context;
	tx_buf->flags		= flags;
	tx_buf->hdr.op		= op;
	tx_buf->hdr.seq		= htonl(seq);
	return tx_buf;
}
예제 #17
0
static ssize_t rxm_ep_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
			      uint64_t flags)
{
	struct util_cmap_handle *handle;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep;
	int ret;

	rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	ret = ofi_cmap_get_handle(rxm_ep->util_ep.cmap, msg->addr, &handle);
	if (OFI_UNLIKELY(ret))
		return ret;
	rxm_conn = container_of(handle, struct rxm_conn, handle);

	return rxm_ep_rma_common(rxm_conn->msg_ep, rxm_ep, msg, flags,
				 fi_readmsg, FI_READ);
}
예제 #18
0
struct mrail_recv *
mrail_match_recv_handle_unexp(struct mrail_recv_queue *recv_queue, uint64_t tag,
			      uint64_t addr, char *data, size_t len, void *context)
{
	struct dlist_entry *entry;
	struct mrail_unexp_msg_entry *unexp_msg_entry;
	struct mrail_match_attr match_attr = {
		.tag	= tag,
		.addr	= addr,
	};

	entry = dlist_remove_first_match(&recv_queue->recv_list,
					 recv_queue->match_recv, &match_attr);
	if (OFI_UNLIKELY(!entry)) {
		unexp_msg_entry = recv_queue->get_unexp_msg_entry(recv_queue,
								  context);
		if (!unexp_msg_entry) {
			FI_WARN(recv_queue->prov, FI_LOG_CQ,
				"Unable to get unexp_msg_entry!");
			assert(0);
			return NULL;
		}

		unexp_msg_entry->addr		= addr;
		unexp_msg_entry->tag		= tag;
		unexp_msg_entry->context	= context;
		memcpy(unexp_msg_entry->data, data, len);

		FI_DBG(recv_queue->prov, FI_LOG_CQ, "No matching recv found for"
		       " incoming msg with addr: 0x%" PRIx64 " tag: 0x%" PRIx64
		       "\n", unexp_msg_entry->addr, unexp_msg_entry->tag);

		FI_DBG(recv_queue->prov, FI_LOG_CQ, "Enqueueing unexp_msg_entry to "
		       "unexpected msg list\n");

		dlist_insert_tail(&unexp_msg_entry->entry,
				  &recv_queue->unexp_msg_list);
		return NULL;
	}
	return container_of(entry, struct mrail_recv, entry);
}
예제 #19
0
static int read_cm_data(SOCKET fd, struct tcpx_cm_context *cm_ctx,
			struct ofi_ctrl_hdr *hdr)
{
	cm_ctx->cm_data_sz = ntohs(hdr->seg_size);
	if (cm_ctx->cm_data_sz) {
		size_t data_sz = MIN(cm_ctx->cm_data_sz,
				     TCPX_MAX_CM_DATA_SIZE);
		ssize_t ret = ofi_recv_socket(fd, cm_ctx->cm_data,
					      data_sz, MSG_WAITALL);
		if ((size_t) ret != data_sz)
			return -FI_EIO;
		cm_ctx->cm_data_sz = data_sz;

		if (OFI_UNLIKELY(cm_ctx->cm_data_sz >
					TCPX_MAX_CM_DATA_SIZE)) {
			discard_cm_data(fd, cm_ctx->cm_data_sz -
					TCPX_MAX_CM_DATA_SIZE);
		}
	}
	return FI_SUCCESS;
}
예제 #20
0
파일: rxm_cq.c 프로젝트: ofiwg/libfabric
static int rxm_finish_buf_recv(struct rxm_rx_buf *rx_buf)
{
	uint64_t flags;
	char *data;

	if (rx_buf->pkt.ctrl_hdr.type == ofi_ctrl_seg_data &&
	    rxm_sar_get_seg_type(&rx_buf->pkt.ctrl_hdr) != RXM_SAR_SEG_FIRST) {
		dlist_insert_tail(&rx_buf->unexp_msg.entry,
				  &rx_buf->conn->sar_deferred_rx_msg_list);
		rx_buf = rxm_rx_buf_alloc(rx_buf->ep, rx_buf->msg_ep, 1);
		if (OFI_UNLIKELY(!rx_buf)) {
			FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
				"ran out of buffers from RX buffer pool\n");
			return -FI_ENOMEM;
		}
		dlist_insert_tail(&rx_buf->repost_entry,
				  &rx_buf->ep->repost_ready_list);

		return 0;
	}

	flags = rxm_cq_get_rx_comp_and_op_flags(rx_buf);

	if (rx_buf->pkt.ctrl_hdr.type != ofi_ctrl_data)
		flags |= FI_MORE;

	if (rx_buf->pkt.ctrl_hdr.type == ofi_ctrl_large_data)
		data = rxm_pkt_rndv_data(&rx_buf->pkt);
	else
		data = rx_buf->pkt.data;

	FI_DBG(&rxm_prov, FI_LOG_CQ, "writing buffered recv completion: "
	       "length: %" PRIu64 "\n", rx_buf->pkt.hdr.size);
	rx_buf->recv_context.ep = &rx_buf->ep->util_ep.ep_fid;

	return rxm_cq_write_recv_comp(rx_buf, &rx_buf->recv_context, flags,
				      rx_buf->pkt.hdr.size, data);
}
예제 #21
0
static ssize_t
rxm_ep_atomic_writev(struct fid_ep *ep_fid, const struct fi_ioc *iov,
		     void **desc, size_t count, fi_addr_t dest_addr,
		     uint64_t addr, uint64_t key, enum fi_datatype datatype,
		     enum fi_op op, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = ofi_total_ioc_cnt(iov, count),
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = context,
		.data = 0,
	};

	return rxm_ep_generic_atomic_writemsg(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t
rxm_ep_atomic_write(struct fid_ep *ep_fid, const void *buf, size_t count,
		    void *desc, fi_addr_t dest_addr, uint64_t addr,
		    uint64_t key, enum fi_datatype datatype, enum fi_op op,
		    void *context)
{
	const struct fi_ioc iov = {
		.addr = (void *) buf,
		.count = count,
	};

	return rxm_ep_atomic_writev(ep_fid, &iov, &desc, 1, dest_addr, addr,
				    key, datatype, op, context);
}

static ssize_t
rxm_ep_atomic_inject(struct fid_ep *ep_fid, const void *buf, size_t count,
		     fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		     enum fi_datatype datatype, enum fi_op op)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_ioc msg_iov = {
		.addr = (void *) buf,
		.count = count,
	};
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = count,
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = &msg_iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = NULL,
		.data = 0,
	};


	return rxm_ep_generic_atomic_writemsg(rxm_ep, &msg, FI_INJECT);
}

static ssize_t
rxm_ep_generic_atomic_readwritemsg(struct rxm_ep *rxm_ep,
				   const struct fi_msg_atomic *msg,
				   struct fi_ioc *resultv, void **result_desc,
				   size_t result_count, uint64_t flags)
{
	int ret;
	struct rxm_conn *rxm_conn;

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	return rxm_ep_atomic_common(rxm_ep, rxm_conn, msg, NULL, NULL, 0,
				    resultv, result_desc, result_count,
				    ofi_op_atomic_fetch, flags);
}

static ssize_t
rxm_ep_atomic_readwritemsg(struct fid_ep *ep_fid,
			   const struct fi_msg_atomic *msg,
			   struct fi_ioc *resultv, void **result_desc,
			   size_t result_count, uint64_t flags)
{

	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_atomic_readwritemsg(rxm_ep, msg,
			resultv, result_desc, result_count,
			flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t
rxm_ep_atomic_readwritev(struct fid_ep *ep_fid, const struct fi_ioc *iov,
		 void **desc, size_t count, struct fi_ioc *resultv,
		 void **result_desc, size_t result_count, fi_addr_t dest_addr,
		 uint64_t addr, uint64_t key, enum fi_datatype datatype,
		 enum fi_op op, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = ofi_total_ioc_cnt(iov, count),
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = context,
		.data = 0,
	};

	return rxm_ep_generic_atomic_readwritemsg(rxm_ep, &msg, resultv,
			result_desc, result_count, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t
rxm_ep_atomic_readwrite(struct fid_ep *ep_fid, const void *buf, size_t count,
			void *desc, void *result, void *result_desc,
			fi_addr_t dest_addr, uint64_t addr, uint64_t key,
			enum fi_datatype datatype, enum fi_op op, void *context)
{
	struct fi_ioc iov = {
		.addr = (op == FI_ATOMIC_READ) ? NULL : (void *) buf,
		.count = count,
	};
	struct fi_ioc result_iov = {
		.addr = result,
		.count = count,
	};

	if (!buf && op != FI_ATOMIC_READ)
		return -FI_EINVAL;

	return rxm_ep_atomic_readwritev(ep_fid, &iov, &desc, 1, &result_iov,
					&result_desc, 1, dest_addr, addr, key,
					datatype, op, context);
}

static ssize_t
rxm_ep_generic_atomic_compwritemsg(struct rxm_ep *rxm_ep,
				   const struct fi_msg_atomic *msg,
				   const struct fi_ioc *comparev, void **compare_desc,
				   size_t compare_count, struct fi_ioc *resultv,
				   void **result_desc, size_t result_count,
				   uint64_t flags)
{
	int ret;
	struct rxm_conn *rxm_conn;

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	return rxm_ep_atomic_common(rxm_ep, rxm_conn, msg, comparev,
				    compare_desc, compare_count, resultv,
				    result_desc, result_count,
				    ofi_op_atomic_compare, flags);
}

static ssize_t
rxm_ep_atomic_compwritemsg(struct fid_ep *ep_fid,
			   const struct fi_msg_atomic *msg,
			   const struct fi_ioc *comparev, void **compare_desc,
			   size_t compare_count, struct fi_ioc *resultv,
			   void **result_desc, size_t result_count,
			   uint64_t flags)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_atomic_compwritemsg(rxm_ep, msg, comparev,
				    compare_desc, compare_count, resultv,
				    result_desc, result_count,
				    flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t
rxm_ep_atomic_compwritev(struct fid_ep *ep_fid, const struct fi_ioc *iov,
		 void **desc, size_t count, const struct fi_ioc *comparev,
		 void **compare_desc, size_t compare_count,
		 struct fi_ioc *resultv, void **result_desc,
		 size_t result_count, fi_addr_t dest_addr, uint64_t addr,
		 uint64_t key, enum fi_datatype datatype, enum fi_op op,
		 void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = ofi_total_ioc_cnt(iov, count),
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = context,
		.data = 0,
	};

	return rxm_ep_generic_atomic_compwritemsg(rxm_ep, &msg, comparev,
			compare_desc, compare_count, resultv, result_desc,
			result_count, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t
rxm_ep_atomic_compwrite(struct fid_ep *ep_fid, const void *buf, size_t count,
			void *desc, const void *compare, void *compare_desc,
			void *result, void *result_desc, fi_addr_t dest_addr,
			uint64_t addr, uint64_t key, enum fi_datatype datatype,
			enum fi_op op, void *context)
{
	struct fi_ioc iov = {
		.addr = (void *) buf,
		.count = count,
	};
	struct fi_ioc resultv = {
		.addr = result,
		.count = count,
	};
	struct fi_ioc comparev = {
		.addr = (void *) compare,
		.count = count,
	};

	return rxm_ep_atomic_compwritev(ep_fid, &iov, &desc, 1,
					&comparev, &compare_desc, 1,
					&resultv, &result_desc, 1,
					dest_addr, addr, key,
					datatype, op, context);
}

int rxm_ep_query_atomic(struct fid_domain *domain, enum fi_datatype datatype,
			enum fi_op op, struct fi_atomic_attr *attr,
			uint64_t flags)
{
	struct rxm_domain *rxm_domain = container_of(domain,
						     struct rxm_domain,
						     util_domain.domain_fid);
	size_t tot_size;
	int ret;

	if (flags & FI_TAGGED) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"tagged atomic op not supported\n");
		return -FI_EINVAL;
	}

	ret = ofi_atomic_valid(&rxm_prov, datatype, op, flags);
	if (ret || !attr)
		return ret;

	tot_size = flags & FI_COMPARE_ATOMIC ?
		rxm_domain->max_atomic_size / 2 : rxm_domain->max_atomic_size;
	attr->size = ofi_datatype_size(datatype);
	attr->count = tot_size / attr->size;

	return FI_SUCCESS;
}

static int rxm_ep_atomic_valid(struct fid_ep *ep_fid, enum fi_datatype datatype,
			       enum fi_op op, size_t *count)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid);
	struct fi_atomic_attr attr;
	int ret;

	ret = rxm_ep_query_atomic(&rxm_ep->util_ep.domain->domain_fid,
				  datatype, op, &attr, 0);
	if (!ret)
		*count = attr.count;

	return ret;
}

static int rxm_ep_atomic_fetch_valid(struct fid_ep *ep_fid,
				     enum fi_datatype datatype, enum fi_op op,
				     size_t *count)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid);
	struct fi_atomic_attr attr;
	int ret;

	ret = rxm_ep_query_atomic(&rxm_ep->util_ep.domain->domain_fid,
				  datatype, op, &attr, FI_FETCH_ATOMIC);
	if (!ret)
		*count = attr.count;

	return ret;
}

static int rxm_ep_atomic_cswap_valid(struct fid_ep *ep_fid,
				     enum fi_datatype datatype, enum fi_op op,
				     size_t *count)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid);
	struct fi_atomic_attr attr;
	int ret;

	ret = rxm_ep_query_atomic(&rxm_ep->util_ep.domain->domain_fid,
				  datatype, op, &attr, FI_COMPARE_ATOMIC);
	if (!ret)
		*count = attr.count;

	return ret;
}

struct fi_ops_atomic rxm_ops_atomic = {
	.size = sizeof(struct fi_ops_atomic),
	.write = rxm_ep_atomic_write,
	.writev = rxm_ep_atomic_writev,
	.writemsg = rxm_ep_atomic_writemsg,
	.inject = rxm_ep_atomic_inject,
	.readwrite = rxm_ep_atomic_readwrite,
	.readwritev = rxm_ep_atomic_readwritev,
	.readwritemsg = rxm_ep_atomic_readwritemsg,
	.compwrite = rxm_ep_atomic_compwrite,
	.compwritev = rxm_ep_atomic_compwritev,
	.compwritemsg = rxm_ep_atomic_compwritemsg,
	.writevalid = rxm_ep_atomic_valid,
	.readwritevalid = rxm_ep_atomic_fetch_valid,
	.compwritevalid = rxm_ep_atomic_cswap_valid,
};
예제 #22
0
static ssize_t
rxm_ep_atomic_common(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
		const struct fi_msg_atomic *msg, const struct fi_ioc *comparev,
		void **compare_desc, size_t compare_iov_count,
		struct fi_ioc *resultv, void **result_desc,
		size_t result_iov_count, uint32_t op, uint64_t flags)
{
	struct rxm_tx_atomic_buf *tx_buf;
	struct rxm_atomic_hdr *atomic_hdr;
	struct iovec buf_iov[RXM_IOV_LIMIT];
	struct iovec cmp_iov[RXM_IOV_LIMIT];
	size_t datatype_sz = ofi_datatype_size(msg->datatype);
	size_t buf_len = 0;
	size_t cmp_len = 0;
	size_t tot_len;
	ssize_t ret;

	assert(msg->iov_count <= RXM_IOV_LIMIT &&
	       msg->rma_iov_count <= RXM_IOV_LIMIT);

	if (flags & FI_REMOTE_CQ_DATA) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"atomic with remote CQ data not supported\n");
		return -FI_EINVAL;
	}

	if (msg->op != FI_ATOMIC_READ) {
		assert(msg->msg_iov);
		ofi_ioc_to_iov(msg->msg_iov, buf_iov, msg->iov_count,
			       datatype_sz);
		buf_len = ofi_total_iov_len(buf_iov, msg->iov_count);
	}

	if (op == ofi_op_atomic_compare) {
		assert(comparev);
		ofi_ioc_to_iov(comparev, cmp_iov, compare_iov_count,
			       datatype_sz);
		cmp_len = ofi_total_iov_len(cmp_iov, compare_iov_count);
		assert(buf_len == cmp_len);
	}

	tot_len = buf_len + cmp_len + sizeof(struct rxm_atomic_hdr) +
			sizeof(struct rxm_pkt);

	if (tot_len > rxm_eager_limit) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"atomic data too large %zu\n", tot_len);
		return -FI_EINVAL;
	}

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	tx_buf = (struct rxm_tx_atomic_buf *)
		 rxm_tx_buf_alloc(rxm_ep, RXM_BUF_POOL_TX_ATOMIC);
	if (OFI_UNLIKELY(!tx_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from Atomic buffer pool\n");
		ret = -FI_EAGAIN;
		goto unlock;
	}

	rxm_ep_format_atomic_pkt_hdr(rxm_conn, tx_buf, tot_len, op,
				msg->datatype, msg->op, flags, msg->data,
				msg->rma_iov, msg->rma_iov_count);
	tx_buf->pkt.ctrl_hdr.msg_id = ofi_buf_index(tx_buf);
	tx_buf->app_context = msg->context;

	atomic_hdr = (struct rxm_atomic_hdr *) tx_buf->pkt.data;

	ofi_copy_from_iov(atomic_hdr->data, buf_len, buf_iov,
			  msg->iov_count, 0);
	if (cmp_len)
		ofi_copy_from_iov(atomic_hdr->data + buf_len, cmp_len,
				  cmp_iov, compare_iov_count, 0);

	tx_buf->result_iov_count = result_iov_count;
	if (resultv)
		ofi_ioc_to_iov(resultv, tx_buf->result_iov, result_iov_count,
			       datatype_sz);

	ret = rxm_ep_send_atomic_req(rxm_ep, rxm_conn, tx_buf, tot_len);
	if (ret)
		ofi_buf_free(tx_buf);
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}
예제 #23
0
ssize_t psmx2_recv_generic(struct fid_ep *ep, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr, void *context,
			   uint64_t flags)
{
	struct psmx2_fid_ep *ep_priv;
	struct psmx2_fid_av *av;
	psm2_epaddr_t psm2_epaddr;
	psm2_mq_req_t psm2_req;
	psm2_mq_tag_t psm2_tag, psm2_tagsel;
	struct fi_context *fi_context;
	int recv_flag = 0;
	int err;
	int enable_completion;

	ep_priv = container_of(ep, struct psmx2_fid_ep, ep);

	if (flags & FI_TRIGGER)
		return psmx2_trigger_queue_recv(ep, buf, len, desc, src_addr,
						context, flags);

	if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) {
		av = ep_priv->av;
		assert(av);
		psm2_epaddr = psmx2_av_translate_addr(av, ep_priv->rx, src_addr, av->type);
	} else {
		psm2_epaddr = 0;
	}

	PSMX2_SET_TAG(psm2_tag, 0ULL, 0, PSMX2_TYPE_MSG);
	PSMX2_SET_MASK(psm2_tagsel, PSMX2_MATCH_NONE, PSMX2_TYPE_MASK);

	enable_completion = !ep_priv->recv_selective_completion ||
			    (flags & FI_COMPLETION);
	if (enable_completion) {
		assert(context);
		fi_context = context;
		if (flags & FI_MULTI_RECV) {
			struct psmx2_multi_recv *req;

			req = calloc(1, sizeof(*req));
			if (!req)
				return -FI_ENOMEM;

			req->src_addr = psm2_epaddr;
			req->tag = psm2_tag;
			req->tagsel = psm2_tagsel;
			req->flag = recv_flag;
			req->buf = buf;
			req->len = len;
			req->offset = 0;
			req->min_buf_size = ep_priv->min_multi_recv;
			req->context = fi_context; 
			PSMX2_CTXT_TYPE(fi_context) = PSMX2_MULTI_RECV_CONTEXT;
			PSMX2_CTXT_USER(fi_context) = req;
			if (len > PSMX2_MAX_MSG_SIZE)
				len = PSMX2_MAX_MSG_SIZE;
		} else {
			PSMX2_CTXT_TYPE(fi_context) = PSMX2_RECV_CONTEXT;
			PSMX2_CTXT_USER(fi_context) = buf;
		}
		PSMX2_CTXT_EP(fi_context) = ep_priv;
		PSMX2_CTXT_SIZE(fi_context) = len;
	} else {
		PSMX2_EP_GET_OP_CONTEXT(ep_priv, fi_context);
		#if !PSMX2_USE_REQ_CONTEXT
		PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT;
		PSMX2_CTXT_EP(fi_context) = ep_priv;
		PSMX2_CTXT_USER(fi_context) = buf;
		PSMX2_CTXT_SIZE(fi_context) = len;
		#endif
	}

	err = psm2_mq_irecv2(ep_priv->rx->psm2_mq, psm2_epaddr,
			     &psm2_tag, &psm2_tagsel, recv_flag, buf, len,
			     (void *)fi_context, &psm2_req);
	if (OFI_UNLIKELY(err != PSM2_OK))
		return psmx2_errno(err);

	if (enable_completion) {
		PSMX2_CTXT_REQ(fi_context) = psm2_req;
	} else {
		#if PSMX2_USE_REQ_CONTEXT
		PSMX2_REQ_GET_OP_CONTEXT(psm2_req, fi_context);
		PSMX2_CTXT_TYPE(fi_context) = PSMX2_NOCOMP_RECV_CONTEXT;
		PSMX2_CTXT_EP(fi_context) = ep_priv;
		PSMX2_CTXT_USER(fi_context) = buf;
		PSMX2_CTXT_SIZE(fi_context) = len;
		#endif
	}

	return 0;
}
예제 #24
0
static ssize_t rxm_ep_readv(struct fid_ep *ep_fid, const struct iovec *iov,
			    void **desc, size_t count, fi_addr_t src_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};

	return rxm_ep_rma_common(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep), fi_readmsg, FI_READ);
}

static ssize_t rxm_ep_read(struct fid_ep *ep_fid, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr, uint64_t addr,
			   uint64_t key, void *context)
{
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_rma_common(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep), fi_readmsg, FI_READ);
}

static inline void
rxm_ep_format_rma_msg(struct rxm_rma_buf *rma_buf, const struct fi_msg_rma *orig_msg,
		      struct iovec *rxm_iov, struct fi_msg_rma *rxm_msg)
{
	rxm_msg->context = rma_buf;
	rxm_msg->addr = orig_msg->addr;
	rxm_msg->data = orig_msg->data;

	ofi_copy_from_iov(rma_buf->pkt.data, rma_buf->pkt.hdr.size,
			  orig_msg->msg_iov, orig_msg->iov_count, 0);
	rxm_iov->iov_base = &rma_buf->pkt.data;
	rxm_iov->iov_len = rma_buf->pkt.hdr.size;
	rxm_msg->msg_iov = rxm_iov;
	rxm_msg->desc = &rma_buf->hdr.desc;
	rxm_msg->iov_count = 1;

	rxm_msg->rma_iov = orig_msg->rma_iov;
	rxm_msg->rma_iov_count = orig_msg->rma_iov_count;
}

static inline ssize_t
rxm_ep_rma_emulate_inject_msg(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn, size_t total_size,
			      const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_rma_buf *rma_buf;
	ssize_t ret;
	struct iovec rxm_msg_iov = { 0 };
	struct fi_msg_rma rxm_rma_msg = { 0 };

	assert(msg->rma_iov_count <= rxm_ep->rxm_info->tx_attr->rma_iov_limit);

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	rma_buf = rxm_rma_buf_alloc(rxm_ep);
	if (OFI_UNLIKELY(!rma_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from RMA buffer pool\n");
		ret = -FI_ENOMEM;
		goto unlock;
	}

	rma_buf->pkt.hdr.size = total_size;
	rma_buf->app_context = msg->context;
	rma_buf->flags = flags;
	rxm_ep_format_rma_msg(rma_buf, msg, &rxm_msg_iov, &rxm_rma_msg);

	flags = (flags & ~FI_INJECT) | FI_COMPLETION;

	ret = fi_writemsg(rxm_conn->msg_ep, &rxm_rma_msg, flags);
	if (OFI_UNLIKELY(ret)) {
		if (ret == -FI_EAGAIN)
			rxm_ep_do_progress(&rxm_ep->util_ep);
		ofi_buf_free(rma_buf);
	}
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}

static inline ssize_t
rxm_ep_rma_emulate_inject(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
			  const void *buf, size_t len, uint64_t data,
			  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
			  uint64_t flags)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = data,
	};

	return rxm_ep_rma_emulate_inject_msg(rxm_ep, rxm_conn, len, &msg, flags);
}

static inline ssize_t
rxm_ep_rma_inject_common(struct rxm_ep *rxm_ep, const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_conn *rxm_conn;
	size_t total_size = ofi_total_iov_len(msg->msg_iov, msg->iov_count);
	ssize_t ret;

	assert(total_size <= rxm_ep->rxm_info->tx_attr->inject_size);

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if ((total_size <= rxm_ep->msg_info->tx_attr->inject_size) &&
	    !(flags & FI_COMPLETION) &&
	    (msg->iov_count == 1) && (msg->rma_iov_count == 1)) {
		if (flags & FI_REMOTE_CQ_DATA) {
			ret = fi_inject_writedata(rxm_conn->msg_ep,
						  msg->msg_iov->iov_base,
						  msg->msg_iov->iov_len, msg->data,
						  msg->addr, msg->rma_iov->addr,
						  msg->rma_iov->key);
		} else {
			ret = fi_inject_write(rxm_conn->msg_ep,
					      msg->msg_iov->iov_base,
					      msg->msg_iov->iov_len, msg->addr,
					      msg->rma_iov->addr,
					      msg->rma_iov->key);
		}
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_write* for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject_msg(rxm_ep, rxm_conn, total_size, msg, flags);
	}
}

static inline ssize_t
rxm_ep_generic_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
			uint64_t flags)
{
	struct rxm_ep *rxm_ep =
		container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	if (flags & FI_INJECT)
		return rxm_ep_rma_inject_common(rxm_ep, msg, flags);
	else
		return rxm_ep_rma_common(rxm_ep, msg, flags,
					 fi_writemsg, FI_WRITE);
}

static inline ssize_t
rxm_ep_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_ep *rxm_ep =
		container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, msg, flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t rxm_ep_writev(struct fid_ep *ep_fid, const struct iovec *iov,
			     void **desc, size_t count, fi_addr_t dest_addr,
			     uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_writedata(struct fid_ep *ep_fid, const void *buf,
				size_t len, void *desc, uint64_t data,
				fi_addr_t dest_addr, uint64_t addr,
				uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep) |
				       FI_REMOTE_CQ_DATA);
}

static ssize_t rxm_ep_write(struct fid_ep *ep_fid, const void *buf,
			    size_t len, void *desc, fi_addr_t dest_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_inject_write(struct fid_ep *ep_fid, const void *buf,
				   size_t len, fi_addr_t dest_addr,
				   uint64_t addr, uint64_t key)
{
	ssize_t ret;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	ret = rxm_ep_prepare_tx(rxm_ep, dest_addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if (len <= rxm_ep->msg_info->tx_attr->inject_size) {
		ret = fi_inject_write(rxm_conn->msg_ep, buf, len,
				      dest_addr, addr, key);
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_write for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject(rxm_ep, rxm_conn, buf, len,
						 0, dest_addr, addr, key, FI_INJECT);
	}
}

static ssize_t rxm_ep_inject_writedata(struct fid_ep *ep_fid, const void *buf,
				       size_t len, uint64_t data,
				       fi_addr_t dest_addr, uint64_t addr,
				       uint64_t key)
{
	ssize_t ret;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	ret = rxm_ep_prepare_tx(rxm_ep, dest_addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	if (len <= rxm_ep->msg_info->tx_attr->inject_size) {
		ret = fi_inject_writedata(rxm_conn->msg_ep, buf, len,
					  data, dest_addr, addr, key);
		if (OFI_LIKELY(!ret)) {
			ofi_ep_wr_cntr_inc(&rxm_ep->util_ep);
		} else {
			FI_DBG(&rxm_prov, FI_LOG_EP_DATA,
			       "fi_inject_writedata for MSG provider failed with ret - %"
			       PRId64"\n", ret);
			if (OFI_LIKELY(ret == -FI_EAGAIN))
				rxm_ep_progress(&rxm_ep->util_ep);
		}
		return ret;
	} else {
		return rxm_ep_rma_emulate_inject(rxm_ep, rxm_conn, buf, len,
						 data, dest_addr, addr, key,
						 FI_REMOTE_CQ_DATA | FI_INJECT);
	}
}

struct fi_ops_rma rxm_ops_rma = {
	.size = sizeof (struct fi_ops_rma),
	.read = rxm_ep_read,
	.readv = rxm_ep_readv,
	.readmsg = rxm_ep_readmsg,
	.write = rxm_ep_write,
	.writev = rxm_ep_writev,
	.writemsg = rxm_ep_writemsg,
	.inject = rxm_ep_inject_write,
	.writedata = rxm_ep_writedata,
	.injectdata = rxm_ep_inject_writedata,
};
예제 #25
0
static inline struct efa_conn *efa_av_tbl_idx_to_conn(struct efa_av *av, fi_addr_t addr)
{
	if (OFI_UNLIKELY(addr == FI_ADDR_UNSPEC))
		return NULL;
	return av->conn_table[addr];
}
예제 #26
0
static ssize_t rxm_ep_readv(struct fid_ep *ep_fid, const struct iovec *iov,
			    void **desc, size_t count, fi_addr_t src_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_readmsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_read(struct fid_ep *ep_fid, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr, uint64_t addr,
			   uint64_t key, void *context)
{
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_readmsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_rma_inject(struct fid_ep *msg_ep, struct rxm_ep *rxm_ep,
				 const struct fi_msg_rma *msg, uint64_t flags)
{
	struct rxm_tx_entry *tx_entry;
	struct rxm_tx_buf *tx_buf;
	struct fi_msg_rma msg_rma;
	struct iovec iov;
	size_t size;
	ssize_t ret;

	size = ofi_total_iov_len(msg->msg_iov, msg->iov_count);

	if (size > rxm_ep->rxm_info->tx_attr->inject_size)
		return -FI_EMSGSIZE;

	/* Use fi_inject_write instead of fi_writemsg since the latter generates
	 * completion by default */
	if (size <= rxm_ep->msg_info->tx_attr->inject_size &&
	    !(flags & FI_COMPLETION)) {
		if (flags & FI_REMOTE_CQ_DATA)
			return fi_inject_writedata(msg_ep, msg->msg_iov->iov_base,
					       msg->msg_iov->iov_len, msg->data,
					       msg->addr, msg->rma_iov->addr,
					       msg->rma_iov->key);
		else
			return fi_inject_write(msg_ep, msg->msg_iov->iov_base,
					       msg->msg_iov->iov_len, msg->addr,
					       msg->rma_iov->addr,
					       msg->rma_iov->key);
	}

	tx_buf = rxm_tx_buf_get(rxm_ep, RXM_BUF_POOL_TX_MSG);
	if (!tx_buf) {
		FI_WARN(&rxm_prov, FI_LOG_CQ, "TX queue full!\n");
		rxm_ep_progress_multi(&rxm_ep->util_ep);
		return -FI_EAGAIN;
	}

	tx_entry = rxm_tx_entry_get(&rxm_ep->send_queue);
	if (!tx_entry) {
		rxm_ep_progress_multi(&rxm_ep->util_ep);
		ret = -FI_EAGAIN;
		goto err1;
	}

	tx_entry->state = RXM_TX;
	tx_entry->flags = flags;
	tx_entry->comp_flags = FI_RMA | FI_WRITE;
	tx_entry->tx_buf = tx_buf;

	ofi_copy_from_iov(tx_buf->pkt.data, size, msg->msg_iov, msg->iov_count, 0);

	iov.iov_base = &tx_buf->pkt.data;
	iov.iov_len = size;

	msg_rma.msg_iov = &iov;
	msg_rma.desc = &tx_buf->hdr.desc;
	msg_rma.iov_count = 1;
	msg_rma.addr = msg->addr;
	msg_rma.rma_iov = msg->rma_iov;
	msg_rma.rma_iov_count = msg->rma_iov_count;
	msg_rma.context = tx_entry;
	msg_rma.data = msg->data;
	flags = (flags & ~FI_INJECT) | FI_COMPLETION;

	ret = fi_writemsg(msg_ep, &msg_rma, flags);
	if (ret) {
		if (ret == -FI_EAGAIN)
			rxm_ep_progress_multi(&rxm_ep->util_ep);
		goto err2;
	}
	return 0;
err2:
	rxm_tx_entry_release(&rxm_ep->send_queue, tx_entry);
err1:
	rxm_tx_buf_release(rxm_ep, tx_buf);
	return ret;
}

static ssize_t rxm_ep_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
			       uint64_t flags)
{
	struct util_cmap_handle *handle;
	struct rxm_conn *rxm_conn;
	struct rxm_ep *rxm_ep;
	int ret;

	rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid);

	ret = ofi_cmap_get_handle(rxm_ep->util_ep.cmap, msg->addr, &handle);
	if (OFI_UNLIKELY(ret))
		return ret;
	rxm_conn = container_of(handle, struct rxm_conn, handle);

	if (flags & FI_INJECT)
		return rxm_ep_rma_inject(rxm_conn->msg_ep, rxm_ep, msg, flags);
	else
		return rxm_ep_rma_common(rxm_conn->msg_ep, rxm_ep, msg, flags,
					 fi_writemsg, FI_WRITE);
}

static ssize_t rxm_ep_writev(struct fid_ep *ep_fid, const struct iovec *iov,
			     void **desc, size_t count, fi_addr_t dest_addr,
			     uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = ofi_total_iov_len(iov, count),
		.key = key,
	};
	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_writedata(struct fid_ep *ep_fid, const void *buf,
				size_t len, void *desc, uint64_t data,
				fi_addr_t dest_addr, uint64_t addr,
				uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep) |
			       FI_REMOTE_CQ_DATA);
}

static ssize_t rxm_ep_write(struct fid_ep *ep_fid, const void *buf,
			    size_t len, void *desc, fi_addr_t dest_addr,
			    uint64_t addr, uint64_t key, void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t rxm_ep_inject_write(struct fid_ep *ep_fid, const void *buf,
			     size_t len, fi_addr_t dest_addr, uint64_t addr,
			     uint64_t key)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = 0,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg,
			       (rxm_ep_tx_flags(rxm_ep) & ~FI_COMPLETION) |
			       FI_INJECT);
}

static ssize_t rxm_ep_inject_writedata(struct fid_ep *ep_fid, const void *buf,
				       size_t len, uint64_t data,
				       fi_addr_t dest_addr, uint64_t addr,
				       uint64_t key)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = len,
		.key = key,
	};
	struct iovec iov = {
		.iov_base = (void*)buf,
		.iov_len = len,
	};
	struct fi_msg_rma msg = {
		.msg_iov = &iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = NULL,
		.data = data,
	};
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_writemsg(ep_fid, &msg,
			       (rxm_ep_tx_flags(rxm_ep) & ~FI_COMPLETION) |
			       FI_INJECT | FI_REMOTE_CQ_DATA);
}

struct fi_ops_rma rxm_ops_rma = {
	.size = sizeof (struct fi_ops_rma),
	.read = rxm_ep_read,
	.readv = rxm_ep_readv,
	.readmsg = rxm_ep_readmsg,
	.write = rxm_ep_write,
	.writev = rxm_ep_writev,
	.writemsg = rxm_ep_writemsg,
	.inject = rxm_ep_inject_write,
	.writedata = rxm_ep_writedata,
	.injectdata = rxm_ep_inject_writedata,
};
예제 #27
0
int _gnix_amo_post_req(void *data)
{
	struct gnix_fab_req *fab_req = (struct gnix_fab_req *)data;
	struct gnix_fid_ep *ep = fab_req->gnix_ep;
	struct gnix_nic *nic = ep->nic;
	struct gnix_fid_mem_desc *loc_md;
	struct gnix_tx_descriptor *txd;
	gni_mem_handle_t mdh;
	gni_return_t status;
	int rc;
	int inject_err = _gnix_req_inject_err(fab_req);

	if (!gnix_ops_allowed(ep, fab_req->vc->peer_caps, fab_req->flags)) {
		GNIX_DEBUG(FI_LOG_EP_DATA, "flags:0x%llx, %s\n", fab_req->flags,
			   fi_tostr(&fab_req->flags, FI_TYPE_OP_FLAGS));
		GNIX_DEBUG(FI_LOG_EP_DATA, "caps:0x%llx, %s\n",
			   ep->caps, fi_tostr(&ep->caps, FI_TYPE_CAPS));
		GNIX_DEBUG(FI_LOG_EP_DATA, "peer_caps:0x%llx, %s\n",
			   fab_req->vc->peer_caps,
			   fi_tostr(&fab_req->vc->peer_caps, FI_TYPE_OP_FLAGS));

		rc = __gnix_amo_post_err(fab_req, FI_EOPNOTSUPP);
		if (rc != FI_SUCCESS)
			GNIX_WARN(FI_LOG_EP_DATA,
				  "__gnix_amo_post_err() failed: %d\n", rc);
		return -FI_ECANCELED;
	}

	rc = _gnix_nic_tx_alloc(nic, &txd);
	if (rc) {
		GNIX_INFO(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n",
			 rc);
		return -FI_ENOSPC;
	}

	txd->completer_fn = __gnix_amo_txd_complete;
	txd->req = fab_req;

	/* Mem handle CRC is not validated during FMA operations.  Skip this
	 * costly calculation. */
	_gnix_convert_key_to_mhdl_no_crc(
			(gnix_mr_key_t *)&fab_req->amo.rem_mr_key,
			&mdh);
	loc_md = (struct gnix_fid_mem_desc *)fab_req->amo.loc_md;

	txd->gni_desc.type = GNI_POST_AMO;
	txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */
	txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */
	txd->gni_desc.local_addr = (uint64_t)fab_req->amo.loc_addr;
	if (loc_md) {
		txd->gni_desc.local_mem_hndl = loc_md->mem_hndl;
	}
	txd->gni_desc.remote_addr = (uint64_t)fab_req->amo.rem_addr;
	txd->gni_desc.remote_mem_hndl = mdh;
	txd->gni_desc.length = fab_req->amo.len;
	txd->gni_desc.rdma_mode = 0; /* check flags */
	txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */

	txd->gni_desc.amo_cmd = _gnix_atomic_cmd(fab_req->amo.datatype,
						 fab_req->amo.op,
						 fab_req->type);
	txd->gni_desc.first_operand = fab_req->amo.first_operand;
	txd->gni_desc.second_operand = fab_req->amo.second_operand;

	GNIX_DEBUG(FI_LOG_EP_DATA, "fo:%016lx so:%016lx\n",
		   txd->gni_desc.first_operand, txd->gni_desc.second_operand);
	GNIX_DEBUG(FI_LOG_EP_DATA, "amo_cmd:%x\n",
		   txd->gni_desc.amo_cmd);
	GNIX_LOG_DUMP_TXD(txd);

	COND_ACQUIRE(nic->requires_lock, &nic->lock);

	if (OFI_UNLIKELY(inject_err)) {
		_gnix_nic_txd_err_inject(nic, txd);
		status = GNI_RC_SUCCESS;
	} else {
		status = GNI_PostFma(fab_req->vc->gni_ep, &txd->gni_desc);
	}

	COND_RELEASE(nic->requires_lock, &nic->lock);

	if (status != GNI_RC_SUCCESS) {
		_gnix_nic_tx_free(nic, txd);
		GNIX_INFO(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n",
			  gni_err_str[status]);
	}

	return gnixu_to_fi_errno(status);
}
예제 #28
0
파일: rxm_cq.c 프로젝트: ofiwg/libfabric
static int rxm_finish_recv(struct rxm_rx_buf *rx_buf, size_t done_len)
{
	int ret;
	struct rxm_recv_entry *recv_entry = rx_buf->recv_entry;

	if (OFI_UNLIKELY(done_len < rx_buf->pkt.hdr.size)) {
		ret = rxm_cq_write_error_trunc(rx_buf, done_len);
		if (ret)
			return ret;
	} else {
		if (rx_buf->recv_entry->flags & FI_COMPLETION ||
		    rx_buf->ep->rxm_info->mode & FI_BUFFERED_RECV) {
			ret = rxm_cq_write_recv_comp(
					rx_buf, rx_buf->recv_entry->context,
					rx_buf->recv_entry->comp_flags |
					rxm_cq_get_rx_comp_flags(rx_buf),
					rx_buf->pkt.hdr.size,
					rx_buf->recv_entry->rxm_iov.iov[0].iov_base);
			if (ret)
				return ret;
		}
		ofi_ep_rx_cntr_inc(&rx_buf->ep->util_ep);
	}

	if (rx_buf->recv_entry->flags & FI_MULTI_RECV) {
		struct rxm_iov rxm_iov;
		size_t recv_size = rx_buf->pkt.hdr.size;
		struct rxm_ep *rxm_ep = rx_buf->ep;

		rxm_rx_buf_release(rxm_ep, rx_buf);

		recv_entry->total_len -= recv_size;

		if (recv_entry->total_len <= rxm_ep->min_multi_recv_size) {
			FI_DBG(&rxm_prov, FI_LOG_CQ,
			       "Buffer %p has been completely consumed. "
			       "Reporting Multi-Recv completion\n",
			       recv_entry->multi_recv.buf);
			ret = rxm_cq_write_multi_recv_comp(rxm_ep, recv_entry);
			if (OFI_UNLIKELY(ret)) {
				FI_WARN(&rxm_prov, FI_LOG_CQ,
					"Unable to write FI_MULTI_RECV completion\n");
				return ret;
			}
			/* Since buffer is elapsed, release recv_entry */
			rxm_recv_entry_release(recv_entry->recv_queue,
					       recv_entry);
			return ret;
		}

		FI_DBG(&rxm_prov, FI_LOG_CQ,
		       "Repost Multi-Recv entry: %p "
		       "consumed len = %zu, remain len = %zu\n",
		       recv_entry, recv_size, recv_entry->total_len);

		rxm_iov = recv_entry->rxm_iov;
		ret = rxm_match_iov(/* prev iovecs */
				    rxm_iov.iov, rxm_iov.desc, rxm_iov.count,
				    recv_size,			/* offset */
				    recv_entry->total_len,	/* match_len */
				    &recv_entry->rxm_iov);	/* match_iov */
		if (OFI_UNLIKELY(ret))
			return ret;

		return rxm_process_recv_entry(recv_entry->recv_queue, recv_entry);
	} else {
		rxm_rx_buf_release(rx_buf->ep, rx_buf);
		rxm_recv_entry_release(recv_entry->recv_queue, recv_entry);
	}

	return FI_SUCCESS;
}
예제 #29
0
static inline struct efa_conn *efa_av_map_addr_to_conn(struct efa_av *av, fi_addr_t addr)
{
	if (OFI_UNLIKELY(addr == FI_ADDR_UNSPEC))
		return NULL;
	return (struct efa_conn *)(void *)addr;
}
예제 #30
0
static ssize_t
fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	struct fi_ibv_rdm_ep *ep =
		container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid);
	struct fi_ibv_rdm_conn *conn = ep->av->addr_to_conn(ep, msg->addr);
	struct fi_ibv_rdm_rma_start_data start_data = {
		.ep_rdm = ep,
		.conn = conn,
		.context = msg->context,
		.flags = FI_RMA | FI_READ | GET_TX_COMP_FLAG(ep, flags),
		.data_len = (uint64_t)msg->msg_iov[0].iov_len,
		.rbuf = (uintptr_t)msg->rma_iov[0].addr,
		.lbuf = (uintptr_t)msg->msg_iov[0].iov_base,
		.rkey = (uint64_t)(uintptr_t)(msg->rma_iov[0].key),
		.lkey = (uint64_t)(uintptr_t)(msg->desc ? msg->desc[0] : NULL),
		.op_code = IBV_WR_RDMA_READ
	};
	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	struct fi_ibv_rdm_buf *rdm_buf = NULL;
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_request *request;

	if(msg->iov_count != 1 || msg->rma_iov_count != 1) {
		assert(0);
		return -FI_EMSGSIZE;
	}

	ret = fi_ibv_rdm_ep_rma_preinit((void**)&start_data.lkey, &rdm_buf,
					msg->msg_iov[0].iov_len,
					conn, ep);
	if (ret) {
		return ret;
	}

	request = util_buf_alloc(fi_ibv_rdm_request_pool);
	if (OFI_UNLIKELY(!request))
		return -FI_EAGAIN;

	FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->state.err   = FI_SUCCESS;

	request->minfo.is_tagged = 0;
	request->rmabuf = rdm_buf;

	fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	return fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_POST_READY,
				   &post_ready_data);
}

static ssize_t
fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_ibv_rdm_ep *ep_rdm = 
		container_of(ep, struct fi_ibv_rdm_ep, ep_fid);

	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = 0,
		.key = key
	};

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	return fi_ibv_rdm_ep_rma_readmsg(ep, &msg, GET_TX_COMP(ep_rdm));
}

static ssize_t
fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len,
		    void *desc, fi_addr_t src_addr,
		    uint64_t addr, uint64_t key, void *context)
{
	const struct iovec iov = {
		.iov_base = buf,
		.iov_len = len
	};

	return fi_ibv_rdm_ep_rma_readv(ep_fid, &iov, &desc, 1, src_addr, addr,
					key, context);
}

static ssize_t
fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);
	struct fi_ibv_rdm_conn *conn = ep->av->addr_to_conn(ep, msg->addr);
	struct fi_ibv_rdm_request *request = NULL;
	struct fi_ibv_rdm_buf *rdm_buf = NULL;
	ssize_t ret = FI_SUCCESS;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep,
		.context = msg->context,
		.flags = FI_RMA | FI_WRITE | (ep->tx_selective_completion ?
			(flags & FI_COMPLETION) : FI_COMPLETION),
		.data_len = (uint64_t)msg->msg_iov[0].iov_len,
		.rbuf = msg->rma_iov[0].addr,
		.lbuf = (uintptr_t)msg->msg_iov[0].iov_base,
		.rkey = msg->rma_iov[0].key,
		.lkey = (uint64_t)(uintptr_t)(msg->desc ? msg->desc[0] : NULL),
		.op_code = IBV_WR_RDMA_WRITE
	};

	if(msg->iov_count != 1 && msg->rma_iov_count != 1) {
		assert(0);
		return -FI_EMSGSIZE;
	}

	ret = fi_ibv_rdm_ep_rma_preinit((void**)&start_data.lkey, &rdm_buf,
					msg->msg_iov[0].iov_len,
					conn, ep);
	if (ret) {
		return ret;
	}

	request = util_buf_alloc(fi_ibv_rdm_request_pool);
	if (OFI_UNLIKELY(!request))
		return -FI_EAGAIN;

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->state.err   = FI_SUCCESS;
	request->minfo.is_tagged = 0;
	request->rmabuf = rdm_buf;

	FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	return fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_POST_READY,
				   &post_ready_data);
}

static ssize_t
fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep_fid, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = 0,
		.key = key
	};

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_ibv_rdm_ep *ep_rdm =
		container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid);

	return fi_ibv_rdm_ep_rma_writemsg(ep_fid, &msg, GET_TX_COMP(ep_rdm));
}

static ssize_t
fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len,
			void *desc, fi_addr_t dest_addr, uint64_t addr,
			uint64_t key, void *context)
{
	const struct iovec iov = {
		.iov_base = (void *)buf,
		.iov_len = len
	};

	return fi_ibv_rdm_ep_rma_writev(ep_fid, &iov, &desc, 1, dest_addr, addr,
					key, context);
}

static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep,
					      const void *buf, size_t len,
					      fi_addr_t dest_addr,
					      uint64_t addr, uint64_t key)
{
	struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep,
						    ep_fid);
	struct fi_ibv_rdm_conn *conn = ep_rdm->av->addr_to_conn(ep_rdm, dest_addr);
	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep_rdm,
		.flags = 0, /* inject does not generate completion */
		.data_len = (uint64_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint64_t)key,
		.lkey = 0
	};
	ssize_t ret;
	struct fi_ibv_rdm_request *request =
		util_buf_alloc(fi_ibv_rdm_request_pool);

	if (OFI_UNLIKELY(!request))
		return -FI_EAGAIN;

	FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->state.err   = FI_SUCCESS;

	request->minfo.is_tagged = 0;
	ret = fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	switch (ret)
	{
	case FI_SUCCESS:
		return ret;
	case -FI_EAGAIN:
		break;
	default:
		ret = -errno;
		break;
	}

	FI_IBV_RDM_DBG_REQUEST("to_pool: ", request, FI_LOG_DEBUG);
	util_buf_release(fi_ibv_rdm_request_pool, request);

	fi_ibv_rdm_tagged_poll(ep_rdm);

	return ret;
}

struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = {
	.size		= sizeof(struct fi_ops_rma),
	.read		= fi_ibv_rdm_ep_rma_read,
	.readv		= fi_ibv_rdm_ep_rma_readv,
	.readmsg	= fi_ibv_rdm_ep_rma_readmsg,
	.write		= fi_ibv_rdm_ep_rma_write,
	.writev		= fi_ibv_rdm_ep_rma_writev,
	.writemsg	= fi_ibv_rdm_ep_rma_writemsg,
	.inject		= fi_ibv_rdm_ep_rma_inject_write,
	.writedata	= fi_no_rma_writedata,
	.injectdata	= fi_no_rma_injectdata,
};

struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma()
{
	return &fi_ibv_rdm_ep_rma_ops;
}