Example #1
0
static inline ssize_t
fi_ibv_rdm_ep_rma_preinit(void **desc, struct fi_ibv_rdm_buf **rdm_buf,
			  size_t len, struct fi_ibv_rdm_conn *conn,
			  struct fi_ibv_rdm_ep *ep)
{
	assert(desc && rdm_buf);

	if (*desc == NULL && len < ep->rndv_threshold) {
		*rdm_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);
		if (*rdm_buf) {
			*desc = (void*)(uintptr_t)conn->rma_mr->lkey;
		} else {
			goto again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		RMA_RESOURCES_IS_BUSY(conn, ep) || conn->postponed_entry) {
		goto again;
	}

	return FI_SUCCESS;
again:
	fi_ibv_rdm_tagged_poll(ep);
	return -FI_EAGAIN;
}
Example #2
0
static ssize_t
fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len,
		    void *desc, fi_addr_t src_addr,
		    uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) src_addr;
	void *raw_buf = NULL;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);

			if (raw_buf) {
				desc = (void*)(uintptr_t)conn->rma_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		   RMA_RESOURCES_IS_BUSY(conn, ep)) {
		/*
		 * TODO: Should be postponed queue flow for RMA be implemented?
		 */
		goto out_again;
	}

	struct fi_ibv_rdm_tagged_request *request = 
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->rmabuf = raw_buf;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.ep_rdm = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid),
		.conn = (struct fi_ibv_rdm_tagged_conn *) src_addr,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_READ
	};

	fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY,
					 &post_ready_data);

out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	if(msg->iov_count == 1 && msg->rma_iov_count == 1) {
		return fi_ibv_rdm_ep_rma_read(ep,
					      msg->msg_iov[0].iov_base,
					      msg->msg_iov[0].iov_len,
					      msg->desc[0],
					      msg->addr,
					      msg->rma_iov[0].addr,
					      msg->rma_iov[0].key,
					      msg->context);
	}

	assert(0);
	return -FI_EMSGSIZE;
}

static ssize_t
fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = src_addr,
		.len = 0,
		.key = key
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	return fi_ibv_rdm_ep_rma_readmsg(ep, &msg, 0);
}

static ssize_t
fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len,
		     void *desc, fi_addr_t dest_addr,
		     uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;
	void *raw_buf = NULL;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);

			if (raw_buf) {
				memcpy (raw_buf, buf, len);
				desc = (void*)(uintptr_t)conn->rma_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		   SEND_RESOURCES_IS_BUSY(conn, ep)) {
		/*
		 * TODO: Should be postponed queue flow for RMA be implemented?
		 */
		goto out_again;
	}

	struct fi_ibv_rdm_tagged_request *request = 
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->rmabuf = raw_buf;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_WRITE
	};

	fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY,
					 &post_ready_data);
	ret = (ret == FI_EP_RDM_HNDL_SUCCESS) ? FI_SUCCESS : -FI_EOTHER;
out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	if(msg->iov_count == 1 && msg->rma_iov_count == 1) {
		return fi_ibv_rdm_ep_rma_write(ep,
					       msg->msg_iov[0].iov_base,
					       msg->msg_iov[0].iov_len,
					       msg->desc[0],
					       msg->addr,
					       msg->rma_iov[0].addr,
					       msg->rma_iov[0].key,
					       msg->context);
	}

	assert(0);
	return -FI_EMSGSIZE;
}

static ssize_t
fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = dest_addr,
		.len = 0,
		.key = key
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	return fi_ibv_rdm_ep_rma_writemsg(ep, &msg, 0);
}

static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep,
					      const void *buf, size_t len,
					      fi_addr_t dest_addr,
					      uint64_t addr, uint64_t key)
{
	struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep,
						    ep_fid);
	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;
	struct fi_ibv_rdm_tagged_request *request = NULL;
	int ret = FI_EP_RDM_HNDL_AGAIN;

	if (len >= ep_rdm->rndv_threshold) {
		return -FI_EMSGSIZE;
	}

	if (fi_ibv_rdm_check_connection(conn, ep_rdm) &&
	    !RMA_RESOURCES_IS_BUSY(conn, ep_rdm) &&
	    !conn->postponed_entry)
	{
		request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool);

		FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ",
			request, FI_LOG_DEBUG);

		/* Initial state */
		request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT;
		request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;

		struct fi_ibv_rdm_rma_start_data start_data = {
			.conn = conn,
			.ep_rdm = ep_rdm,
			.data_len = (uint32_t)len,
			.rbuf = addr,
			.lbuf = (uintptr_t)buf,
			.rkey = (uint32_t)key,
			.lkey = 0

		};

		ret =  fi_ibv_rdm_tagged_req_hndl(request,
						  FI_IBV_EVENT_RMA_START,
						  &start_data);
	}

	switch (ret)
	{
	case FI_EP_RDM_HNDL_SUCCESS:
		return ret;
	case FI_EP_RDM_HNDL_AGAIN:
		ret = -FI_EAGAIN;
		break;
	default:
		ret = -errno;
		break;
	}

	if (request) {
		FI_IBV_RDM_TAGGED_DBG_REQUEST("to_pool: ", request,
					      FI_LOG_DEBUG);
		util_buf_release(fi_ibv_rdm_tagged_request_pool, request);
	}

	fi_ibv_rdm_tagged_poll(ep_rdm);

	return ret;
}

static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = {
	.size		= sizeof(struct fi_ops_rma),
	.read		= fi_ibv_rdm_ep_rma_read,
	.readv		= fi_ibv_rdm_ep_rma_readv,
	.readmsg	= fi_ibv_rdm_ep_rma_readmsg,
	.write		= fi_ibv_rdm_ep_rma_write,
	.writev		= fi_ibv_rdm_ep_rma_writev,
	.writemsg	= fi_ibv_rdm_ep_rma_writemsg,
	.inject		= fi_ibv_rdm_ep_rma_inject_write,
	.writedata	= fi_no_rma_writedata,
	.injectdata	= fi_no_rma_injectdata,
};

struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma(struct fi_ibv_rdm_ep *ep)
{
	return &fi_ibv_rdm_ep_rma_ops;
}