示例#1
0
static ssize_t
fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	struct fi_ibv_rdm_ep *ep =
		container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid);

	struct fi_ibv_rdm_conn *conn = ep->av->addr_to_conn(ep, msg->addr);

	struct fi_ibv_rdm_rma_start_data start_data = {
		.ep_rdm = ep,
		.conn = conn,
		.context = msg->context,
		.flags = FI_RMA | FI_READ | (ep->tx_selective_completion ?
			(flags & FI_COMPLETION) : FI_COMPLETION),
		.data_len = (uint64_t)msg->msg_iov[0].iov_len,
		.rbuf = (uintptr_t)msg->rma_iov[0].addr,
		.lbuf = (uintptr_t)msg->msg_iov[0].iov_base,
		.rkey = (uint64_t)(uintptr_t)(msg->rma_iov[0].key),
		.lkey = (uint64_t)(uintptr_t)(msg->desc ? msg->desc[0] : NULL),
		.op_code = IBV_WR_RDMA_READ
	};

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	struct fi_ibv_rdm_buf *rdm_buf = NULL;
	ssize_t ret = FI_SUCCESS;

	if(msg->iov_count != 1 || msg->rma_iov_count != 1) {
		assert(0);
		return -FI_EMSGSIZE;
	}

	ret = fi_ibv_rdm_ep_rma_preinit((void**)&start_data.lkey, &rdm_buf,
					msg->msg_iov[0].iov_len,
					conn, ep);
	if (ret) {
		return ret;
	}

	struct fi_ibv_rdm_request *request =
		util_buf_alloc(fi_ibv_rdm_request_pool);
	FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->state.err   = FI_SUCCESS;

	request->minfo.is_tagged = 0;
	request->rmabuf = rdm_buf;

	fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	return fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_POST_READY,
				   &post_ready_data);
}

static ssize_t
fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_ibv_rdm_ep *ep_rdm = 
		container_of(ep, struct fi_ibv_rdm_ep, ep_fid);

	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = 0,
		.key = key
	};

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	return fi_ibv_rdm_ep_rma_readmsg(ep, &msg,
		(ep_rdm->tx_selective_completion ? 0ULL : FI_COMPLETION));
}

static ssize_t
fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len,
		    void *desc, fi_addr_t src_addr,
		    uint64_t addr, uint64_t key, void *context)
{
	const struct iovec iov = {
		.iov_base = buf,
		.iov_len = len
	};

	return fi_ibv_rdm_ep_rma_readv(ep_fid, &iov, &desc, 1, src_addr, addr,
					key, context);
}

static ssize_t
fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);
	struct fi_ibv_rdm_conn *conn = ep->av->addr_to_conn(ep, msg->addr);
	struct fi_ibv_rdm_request *request = NULL;
	struct fi_ibv_rdm_buf *rdm_buf = NULL;
	ssize_t ret = FI_SUCCESS;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep,
		.context = msg->context,
		.flags = FI_RMA | FI_WRITE | (ep->tx_selective_completion ?
			(flags & FI_COMPLETION) : FI_COMPLETION),
		.data_len = (uint64_t)msg->msg_iov[0].iov_len,
		.rbuf = msg->rma_iov[0].addr,
		.lbuf = (uintptr_t)msg->msg_iov[0].iov_base,
		.rkey = msg->rma_iov[0].key,
		.lkey = (uint64_t)(uintptr_t)(msg->desc ? msg->desc[0] : NULL),
		.op_code = IBV_WR_RDMA_WRITE
	};

	if(msg->iov_count != 1 && msg->rma_iov_count != 1) {
		assert(0);
		return -FI_EMSGSIZE;
	}

	ret = fi_ibv_rdm_ep_rma_preinit((void**)&start_data.lkey, &rdm_buf,
					msg->msg_iov[0].iov_len,
					conn, ep);
	if (ret) {
		return ret;
	}

	request = util_buf_alloc(fi_ibv_rdm_request_pool);
	FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->state.err   = FI_SUCCESS;

	request->minfo.is_tagged = 0;
	request->rmabuf = rdm_buf;

	fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	return fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_POST_READY,
				   &post_ready_data);
}

static ssize_t
fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep_fid, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = addr,
		.len = 0,
		.key = key
	};

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_ibv_rdm_ep *ep_rdm =
		container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid);

	return fi_ibv_rdm_ep_rma_writemsg(ep_fid, &msg,
		(ep_rdm->tx_selective_completion ? 0ULL : FI_COMPLETION));
}

static ssize_t
fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len,
			void *desc, fi_addr_t dest_addr, uint64_t addr,
			uint64_t key, void *context)
{
	const struct iovec iov = {
		.iov_base = (void *)buf,
		.iov_len = len
	};

	return fi_ibv_rdm_ep_rma_writev(ep_fid, &iov, &desc, 1, dest_addr, addr,
					key, context);
}

static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep,
					      const void *buf, size_t len,
					      fi_addr_t dest_addr,
					      uint64_t addr, uint64_t key)
{
	struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep,
						    ep_fid);
	struct fi_ibv_rdm_conn *conn = ep_rdm->av->addr_to_conn(ep_rdm, dest_addr);
	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep_rdm,
		.flags = 0, /* inject does not generate completion */
		.data_len = (uint64_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint64_t)key,
		.lkey = 0
	};
	struct fi_ibv_rdm_request *request =
		util_buf_alloc(fi_ibv_rdm_request_pool);
	ssize_t ret;

	FI_IBV_RDM_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->state.err   = FI_SUCCESS;

	request->minfo.is_tagged = 0;
	ret = fi_ibv_rdm_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	switch (ret)
	{
	case FI_SUCCESS:
		return ret;
	case -FI_EAGAIN:
		break;
	default:
		ret = -errno;
		break;
	}

	FI_IBV_RDM_DBG_REQUEST("to_pool: ", request, FI_LOG_DEBUG);
	util_buf_release(fi_ibv_rdm_request_pool, request);

	fi_ibv_rdm_tagged_poll(ep_rdm);

	return ret;
}

static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = {
	.size		= sizeof(struct fi_ops_rma),
	.read		= fi_ibv_rdm_ep_rma_read,
	.readv		= fi_ibv_rdm_ep_rma_readv,
	.readmsg	= fi_ibv_rdm_ep_rma_readmsg,
	.write		= fi_ibv_rdm_ep_rma_write,
	.writev		= fi_ibv_rdm_ep_rma_writev,
	.writemsg	= fi_ibv_rdm_ep_rma_writemsg,
	.inject		= fi_ibv_rdm_ep_rma_inject_write,
	.writedata	= fi_no_rma_writedata,
	.injectdata	= fi_no_rma_injectdata,
};

struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma()
{
	return &fi_ibv_rdm_ep_rma_ops;
}
示例#2
0
static ssize_t
fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len,
		    void *desc, fi_addr_t src_addr,
		    uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) src_addr;
	void *raw_buf = NULL;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);

			if (raw_buf) {
				desc = (void*)(uintptr_t)conn->rma_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		   RMA_RESOURCES_IS_BUSY(conn, ep)) {
		/*
		 * TODO: Should be postponed queue flow for RMA be implemented?
		 */
		goto out_again;
	}

	struct fi_ibv_rdm_tagged_request *request = 
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->rmabuf = raw_buf;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.ep_rdm = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid),
		.conn = (struct fi_ibv_rdm_tagged_conn *) src_addr,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_READ
	};

	fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY,
					 &post_ready_data);

out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	if(msg->iov_count == 1 && msg->rma_iov_count == 1) {
		return fi_ibv_rdm_ep_rma_read(ep,
					      msg->msg_iov[0].iov_base,
					      msg->msg_iov[0].iov_len,
					      msg->desc[0],
					      msg->addr,
					      msg->rma_iov[0].addr,
					      msg->rma_iov[0].key,
					      msg->context);
	}

	assert(0);
	return -FI_EMSGSIZE;
}

static ssize_t
fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = src_addr,
		.len = 0,
		.key = key
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	return fi_ibv_rdm_ep_rma_readmsg(ep, &msg, 0);
}

static ssize_t
fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len,
		     void *desc, fi_addr_t dest_addr,
		     uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;
	void *raw_buf = NULL;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);

			if (raw_buf) {
				memcpy (raw_buf, buf, len);
				desc = (void*)(uintptr_t)conn->rma_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		   SEND_RESOURCES_IS_BUSY(conn, ep)) {
		/*
		 * TODO: Should be postponed queue flow for RMA be implemented?
		 */
		goto out_again;
	}

	struct fi_ibv_rdm_tagged_request *request = 
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->rmabuf = raw_buf;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_WRITE
	};

	fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY,
					 &post_ready_data);
	ret = (ret == FI_EP_RDM_HNDL_SUCCESS) ? FI_SUCCESS : -FI_EOTHER;
out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	if(msg->iov_count == 1 && msg->rma_iov_count == 1) {
		return fi_ibv_rdm_ep_rma_write(ep,
					       msg->msg_iov[0].iov_base,
					       msg->msg_iov[0].iov_len,
					       msg->desc[0],
					       msg->addr,
					       msg->rma_iov[0].addr,
					       msg->rma_iov[0].key,
					       msg->context);
	}

	assert(0);
	return -FI_EMSGSIZE;
}

static ssize_t
fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = dest_addr,
		.len = 0,
		.key = key
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	return fi_ibv_rdm_ep_rma_writemsg(ep, &msg, 0);
}

static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep,
					      const void *buf, size_t len,
					      fi_addr_t dest_addr,
					      uint64_t addr, uint64_t key)
{
	struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep,
						    ep_fid);
	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;
	struct fi_ibv_rdm_tagged_request *request = NULL;
	int ret = FI_EP_RDM_HNDL_AGAIN;

	if (len >= ep_rdm->rndv_threshold) {
		return -FI_EMSGSIZE;
	}

	if (fi_ibv_rdm_check_connection(conn, ep_rdm) &&
	    !RMA_RESOURCES_IS_BUSY(conn, ep_rdm) &&
	    !conn->postponed_entry)
	{
		request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool);

		FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ",
			request, FI_LOG_DEBUG);

		/* Initial state */
		request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT;
		request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;

		struct fi_ibv_rdm_rma_start_data start_data = {
			.conn = conn,
			.ep_rdm = ep_rdm,
			.data_len = (uint32_t)len,
			.rbuf = addr,
			.lbuf = (uintptr_t)buf,
			.rkey = (uint32_t)key,
			.lkey = 0

		};

		ret =  fi_ibv_rdm_tagged_req_hndl(request,
						  FI_IBV_EVENT_RMA_START,
						  &start_data);
	}

	switch (ret)
	{
	case FI_EP_RDM_HNDL_SUCCESS:
		return ret;
	case FI_EP_RDM_HNDL_AGAIN:
		ret = -FI_EAGAIN;
		break;
	default:
		ret = -errno;
		break;
	}

	if (request) {
		FI_IBV_RDM_TAGGED_DBG_REQUEST("to_pool: ", request,
					      FI_LOG_DEBUG);
		util_buf_release(fi_ibv_rdm_tagged_request_pool, request);
	}

	fi_ibv_rdm_tagged_poll(ep_rdm);

	return ret;
}

static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = {
	.size		= sizeof(struct fi_ops_rma),
	.read		= fi_ibv_rdm_ep_rma_read,
	.readv		= fi_ibv_rdm_ep_rma_readv,
	.readmsg	= fi_ibv_rdm_ep_rma_readmsg,
	.write		= fi_ibv_rdm_ep_rma_write,
	.writev		= fi_ibv_rdm_ep_rma_writev,
	.writemsg	= fi_ibv_rdm_ep_rma_writemsg,
	.inject		= fi_ibv_rdm_ep_rma_inject_write,
	.writedata	= fi_no_rma_writedata,
	.injectdata	= fi_no_rma_injectdata,
};

struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma(struct fi_ibv_rdm_ep *ep)
{
	return &fi_ibv_rdm_ep_rma_ops;
}