Пример #1
0
static ssize_t fi_ibv_rdm_tagged_recvfrom(struct fid_ep *ep_fid, void *buf,
					  size_t len, void *desc,
					  fi_addr_t src_addr, uint64_t tag,
					  uint64_t ignore, void *context)
{
	int ret = 0;

	struct fi_ibv_rdm_tagged_request *request =
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	fi_ibv_rdm_tagged_zero_request(request);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	struct fi_ibv_rdm_tagged_conn *conn = (src_addr == FI_ADDR_UNSPEC)
		? NULL : (struct fi_ibv_rdm_tagged_conn *) src_addr;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid,
						struct fi_ibv_rdm_ep, ep_fid);

	{
		struct fi_ibv_rdm_tagged_recv_start_data recv_data = {
			.peek_data = {
				.minfo = {
					.conn = conn,
					.tag = tag,
					.tagmask = ~ignore
				},
				.context = context,
				.flags = 0
			},
			.dest_addr = buf,
			.data_len = len,
			.ep = ep
		};
Пример #2
0
int fi_ibv_rdm_tagged_prepare_send_request(
	struct fi_ibv_rdm_tagged_request *request, struct fi_ibv_rdm_ep *ep)
{
#if ENABLE_DEBUG
	int res =
		FI_IBV_RDM_TAGGED_SENDS_OUTGOING_ARE_LIMITED(request->minfo.conn, ep);
	if (res) {
		FI_IBV_RDM_TAGGED_DBG_REQUEST
			("failed because SENDS_OUTGOING_ARE_LIMITED", request,
			FI_LOG_DEBUG);
		return !res;
	}
	res = PEND_SEND_IS_LIMITED(ep);
	if (res) {
		FI_IBV_RDM_TAGGED_DBG_REQUEST
			("failed because PEND_SEND_IS_LIMITED", request,
			FI_LOG_DEBUG);
		return !res;
	}
#endif // ENABLE_DEBUG
	request->sbuf = fi_ibv_rdm_prepare_send_resources(request->minfo.conn, ep);
	return !!request->sbuf;
}
Пример #3
0
static ssize_t fi_ibv_rdm_tagged_ep_cancel(fid_t fid, void *ctx)
{
	struct fi_ibv_rdm_ep *fid_ep;
	struct fi_context *context = (struct fi_context *)ctx;
	int err = 1;

	fid_ep = container_of(fid, struct fi_ibv_rdm_ep, ep_fid);
	if (!fid_ep->domain)
		return -EBADF;

	if (!context)
		return -EINVAL;

	if (context->internal[0] == NULL)
		return 0;

	struct fi_ibv_rdm_tagged_request *request = context->internal[0];

	VERBS_DBG(FI_LOG_EP_DATA,
		  "ep_cancel, match %p, tag 0x%llx, len %d, ctx %p\n",
		  request, (long long unsigned)request->tag,
		  request->len, request->context);

	struct dlist_entry *found =
	    dlist_find_first_match(&fi_ibv_rdm_tagged_recv_posted_queue,
				   fi_ibv_rdm_tagged_match_requests, request);

	if (found) {
		assert(container_of(found, struct fi_ibv_rdm_tagged_request,
				    queue_entry) == request);

		fi_ibv_rdm_tagged_remove_from_posted_queue(request, fid_ep);

		assert(request->send_completions_wait == 0);
		FI_IBV_RDM_TAGGED_DBG_REQUEST("to_pool: ", request,
					      FI_LOG_DEBUG);

		fi_ibv_mem_pool_return(&request->mpe,
				       &fi_ibv_rdm_tagged_request_pool);

		VERBS_DBG(FI_LOG_EP_DATA,
			  "\t\t-> SUCCESS, pend recv %d\n", fid_ep->pend_recv);

		err = 0;
	}

	return err;
}
Пример #4
0
static ssize_t
fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len,
		    void *desc, fi_addr_t src_addr,
		    uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) src_addr;
	void *raw_buf = NULL;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);

			if (raw_buf) {
				desc = (void*)(uintptr_t)conn->rma_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		   RMA_RESOURCES_IS_BUSY(conn, ep)) {
		/*
		 * TODO: Should be postponed queue flow for RMA be implemented?
		 */
		goto out_again;
	}

	struct fi_ibv_rdm_tagged_request *request = 
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->rmabuf = raw_buf;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.ep_rdm = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid),
		.conn = (struct fi_ibv_rdm_tagged_conn *) src_addr,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_READ
	};

	fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY,
					 &post_ready_data);

out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	if(msg->iov_count == 1 && msg->rma_iov_count == 1) {
		return fi_ibv_rdm_ep_rma_read(ep,
					      msg->msg_iov[0].iov_base,
					      msg->msg_iov[0].iov_len,
					      msg->desc[0],
					      msg->addr,
					      msg->rma_iov[0].addr,
					      msg->rma_iov[0].key,
					      msg->context);
	}

	assert(0);
	return -FI_EMSGSIZE;
}

static ssize_t
fi_ibv_rdm_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = src_addr,
		.len = 0,
		.key = key
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	return fi_ibv_rdm_ep_rma_readmsg(ep, &msg, 0);
}

static ssize_t
fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len,
		     void *desc, fi_addr_t dest_addr,
		     uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;
	void *raw_buf = NULL;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			raw_buf = fi_ibv_rdm_rma_prepare_resources(conn, ep);

			if (raw_buf) {
				memcpy (raw_buf, buf, len);
				desc = (void*)(uintptr_t)conn->rma_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	} else if (!fi_ibv_rdm_check_connection(conn, ep) ||
		   SEND_RESOURCES_IS_BUSY(conn, ep)) {
		/*
		 * TODO: Should be postponed queue flow for RMA be implemented?
		 */
		goto out_again;
	}

	struct fi_ibv_rdm_tagged_request *request = 
		util_buf_alloc(fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;
	request->rmabuf = raw_buf;

	struct fi_ibv_rdm_rma_start_data start_data = {
		.conn = conn,
		.ep_rdm = ep,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_WRITE
	};

	fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_RMA_START, &start_data);

	struct fi_ibv_rma_post_ready_data post_ready_data = { .ep_rdm = ep };

	ret = fi_ibv_rdm_tagged_req_hndl(request, FI_IBV_EVENT_SEND_READY,
					 &post_ready_data);
	ret = (ret == FI_EP_RDM_HNDL_SUCCESS) ? FI_SUCCESS : -FI_EOTHER;
out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		uint64_t flags)
{
	if(msg->iov_count == 1 && msg->rma_iov_count == 1) {
		return fi_ibv_rdm_ep_rma_write(ep,
					       msg->msg_iov[0].iov_base,
					       msg->msg_iov[0].iov_len,
					       msg->desc[0],
					       msg->addr,
					       msg->rma_iov[0].addr,
					       msg->rma_iov[0].key,
					       msg->context);
	}

	assert(0);
	return -FI_EMSGSIZE;
}

static ssize_t
fi_ibv_rdm_ep_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
		size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		void *context)
{
	struct fi_rma_iov rma_iov = {
		.addr = dest_addr,
		.len = 0,
		.key = key
	};

	size_t i;
	for (i = 0; i < count; i++) {
		rma_iov.len += iov[i].iov_len;
	}

	struct fi_msg_rma msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.context = context,
		.data = 0
	};

	return fi_ibv_rdm_ep_rma_writemsg(ep, &msg, 0);
}

static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep,
					      const void *buf, size_t len,
					      fi_addr_t dest_addr,
					      uint64_t addr, uint64_t key)
{
	struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep,
						    ep_fid);
	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;
	struct fi_ibv_rdm_tagged_request *request = NULL;
	int ret = FI_EP_RDM_HNDL_AGAIN;

	if (len >= ep_rdm->rndv_threshold) {
		return -FI_EMSGSIZE;
	}

	if (fi_ibv_rdm_check_connection(conn, ep_rdm) &&
	    !RMA_RESOURCES_IS_BUSY(conn, ep_rdm) &&
	    !conn->postponed_entry)
	{
		request = util_buf_alloc(fi_ibv_rdm_tagged_request_pool);

		FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ",
			request, FI_LOG_DEBUG);

		/* Initial state */
		request->state.eager = FI_IBV_STATE_EAGER_RMA_INJECT;
		request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;

		struct fi_ibv_rdm_rma_start_data start_data = {
			.conn = conn,
			.ep_rdm = ep_rdm,
			.data_len = (uint32_t)len,
			.rbuf = addr,
			.lbuf = (uintptr_t)buf,
			.rkey = (uint32_t)key,
			.lkey = 0

		};

		ret =  fi_ibv_rdm_tagged_req_hndl(request,
						  FI_IBV_EVENT_RMA_START,
						  &start_data);
	}

	switch (ret)
	{
	case FI_EP_RDM_HNDL_SUCCESS:
		return ret;
	case FI_EP_RDM_HNDL_AGAIN:
		ret = -FI_EAGAIN;
		break;
	default:
		ret = -errno;
		break;
	}

	if (request) {
		FI_IBV_RDM_TAGGED_DBG_REQUEST("to_pool: ", request,
					      FI_LOG_DEBUG);
		util_buf_release(fi_ibv_rdm_tagged_request_pool, request);
	}

	fi_ibv_rdm_tagged_poll(ep_rdm);

	return ret;
}

static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = {
	.size		= sizeof(struct fi_ops_rma),
	.read		= fi_ibv_rdm_ep_rma_read,
	.readv		= fi_ibv_rdm_ep_rma_readv,
	.readmsg	= fi_ibv_rdm_ep_rma_readmsg,
	.write		= fi_ibv_rdm_ep_rma_write,
	.writev		= fi_ibv_rdm_ep_rma_writev,
	.writemsg	= fi_ibv_rdm_ep_rma_writemsg,
	.inject		= fi_ibv_rdm_ep_rma_inject_write,
	.writedata	= fi_no_rma_writedata,
	.injectdata	= fi_no_rma_injectdata,
};

struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma(struct fi_ibv_rdm_ep *ep)
{
	return &fi_ibv_rdm_ep_rma_ops;
}
Пример #5
0
static ssize_t
fi_ibv_rdm_ep_rma_read(struct fid_ep *ep_fid, void *buf, size_t len,
		    void *desc, fi_addr_t src_addr,
		    uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) src_addr;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			void *raw_sbuf =
				fi_ibv_rdm_tagged_prepare_send_resources(conn,
									 ep);

			if (raw_sbuf) {
				memcpy (raw_sbuf, buf, len);
				buf = raw_sbuf;
				desc = (void*)(uintptr_t)conn->s_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	}

	struct fi_ibv_rdm_tagged_request *request =
	    (struct fi_ibv_rdm_tagged_request *)
	    fi_verbs_mem_pool_get(&fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;

	struct fi_ibv_rdm_rma_start_data data = {
		.ep_rdm = container_of(ep_fid, struct fi_ibv_rdm_ep, ep_fid),
		.conn = (struct fi_ibv_rdm_tagged_conn *) src_addr,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_READ
	};

	ret = fi_ibv_rdm_tagged_req_hndl(request,
		FI_IBV_EVENT_RMA_START, &data);
	ret = (ret == FI_EP_RDM_HNDL_SUCCESS) ? FI_SUCCESS : -FI_EOTHER;

out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t
fi_ibv_rdm_ep_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len,
		     void *desc, fi_addr_t dest_addr,
		     uint64_t addr, uint64_t key, void *context)
{
	ssize_t ret = FI_SUCCESS;
	struct fi_ibv_rdm_ep *ep = container_of(ep_fid, struct fi_ibv_rdm_ep,
						ep_fid);

	if (desc == NULL && len >= ep->rndv_threshold) {
		goto out_errinput;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;

	if (desc == NULL) {
		int again = 1;

		if (!conn->postponed_entry) {
			void *raw_sbuf =
				fi_ibv_rdm_tagged_prepare_send_resources(conn,
									 ep);

			if (raw_sbuf) {
				memcpy (raw_sbuf, buf, len);
				buf = raw_sbuf;
				desc = (void*)(uintptr_t)conn->s_mr->lkey;
				again = 0;
			}
		}

		if (again) {
			goto out_again;
		}
	}

	struct fi_ibv_rdm_tagged_request *request =
	    (struct fi_ibv_rdm_tagged_request *)
	    fi_verbs_mem_pool_get(&fi_ibv_rdm_tagged_request_pool);
	FI_IBV_RDM_TAGGED_DBG_REQUEST("get_from_pool: ", request, FI_LOG_DEBUG);

	/* Initial state */
	request->state.eager = FI_IBV_STATE_EAGER_BEGIN;
	request->state.rndv  = FI_IBV_STATE_RNDV_NOT_USED;

	struct fi_ibv_rdm_rma_start_data data = {
		.conn = conn,
		.ep_rdm = ep,
		.context = context,
		.data_len = (uint32_t)len,
		.rbuf = addr,
		.lbuf = (uintptr_t)buf,
		.rkey = (uint32_t)key,
		.lkey = (uint32_t)(uintptr_t)desc,
		.op_code = IBV_WR_RDMA_WRITE
	};

	ret = fi_ibv_rdm_tagged_req_hndl(request,
		FI_IBV_EVENT_RMA_START, &data);
	ret = (ret == FI_EP_RDM_HNDL_SUCCESS) ? FI_SUCCESS : -FI_EOTHER;
out:
	return ret;

out_again:
	fi_ibv_rdm_tagged_poll(ep);
	ret = -FI_EAGAIN;
	goto out;

out_errinput:
	ret = -FI_EINVAL;
	goto out;
}

static ssize_t fi_ibv_rdm_ep_rma_inject_write(struct fid_ep *ep,
					      const void *buf, size_t len,
					      fi_addr_t dest_addr,
					      uint64_t addr, uint64_t key)
{
	struct fi_ibv_rdm_ep *ep_rdm = container_of(ep, struct fi_ibv_rdm_ep,
						    ep_fid);

	if (len >= ep_rdm->rndv_threshold) {
		return -FI_EMSGSIZE;
	}

	struct fi_ibv_rdm_tagged_conn *conn =
		(struct fi_ibv_rdm_tagged_conn *) dest_addr;

	if (!conn->postponed_entry) {
		void *raw_sbuf =
			fi_ibv_rdm_tagged_prepare_send_resources(conn, ep_rdm);

		if (raw_sbuf) {
			memcpy(raw_sbuf, buf, len);

			struct ibv_sge sge = { 0 };
			struct ibv_send_wr wr = { 0 };
			struct ibv_send_wr *bad_wr = NULL;
			wr.wr_id = FI_IBV_RDM_PACK_SERVICE_WR(conn);
			wr.sg_list = &sge;
			wr.num_sge = 1;
			wr.wr.rdma.remote_addr = addr;
			wr.wr.rdma.rkey = (uint32_t)key;
			wr.send_flags = (len < ep_rdm->max_inline_rc)
					? IBV_SEND_INLINE : 0;
			wr.opcode = IBV_WR_RDMA_WRITE;
			sge.addr = (uint64_t)raw_sbuf;
			sge.length = len;
			sge.lkey = conn->s_mr->lkey;

			FI_IBV_RDM_TAGGED_INC_SEND_COUNTERS(conn, ep_rdm,
							    wr.send_flags);
			int ret = ibv_post_send(conn->qp, &wr, &bad_wr);
			return (ret == 0) ? -FI_SUCCESS : -errno;
		}
	}

	return -FI_EAGAIN;
}

static struct fi_ops_rma fi_ibv_rdm_ep_rma_ops = {
	.size		= sizeof(struct fi_ops_rma),
	.read		= fi_ibv_rdm_ep_rma_read,
	.readv		= fi_no_rma_readv,
	.readmsg	= fi_no_rma_readmsg,
	.write		= fi_ibv_rdm_ep_rma_write,
	.writev		= fi_no_rma_writev,
	.writemsg	= fi_no_rma_writemsg,
	.inject		= fi_ibv_rdm_ep_rma_inject_write,
	.writedata	= fi_no_rma_writedata,
	.injectdata	= fi_no_rma_injectdata,
};

struct fi_ops_rma *fi_ibv_rdm_ep_ops_rma(struct fi_ibv_rdm_ep *ep)
{
	return &fi_ibv_rdm_ep_rma_ops;
}