Beispiel #1
0
static void rxd_close_peer(struct rxd_ep *ep, struct rxd_peer *peer)
{
	struct rxd_pkt_entry *pkt_entry;
	struct rxd_x_entry *x_entry;

	while (!dlist_empty(&peer->unacked)) {
		dlist_pop_front(&peer->unacked, struct rxd_pkt_entry,
				pkt_entry, d_entry);
		ofi_buf_free(pkt_entry);
		peer->unacked_cnt--;
	}

	while(!dlist_empty(&peer->tx_list)) {
		dlist_pop_front(&peer->tx_list, struct rxd_x_entry,
				x_entry, entry);
		rxd_tx_entry_free(ep, x_entry);
	}

	while(!dlist_empty(&peer->rx_list)) {
		dlist_pop_front(&peer->rx_list, struct rxd_x_entry,
				x_entry, entry);
		rxd_rx_entry_free(ep, x_entry);
	}

	while(!dlist_empty(&peer->rma_rx_list)) {
		dlist_pop_front(&peer->rma_rx_list, struct rxd_x_entry,
				x_entry, entry);
		rxd_tx_entry_free(ep, x_entry);
	}

	dlist_remove(&peer->entry);
	peer->active = 0;
}
Beispiel #2
0
static void rxd_peer_timeout(struct rxd_ep *rxd_ep, struct rxd_peer *peer)
{
	struct fi_cq_err_entry err_entry;
	struct rxd_x_entry *tx_entry;
	struct rxd_pkt_entry *pkt_entry;
	int ret;

	while (!dlist_empty(&peer->tx_list)) {
		dlist_pop_front(&peer->tx_list, struct rxd_x_entry, tx_entry, entry);
		memset(&err_entry, 0, sizeof(struct fi_cq_err_entry));
		rxd_tx_entry_free(rxd_ep, tx_entry);
		err_entry.op_context = tx_entry->cq_entry.op_context;
		err_entry.flags = tx_entry->cq_entry.flags;
		err_entry.err = FI_ECONNREFUSED;
		err_entry.prov_errno = 0;
		ret = ofi_cq_write_error(&rxd_ep_tx_cq(rxd_ep)->util_cq, &err_entry);
		if (ret)
			FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "could not write error entry\n");
	}

	while (!dlist_empty(&peer->unacked)) {
		dlist_pop_front(&peer->unacked, struct rxd_pkt_entry, pkt_entry,
				d_entry);
		ofi_buf_free(pkt_entry);
	     	peer->unacked_cnt--;
	}

	dlist_remove(&peer->entry);
}
Beispiel #3
0
void rxd_tx_entry_done(struct rxd_ep *ep, struct rxd_tx_entry *tx_entry)
{
	struct rxd_pkt_meta *pkt_meta;

	while (!dlist_empty(&tx_entry->pkt_list)) {
		pkt_meta = container_of(tx_entry->pkt_list.next,
					struct rxd_pkt_meta, entry);
		dlist_remove(&pkt_meta->entry);
		if (pkt_meta->flags & RXD_LOCAL_COMP)
			rxd_tx_pkt_free(pkt_meta);
		else
			pkt_meta->flags |= RXD_REMOTE_ACK;
	}
	rxd_tx_entry_free(ep, tx_entry);
}
Beispiel #4
0
ssize_t	rxd_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
		       uint64_t flags)
{
	struct rxd_ep *rxd_ep;
	struct rxd_peer *peer;
	struct rxd_tx_entry *tx_entry;
	uint64_t peer_addr;
	ssize_t ret;

	rxd_ep = container_of(ep, struct rxd_ep, util_ep.ep_fid);

	peer_addr = rxd_av_dg_addr(rxd_ep_av(rxd_ep), msg->addr);
	peer = rxd_ep_getpeer_info(rxd_ep, peer_addr);

	fastlock_acquire(&rxd_ep->lock);
	if (peer->state != CMAP_CONNECTED) {
		ret = rxd_ep_connect(rxd_ep, peer, peer_addr);
		fastlock_release(&rxd_ep->lock);
		if (ret == -FI_EALREADY) {
			rxd_ep->util_ep.progress(&rxd_ep->util_ep);
			ret = -FI_EAGAIN;
		}
		return ret ? ret : -FI_EAGAIN;
	}

	tx_entry = rxd_tx_entry_alloc(rxd_ep, peer, peer_addr, flags,
				      RXD_TX_READ_REQ);
	if (!tx_entry) {
		ret = -FI_EAGAIN;
		goto out;
	}

	tx_entry->read_req.msg = *msg;
	memcpy(&tx_entry->read_req.dst_iov[0], msg->msg_iov,
	       sizeof(*msg->msg_iov)* msg->iov_count);
	memcpy(&tx_entry->read_req.src_iov[0], msg->rma_iov,
	       sizeof(*msg->rma_iov) * msg->rma_iov_count);
	ret = rxd_ep_start_xfer(rxd_ep, peer, ofi_op_read_req, tx_entry);
	if (ret)
		rxd_tx_entry_free(rxd_ep, tx_entry);

out:
	fastlock_release(&rxd_ep->lock);
	return ret;
}
Beispiel #5
0
static ssize_t rxd_generic_write_inject(struct rxd_ep *rxd_ep,
		const struct iovec *iov, size_t iov_count,
		const struct fi_rma_iov *rma_iov, size_t rma_count,
		fi_addr_t addr, void *context, uint32_t op, uint64_t data,
		uint32_t rxd_flags)
{
	struct rxd_x_entry *tx_entry;
	fi_addr_t rxd_addr;
	ssize_t ret = -FI_EAGAIN;

	assert(iov_count <= RXD_IOV_LIMIT && rma_count <= RXD_IOV_LIMIT);
	assert(ofi_total_iov_len(iov, iov_count) <= rxd_ep_domain(rxd_ep)->max_inline_rma);

	fastlock_acquire(&rxd_ep->util_ep.lock);
	fastlock_acquire(&rxd_ep->util_ep.tx_cq->cq_lock);

	if (ofi_cirque_isfull(rxd_ep->util_ep.tx_cq->cirq))
		goto out;

	rxd_addr = rxd_ep_av(rxd_ep)->fi_addr_table[addr];
	ret = rxd_send_rts_if_needed(rxd_ep, rxd_addr);
	if (ret)
		goto out;

	tx_entry = rxd_tx_entry_init(rxd_ep, iov, iov_count, NULL, 0, rma_count, data,
				     0, context, rxd_addr, op, rxd_flags);
	if (!tx_entry)
		goto out;

	ret = rxd_ep_send_op(rxd_ep, tx_entry, rma_iov, rma_count, NULL, 0, 0, 0);
	if (ret) {
		rxd_tx_entry_free(rxd_ep, tx_entry);
		goto out;
	}

	if (tx_entry->op == RXD_READ_REQ)
		goto out;

	ret = 0;

out:
	fastlock_release(&rxd_ep->util_ep.tx_cq->cq_lock);
	fastlock_release(&rxd_ep->util_ep.lock);
	return ret;
}
Beispiel #6
0
ssize_t rxd_generic_rma(struct rxd_ep *rxd_ep, const struct iovec *iov,
	size_t iov_count, const struct fi_rma_iov *rma_iov, size_t rma_count,
	void **desc, fi_addr_t addr, void *context, uint32_t op, uint64_t data,
	uint32_t rxd_flags)
{
	struct rxd_x_entry *tx_entry;
	fi_addr_t rxd_addr;
	ssize_t ret = -FI_EAGAIN;

	if (rxd_flags & RXD_INJECT)
		return rxd_generic_write_inject(rxd_ep, iov, iov_count, rma_iov,
						rma_count, addr, context, op,
						data, rxd_flags);

	assert(iov_count <= RXD_IOV_LIMIT && rma_count <= RXD_IOV_LIMIT);

	fastlock_acquire(&rxd_ep->util_ep.lock);
	fastlock_acquire(&rxd_ep->util_ep.tx_cq->cq_lock);

	if (ofi_cirque_isfull(rxd_ep->util_ep.tx_cq->cirq))
		goto out;

	rxd_addr = rxd_ep_av(rxd_ep)->fi_addr_table[addr];
	ret = rxd_send_rts_if_needed(rxd_ep, rxd_addr);
	if (ret)
		goto out;

	tx_entry = rxd_tx_entry_init(rxd_ep, iov, iov_count, NULL, 0, rma_count,
				     data, 0, context, rxd_addr, op, rxd_flags);
	if (!tx_entry)
		goto out;

	ret = rxd_ep_send_op(rxd_ep, tx_entry, rma_iov, rma_count, NULL, 0, 0, 0);
	if (ret)
		rxd_tx_entry_free(rxd_ep, tx_entry);

out:
	fastlock_release(&rxd_ep->util_ep.tx_cq->cq_lock);
	fastlock_release(&rxd_ep->util_ep.lock);
	return ret;
}
Beispiel #7
0
static void rxd_handle_ack(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
			   struct rxd_rx_buf *rx_buf)
{
	struct rxd_tx_entry *tx_entry;
	uint64_t idx;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
	       "ack- msg_id: %" PRIu64 ", segno: %d, segsz: %d, buf: %p\n",
	       ctrl->msg_id, ctrl->seg_no, ctrl->seg_size, rx_buf);

	idx = ctrl->msg_id & RXD_TX_IDX_BITS;
	tx_entry = &ep->tx_entry_fs->buf[idx];
	if (tx_entry->msg_id != ctrl->msg_id)
		goto out;

	rxd_ep_free_acked_pkts(ep, tx_entry, ctrl->seg_no);
	if ((tx_entry->bytes_sent == tx_entry->op_hdr.size) &&
	    dlist_empty(&tx_entry->pkt_list)) {
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
			"reporting TX completion : %p\n", tx_entry);
		if (tx_entry->op_type != RXD_TX_READ_REQ) {
			rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), tx_entry);
			rxd_cntr_report_tx_comp(ep, tx_entry);
			rxd_tx_entry_free(ep, tx_entry);
		}
	} else {
		tx_entry->rx_key = ctrl->rx_key;
		/* do not allow reduce window size (on duplicate acks) */
		tx_entry->window = MAX(tx_entry->window, ctrl->seg_no + ctrl->seg_size);
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
		       "ack- msg_id: %" PRIu64 ", window: %d\n",
		       ctrl->msg_id, tx_entry->window);
	}
out:
	rxd_ep_repost_buff(rx_buf);
}
Beispiel #8
0
int rxd_process_start_data(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry,
			   struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl,
			   struct fi_cq_msg_entry *comp,
			   struct rxd_rx_buf *rx_buf)
{
	uint64_t idx;
	int i, offset, ret;
	struct ofi_rma_iov *rma_iov;
	struct rxd_pkt_data_start *pkt_start;
	struct rxd_tx_entry *tx_entry;
	pkt_start = (struct rxd_pkt_data_start *) ctrl;

	switch (rx_entry->op_hdr.op) {
	case ofi_op_msg:
		rx_entry->recv = rxd_get_recv_entry(ep, rx_entry);
		if (!rx_entry->recv) {
			if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) {
				dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_msg_list);
				rx_entry->unexp_buf = rx_buf;
				ep->num_unexp_msg++;
				return -FI_ENOENT;
			} else {
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n");
				return -FI_ENOMEM;
			}
		}

		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov,
				     rx_entry->recv->msg.iov_count, ctrl,
				     pkt_start->data, rx_buf);
		break;
	case ofi_op_tagged:
		rx_entry->trecv = rxd_get_trecv_entry(ep, rx_entry);
		if (!rx_entry->trecv) {
			if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) {
				dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_tag_list);
				rx_entry->unexp_buf = rx_buf;
				ep->num_unexp_msg++;
				return -FI_ENOENT;
			} else {
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n");
				return -FI_ENOMEM;
			}
		}

		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov,
				     rx_entry->trecv->msg.iov_count, ctrl,
				     pkt_start->data, rx_buf);
		break;
	case ofi_op_write:
		rma_iov = (struct ofi_rma_iov *) pkt_start->data;
		for (i = 0; i < rx_entry->op_hdr.iov_count; i++) {
			ret = rxd_mr_verify(rxd_ep_domain(ep),
					    rma_iov[i].len,
					    (uintptr_t *) &rma_iov[i].addr,
					    rma_iov[i].key, FI_REMOTE_WRITE);
			if (ret) {
				/* todo: handle invalid key case */
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n");
				return -FI_EACCES;
			}

			rx_entry->write.iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr;
			rx_entry->write.iov[i].iov_len = rma_iov[i].len;
		}

		offset = sizeof(struct ofi_rma_iov) * rx_entry->op_hdr.iov_count;
		ctrl->seg_size -= offset;
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov,
				       rx_entry->op_hdr.iov_count, ctrl,
				       pkt_start->data + offset, rx_buf);
		break;
	case ofi_op_read_req:
		rma_iov = (struct ofi_rma_iov *) pkt_start->data;
		tx_entry = rxd_tx_entry_alloc(ep, peer, rx_entry->peer, 0,
						RXD_TX_READ_RSP);
		if (!tx_entry) {
			FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "no free tx-entry\n");
			return -FI_ENOMEM;
		}

		tx_entry->peer = rx_entry->peer;
		tx_entry->read_rsp.iov_count = rx_entry->op_hdr.iov_count;
		for (i = 0; i < rx_entry->op_hdr.iov_count; i++) {
			ret = rxd_mr_verify(rxd_ep_domain(ep),
					    rma_iov[i].len,
					    (uintptr_t *) &rma_iov[i].addr,
					    rma_iov[i].key, FI_REMOTE_READ);
			if (ret) {
				/* todo: handle invalid key case */
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n");
				return -FI_EACCES;
			}

			tx_entry->read_rsp.src_iov[i].iov_base = (void *) (uintptr_t)
								rma_iov[i].addr;
			tx_entry->read_rsp.src_iov[i].iov_len = rma_iov[i].len;
		}
		tx_entry->read_rsp.peer_msg_id = ctrl->msg_id;
		ret = rxd_ep_start_xfer(ep, peer, ofi_op_read_rsp, tx_entry);
		if (ret)
			rxd_tx_entry_free(ep, tx_entry);
		rxd_rx_entry_free(ep, rx_entry);
		break;
	case ofi_op_read_rsp:
		idx = rx_entry->op_hdr.remote_idx & RXD_TX_IDX_BITS;
		tx_entry = &ep->tx_entry_fs->buf[idx];
		if (tx_entry->msg_id != rx_entry->op_hdr.remote_idx)
			return -FI_ENOMEM;

		rx_entry->read_rsp.tx_entry = tx_entry;
		rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov,
				       tx_entry->read_req.msg.iov_count, ctrl,
				       pkt_start->data, rx_buf);
		break;
	case ofi_op_atomic:
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n");
		return -FI_EINVAL;
	}
	return 0;
}