Exemple #1
0
static void rxd_close_peer(struct rxd_ep *ep, struct rxd_peer *peer)
{
	struct rxd_pkt_entry *pkt_entry;
	struct rxd_x_entry *x_entry;

	while (!dlist_empty(&peer->unacked)) {
		dlist_pop_front(&peer->unacked, struct rxd_pkt_entry,
				pkt_entry, d_entry);
		ofi_buf_free(pkt_entry);
		peer->unacked_cnt--;
	}

	while(!dlist_empty(&peer->tx_list)) {
		dlist_pop_front(&peer->tx_list, struct rxd_x_entry,
				x_entry, entry);
		rxd_tx_entry_free(ep, x_entry);
	}

	while(!dlist_empty(&peer->rx_list)) {
		dlist_pop_front(&peer->rx_list, struct rxd_x_entry,
				x_entry, entry);
		rxd_rx_entry_free(ep, x_entry);
	}

	while(!dlist_empty(&peer->rma_rx_list)) {
		dlist_pop_front(&peer->rma_rx_list, struct rxd_x_entry,
				x_entry, entry);
		rxd_tx_entry_free(ep, x_entry);
	}

	dlist_remove(&peer->entry);
	peer->active = 0;
}
Exemple #2
0
int rxd_process_start_data(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry,
			   struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl,
			   struct fi_cq_msg_entry *comp,
			   struct rxd_rx_buf *rx_buf)
{
	uint64_t idx;
	int i, offset, ret;
	struct ofi_rma_iov *rma_iov;
	struct rxd_pkt_data_start *pkt_start;
	struct rxd_tx_entry *tx_entry;
	pkt_start = (struct rxd_pkt_data_start *) ctrl;

	switch (rx_entry->op_hdr.op) {
	case ofi_op_msg:
		rx_entry->recv = rxd_get_recv_entry(ep, rx_entry);
		if (!rx_entry->recv) {
			if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) {
				dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_msg_list);
				rx_entry->unexp_buf = rx_buf;
				ep->num_unexp_msg++;
				return -FI_ENOENT;
			} else {
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n");
				return -FI_ENOMEM;
			}
		}

		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov,
				     rx_entry->recv->msg.iov_count, ctrl,
				     pkt_start->data, rx_buf);
		break;
	case ofi_op_tagged:
		rx_entry->trecv = rxd_get_trecv_entry(ep, rx_entry);
		if (!rx_entry->trecv) {
			if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) {
				dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_tag_list);
				rx_entry->unexp_buf = rx_buf;
				ep->num_unexp_msg++;
				return -FI_ENOENT;
			} else {
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n");
				return -FI_ENOMEM;
			}
		}

		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov,
				     rx_entry->trecv->msg.iov_count, ctrl,
				     pkt_start->data, rx_buf);
		break;
	case ofi_op_write:
		rma_iov = (struct ofi_rma_iov *) pkt_start->data;
		for (i = 0; i < rx_entry->op_hdr.iov_count; i++) {
			ret = rxd_mr_verify(rxd_ep_domain(ep),
					    rma_iov[i].len,
					    (uintptr_t *) &rma_iov[i].addr,
					    rma_iov[i].key, FI_REMOTE_WRITE);
			if (ret) {
				/* todo: handle invalid key case */
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n");
				return -FI_EACCES;
			}

			rx_entry->write.iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr;
			rx_entry->write.iov[i].iov_len = rma_iov[i].len;
		}

		offset = sizeof(struct ofi_rma_iov) * rx_entry->op_hdr.iov_count;
		ctrl->seg_size -= offset;
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov,
				       rx_entry->op_hdr.iov_count, ctrl,
				       pkt_start->data + offset, rx_buf);
		break;
	case ofi_op_read_req:
		rma_iov = (struct ofi_rma_iov *) pkt_start->data;
		tx_entry = rxd_tx_entry_alloc(ep, peer, rx_entry->peer, 0,
						RXD_TX_READ_RSP);
		if (!tx_entry) {
			FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "no free tx-entry\n");
			return -FI_ENOMEM;
		}

		tx_entry->peer = rx_entry->peer;
		tx_entry->read_rsp.iov_count = rx_entry->op_hdr.iov_count;
		for (i = 0; i < rx_entry->op_hdr.iov_count; i++) {
			ret = rxd_mr_verify(rxd_ep_domain(ep),
					    rma_iov[i].len,
					    (uintptr_t *) &rma_iov[i].addr,
					    rma_iov[i].key, FI_REMOTE_READ);
			if (ret) {
				/* todo: handle invalid key case */
				FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n");
				return -FI_EACCES;
			}

			tx_entry->read_rsp.src_iov[i].iov_base = (void *) (uintptr_t)
								rma_iov[i].addr;
			tx_entry->read_rsp.src_iov[i].iov_len = rma_iov[i].len;
		}
		tx_entry->read_rsp.peer_msg_id = ctrl->msg_id;
		ret = rxd_ep_start_xfer(ep, peer, ofi_op_read_rsp, tx_entry);
		if (ret)
			rxd_tx_entry_free(ep, tx_entry);
		rxd_rx_entry_free(ep, rx_entry);
		break;
	case ofi_op_read_rsp:
		idx = rx_entry->op_hdr.remote_idx & RXD_TX_IDX_BITS;
		tx_entry = &ep->tx_entry_fs->buf[idx];
		if (tx_entry->msg_id != rx_entry->op_hdr.remote_idx)
			return -FI_ENOMEM;

		rx_entry->read_rsp.tx_entry = tx_entry;
		rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov,
				       tx_entry->read_req.msg.iov_count, ctrl,
				       pkt_start->data, rx_buf);
		break;
	case ofi_op_atomic:
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n");
		return -FI_EINVAL;
	}
	return 0;
}
Exemple #3
0
static void rxd_handle_start_data(struct rxd_ep *ep, struct rxd_peer *peer,
				  struct ofi_ctrl_hdr *ctrl,
				  struct fi_cq_msg_entry *comp,
				  struct rxd_rx_buf *rx_buf)
{
	struct rxd_rx_entry *rx_entry;
	struct rxd_pkt_data_start *pkt_start;
	int ret;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
	       "start data- msg_id: %" PRIu64 ", segno: %d, buf: %p\n",
	       ctrl->msg_id, ctrl->seg_no, rx_buf);

	pkt_start = (struct rxd_pkt_data_start *) ctrl;
	if (pkt_start->op.version != OFI_OP_VERSION) {
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "op version mismatch\n");
		goto repost;
	}

	ret = rxd_check_start_pkt_order(ep, peer, ctrl, comp);
	if (ret) {
		if (ret == -FI_EALREADY) {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d\n",
				ctrl->seg_no);
			rxd_handle_dup_datastart(ep, ctrl, rx_buf);
			goto repost;
		} else {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "unexpected pkt: %d\n",
				ctrl->seg_no);
			goto repost;
		}
	}

	rx_entry = rxd_rx_entry_alloc(ep);
	if (!rx_entry)
		goto repost;

	rx_entry->peer_info = peer;
	rx_entry->op_hdr = pkt_start->op;
	rx_entry->exp_seg_no = 0;
	rx_entry->msg_id = ctrl->msg_id;
	rx_entry->done = 0;
	rx_entry->peer = ctrl->conn_id;
	rx_entry->source = (ep->util_ep.caps & FI_DIRECTED_RECV) ?
		rxd_av_fi_addr(rxd_ep_av(ep), ctrl->conn_id) : FI_ADDR_UNSPEC;
	rx_entry->credits = 1;
	rx_entry->last_win_seg = 1;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "Assign rx_entry :%d for  %p\n",
	       rx_entry->key, rx_entry->msg_id);

	ep->credits--;
	ret = rxd_process_start_data(ep, rx_entry, peer, ctrl, comp, rx_buf);
	if (ret == -FI_ENOMEM)
		rxd_rx_entry_free(ep, rx_entry);
	else if (ret == -FI_ENOENT) {
		peer->exp_msg_id++;

		/* reply ack, with no window = 0 */
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "Sending wait-ACK [%p] - %d\n",
			ctrl->msg_id, ctrl->seg_no);
		goto out;
	} else {
		peer->exp_msg_id++;
	}

repost:
	rxd_ep_repost_buff(rx_buf);
out:
	assert(rxd_reposted_bufs);
	return;
}
Exemple #4
0
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer,
			   struct rxd_rx_entry *rx_entry,
			   struct iovec *iov, size_t iov_count,
			   struct ofi_ctrl_hdr *ctrl, void *data,
			   struct rxd_rx_buf *rx_buf)
{
	struct fi_cq_tagged_entry cq_entry = {0};
	struct util_cntr *cntr = NULL;
	uint64_t done;
	struct rxd_cq *rxd_rx_cq = rxd_ep_rx_cq(ep);

	ep->credits++;
	done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data,
				ctrl->seg_size);
	rx_entry->done += done;
	rx_entry->credits--;
	rx_entry->exp_seg_no++;

	if (done != ctrl->seg_size) {
		/* todo: generate truncation error */
		/* inform peer */
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n");
	}

	if (rx_entry->credits == 0) {
		rxd_set_rx_credits(ep, rx_entry);

		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n",
			ctrl->msg_id, ctrl->seg_no);

		rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits,
			       rx_entry->key, peer->conn_data, ctrl->conn_id);
	}

	if (rx_entry->op_hdr.size != rx_entry->done) {
		if (rx_entry->credits == 0) {
			dlist_init(&rx_entry->wait_entry);
			dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list);
			FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n",
				ctrl->msg_id, ctrl->seg_no);
		} else {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
				"rx_entry->op_hdr.size: %d, rx_entry->done: %d\n",
				rx_entry->op_hdr.size, rx_entry->done);
		}
		return;
	}

	/* todo: handle FI_COMPLETION for RX CQ comp */
	switch(rx_entry->op_hdr.op) {
	case ofi_op_msg:
		freestack_push(ep->recv_fs, rx_entry->recv);
		/* Handle cntr */
		cntr = ep->util_ep.rx_cntr;
		/* Handle CQ comp */
		cq_entry.flags |= FI_RECV;
		cq_entry.op_context = rx_entry->recv->msg.context;
		cq_entry.len = rx_entry->done;
		cq_entry.buf = rx_entry->recv->iov[0].iov_base;
		cq_entry.data = rx_entry->op_hdr.data;
		rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		break;
	case ofi_op_tagged:
		freestack_push(ep->trecv_fs, rx_entry->trecv);
		/* Handle cntr */
		cntr = ep->util_ep.rx_cntr;
		/* Handle CQ comp */
		cq_entry.flags |= (FI_RECV | FI_TAGGED);
		cq_entry.op_context = rx_entry->trecv->msg.context;
		cq_entry.len = rx_entry->done;
		cq_entry.buf = rx_entry->trecv->iov[0].iov_base;
		cq_entry.data = rx_entry->op_hdr.data;
		cq_entry.tag = rx_entry->trecv->msg.tag;\
		rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		break;
	case ofi_op_atomic:
		/* Handle cntr */ 
		cntr = ep->util_ep.rem_wr_cntr;
		/* Handle CQ comp */
		cq_entry.flags |= FI_ATOMIC;
		rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		break;
	case ofi_op_write:
		/* Handle cntr */
		cntr = ep->util_ep.rem_wr_cntr;
		/* Handle CQ comp */
		if (rx_entry->op_hdr.flags & OFI_REMOTE_CQ_DATA) {
			cq_entry.flags |= (FI_RMA | FI_REMOTE_WRITE);
			cq_entry.op_context = rx_entry->trecv->msg.context;
			cq_entry.len = rx_entry->done;
			cq_entry.buf = rx_entry->write.iov[0].iov_base;
			cq_entry.data = rx_entry->op_hdr.data;
			rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		}
		break;
	case ofi_op_read_rsp:
		rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), rx_entry->read_rsp.tx_entry);
		rxd_cntr_report_tx_comp(ep, rx_entry->read_rsp.tx_entry);
		rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry);
		break;
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type: %d\n",
			rx_entry->op_hdr.op);
		break;
	}

	if (cntr)
		cntr->cntr_fid.ops->add(&cntr->cntr_fid, 1);

	rxd_rx_entry_free(ep, rx_entry);
}