Exemplo n.º 1
0
static void rxd_handle_dup_datastart(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
				      struct rxd_rx_buf *rx_buf)
{
	struct dlist_entry *item;
	struct rxd_rx_entry *rx_entry;
	struct rxd_peer *peer;

	peer = rxd_ep_getpeer_info(ep, ctrl->conn_id);
	item = dlist_find_first_match(&ep->rx_entry_list,
				      rxd_rx_entry_match, ctrl);
	if (!item) {
	      /* for small (1-packet) messages we may have situation
	       * when receiver completed operation and destroyed
	       * rx_entry, but ack is lost (not delivered to sender).
	       * in this case just send ack with zero window to
	       * allow sender complete operation on sender side */
	      rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, 0, UINT64_MAX,
			       peer->conn_data, ctrl->conn_id);
	      return;
	}

	FI_INFO(&rxd_prov, FI_LOG_EP_CTRL,
		"duplicate start-data: msg_id: %" PRIu64 ", seg_no: %d\n",
		ctrl->msg_id, ctrl->seg_no);

	rx_entry = container_of(item, struct rxd_rx_entry, entry);
	rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key,
		       peer->conn_data, ctrl->conn_id);
	return;
}
Exemplo n.º 2
0
int rxd_handle_conn_req(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
			 struct fi_cq_msg_entry *comp,
			 struct rxd_rx_buf *rx_buf)
{
	int ret;
	void *addr;
	size_t addrlen;
	uint64_t peer;
	struct rxd_pkt_data *pkt_data;
	struct rxd_peer *peer_info;

	rxd_ep_lock_if_required(ep);

	pkt_data = (struct rxd_pkt_data *) ctrl;
	addr = pkt_data->data;
	addrlen = ctrl->seg_size;

	ret = rxd_av_dg_reverse_lookup(ep->av, ctrl->rx_key, addr, addrlen, &peer);
	if (ret == -FI_ENODATA) {
		ret = rxd_av_insert_dg_av(ep->av, addr);
		assert(ret == 1);

		ret = rxd_av_dg_reverse_lookup(ep->av, ctrl->rx_key, addr, addrlen, &peer);
		assert(ret == 0);
	}

	peer_info = rxd_ep_getpeer_info(ep, peer);
	if (!peer_info->addr_published) {
		peer_info->addr_published = 1;
		peer_info->conn_initiated = 1;
		peer_info->conn_data = ctrl->conn_id;
		peer_info->exp_msg_id++;
	}

	rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_connresp, 0, ctrl->conn_id, peer, peer);
	rxd_ep_repost_buff(rx_buf);
	rxd_ep_unlock_if_required(ep);
	return ret;
}
Exemplo n.º 3
0
static void rxd_handle_dup_datastart(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
				      struct rxd_rx_buf *rx_buf)
{
	struct dlist_entry *item;
	struct rxd_rx_entry *rx_entry;
	struct rxd_peer *peer;

	item = dlist_find_first_match(&ep->rx_entry_list,
				      rxd_rx_entry_match, ctrl);
	if (!item)
		return;

	FI_INFO(&rxd_prov, FI_LOG_EP_CTRL,
		"duplicate start-data: msg_id: %" PRIu64 ", seg_no: %d\n",
		ctrl->msg_id, ctrl->seg_no);

	rx_entry = container_of(item, struct rxd_rx_entry, entry);
	peer = rxd_ep_getpeer_info(ep, ctrl->conn_id);
	rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->window, rx_entry->key,
		       peer->conn_data, ctrl->conn_id);
	return;
}
Exemplo n.º 4
0
static void rxd_progress_wait_rx(struct rxd_ep *ep,
				 struct rxd_rx_entry *rx_entry)
{
	struct ofi_ctrl_hdr ctrl;

	rxd_set_rx_credits(ep, rx_entry);
	if (!rx_entry->credits)
		return;

	dlist_remove(&rx_entry->wait_entry);

	ctrl.msg_id = rx_entry->msg_id;
	ctrl.seg_no = rx_entry->exp_seg_no - 1;
	ctrl.conn_id = rx_entry->peer;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
		"rx-entry wait over [%p], credits: %d\n",
		rx_entry->msg_id, rx_entry->credits);
	rxd_ep_reply_ack(ep, &ctrl, ofi_ctrl_ack, rx_entry->credits,
		       rx_entry->key, rx_entry->peer_info->conn_data,
		       ctrl.conn_id);
}
Exemplo n.º 5
0
static void rxd_handle_conn_req(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
				struct fi_cq_msg_entry *comp,
				struct rxd_rx_buf *rx_buf)
{
	struct rxd_pkt_data *pkt_data;
	struct rxd_peer *peer_info;
	fi_addr_t dg_fiaddr;
	void *addr;
	int ret;

	FI_INFO(&rxd_prov, FI_LOG_EP_DATA,
	       "conn req - rx_key: %" PRIu64 "\n", ctrl->rx_key);

	pkt_data = (struct rxd_pkt_data *) ctrl;
	addr = pkt_data->data;
	if (ctrl->seg_size > RXD_MAX_DGRAM_ADDR) {
		FI_WARN(&rxd_prov, FI_LOG_EP_DATA, "addr too large\n");
		goto repost;
	}

	ret = rxd_av_insert_dg_addr(rxd_ep_av(ep), ctrl->rx_key, addr, &dg_fiaddr);
	if (ret) {
		FI_WARN(&rxd_prov, FI_LOG_EP_DATA, "failed to insert peer address\n");
		goto repost;
	}

	peer_info = rxd_ep_getpeer_info(ep, dg_fiaddr);
	if (peer_info->state != CMAP_CONNECTED) {
		peer_info->state = CMAP_CONNECTED;
		peer_info->conn_data = ctrl->conn_id;
		peer_info->exp_msg_id++;
	}

	rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_connresp, 0, ctrl->conn_id,
			 dg_fiaddr, dg_fiaddr);
repost:
	rxd_ep_repost_buff(rx_buf);
}
Exemplo n.º 6
0
static void rxd_progress_wait_rx(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry)
{
	struct ofi_ctrl_hdr ctrl;

	rx_entry->window = rxd_get_window_sz(ep, rx_entry->op_hdr.size - rx_entry->done);

	if (!rx_entry->window)
		return;

	rx_entry->last_win_seg += rx_entry->window;
	dlist_remove(&rx_entry->wait_entry);

	ep->credits -= rx_entry->window;

	ctrl.msg_id = rx_entry->msg_id;
	ctrl.seg_no = rx_entry->exp_seg_no - 1;
	ctrl.conn_id = rx_entry->peer;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry wait over [%p], window: %d\n",
		rx_entry->msg_id, rx_entry->window);
	rxd_ep_reply_ack(ep, &ctrl, ofi_ctrl_ack, rx_entry->window,
		       rx_entry->key, rx_entry->peer_info->conn_data,
		       ctrl.conn_id);
}
Exemplo n.º 7
0
void rxd_handle_data(struct rxd_ep *ep, struct rxd_peer *peer,
		      struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp,
		      struct rxd_rx_buf *rx_buf)
{
	int ret;
	struct rxd_rx_entry *rx_entry;
	struct rxd_tx_entry *tx_entry;
	struct rxd_pkt_data *pkt_data = (struct rxd_pkt_data *) ctrl;
	uint16_t win_sz;
	uint64_t curr_stamp;

	rxd_ep_lock_if_required(ep);
	rx_entry = &ep->rx_entry_fs->buf[ctrl->rx_key];

	ret = rxd_check_data_pkt_order(ep, peer, ctrl, rx_entry);
	if (ret == RXD_PKT_ORDR_DUP) {
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
			"duplicate pkt: %d expected:%d, rx-key:%d, ctrl_msg_id: %p\n",
			ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id);

		win_sz = (rx_entry->msg_id == ctrl->msg_id &&
			  rx_entry->last_win_seg == ctrl->seg_no) ? rx_entry->window : 0;
		rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, win_sz,
			       ctrl->rx_key, peer->conn_data, ctrl->conn_id);

		goto repost;
	} else if (ret == RXD_PKT_ORDR_UNEXP) {
		if (!(comp->flags & RXD_UNEXP_ENTRY)) {
			curr_stamp = fi_gettime_us();
			if (rx_entry->nack_stamp == 0 ||
			    (curr_stamp > rx_entry->nack_stamp &&
			     curr_stamp - rx_entry->nack_stamp > RXD_RETRY_TIMEOUT)) {

				FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
				       "unexpected pkt, sending NACK: %d\n", ctrl->seg_no);

				rx_entry->nack_stamp = curr_stamp;
				rxd_ep_reply_nack(ep, ctrl, rx_entry->exp_seg_no,
						ctrl->rx_key, peer->conn_data,
						ctrl->conn_id);
			}
			rxd_ep_enqueue_pkt(ep, ctrl, comp);
		}
		goto out;
	}

	rx_entry->nack_stamp = 0;
	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "expected pkt: %d\n", ctrl->seg_no);
	switch (rx_entry->op_hdr.op) {
	case ofi_op_msg:
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov,
				     rx_entry->recv->msg.iov_count, ctrl,
				     pkt_data->data, rx_buf);
		break;

	case ofi_op_tagged:
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov,
				     rx_entry->trecv->msg.iov_count, ctrl,
				     pkt_data->data, rx_buf);
		break;

	case ofi_op_write:
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov,
				       rx_entry->op_hdr.iov_count, ctrl,
				       pkt_data->data, rx_buf);
		break;

	case ofi_op_read_rsp:
		tx_entry = rx_entry->read_rsp.tx_entry;
		rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov,
				       tx_entry->read_req.msg.iov_count, ctrl,
				       pkt_data->data, rx_buf);
		break;

	case ofi_op_atomic:
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n");
	}

repost:
	if (comp->flags & RXD_UNEXP_ENTRY) {
		rxd_release_unexp_entry(ep->rx_cq, comp);
		ep->num_unexp_pkt--;
	}

	rxd_ep_repost_buff(rx_buf);
out:
	rxd_ep_unlock_if_required(ep);
}
Exemplo n.º 8
0
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer,
			   struct rxd_rx_entry *rx_entry,
			   struct iovec *iov, size_t iov_count,
			   struct ofi_ctrl_hdr *ctrl, void *data,
			   struct rxd_rx_buf *rx_buf)
{

	uint64_t done;

	ep->credits++;
	done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data, ctrl->seg_size);
	rx_entry->done += done;
	rx_entry->window--;
	rx_entry->exp_seg_no++;

	if (done != ctrl->seg_size) {
		/* todo: generate truncation error */
		/* inform peer */
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n");
	}

	if (rx_entry->window == 0) {
		rx_entry->window = rxd_get_window_sz(ep, rx_entry->op_hdr.size - rx_entry->done);

		rx_entry->last_win_seg += rx_entry->window;
		ep->credits -= rx_entry->window;
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n",
			ctrl->msg_id, ctrl->seg_no);

		rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->window,
			       rx_entry->key, peer->conn_data, ctrl->conn_id);
	}

	if (rx_entry->op_hdr.size != rx_entry->done) {
		if (rx_entry->window == 0) {
			dlist_init(&rx_entry->wait_entry);
			dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list);
			FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n",
				ctrl->msg_id, ctrl->seg_no);
		} else {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
				"rx_entry->op_hdr.size: %d, rx_entry->done: %d\n",
				rx_entry->op_hdr.size, rx_entry->done);
		}
		return;
	}

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "reporting RX completion event\n");
	rxd_report_rx_comp(ep->rx_cq, rx_entry);

	switch(rx_entry->op_hdr.op) {
	case ofi_op_msg:
		freestack_push(ep->recv_fs, rx_entry->recv);
		break;

	case ofi_op_tagged:
		freestack_push(ep->trecv_fs, rx_entry->trecv);
		break;

	case ofi_op_read_rsp:
		rxd_cq_report_tx_comp(ep->tx_cq, rx_entry->read_rsp.tx_entry);
		rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry);
		break;

	default:
		break;
	}
	rxd_rx_entry_release(ep, rx_entry);
}
Exemplo n.º 9
0
static void rxd_handle_data(struct rxd_ep *ep, struct rxd_peer *peer,
			    struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp,
			    struct rxd_rx_buf *rx_buf)
{
	struct rxd_rx_entry *rx_entry;
	struct rxd_tx_entry *tx_entry;
	struct rxd_pkt_data *pkt_data = (struct rxd_pkt_data *) ctrl;
	uint16_t credits;
	int ret;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
	       "data pkt- msg_id: %" PRIu64 ", segno: %d, buf: %p\n",
	       ctrl->msg_id, ctrl->seg_no, rx_buf);

	rx_entry = &ep->rx_entry_fs->buf[ctrl->rx_key];

	ret = rxd_check_data_pkt_order(ep, peer, ctrl, rx_entry);
	if (ret) {
		if (ret == -FI_EALREADY) {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d "
				"expected:%d, rx-key:%d, ctrl_msg_id: %p\n",
				ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key,
				ctrl->msg_id);

			credits = ((rx_entry->msg_id == ctrl->msg_id) &&
				  (rx_entry->last_win_seg == ctrl->seg_no)) ?
				  rx_entry->credits : 0;
			rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, credits,
				       ctrl->rx_key, peer->conn_data,
				       ctrl->conn_id);
			goto repost;
		} else {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "invalid pkt: segno: %d "
			       "expected:%d, rx-key:%d, ctrl_msg_id: %ld, "
			       "rx_entry_msg_id: %ld\n",
			       ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key,
			       ctrl->msg_id, rx_entry->msg_id);
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "invalid pkt: "
			       "credits: %d, last win: %d\n",
			       rx_entry->credits, rx_entry->last_win_seg);
			credits = (rx_entry->msg_id == ctrl->msg_id) ?
				  rx_entry->last_win_seg - rx_entry->exp_seg_no : 0;
			rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, credits,
				       ctrl->rx_key, peer->conn_data,
				       ctrl->conn_id);
			goto repost;
		}
	}

	rx_entry->nack_stamp = 0;
	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "expected pkt: %d\n", ctrl->seg_no);
	switch (rx_entry->op_hdr.op) {
	case ofi_op_msg:
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov,
				     rx_entry->recv->msg.iov_count, ctrl,
				     pkt_data->data, rx_buf);
		break;
	case ofi_op_tagged:
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov,
				     rx_entry->trecv->msg.iov_count, ctrl,
				     pkt_data->data, rx_buf);
		break;
	case ofi_op_write:
		rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov,
				       rx_entry->op_hdr.iov_count, ctrl,
				       pkt_data->data, rx_buf);
		break;
	case ofi_op_read_rsp:
		tx_entry = rx_entry->read_rsp.tx_entry;
		rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov,
				       tx_entry->read_req.msg.iov_count, ctrl,
				       pkt_data->data, rx_buf);
		break;
	case ofi_op_atomic:
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n");
	}

repost:
	rxd_ep_repost_buff(rx_buf);
}
Exemplo n.º 10
0
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer,
			   struct rxd_rx_entry *rx_entry,
			   struct iovec *iov, size_t iov_count,
			   struct ofi_ctrl_hdr *ctrl, void *data,
			   struct rxd_rx_buf *rx_buf)
{
	struct fi_cq_tagged_entry cq_entry = {0};
	struct util_cntr *cntr = NULL;
	uint64_t done;
	struct rxd_cq *rxd_rx_cq = rxd_ep_rx_cq(ep);

	ep->credits++;
	done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data,
				ctrl->seg_size);
	rx_entry->done += done;
	rx_entry->credits--;
	rx_entry->exp_seg_no++;

	if (done != ctrl->seg_size) {
		/* todo: generate truncation error */
		/* inform peer */
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n");
	}

	if (rx_entry->credits == 0) {
		rxd_set_rx_credits(ep, rx_entry);

		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n",
			ctrl->msg_id, ctrl->seg_no);

		rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits,
			       rx_entry->key, peer->conn_data, ctrl->conn_id);
	}

	if (rx_entry->op_hdr.size != rx_entry->done) {
		if (rx_entry->credits == 0) {
			dlist_init(&rx_entry->wait_entry);
			dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list);
			FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n",
				ctrl->msg_id, ctrl->seg_no);
		} else {
			FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
				"rx_entry->op_hdr.size: %d, rx_entry->done: %d\n",
				rx_entry->op_hdr.size, rx_entry->done);
		}
		return;
	}

	/* todo: handle FI_COMPLETION for RX CQ comp */
	switch(rx_entry->op_hdr.op) {
	case ofi_op_msg:
		freestack_push(ep->recv_fs, rx_entry->recv);
		/* Handle cntr */
		cntr = ep->util_ep.rx_cntr;
		/* Handle CQ comp */
		cq_entry.flags |= FI_RECV;
		cq_entry.op_context = rx_entry->recv->msg.context;
		cq_entry.len = rx_entry->done;
		cq_entry.buf = rx_entry->recv->iov[0].iov_base;
		cq_entry.data = rx_entry->op_hdr.data;
		rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		break;
	case ofi_op_tagged:
		freestack_push(ep->trecv_fs, rx_entry->trecv);
		/* Handle cntr */
		cntr = ep->util_ep.rx_cntr;
		/* Handle CQ comp */
		cq_entry.flags |= (FI_RECV | FI_TAGGED);
		cq_entry.op_context = rx_entry->trecv->msg.context;
		cq_entry.len = rx_entry->done;
		cq_entry.buf = rx_entry->trecv->iov[0].iov_base;
		cq_entry.data = rx_entry->op_hdr.data;
		cq_entry.tag = rx_entry->trecv->msg.tag;\
		rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		break;
	case ofi_op_atomic:
		/* Handle cntr */ 
		cntr = ep->util_ep.rem_wr_cntr;
		/* Handle CQ comp */
		cq_entry.flags |= FI_ATOMIC;
		rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		break;
	case ofi_op_write:
		/* Handle cntr */
		cntr = ep->util_ep.rem_wr_cntr;
		/* Handle CQ comp */
		if (rx_entry->op_hdr.flags & OFI_REMOTE_CQ_DATA) {
			cq_entry.flags |= (FI_RMA | FI_REMOTE_WRITE);
			cq_entry.op_context = rx_entry->trecv->msg.context;
			cq_entry.len = rx_entry->done;
			cq_entry.buf = rx_entry->write.iov[0].iov_base;
			cq_entry.data = rx_entry->op_hdr.data;
			rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry);
		}
		break;
	case ofi_op_read_rsp:
		rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), rx_entry->read_rsp.tx_entry);
		rxd_cntr_report_tx_comp(ep, rx_entry->read_rsp.tx_entry);
		rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry);
		break;
	default:
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type: %d\n",
			rx_entry->op_hdr.op);
		break;
	}

	if (cntr)
		cntr->cntr_fid.ops->add(&cntr->cntr_fid, 1);

	rxd_rx_entry_free(ep, rx_entry);
}