static void rxd_handle_dup_datastart(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct dlist_entry *item; struct rxd_rx_entry *rx_entry; struct rxd_peer *peer; peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); item = dlist_find_first_match(&ep->rx_entry_list, rxd_rx_entry_match, ctrl); if (!item) { /* for small (1-packet) messages we may have situation * when receiver completed operation and destroyed * rx_entry, but ack is lost (not delivered to sender). * in this case just send ack with zero window to * allow sender complete operation on sender side */ rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, 0, UINT64_MAX, peer->conn_data, ctrl->conn_id); return; } FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "duplicate start-data: msg_id: %" PRIu64 ", seg_no: %d\n", ctrl->msg_id, ctrl->seg_no); rx_entry = container_of(item, struct rxd_rx_entry, entry); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key, peer->conn_data, ctrl->conn_id); return; }
int rxd_handle_conn_req(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { int ret; void *addr; size_t addrlen; uint64_t peer; struct rxd_pkt_data *pkt_data; struct rxd_peer *peer_info; rxd_ep_lock_if_required(ep); pkt_data = (struct rxd_pkt_data *) ctrl; addr = pkt_data->data; addrlen = ctrl->seg_size; ret = rxd_av_dg_reverse_lookup(ep->av, ctrl->rx_key, addr, addrlen, &peer); if (ret == -FI_ENODATA) { ret = rxd_av_insert_dg_av(ep->av, addr); assert(ret == 1); ret = rxd_av_dg_reverse_lookup(ep->av, ctrl->rx_key, addr, addrlen, &peer); assert(ret == 0); } peer_info = rxd_ep_getpeer_info(ep, peer); if (!peer_info->addr_published) { peer_info->addr_published = 1; peer_info->conn_initiated = 1; peer_info->conn_data = ctrl->conn_id; peer_info->exp_msg_id++; } rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_connresp, 0, ctrl->conn_id, peer, peer); rxd_ep_repost_buff(rx_buf); rxd_ep_unlock_if_required(ep); return ret; }
static void rxd_handle_dup_datastart(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct dlist_entry *item; struct rxd_rx_entry *rx_entry; struct rxd_peer *peer; item = dlist_find_first_match(&ep->rx_entry_list, rxd_rx_entry_match, ctrl); if (!item) return; FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "duplicate start-data: msg_id: %" PRIu64 ", seg_no: %d\n", ctrl->msg_id, ctrl->seg_no); rx_entry = container_of(item, struct rxd_rx_entry, entry); peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->window, rx_entry->key, peer->conn_data, ctrl->conn_id); return; }
static void rxd_progress_wait_rx(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry) { struct ofi_ctrl_hdr ctrl; rxd_set_rx_credits(ep, rx_entry); if (!rx_entry->credits) return; dlist_remove(&rx_entry->wait_entry); ctrl.msg_id = rx_entry->msg_id; ctrl.seg_no = rx_entry->exp_seg_no - 1; ctrl.conn_id = rx_entry->peer; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry wait over [%p], credits: %d\n", rx_entry->msg_id, rx_entry->credits); rxd_ep_reply_ack(ep, &ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key, rx_entry->peer_info->conn_data, ctrl.conn_id); }
static void rxd_handle_conn_req(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { struct rxd_pkt_data *pkt_data; struct rxd_peer *peer_info; fi_addr_t dg_fiaddr; void *addr; int ret; FI_INFO(&rxd_prov, FI_LOG_EP_DATA, "conn req - rx_key: %" PRIu64 "\n", ctrl->rx_key); pkt_data = (struct rxd_pkt_data *) ctrl; addr = pkt_data->data; if (ctrl->seg_size > RXD_MAX_DGRAM_ADDR) { FI_WARN(&rxd_prov, FI_LOG_EP_DATA, "addr too large\n"); goto repost; } ret = rxd_av_insert_dg_addr(rxd_ep_av(ep), ctrl->rx_key, addr, &dg_fiaddr); if (ret) { FI_WARN(&rxd_prov, FI_LOG_EP_DATA, "failed to insert peer address\n"); goto repost; } peer_info = rxd_ep_getpeer_info(ep, dg_fiaddr); if (peer_info->state != CMAP_CONNECTED) { peer_info->state = CMAP_CONNECTED; peer_info->conn_data = ctrl->conn_id; peer_info->exp_msg_id++; } rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_connresp, 0, ctrl->conn_id, dg_fiaddr, dg_fiaddr); repost: rxd_ep_repost_buff(rx_buf); }
static void rxd_progress_wait_rx(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry) { struct ofi_ctrl_hdr ctrl; rx_entry->window = rxd_get_window_sz(ep, rx_entry->op_hdr.size - rx_entry->done); if (!rx_entry->window) return; rx_entry->last_win_seg += rx_entry->window; dlist_remove(&rx_entry->wait_entry); ep->credits -= rx_entry->window; ctrl.msg_id = rx_entry->msg_id; ctrl.seg_no = rx_entry->exp_seg_no - 1; ctrl.conn_id = rx_entry->peer; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry wait over [%p], window: %d\n", rx_entry->msg_id, rx_entry->window); rxd_ep_reply_ack(ep, &ctrl, ofi_ctrl_ack, rx_entry->window, rx_entry->key, rx_entry->peer_info->conn_data, ctrl.conn_id); }
void rxd_handle_data(struct rxd_ep *ep, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { int ret; struct rxd_rx_entry *rx_entry; struct rxd_tx_entry *tx_entry; struct rxd_pkt_data *pkt_data = (struct rxd_pkt_data *) ctrl; uint16_t win_sz; uint64_t curr_stamp; rxd_ep_lock_if_required(ep); rx_entry = &ep->rx_entry_fs->buf[ctrl->rx_key]; ret = rxd_check_data_pkt_order(ep, peer, ctrl, rx_entry); if (ret == RXD_PKT_ORDR_DUP) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d expected:%d, rx-key:%d, ctrl_msg_id: %p\n", ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id); win_sz = (rx_entry->msg_id == ctrl->msg_id && rx_entry->last_win_seg == ctrl->seg_no) ? rx_entry->window : 0; rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, win_sz, ctrl->rx_key, peer->conn_data, ctrl->conn_id); goto repost; } else if (ret == RXD_PKT_ORDR_UNEXP) { if (!(comp->flags & RXD_UNEXP_ENTRY)) { curr_stamp = fi_gettime_us(); if (rx_entry->nack_stamp == 0 || (curr_stamp > rx_entry->nack_stamp && curr_stamp - rx_entry->nack_stamp > RXD_RETRY_TIMEOUT)) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "unexpected pkt, sending NACK: %d\n", ctrl->seg_no); rx_entry->nack_stamp = curr_stamp; rxd_ep_reply_nack(ep, ctrl, rx_entry->exp_seg_no, ctrl->rx_key, peer->conn_data, ctrl->conn_id); } rxd_ep_enqueue_pkt(ep, ctrl, comp); } goto out; } rx_entry->nack_stamp = 0; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "expected pkt: %d\n", ctrl->seg_no); switch (rx_entry->op_hdr.op) { case ofi_op_msg: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_tagged: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_write: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_read_rsp: tx_entry = rx_entry->read_rsp.tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); } repost: if (comp->flags & RXD_UNEXP_ENTRY) { rxd_release_unexp_entry(ep->rx_cq, comp); ep->num_unexp_pkt--; } rxd_ep_repost_buff(rx_buf); out: rxd_ep_unlock_if_required(ep); }
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer, struct rxd_rx_entry *rx_entry, struct iovec *iov, size_t iov_count, struct ofi_ctrl_hdr *ctrl, void *data, struct rxd_rx_buf *rx_buf) { uint64_t done; ep->credits++; done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data, ctrl->seg_size); rx_entry->done += done; rx_entry->window--; rx_entry->exp_seg_no++; if (done != ctrl->seg_size) { /* todo: generate truncation error */ /* inform peer */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n"); } if (rx_entry->window == 0) { rx_entry->window = rxd_get_window_sz(ep, rx_entry->op_hdr.size - rx_entry->done); rx_entry->last_win_seg += rx_entry->window; ep->credits -= rx_entry->window; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n", ctrl->msg_id, ctrl->seg_no); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->window, rx_entry->key, peer->conn_data, ctrl->conn_id); } if (rx_entry->op_hdr.size != rx_entry->done) { if (rx_entry->window == 0) { dlist_init(&rx_entry->wait_entry); dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n", ctrl->msg_id, ctrl->seg_no); } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx_entry->op_hdr.size: %d, rx_entry->done: %d\n", rx_entry->op_hdr.size, rx_entry->done); } return; } FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "reporting RX completion event\n"); rxd_report_rx_comp(ep->rx_cq, rx_entry); switch(rx_entry->op_hdr.op) { case ofi_op_msg: freestack_push(ep->recv_fs, rx_entry->recv); break; case ofi_op_tagged: freestack_push(ep->trecv_fs, rx_entry->trecv); break; case ofi_op_read_rsp: rxd_cq_report_tx_comp(ep->tx_cq, rx_entry->read_rsp.tx_entry); rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry); break; default: break; } rxd_rx_entry_release(ep, rx_entry); }
static void rxd_handle_data(struct rxd_ep *ep, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { struct rxd_rx_entry *rx_entry; struct rxd_tx_entry *tx_entry; struct rxd_pkt_data *pkt_data = (struct rxd_pkt_data *) ctrl; uint16_t credits; int ret; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "data pkt- msg_id: %" PRIu64 ", segno: %d, buf: %p\n", ctrl->msg_id, ctrl->seg_no, rx_buf); rx_entry = &ep->rx_entry_fs->buf[ctrl->rx_key]; ret = rxd_check_data_pkt_order(ep, peer, ctrl, rx_entry); if (ret) { if (ret == -FI_EALREADY) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d " "expected:%d, rx-key:%d, ctrl_msg_id: %p\n", ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id); credits = ((rx_entry->msg_id == ctrl->msg_id) && (rx_entry->last_win_seg == ctrl->seg_no)) ? rx_entry->credits : 0; rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, credits, ctrl->rx_key, peer->conn_data, ctrl->conn_id); goto repost; } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "invalid pkt: segno: %d " "expected:%d, rx-key:%d, ctrl_msg_id: %ld, " "rx_entry_msg_id: %ld\n", ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id, rx_entry->msg_id); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "invalid pkt: " "credits: %d, last win: %d\n", rx_entry->credits, rx_entry->last_win_seg); credits = (rx_entry->msg_id == ctrl->msg_id) ? rx_entry->last_win_seg - rx_entry->exp_seg_no : 0; rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, credits, ctrl->rx_key, peer->conn_data, ctrl->conn_id); goto repost; } } rx_entry->nack_stamp = 0; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "expected pkt: %d\n", ctrl->seg_no); switch (rx_entry->op_hdr.op) { case ofi_op_msg: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_tagged: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_write: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_read_rsp: tx_entry = rx_entry->read_rsp.tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); } repost: rxd_ep_repost_buff(rx_buf); }
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer, struct rxd_rx_entry *rx_entry, struct iovec *iov, size_t iov_count, struct ofi_ctrl_hdr *ctrl, void *data, struct rxd_rx_buf *rx_buf) { struct fi_cq_tagged_entry cq_entry = {0}; struct util_cntr *cntr = NULL; uint64_t done; struct rxd_cq *rxd_rx_cq = rxd_ep_rx_cq(ep); ep->credits++; done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data, ctrl->seg_size); rx_entry->done += done; rx_entry->credits--; rx_entry->exp_seg_no++; if (done != ctrl->seg_size) { /* todo: generate truncation error */ /* inform peer */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n"); } if (rx_entry->credits == 0) { rxd_set_rx_credits(ep, rx_entry); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n", ctrl->msg_id, ctrl->seg_no); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key, peer->conn_data, ctrl->conn_id); } if (rx_entry->op_hdr.size != rx_entry->done) { if (rx_entry->credits == 0) { dlist_init(&rx_entry->wait_entry); dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n", ctrl->msg_id, ctrl->seg_no); } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx_entry->op_hdr.size: %d, rx_entry->done: %d\n", rx_entry->op_hdr.size, rx_entry->done); } return; } /* todo: handle FI_COMPLETION for RX CQ comp */ switch(rx_entry->op_hdr.op) { case ofi_op_msg: freestack_push(ep->recv_fs, rx_entry->recv); /* Handle cntr */ cntr = ep->util_ep.rx_cntr; /* Handle CQ comp */ cq_entry.flags |= FI_RECV; cq_entry.op_context = rx_entry->recv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->recv->iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_tagged: freestack_push(ep->trecv_fs, rx_entry->trecv); /* Handle cntr */ cntr = ep->util_ep.rx_cntr; /* Handle CQ comp */ cq_entry.flags |= (FI_RECV | FI_TAGGED); cq_entry.op_context = rx_entry->trecv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->trecv->iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; cq_entry.tag = rx_entry->trecv->msg.tag;\ rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_atomic: /* Handle cntr */ cntr = ep->util_ep.rem_wr_cntr; /* Handle CQ comp */ cq_entry.flags |= FI_ATOMIC; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_write: /* Handle cntr */ cntr = ep->util_ep.rem_wr_cntr; /* Handle CQ comp */ if (rx_entry->op_hdr.flags & OFI_REMOTE_CQ_DATA) { cq_entry.flags |= (FI_RMA | FI_REMOTE_WRITE); cq_entry.op_context = rx_entry->trecv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->write.iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); } break; case ofi_op_read_rsp: rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), rx_entry->read_rsp.tx_entry); rxd_cntr_report_tx_comp(ep, rx_entry->read_rsp.tx_entry); rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry); break; default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type: %d\n", rx_entry->op_hdr.op); break; } if (cntr) cntr->cntr_fid.ops->add(&cntr->cntr_fid, 1); rxd_rx_entry_free(ep, rx_entry); }