static void rxd_close_peer(struct rxd_ep *ep, struct rxd_peer *peer) { struct rxd_pkt_entry *pkt_entry; struct rxd_x_entry *x_entry; while (!dlist_empty(&peer->unacked)) { dlist_pop_front(&peer->unacked, struct rxd_pkt_entry, pkt_entry, d_entry); ofi_buf_free(pkt_entry); peer->unacked_cnt--; } while(!dlist_empty(&peer->tx_list)) { dlist_pop_front(&peer->tx_list, struct rxd_x_entry, x_entry, entry); rxd_tx_entry_free(ep, x_entry); } while(!dlist_empty(&peer->rx_list)) { dlist_pop_front(&peer->rx_list, struct rxd_x_entry, x_entry, entry); rxd_rx_entry_free(ep, x_entry); } while(!dlist_empty(&peer->rma_rx_list)) { dlist_pop_front(&peer->rma_rx_list, struct rxd_x_entry, x_entry, entry); rxd_tx_entry_free(ep, x_entry); } dlist_remove(&peer->entry); peer->active = 0; }
int rxd_process_start_data(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { uint64_t idx; int i, offset, ret; struct ofi_rma_iov *rma_iov; struct rxd_pkt_data_start *pkt_start; struct rxd_tx_entry *tx_entry; pkt_start = (struct rxd_pkt_data_start *) ctrl; switch (rx_entry->op_hdr.op) { case ofi_op_msg: rx_entry->recv = rxd_get_recv_entry(ep, rx_entry); if (!rx_entry->recv) { if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) { dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_msg_list); rx_entry->unexp_buf = rx_buf; ep->num_unexp_msg++; return -FI_ENOENT; } else { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n"); return -FI_ENOMEM; } } rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_tagged: rx_entry->trecv = rxd_get_trecv_entry(ep, rx_entry); if (!rx_entry->trecv) { if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) { dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_tag_list); rx_entry->unexp_buf = rx_buf; ep->num_unexp_msg++; return -FI_ENOENT; } else { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n"); return -FI_ENOMEM; } } rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_write: rma_iov = (struct ofi_rma_iov *) pkt_start->data; for (i = 0; i < rx_entry->op_hdr.iov_count; i++) { ret = rxd_mr_verify(rxd_ep_domain(ep), rma_iov[i].len, (uintptr_t *) &rma_iov[i].addr, rma_iov[i].key, FI_REMOTE_WRITE); if (ret) { /* todo: handle invalid key case */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n"); return -FI_EACCES; } rx_entry->write.iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr; rx_entry->write.iov[i].iov_len = rma_iov[i].len; } offset = sizeof(struct ofi_rma_iov) * rx_entry->op_hdr.iov_count; ctrl->seg_size -= offset; rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_start->data + offset, rx_buf); break; case ofi_op_read_req: rma_iov = (struct ofi_rma_iov *) pkt_start->data; tx_entry = rxd_tx_entry_alloc(ep, peer, rx_entry->peer, 0, RXD_TX_READ_RSP); if (!tx_entry) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "no free tx-entry\n"); return -FI_ENOMEM; } tx_entry->peer = rx_entry->peer; tx_entry->read_rsp.iov_count = rx_entry->op_hdr.iov_count; for (i = 0; i < rx_entry->op_hdr.iov_count; i++) { ret = rxd_mr_verify(rxd_ep_domain(ep), rma_iov[i].len, (uintptr_t *) &rma_iov[i].addr, rma_iov[i].key, FI_REMOTE_READ); if (ret) { /* todo: handle invalid key case */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n"); return -FI_EACCES; } tx_entry->read_rsp.src_iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr; tx_entry->read_rsp.src_iov[i].iov_len = rma_iov[i].len; } tx_entry->read_rsp.peer_msg_id = ctrl->msg_id; ret = rxd_ep_start_xfer(ep, peer, ofi_op_read_rsp, tx_entry); if (ret) rxd_tx_entry_free(ep, tx_entry); rxd_rx_entry_free(ep, rx_entry); break; case ofi_op_read_rsp: idx = rx_entry->op_hdr.remote_idx & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id != rx_entry->op_hdr.remote_idx) return -FI_ENOMEM; rx_entry->read_rsp.tx_entry = tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); return -FI_EINVAL; } return 0; }
static void rxd_handle_start_data(struct rxd_ep *ep, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { struct rxd_rx_entry *rx_entry; struct rxd_pkt_data_start *pkt_start; int ret; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "start data- msg_id: %" PRIu64 ", segno: %d, buf: %p\n", ctrl->msg_id, ctrl->seg_no, rx_buf); pkt_start = (struct rxd_pkt_data_start *) ctrl; if (pkt_start->op.version != OFI_OP_VERSION) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "op version mismatch\n"); goto repost; } ret = rxd_check_start_pkt_order(ep, peer, ctrl, comp); if (ret) { if (ret == -FI_EALREADY) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d\n", ctrl->seg_no); rxd_handle_dup_datastart(ep, ctrl, rx_buf); goto repost; } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "unexpected pkt: %d\n", ctrl->seg_no); goto repost; } } rx_entry = rxd_rx_entry_alloc(ep); if (!rx_entry) goto repost; rx_entry->peer_info = peer; rx_entry->op_hdr = pkt_start->op; rx_entry->exp_seg_no = 0; rx_entry->msg_id = ctrl->msg_id; rx_entry->done = 0; rx_entry->peer = ctrl->conn_id; rx_entry->source = (ep->util_ep.caps & FI_DIRECTED_RECV) ? rxd_av_fi_addr(rxd_ep_av(ep), ctrl->conn_id) : FI_ADDR_UNSPEC; rx_entry->credits = 1; rx_entry->last_win_seg = 1; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "Assign rx_entry :%d for %p\n", rx_entry->key, rx_entry->msg_id); ep->credits--; ret = rxd_process_start_data(ep, rx_entry, peer, ctrl, comp, rx_buf); if (ret == -FI_ENOMEM) rxd_rx_entry_free(ep, rx_entry); else if (ret == -FI_ENOENT) { peer->exp_msg_id++; /* reply ack, with no window = 0 */ FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "Sending wait-ACK [%p] - %d\n", ctrl->msg_id, ctrl->seg_no); goto out; } else { peer->exp_msg_id++; } repost: rxd_ep_repost_buff(rx_buf); out: assert(rxd_reposted_bufs); return; }
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer, struct rxd_rx_entry *rx_entry, struct iovec *iov, size_t iov_count, struct ofi_ctrl_hdr *ctrl, void *data, struct rxd_rx_buf *rx_buf) { struct fi_cq_tagged_entry cq_entry = {0}; struct util_cntr *cntr = NULL; uint64_t done; struct rxd_cq *rxd_rx_cq = rxd_ep_rx_cq(ep); ep->credits++; done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data, ctrl->seg_size); rx_entry->done += done; rx_entry->credits--; rx_entry->exp_seg_no++; if (done != ctrl->seg_size) { /* todo: generate truncation error */ /* inform peer */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n"); } if (rx_entry->credits == 0) { rxd_set_rx_credits(ep, rx_entry); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n", ctrl->msg_id, ctrl->seg_no); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key, peer->conn_data, ctrl->conn_id); } if (rx_entry->op_hdr.size != rx_entry->done) { if (rx_entry->credits == 0) { dlist_init(&rx_entry->wait_entry); dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n", ctrl->msg_id, ctrl->seg_no); } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx_entry->op_hdr.size: %d, rx_entry->done: %d\n", rx_entry->op_hdr.size, rx_entry->done); } return; } /* todo: handle FI_COMPLETION for RX CQ comp */ switch(rx_entry->op_hdr.op) { case ofi_op_msg: freestack_push(ep->recv_fs, rx_entry->recv); /* Handle cntr */ cntr = ep->util_ep.rx_cntr; /* Handle CQ comp */ cq_entry.flags |= FI_RECV; cq_entry.op_context = rx_entry->recv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->recv->iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_tagged: freestack_push(ep->trecv_fs, rx_entry->trecv); /* Handle cntr */ cntr = ep->util_ep.rx_cntr; /* Handle CQ comp */ cq_entry.flags |= (FI_RECV | FI_TAGGED); cq_entry.op_context = rx_entry->trecv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->trecv->iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; cq_entry.tag = rx_entry->trecv->msg.tag;\ rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_atomic: /* Handle cntr */ cntr = ep->util_ep.rem_wr_cntr; /* Handle CQ comp */ cq_entry.flags |= FI_ATOMIC; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_write: /* Handle cntr */ cntr = ep->util_ep.rem_wr_cntr; /* Handle CQ comp */ if (rx_entry->op_hdr.flags & OFI_REMOTE_CQ_DATA) { cq_entry.flags |= (FI_RMA | FI_REMOTE_WRITE); cq_entry.op_context = rx_entry->trecv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->write.iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); } break; case ofi_op_read_rsp: rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), rx_entry->read_rsp.tx_entry); rxd_cntr_report_tx_comp(ep, rx_entry->read_rsp.tx_entry); rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry); break; default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type: %d\n", rx_entry->op_hdr.op); break; } if (cntr) cntr->cntr_fid.ops->add(&cntr->cntr_fid, 1); rxd_rx_entry_free(ep, rx_entry); }