void rxd_rx_entry_release(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry) { rx_entry->key = -1; dlist_remove(&rx_entry->entry); freestack_push(ep->rx_entry_fs, rx_entry); if (ep->credits && !dlist_empty(&ep->wait_rx_list)) rxd_check_waiting_rx(ep); }
static int smr_ep_cancel_recv(struct smr_ep *ep, struct smr_queue *queue, void *context) { struct smr_ep_entry *recv_entry; struct dlist_entry *entry; int ret = 0; fastlock_acquire(&ep->util_ep.rx_cq->cq_lock); entry = dlist_remove_first_match(&queue->list, smr_match_recv_ctx, context); if (entry) { recv_entry = container_of(entry, struct smr_ep_entry, entry); ret = ep->rx_comp(ep, (void *) recv_entry->context, recv_entry->flags | FI_RECV, 0, NULL, (void *) recv_entry->addr, recv_entry->tag, 0, FI_ECANCELED); freestack_push(ep->recv_fs, recv_entry); ret = ret ? ret : 1; } fastlock_release(&ep->util_ep.rx_cq->cq_lock); return ret; }
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer, struct rxd_rx_entry *rx_entry, struct iovec *iov, size_t iov_count, struct ofi_ctrl_hdr *ctrl, void *data, struct rxd_rx_buf *rx_buf) { uint64_t done; ep->credits++; done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data, ctrl->seg_size); rx_entry->done += done; rx_entry->window--; rx_entry->exp_seg_no++; if (done != ctrl->seg_size) { /* todo: generate truncation error */ /* inform peer */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n"); } if (rx_entry->window == 0) { rx_entry->window = rxd_get_window_sz(ep, rx_entry->op_hdr.size - rx_entry->done); rx_entry->last_win_seg += rx_entry->window; ep->credits -= rx_entry->window; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n", ctrl->msg_id, ctrl->seg_no); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->window, rx_entry->key, peer->conn_data, ctrl->conn_id); } if (rx_entry->op_hdr.size != rx_entry->done) { if (rx_entry->window == 0) { dlist_init(&rx_entry->wait_entry); dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n", ctrl->msg_id, ctrl->seg_no); } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx_entry->op_hdr.size: %d, rx_entry->done: %d\n", rx_entry->op_hdr.size, rx_entry->done); } return; } FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "reporting RX completion event\n"); rxd_report_rx_comp(ep->rx_cq, rx_entry); switch(rx_entry->op_hdr.op) { case ofi_op_msg: freestack_push(ep->recv_fs, rx_entry->recv); break; case ofi_op_tagged: freestack_push(ep->trecv_fs, rx_entry->trecv); break; case ofi_op_read_rsp: rxd_cq_report_tx_comp(ep->tx_cq, rx_entry->read_rsp.tx_entry); rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry); break; default: break; } rxd_rx_entry_release(ep, rx_entry); }
// TODO handle all flags static ssize_t rxm_ep_send_common(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, size_t count, fi_addr_t dest_addr, void *context, uint64_t data, uint64_t tag, uint64_t flags, int op) { struct rxm_ep *rxm_ep; struct rxm_conn *rxm_conn; struct rxm_tx_entry *tx_entry; struct rxm_pkt *pkt; struct fid_mr *mr; void *desc_tx_buf = NULL; struct rxm_rma_iov *rma_iov; int pkt_size = 0; int i, ret; rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid); ret = rxm_get_conn(rxm_ep, dest_addr, &rxm_conn); if (ret) return ret; if (freestack_isempty(rxm_ep->txe_fs)) { FI_DBG(&rxm_prov, FI_LOG_CQ, "Exhaused tx_entry freestack\n"); return -FI_ENOMEM; } tx_entry = freestack_pop(rxm_ep->txe_fs); tx_entry->ctx_type = RXM_TX_ENTRY; tx_entry->ep = rxm_ep; tx_entry->context = context; tx_entry->flags = flags; if (rxm_ep->msg_info->mode & FI_LOCAL_MR) { pkt = util_buf_get_ex(rxm_ep->tx_pool, (void **)&mr); desc_tx_buf = fi_mr_desc(mr); } else { pkt = util_buf_get(rxm_ep->tx_pool); } assert(pkt); tx_entry->pkt = pkt; rxm_pkt_init(pkt); pkt->ctrl_hdr.conn_id = rxm_conn->handle.remote_key; pkt->hdr.op = op; pkt->hdr.size = ofi_get_iov_len(iov, count); rxm_op_hdr_process_flags(&pkt->hdr, flags, data); if (op == ofi_op_tagged) pkt->hdr.tag = tag; if (pkt->hdr.size > RXM_TX_DATA_SIZE) { if (flags & FI_INJECT) { FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "inject size supported: %d, msg size: %d\n", rxm_tx_attr.inject_size, pkt->hdr.size); ret = -FI_EMSGSIZE; goto err; } tx_entry->msg_id = ofi_idx2key(&rxm_ep->tx_key_idx, rxm_txe_fs_index(rxm_ep->txe_fs, tx_entry)); pkt->ctrl_hdr.msg_id = tx_entry->msg_id; pkt->ctrl_hdr.type = ofi_ctrl_large_data; rma_iov = (struct rxm_rma_iov *)pkt->data; rma_iov->count = count; for (i = 0; i < count; i++) { rma_iov->iov[i].addr = rxm_ep->msg_info->domain_attr->mr_mode == FI_MR_SCALABLE ? 0 : (uintptr_t)iov->iov_base; rma_iov->iov[i].len = (uint64_t)iov->iov_len; rma_iov->iov[i].key = fi_mr_key(desc[i]); } pkt_size = sizeof(*pkt) + sizeof(*rma_iov) + sizeof(*rma_iov->iov) * count; FI_DBG(&rxm_prov, FI_LOG_CQ, "Sending large msg. msg_id: 0x%" PRIx64 "\n", tx_entry->msg_id); FI_DBG(&rxm_prov, FI_LOG_CQ, "tx_entry->state -> RXM_LMT_START\n"); tx_entry->state = RXM_LMT_START; } else { pkt->ctrl_hdr.type = ofi_ctrl_data; ofi_copy_iov_buf(iov, count, pkt->data, pkt->hdr.size, 0, OFI_COPY_IOV_TO_BUF); pkt_size = sizeof(*pkt) + pkt->hdr.size; } ret = fi_send(rxm_conn->msg_ep, pkt, pkt_size, desc_tx_buf, 0, tx_entry); if (ret) { FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "fi_send for MSG provider failed\n"); goto err; } return 0; err: util_buf_release(rxm_ep->tx_pool, pkt); freestack_push(rxm_ep->txe_fs, tx_entry); return ret; }
void rxd_ep_handle_data_msg(struct rxd_ep *ep, struct rxd_peer *peer, struct rxd_rx_entry *rx_entry, struct iovec *iov, size_t iov_count, struct ofi_ctrl_hdr *ctrl, void *data, struct rxd_rx_buf *rx_buf) { struct fi_cq_tagged_entry cq_entry = {0}; struct util_cntr *cntr = NULL; uint64_t done; struct rxd_cq *rxd_rx_cq = rxd_ep_rx_cq(ep); ep->credits++; done = ofi_copy_to_iov(iov, iov_count, rx_entry->done, data, ctrl->seg_size); rx_entry->done += done; rx_entry->credits--; rx_entry->exp_seg_no++; if (done != ctrl->seg_size) { /* todo: generate truncation error */ /* inform peer */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "TODO: message truncated\n"); } if (rx_entry->credits == 0) { rxd_set_rx_credits(ep, rx_entry); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "replying ack [%p] - %d\n", ctrl->msg_id, ctrl->seg_no); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key, peer->conn_data, ctrl->conn_id); } if (rx_entry->op_hdr.size != rx_entry->done) { if (rx_entry->credits == 0) { dlist_init(&rx_entry->wait_entry); dlist_insert_tail(&rx_entry->wait_entry, &ep->wait_rx_list); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "rx-entry %p - %d enqueued\n", ctrl->msg_id, ctrl->seg_no); } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "rx_entry->op_hdr.size: %d, rx_entry->done: %d\n", rx_entry->op_hdr.size, rx_entry->done); } return; } /* todo: handle FI_COMPLETION for RX CQ comp */ switch(rx_entry->op_hdr.op) { case ofi_op_msg: freestack_push(ep->recv_fs, rx_entry->recv); /* Handle cntr */ cntr = ep->util_ep.rx_cntr; /* Handle CQ comp */ cq_entry.flags |= FI_RECV; cq_entry.op_context = rx_entry->recv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->recv->iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_tagged: freestack_push(ep->trecv_fs, rx_entry->trecv); /* Handle cntr */ cntr = ep->util_ep.rx_cntr; /* Handle CQ comp */ cq_entry.flags |= (FI_RECV | FI_TAGGED); cq_entry.op_context = rx_entry->trecv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->trecv->iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; cq_entry.tag = rx_entry->trecv->msg.tag;\ rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_atomic: /* Handle cntr */ cntr = ep->util_ep.rem_wr_cntr; /* Handle CQ comp */ cq_entry.flags |= FI_ATOMIC; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); break; case ofi_op_write: /* Handle cntr */ cntr = ep->util_ep.rem_wr_cntr; /* Handle CQ comp */ if (rx_entry->op_hdr.flags & OFI_REMOTE_CQ_DATA) { cq_entry.flags |= (FI_RMA | FI_REMOTE_WRITE); cq_entry.op_context = rx_entry->trecv->msg.context; cq_entry.len = rx_entry->done; cq_entry.buf = rx_entry->write.iov[0].iov_base; cq_entry.data = rx_entry->op_hdr.data; rxd_rx_cq->write_fn(rxd_rx_cq, &cq_entry); } break; case ofi_op_read_rsp: rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), rx_entry->read_rsp.tx_entry); rxd_cntr_report_tx_comp(ep, rx_entry->read_rsp.tx_entry); rxd_tx_entry_done(ep, rx_entry->read_rsp.tx_entry); break; default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type: %d\n", rx_entry->op_hdr.op); break; } if (cntr) cntr->cntr_fid.ops->add(&cntr->cntr_fid, 1); rxd_rx_entry_free(ep, rx_entry); }