static void rxd_close_peer(struct rxd_ep *ep, struct rxd_peer *peer) { struct rxd_pkt_entry *pkt_entry; struct rxd_x_entry *x_entry; while (!dlist_empty(&peer->unacked)) { dlist_pop_front(&peer->unacked, struct rxd_pkt_entry, pkt_entry, d_entry); ofi_buf_free(pkt_entry); peer->unacked_cnt--; } while(!dlist_empty(&peer->tx_list)) { dlist_pop_front(&peer->tx_list, struct rxd_x_entry, x_entry, entry); rxd_tx_entry_free(ep, x_entry); } while(!dlist_empty(&peer->rx_list)) { dlist_pop_front(&peer->rx_list, struct rxd_x_entry, x_entry, entry); rxd_rx_entry_free(ep, x_entry); } while(!dlist_empty(&peer->rma_rx_list)) { dlist_pop_front(&peer->rma_rx_list, struct rxd_x_entry, x_entry, entry); rxd_tx_entry_free(ep, x_entry); } dlist_remove(&peer->entry); peer->active = 0; }
static void rxd_peer_timeout(struct rxd_ep *rxd_ep, struct rxd_peer *peer) { struct fi_cq_err_entry err_entry; struct rxd_x_entry *tx_entry; struct rxd_pkt_entry *pkt_entry; int ret; while (!dlist_empty(&peer->tx_list)) { dlist_pop_front(&peer->tx_list, struct rxd_x_entry, tx_entry, entry); memset(&err_entry, 0, sizeof(struct fi_cq_err_entry)); rxd_tx_entry_free(rxd_ep, tx_entry); err_entry.op_context = tx_entry->cq_entry.op_context; err_entry.flags = tx_entry->cq_entry.flags; err_entry.err = FI_ECONNREFUSED; err_entry.prov_errno = 0; ret = ofi_cq_write_error(&rxd_ep_tx_cq(rxd_ep)->util_cq, &err_entry); if (ret) FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "could not write error entry\n"); } while (!dlist_empty(&peer->unacked)) { dlist_pop_front(&peer->unacked, struct rxd_pkt_entry, pkt_entry, d_entry); ofi_buf_free(pkt_entry); peer->unacked_cnt--; } dlist_remove(&peer->entry); }
void rxd_tx_entry_done(struct rxd_ep *ep, struct rxd_tx_entry *tx_entry) { struct rxd_pkt_meta *pkt_meta; while (!dlist_empty(&tx_entry->pkt_list)) { pkt_meta = container_of(tx_entry->pkt_list.next, struct rxd_pkt_meta, entry); dlist_remove(&pkt_meta->entry); if (pkt_meta->flags & RXD_LOCAL_COMP) rxd_tx_pkt_free(pkt_meta); else pkt_meta->flags |= RXD_REMOTE_ACK; } rxd_tx_entry_free(ep, tx_entry); }
ssize_t rxd_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) { struct rxd_ep *rxd_ep; struct rxd_peer *peer; struct rxd_tx_entry *tx_entry; uint64_t peer_addr; ssize_t ret; rxd_ep = container_of(ep, struct rxd_ep, util_ep.ep_fid); peer_addr = rxd_av_dg_addr(rxd_ep_av(rxd_ep), msg->addr); peer = rxd_ep_getpeer_info(rxd_ep, peer_addr); fastlock_acquire(&rxd_ep->lock); if (peer->state != CMAP_CONNECTED) { ret = rxd_ep_connect(rxd_ep, peer, peer_addr); fastlock_release(&rxd_ep->lock); if (ret == -FI_EALREADY) { rxd_ep->util_ep.progress(&rxd_ep->util_ep); ret = -FI_EAGAIN; } return ret ? ret : -FI_EAGAIN; } tx_entry = rxd_tx_entry_alloc(rxd_ep, peer, peer_addr, flags, RXD_TX_READ_REQ); if (!tx_entry) { ret = -FI_EAGAIN; goto out; } tx_entry->read_req.msg = *msg; memcpy(&tx_entry->read_req.dst_iov[0], msg->msg_iov, sizeof(*msg->msg_iov)* msg->iov_count); memcpy(&tx_entry->read_req.src_iov[0], msg->rma_iov, sizeof(*msg->rma_iov) * msg->rma_iov_count); ret = rxd_ep_start_xfer(rxd_ep, peer, ofi_op_read_req, tx_entry); if (ret) rxd_tx_entry_free(rxd_ep, tx_entry); out: fastlock_release(&rxd_ep->lock); return ret; }
static ssize_t rxd_generic_write_inject(struct rxd_ep *rxd_ep, const struct iovec *iov, size_t iov_count, const struct fi_rma_iov *rma_iov, size_t rma_count, fi_addr_t addr, void *context, uint32_t op, uint64_t data, uint32_t rxd_flags) { struct rxd_x_entry *tx_entry; fi_addr_t rxd_addr; ssize_t ret = -FI_EAGAIN; assert(iov_count <= RXD_IOV_LIMIT && rma_count <= RXD_IOV_LIMIT); assert(ofi_total_iov_len(iov, iov_count) <= rxd_ep_domain(rxd_ep)->max_inline_rma); fastlock_acquire(&rxd_ep->util_ep.lock); fastlock_acquire(&rxd_ep->util_ep.tx_cq->cq_lock); if (ofi_cirque_isfull(rxd_ep->util_ep.tx_cq->cirq)) goto out; rxd_addr = rxd_ep_av(rxd_ep)->fi_addr_table[addr]; ret = rxd_send_rts_if_needed(rxd_ep, rxd_addr); if (ret) goto out; tx_entry = rxd_tx_entry_init(rxd_ep, iov, iov_count, NULL, 0, rma_count, data, 0, context, rxd_addr, op, rxd_flags); if (!tx_entry) goto out; ret = rxd_ep_send_op(rxd_ep, tx_entry, rma_iov, rma_count, NULL, 0, 0, 0); if (ret) { rxd_tx_entry_free(rxd_ep, tx_entry); goto out; } if (tx_entry->op == RXD_READ_REQ) goto out; ret = 0; out: fastlock_release(&rxd_ep->util_ep.tx_cq->cq_lock); fastlock_release(&rxd_ep->util_ep.lock); return ret; }
ssize_t rxd_generic_rma(struct rxd_ep *rxd_ep, const struct iovec *iov, size_t iov_count, const struct fi_rma_iov *rma_iov, size_t rma_count, void **desc, fi_addr_t addr, void *context, uint32_t op, uint64_t data, uint32_t rxd_flags) { struct rxd_x_entry *tx_entry; fi_addr_t rxd_addr; ssize_t ret = -FI_EAGAIN; if (rxd_flags & RXD_INJECT) return rxd_generic_write_inject(rxd_ep, iov, iov_count, rma_iov, rma_count, addr, context, op, data, rxd_flags); assert(iov_count <= RXD_IOV_LIMIT && rma_count <= RXD_IOV_LIMIT); fastlock_acquire(&rxd_ep->util_ep.lock); fastlock_acquire(&rxd_ep->util_ep.tx_cq->cq_lock); if (ofi_cirque_isfull(rxd_ep->util_ep.tx_cq->cirq)) goto out; rxd_addr = rxd_ep_av(rxd_ep)->fi_addr_table[addr]; ret = rxd_send_rts_if_needed(rxd_ep, rxd_addr); if (ret) goto out; tx_entry = rxd_tx_entry_init(rxd_ep, iov, iov_count, NULL, 0, rma_count, data, 0, context, rxd_addr, op, rxd_flags); if (!tx_entry) goto out; ret = rxd_ep_send_op(rxd_ep, tx_entry, rma_iov, rma_count, NULL, 0, 0, 0); if (ret) rxd_tx_entry_free(rxd_ep, tx_entry); out: fastlock_release(&rxd_ep->util_ep.tx_cq->cq_lock); fastlock_release(&rxd_ep->util_ep.lock); return ret; }
static void rxd_handle_ack(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct rxd_tx_entry *tx_entry; uint64_t idx; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ack- msg_id: %" PRIu64 ", segno: %d, segsz: %d, buf: %p\n", ctrl->msg_id, ctrl->seg_no, ctrl->seg_size, rx_buf); idx = ctrl->msg_id & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id != ctrl->msg_id) goto out; rxd_ep_free_acked_pkts(ep, tx_entry, ctrl->seg_no); if ((tx_entry->bytes_sent == tx_entry->op_hdr.size) && dlist_empty(&tx_entry->pkt_list)) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "reporting TX completion : %p\n", tx_entry); if (tx_entry->op_type != RXD_TX_READ_REQ) { rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), tx_entry); rxd_cntr_report_tx_comp(ep, tx_entry); rxd_tx_entry_free(ep, tx_entry); } } else { tx_entry->rx_key = ctrl->rx_key; /* do not allow reduce window size (on duplicate acks) */ tx_entry->window = MAX(tx_entry->window, ctrl->seg_no + ctrl->seg_size); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ack- msg_id: %" PRIu64 ", window: %d\n", ctrl->msg_id, tx_entry->window); } out: rxd_ep_repost_buff(rx_buf); }
int rxd_process_start_data(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { uint64_t idx; int i, offset, ret; struct ofi_rma_iov *rma_iov; struct rxd_pkt_data_start *pkt_start; struct rxd_tx_entry *tx_entry; pkt_start = (struct rxd_pkt_data_start *) ctrl; switch (rx_entry->op_hdr.op) { case ofi_op_msg: rx_entry->recv = rxd_get_recv_entry(ep, rx_entry); if (!rx_entry->recv) { if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) { dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_msg_list); rx_entry->unexp_buf = rx_buf; ep->num_unexp_msg++; return -FI_ENOENT; } else { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n"); return -FI_ENOMEM; } } rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_tagged: rx_entry->trecv = rxd_get_trecv_entry(ep, rx_entry); if (!rx_entry->trecv) { if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) { dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_tag_list); rx_entry->unexp_buf = rx_buf; ep->num_unexp_msg++; return -FI_ENOENT; } else { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n"); return -FI_ENOMEM; } } rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_write: rma_iov = (struct ofi_rma_iov *) pkt_start->data; for (i = 0; i < rx_entry->op_hdr.iov_count; i++) { ret = rxd_mr_verify(rxd_ep_domain(ep), rma_iov[i].len, (uintptr_t *) &rma_iov[i].addr, rma_iov[i].key, FI_REMOTE_WRITE); if (ret) { /* todo: handle invalid key case */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n"); return -FI_EACCES; } rx_entry->write.iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr; rx_entry->write.iov[i].iov_len = rma_iov[i].len; } offset = sizeof(struct ofi_rma_iov) * rx_entry->op_hdr.iov_count; ctrl->seg_size -= offset; rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_start->data + offset, rx_buf); break; case ofi_op_read_req: rma_iov = (struct ofi_rma_iov *) pkt_start->data; tx_entry = rxd_tx_entry_alloc(ep, peer, rx_entry->peer, 0, RXD_TX_READ_RSP); if (!tx_entry) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "no free tx-entry\n"); return -FI_ENOMEM; } tx_entry->peer = rx_entry->peer; tx_entry->read_rsp.iov_count = rx_entry->op_hdr.iov_count; for (i = 0; i < rx_entry->op_hdr.iov_count; i++) { ret = rxd_mr_verify(rxd_ep_domain(ep), rma_iov[i].len, (uintptr_t *) &rma_iov[i].addr, rma_iov[i].key, FI_REMOTE_READ); if (ret) { /* todo: handle invalid key case */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n"); return -FI_EACCES; } tx_entry->read_rsp.src_iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr; tx_entry->read_rsp.src_iov[i].iov_len = rma_iov[i].len; } tx_entry->read_rsp.peer_msg_id = ctrl->msg_id; ret = rxd_ep_start_xfer(ep, peer, ofi_op_read_rsp, tx_entry); if (ret) rxd_tx_entry_free(ep, tx_entry); rxd_rx_entry_free(ep, rx_entry); break; case ofi_op_read_rsp: idx = rx_entry->op_hdr.remote_idx & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id != rx_entry->op_hdr.remote_idx) return -FI_ENOMEM; rx_entry->read_rsp.tx_entry = tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); return -FI_EINVAL; } return 0; }