void rxd_ep_check_unexp_tag_list(struct rxd_ep *ep, struct rxd_trecv_entry *trecv_entry) { struct dlist_entry *match; struct rxd_rx_entry *rx_entry; struct rxd_pkt_data_start *pkt_start; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ep->num_unexp_msg: %d\n", ep->num_unexp_msg); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ep->num_unexp_pkt: %d\n", ep->num_unexp_pkt); match = dlist_find_first_match(&ep->unexp_tag_list, &rxd_match_unexp_tag, (void *) trecv_entry); if (match) { dlist_remove(match); dlist_remove(&trecv_entry->entry); ep->num_unexp_msg--; rx_entry = container_of(match, struct rxd_rx_entry, unexp_entry); rx_entry->trecv = trecv_entry; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "progressing unexp tagged recv [%p]\n", rx_entry->msg_id); pkt_start = (struct rxd_pkt_data_start *) rx_entry->unexp_buf->buf; rxd_ep_handle_data_msg(ep, rx_entry->peer_info, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, &pkt_start->ctrl, pkt_start->data, rx_entry->unexp_buf); rxd_ep_repost_buff(rx_entry->unexp_buf); } }
int rxd_process_start_data(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { uint64_t idx; int i, offset, ret; struct ofi_rma_iov *rma_iov; struct rxd_pkt_data_start *pkt_start; struct rxd_tx_entry *tx_entry; pkt_start = (struct rxd_pkt_data_start *) ctrl; switch (rx_entry->op_hdr.op) { case ofi_op_msg: rx_entry->recv = rxd_get_recv_entry(ep, rx_entry); if (!rx_entry->recv) { if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) { dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_msg_list); rx_entry->unexp_buf = rx_buf; ep->num_unexp_msg++; return -FI_ENOENT; } else { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n"); return -FI_ENOMEM; } } rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_tagged: rx_entry->trecv = rxd_get_trecv_entry(ep, rx_entry); if (!rx_entry->trecv) { if (ep->num_unexp_msg < RXD_EP_MAX_UNEXP_MSG) { dlist_insert_tail(&rx_entry->unexp_entry, &ep->unexp_tag_list); rx_entry->unexp_buf = rx_buf; ep->num_unexp_msg++; return -FI_ENOENT; } else { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "dropping msg\n"); return -FI_ENOMEM; } } rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_write: rma_iov = (struct ofi_rma_iov *) pkt_start->data; for (i = 0; i < rx_entry->op_hdr.iov_count; i++) { ret = rxd_mr_verify(ep->domain, rma_iov[i].len, (uintptr_t *) &rma_iov[i].addr, rma_iov[i].key, FI_REMOTE_WRITE); if (ret) { /* todo: handle invalid key case */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n"); return -FI_EACCES; } rx_entry->write.iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr; rx_entry->write.iov[i].iov_len = rma_iov[i].len; } offset = sizeof(struct ofi_rma_iov) * rx_entry->op_hdr.iov_count; ctrl->seg_size -= offset; rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_start->data + offset, rx_buf); break; case ofi_op_read_req: rma_iov = (struct ofi_rma_iov *) pkt_start->data; tx_entry = rxd_tx_entry_acquire_fast(ep, peer); if (!tx_entry) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "no free tx-entry\n"); return -FI_ENOMEM; } tx_entry->peer = rx_entry->peer; tx_entry->read_rsp.iov_count = rx_entry->op_hdr.iov_count; for (i = 0; i < rx_entry->op_hdr.iov_count; i++) { ret = rxd_mr_verify(ep->domain, rma_iov[i].len, (uintptr_t *) &rma_iov[i].addr, rma_iov[i].key, FI_REMOTE_READ); if (ret) { /* todo: handle invalid key case */ FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid key/access permissions\n"); return -FI_EACCES; } tx_entry->read_rsp.src_iov[i].iov_base = (void *) (uintptr_t) rma_iov[i].addr; tx_entry->read_rsp.src_iov[i].iov_len = rma_iov[i].len; } tx_entry->read_rsp.peer_msg_id = ctrl->msg_id; rxd_ep_handle_read_req(ep, tx_entry, peer); rxd_rx_entry_release(ep, rx_entry); break; case ofi_op_read_rsp: idx = rx_entry->op_hdr.remote_idx & RXD_TX_IDX_BITS; tx_entry = &ep->tx_entry_fs->buf[idx]; if (tx_entry->msg_id != rx_entry->op_hdr.remote_idx) return -FI_ENOMEM; rx_entry->read_rsp.tx_entry = tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_start->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); return -FI_EINVAL; } return 0; }
void rxd_handle_data(struct rxd_ep *ep, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { int ret; struct rxd_rx_entry *rx_entry; struct rxd_tx_entry *tx_entry; struct rxd_pkt_data *pkt_data = (struct rxd_pkt_data *) ctrl; uint16_t win_sz; uint64_t curr_stamp; rxd_ep_lock_if_required(ep); rx_entry = &ep->rx_entry_fs->buf[ctrl->rx_key]; ret = rxd_check_data_pkt_order(ep, peer, ctrl, rx_entry); if (ret == RXD_PKT_ORDR_DUP) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d expected:%d, rx-key:%d, ctrl_msg_id: %p\n", ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id); win_sz = (rx_entry->msg_id == ctrl->msg_id && rx_entry->last_win_seg == ctrl->seg_no) ? rx_entry->window : 0; rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, win_sz, ctrl->rx_key, peer->conn_data, ctrl->conn_id); goto repost; } else if (ret == RXD_PKT_ORDR_UNEXP) { if (!(comp->flags & RXD_UNEXP_ENTRY)) { curr_stamp = fi_gettime_us(); if (rx_entry->nack_stamp == 0 || (curr_stamp > rx_entry->nack_stamp && curr_stamp - rx_entry->nack_stamp > RXD_RETRY_TIMEOUT)) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "unexpected pkt, sending NACK: %d\n", ctrl->seg_no); rx_entry->nack_stamp = curr_stamp; rxd_ep_reply_nack(ep, ctrl, rx_entry->exp_seg_no, ctrl->rx_key, peer->conn_data, ctrl->conn_id); } rxd_ep_enqueue_pkt(ep, ctrl, comp); } goto out; } rx_entry->nack_stamp = 0; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "expected pkt: %d\n", ctrl->seg_no); switch (rx_entry->op_hdr.op) { case ofi_op_msg: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_tagged: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_write: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_read_rsp: tx_entry = rx_entry->read_rsp.tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); } repost: if (comp->flags & RXD_UNEXP_ENTRY) { rxd_release_unexp_entry(ep->rx_cq, comp); ep->num_unexp_pkt--; } rxd_ep_repost_buff(rx_buf); out: rxd_ep_unlock_if_required(ep); }
static void rxd_handle_data(struct rxd_ep *ep, struct rxd_peer *peer, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { struct rxd_rx_entry *rx_entry; struct rxd_tx_entry *tx_entry; struct rxd_pkt_data *pkt_data = (struct rxd_pkt_data *) ctrl; uint16_t credits; int ret; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "data pkt- msg_id: %" PRIu64 ", segno: %d, buf: %p\n", ctrl->msg_id, ctrl->seg_no, rx_buf); rx_entry = &ep->rx_entry_fs->buf[ctrl->rx_key]; ret = rxd_check_data_pkt_order(ep, peer, ctrl, rx_entry); if (ret) { if (ret == -FI_EALREADY) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "duplicate pkt: %d " "expected:%d, rx-key:%d, ctrl_msg_id: %p\n", ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id); credits = ((rx_entry->msg_id == ctrl->msg_id) && (rx_entry->last_win_seg == ctrl->seg_no)) ? rx_entry->credits : 0; rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, credits, ctrl->rx_key, peer->conn_data, ctrl->conn_id); goto repost; } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "invalid pkt: segno: %d " "expected:%d, rx-key:%d, ctrl_msg_id: %ld, " "rx_entry_msg_id: %ld\n", ctrl->seg_no, rx_entry->exp_seg_no, ctrl->rx_key, ctrl->msg_id, rx_entry->msg_id); FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "invalid pkt: " "credits: %d, last win: %d\n", rx_entry->credits, rx_entry->last_win_seg); credits = (rx_entry->msg_id == ctrl->msg_id) ? rx_entry->last_win_seg - rx_entry->exp_seg_no : 0; rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, credits, ctrl->rx_key, peer->conn_data, ctrl->conn_id); goto repost; } } rx_entry->nack_stamp = 0; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "expected pkt: %d\n", ctrl->seg_no); switch (rx_entry->op_hdr.op) { case ofi_op_msg: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->recv->iov, rx_entry->recv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_tagged: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->trecv->iov, rx_entry->trecv->msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_write: rxd_ep_handle_data_msg(ep, peer, rx_entry, rx_entry->write.iov, rx_entry->op_hdr.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_read_rsp: tx_entry = rx_entry->read_rsp.tx_entry; rxd_ep_handle_data_msg(ep, peer, rx_entry, tx_entry->read_req.dst_iov, tx_entry->read_req.msg.iov_count, ctrl, pkt_data->data, rx_buf); break; case ofi_op_atomic: default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid op type\n"); } repost: rxd_ep_repost_buff(rx_buf); }