static inline int get_mtu(struct rxe_qp *qp) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) return qp->mtu; return rxe->port.mtu_cap; }
/* Executes a new request. A retried request never reach that function (send * and writes are discarded, and reads and atomics are retried elsewhere. */ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { enum resp_states err; if (pkt->mask & RXE_SEND_MASK) { if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { union rdma_network_hdr hdr; struct sk_buff *skb = PKT_TO_SKB(pkt); memset(&hdr, 0, sizeof(hdr)); if (skb->protocol == htons(ETH_P_IP)) memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); else if (skb->protocol == htons(ETH_P_IPV6)) memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); err = send_data_in(qp, &hdr, sizeof(hdr)); if (err) return err; } err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); if (err) return err; } else if (pkt->mask & RXE_WRITE_MASK) { err = write_data_in(qp, pkt); if (err) return err; } else if (pkt->mask & RXE_READ_MASK) { /* For RDMA Read we can increment the msn now. See C9-148. */ qp->resp.msn++; return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { err = process_atomic(qp, pkt); if (err) return err; } else /* Unreachable */ WARN_ON(1); /* We successfully processed this new request. */ qp->resp.msn++; /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; }
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) { if (!pkt || !pkt->qp) return NULL; if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) return &pkt->qp->pri_av; return (pkt->wqe) ? &pkt->wqe->av : NULL; }
/* move the qp to the reset state */ static void rxe_qp_reset(struct rxe_qp *qp) { /* stop tasks from running */ rxe_disable_task(&qp->resp.task); /* stop request/comp */ if (qp->sq.queue) { if (qp_type(qp) == IB_QPT_RC) rxe_disable_task(&qp->comp.task); rxe_disable_task(&qp->req.task); } /* move qp to the reset state */ qp->req.state = QP_STATE_RESET; qp->resp.state = QP_STATE_RESET; /* let state machines reset themselves drain work and packet queues * etc. */ __rxe_do_task(&qp->resp.task); if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); rxe_queue_reset(qp->sq.queue); } /* cleanup attributes */ atomic_set(&qp->ssn, 0); qp->req.opcode = -1; qp->req.need_retry = 0; qp->req.noack_pkts = 0; qp->resp.msn = 0; qp->resp.opcode = -1; qp->resp.drop_msg = 0; qp->resp.goto_error = 0; qp->resp.sent_psn_nak = 0; if (qp->resp.mr) { rxe_drop_ref(qp->resp.mr); qp->resp.mr = NULL; } cleanup_rd_atomic_resources(qp); /* reenable tasks */ rxe_enable_task(&qp->resp.task); if (qp->sq.queue) { if (qp_type(qp) == IB_QPT_RC) rxe_enable_task(&qp->comp.task); rxe_enable_task(&qp->req.task); } }
static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; wr->num_sge = ibwr->num_sge; wr->opcode = ibwr->opcode; wr->send_flags = ibwr->send_flags; if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; if (qp_type(qp) == IB_QPT_GSI) wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; if (wr->opcode == IB_WR_SEND_WITH_IMM) wr->ex.imm_data = ibwr->ex.imm_data; } else { switch (wr->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; /* fall through */ case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; break; case IB_WR_SEND_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; break; case IB_WR_SEND_WITH_INV: wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: wr->wr.atomic.remote_addr = atomic_wr(ibwr)->remote_addr; wr->wr.atomic.compare_add = atomic_wr(ibwr)->compare_add; wr->wr.atomic.swap = atomic_wr(ibwr)->swap; wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; break; case IB_WR_LOCAL_INV: wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; break; case IB_WR_REG_MR: wr->wr.reg.mr = reg_wr(ibwr)->mr; wr->wr.reg.key = reg_wr(ibwr)->key; wr->wr.reg.access = reg_wr(ibwr)->access; break; default: break; } } }
static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; struct ib_sge *sge; int i; u8 *p; init_send_wr(qp, &wqe->wr, ibwr); if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { p = wqe->dma.inline_data; sge = ibwr->sg_list; for (i = 0; i < num_sge; i++, sge++) { if (qp->is_user && copy_from_user(p, (__user void *) (uintptr_t)sge->addr, sge->length)) return -EFAULT; else if (!qp->is_user) memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); p += sge->length; } } else if (mask & WR_REG_MASK) { wqe->mask = mask; wqe->state = wqe_state_posted; return 0; } else memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); wqe->iova = (mask & WR_ATOMIC_MASK) ? atomic_wr(ibwr)->remote_addr : rdma_wr(ibwr)->remote_addr; wqe->mask = mask; wqe->dma.length = length; wqe->dma.resid = length; wqe->dma.num_sge = num_sge; wqe->dma.cur_sge = 0; wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); return 0; }
/* Executes a new request. A retried request never reach that function (send * and writes are discarded, and reads and atomics are retried elsewhere. */ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { enum resp_states err; if (pkt->mask & RXE_SEND_MASK) { if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { union rdma_network_hdr hdr; build_rdma_network_hdr(&hdr, pkt); err = send_data_in(qp, &hdr, sizeof(hdr)); if (err) return err; } err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); if (err) return err; } else if (pkt->mask & RXE_WRITE_MASK) { err = write_data_in(qp, pkt); if (err) return err; } else if (pkt->mask & RXE_READ_MASK) { /* For RDMA Read we can increment the msn now. See C9-148. */ qp->resp.msn++; return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { err = process_atomic(qp, pkt); if (err) return err; } else { /* Unreachable */ WARN_ON_ONCE(1); } /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; if (pkt->mask & RXE_COMP_MASK) { /* We successfully processed this new request. */ qp->resp.msn++; return RESPST_COMPLETE; } else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; }
/* called when the last reference to the qp is dropped */ static void rxe_qp_do_cleanup(struct work_struct *work) { struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); rxe_drop_all_mcast_groups(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); if (qp->srq) rxe_drop_ref(qp->srq); if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); if (qp->scq) rxe_drop_ref(qp->scq); if (qp->rcq) rxe_drop_ref(qp->rcq); if (qp->pd) rxe_drop_ref(qp->pd); if (qp->resp.mr) { rxe_drop_ref(qp->resp.mr); qp->resp.mr = NULL; } if (qp_type(qp) == IB_QPT_RC) sk_dst_reset(qp->sk->sk); free_rd_atomic_resources(qp); kernel_sock_shutdown(qp->sk, SHUT_RDWR); sock_release(qp->sk); }
/* called by the destroy qp verb */ void rxe_qp_destroy(struct rxe_qp *qp) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); qp->valid = 0; qp->qp_timeout_jiffies = 0; rxe_cleanup_task(&qp->resp.task); del_timer_sync(&qp->retrans_timer); del_timer_sync(&qp->rnr_nak_timer); rxe_cleanup_task(&qp->req.task); if (qp_type(qp) == IB_QPT_RC) rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ __rxe_do_task(&qp->req.task); if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); } /* drain the output queue */ while (!list_empty(&qp->arbiter_list)) __rxe_do_task(&rxe->arbiter.task); }
static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, u32 opcode) { int fits = (wqe->dma.resid <= qp->mtu); switch (qp_type(qp)) { case IB_QPT_RC: return next_opcode_rc(qp, opcode, fits); case IB_QPT_UC: return next_opcode_uc(qp, opcode, fits); case IB_QPT_SMI: case IB_QPT_UD: case IB_QPT_GSI: switch (opcode) { case IB_WR_SEND: return IB_OPCODE_UD_SEND_ONLY; case IB_WR_SEND_WITH_IMM: return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; } break; default: break; } return -EINVAL; }
static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; struct rxe_qp *qp = to_rqp(ibqp); unsigned int mask; unsigned int length = 0; int i; int must_sched; if (unlikely(!qp->valid)) { *bad_wr = wr; return -EINVAL; } if (unlikely(qp->req.state < QP_STATE_READY)) { *bad_wr = wr; return -EINVAL; } while (wr) { mask = wr_opcode_mask(wr->opcode, qp); if (unlikely(!mask)) { err = -EINVAL; *bad_wr = wr; break; } if (unlikely((wr->send_flags & IB_SEND_INLINE) && !(mask & WR_INLINE_MASK))) { err = -EINVAL; *bad_wr = wr; break; } length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; err = post_one_send(qp, wr, mask, length); if (err) { *bad_wr = wr; break; } wr = wr->next; } /* * Must sched in case of GSI QP because ib_send_mad() hold irq lock, * and the requester call ip_local_out_sk() that takes spin_lock_bh. */ must_sched = (qp_type(qp) == IB_QPT_GSI) || (queue_count(qp->sq.queue) > 1); rxe_run_task(&qp->req.task, must_sched); return err; }
static void update_wqe_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt) { if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } }
static struct dst_entry *rxe_find_route(struct net_device *ndev, struct rxe_qp *qp, struct rxe_av *av) { struct dst_entry *dst = NULL; if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); if (!dst || !dst_check(dst, qp->dst_cookie)) { if (dst) dst_release(dst); if (av->network_type == RDMA_NETWORK_IPV4) { struct in_addr *saddr; struct in_addr *daddr; saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; dst = rxe_find_route4(ndev, saddr, daddr); } else if (av->network_type == RDMA_NETWORK_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; dst = rxe_find_route6(ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); #endif } if (dst && (qp_type(qp) == IB_QPT_RC)) { dst_hold(dst); sk_dst_set(qp->sk->sk, dst); } } return dst; }
/* drain the send queue */ static void rxe_qp_drain(struct rxe_qp *qp) { if (qp->sq.queue) { if (qp->req.state != QP_STATE_DRAINED) { qp->req.state = QP_STATE_DRAIN; if (qp_type(qp) == IB_QPT_RC) rxe_run_task(&qp->comp.task, 1); else __rxe_do_task(&qp->comp.task); rxe_run_task(&qp->req.task, 1); } } }
static enum resp_states check_length(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: return RESPST_CHK_RKEY; case IB_QPT_UC: return RESPST_CHK_RKEY; default: return RESPST_CHK_RKEY; } }
/* move the qp to the error state */ void rxe_qp_error(struct rxe_qp *qp) { qp->req.state = QP_STATE_ERROR; qp->resp.state = QP_STATE_ERROR; /* drain work and packet queues */ rxe_run_task(&qp->resp.task, 1); if (qp_type(qp) == IB_QPT_RC) rxe_run_task(&qp->comp.task, 1); else __rxe_do_task(&qp->comp.task); rxe_run_task(&qp->req.task, 1); }
static enum resp_states check_psn(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { int diff = psn_compare(pkt->psn, qp->resp.psn); struct rxe_dev *rxe = to_rdev(qp->ibqp.device); switch (qp_type(qp)) { case IB_QPT_RC: if (diff > 0) { if (qp->resp.sent_psn_nak) return RESPST_CLEANUP; qp->resp.sent_psn_nak = 1; rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); return RESPST_ERR_PSN_OUT_OF_SEQ; } else if (diff < 0) { rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); return RESPST_DUPLICATE_REQUEST; } if (qp->resp.sent_psn_nak) qp->resp.sent_psn_nak = 0; break; case IB_QPT_UC: if (qp->resp.drop_msg || diff != 0) { if (pkt->mask & RXE_START_MASK) { qp->resp.drop_msg = 0; return RESPST_CHK_OP_SEQ; } qp->resp.drop_msg = 1; return RESPST_CLEANUP; } break; default: break; } return RESPST_CHK_OP_SEQ; }
/* called by the destroy qp verb */ void rxe_qp_destroy(struct rxe_qp *qp) { qp->valid = 0; qp->qp_timeout_jiffies = 0; rxe_cleanup_task(&qp->resp.task); del_timer_sync(&qp->retrans_timer); del_timer_sync(&qp->rnr_nak_timer); rxe_cleanup_task(&qp->req.task); if (qp_type(qp) == IB_QPT_RC) rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ __rxe_do_task(&qp->req.task); if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); } }
static enum resp_states check_op_valid(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: if (((pkt->mask & RXE_READ_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || ((pkt->mask & RXE_WRITE_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || ((pkt->mask & RXE_ATOMIC_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { return RESPST_ERR_UNSUPPORTED_OPCODE; } break; case IB_QPT_UC: if ((pkt->mask & RXE_WRITE_MASK) && !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { qp->resp.drop_msg = 1; return RESPST_CLEANUP; } break; case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: break; default: WARN_ON_ONCE(1); break; } return RESPST_CHK_RESOURCE; }
/* called by the modify qp verb, this routine checks all the parameters before * making any changes */ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) { enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ? attr->cur_qp_state : qp->attr.qp_state; enum ib_qp_state new_state = (mask & IB_QP_STATE) ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask, IB_LINK_LAYER_ETHERNET)) { pr_warn("invalid mask or state for qp\n"); goto err1; } if (mask & IB_QP_STATE) { if (cur_state == IB_QPS_SQD) { if (qp->req.state == QP_STATE_DRAIN && new_state != IB_QPS_ERR) goto err1; } } if (mask & IB_QP_PORT) { if (attr->port_num != 1) { pr_warn("invalid port %d\n", attr->port_num); goto err1; } } if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) goto err1; if (mask & IB_QP_ALT_PATH) { if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; if (attr->alt_port_num != 1) { pr_warn("invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { pr_warn("invalid QP alt timeout %d > 31\n", attr->alt_timeout); goto err1; } } if (mask & IB_QP_PATH_MTU) { struct rxe_port *port = &rxe->port; enum ib_mtu max_mtu = port->attr.max_mtu; enum ib_mtu mtu = attr->path_mtu; if (mtu > max_mtu) { pr_debug("invalid mtu (%d) > (%d)\n", ib_mtu_enum_to_int(mtu), ib_mtu_enum_to_int(max_mtu)); goto err1; } } if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { pr_warn("invalid max_rd_atomic %d > %d\n", attr->max_rd_atomic, rxe->attr.max_qp_rd_atom); goto err1; } } if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { pr_warn("invalid QP timeout %d > 31\n", attr->timeout); goto err1; } } return 0; err1: return -EINVAL; }
queued_event (qp_type _type = qp_type(), blackbox _bb = blackbox()) : pair<qp_type, blackbox>(_type, _bb) { }
static enum resp_states check_op_seq(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: switch (qp->resp.opcode) { case IB_OPCODE_RC_SEND_FIRST: case IB_OPCODE_RC_SEND_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_RC_SEND_MIDDLE: case IB_OPCODE_RC_SEND_LAST: case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_C; } case IB_OPCODE_RC_RDMA_WRITE_FIRST: case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: case IB_OPCODE_RC_RDMA_WRITE_LAST: case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_C; } default: switch (pkt->opcode) { case IB_OPCODE_RC_SEND_MIDDLE: case IB_OPCODE_RC_SEND_LAST: case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: case IB_OPCODE_RC_RDMA_WRITE_LAST: case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: return RESPST_ERR_MISSING_OPCODE_FIRST; default: return RESPST_CHK_OP_VALID; } } break; case IB_QPT_UC: switch (qp->resp.opcode) { case IB_OPCODE_UC_SEND_FIRST: case IB_OPCODE_UC_SEND_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_UC_SEND_MIDDLE: case IB_OPCODE_UC_SEND_LAST: case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_D1E; } case IB_OPCODE_UC_RDMA_WRITE_FIRST: case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: case IB_OPCODE_UC_RDMA_WRITE_LAST: case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_D1E; } default: switch (pkt->opcode) { case IB_OPCODE_UC_SEND_MIDDLE: case IB_OPCODE_UC_SEND_LAST: case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: case IB_OPCODE_UC_RDMA_WRITE_LAST: case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: qp->resp.drop_msg = 1; return RESPST_CLEANUP; default: return RESPST_CHK_OP_VALID; } } break; default: return RESPST_CHK_OP_VALID; } }
static struct sk_buff *init_req_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, int opcode, int payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_port *port = &rxe->port; struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; u16 pkey; u32 qp_num; int ack_req; /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; /* pkt->hdr, rxe, port_num and mask are initialized in ifc * layer */ pkt->opcode = opcode; pkt->qp = qp; pkt->psn = qp->req.psn; pkt->mask = rxe_opcode[opcode].mask; pkt->paylen = paylen; pkt->offset = 0; pkt->wqe = wqe; /* init skb */ av = rxe_get_av(pkt); skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; /* init bth */ solicited = (ibwr->send_flags & IB_SEND_SOLICITED) && (pkt->mask & RXE_END_MASK) && ((pkt->mask & (RXE_SEND_MASK)) || (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == (RXE_WRITE_MASK | RXE_IMMDT_MASK)); pkey = (qp_type(qp) == IB_QPT_GSI) ? port->pkey_tbl[ibwr->wr.ud.pkey_index] : port->pkey_tbl[qp->attr.pkey_index]; qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; ack_req = ((pkt->mask & RXE_END_MASK) || (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK)); if (ack_req) qp->req.noack_pkts = 0; bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, ack_req, pkt->psn); /* init optional headers */ if (pkt->mask & RXE_RETH_MASK) { reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); reth_set_len(pkt, wqe->dma.length); } if (pkt->mask & RXE_IMMDT_MASK) immdt_set_imm(pkt, ibwr->ex.imm_data); if (pkt->mask & RXE_IETH_MASK) ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey); if (pkt->mask & RXE_ATMETH_MASK) { atmeth_set_va(pkt, wqe->iova); if (opcode == IB_OPCODE_RC_COMPARE_SWAP || opcode == IB_OPCODE_RD_COMPARE_SWAP) { atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); } else { atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add); } atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey); } if (pkt->mask & RXE_DETH_MASK) { if (qp->ibqp.qp_num == 1) deth_set_qkey(pkt, GSI_QKEY); else deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey); deth_set_sqp(pkt, qp->ibqp.qp_num); } return skb; }
int rxe_completer(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_send_wqe *wqe = wqe; struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; rxe_add_ref(qp); if (!qp->valid || qp->req.state == QP_STATE_ERROR || qp->req.state == QP_STATE_RESET) { rxe_drain_resp_pkts(qp, qp->valid && qp->req.state == QP_STATE_ERROR); goto exit; } if (qp->comp.timeout) { qp->comp.timeout_retry = 1; qp->comp.timeout = 0; } else { qp->comp.timeout_retry = 0; } if (qp->req.need_retry) goto exit; state = COMPST_GET_ACK; while (1) { pr_debug("qp#%d state = %s\n", qp_num(qp), comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); if (skb) { pkt = SKB_TO_PKT(skb); qp->comp.timeout_retry = 0; } state = COMPST_GET_WQE; break; case COMPST_GET_WQE: state = get_wqe(qp, pkt, &wqe); break; case COMPST_CHECK_PSN: state = check_psn(qp, pkt, wqe); break; case COMPST_CHECK_ACK: state = check_ack(qp, pkt, wqe); break; case COMPST_READ: state = do_read(qp, pkt, wqe); break; case COMPST_ATOMIC: state = do_atomic(qp, pkt, wqe); break; case COMPST_WRITE_SEND: if (wqe->state == wqe_state_pending && wqe->last_psn == pkt->psn) state = COMPST_COMP_ACK; else state = COMPST_UPDATE_COMP; break; case COMPST_COMP_ACK: state = complete_ack(qp, pkt, wqe); break; case COMPST_COMP_WQE: state = complete_wqe(qp, pkt, wqe); break; case COMPST_UPDATE_COMP: if (pkt->mask & RXE_END_MASK) qp->comp.opcode = -1; else qp->comp.opcode = pkt->opcode; if (psn_compare(pkt->psn, qp->comp.psn) >= 0) qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; if (qp->req.wait_psn) { qp->req.wait_psn = 0; rxe_run_task(&qp->req.task, 1); } state = COMPST_DONE; break; case COMPST_DONE: if (pkt) { rxe_drop_ref(pkt->qp); kfree_skb(skb); skb = NULL; } goto done; case COMPST_EXIT: if (qp->comp.timeout_retry && wqe) { state = COMPST_ERROR_RETRY; break; } /* re reset the timeout counter if * (1) QP is type RC * (2) the QP is alive * (3) there is a packet sent by the requester that * might be acked (we still might get spurious * timeouts but try to keep them as few as possible) * (4) the timeout parameter is set */ if ((qp_type(qp) == IB_QPT_RC) && (qp->req.state == QP_STATE_READY) && (psn_compare(qp->req.psn, qp->comp.psn) > 0) && qp->qp_timeout_jiffies) mod_timer(&qp->retrans_timer, jiffies + qp->qp_timeout_jiffies); WARN_ON_ONCE(skb); goto exit; case COMPST_ERROR_RETRY: /* we come here if the retry timer fired and we did * not receive a response packet. try to retry the send * queue if that makes sense and the limits have not * been exceeded. remember that some timeouts are * spurious since we do not reset the timer but kick * it down the road or let it expire */ /* there is nothing to retry in this case */ if (!wqe || (wqe->state == wqe_state_posted)) { WARN_ON_ONCE(skb); goto exit; } if (qp->comp.retry_cnt > 0) { if (qp->comp.retry_cnt != 7) qp->comp.retry_cnt--; /* no point in retrying if we have already * seen the last ack that the requester could * have caused */ if (psn_compare(qp->req.psn, qp->comp.psn) > 0) { /* tell the requester to retry the * send queue next time around */ rxe_counter_inc(rxe, RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } if (pkt) { rxe_drop_ref(pkt->qp); kfree_skb(skb); skb = NULL; } WARN_ON_ONCE(skb); goto exit; } else { rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED); wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; } break; case COMPST_RNR_RETRY: if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; qp->req.need_retry = 1; pr_debug("qp#%d set rnr nak timer\n", qp_num(qp)); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); rxe_drop_ref(pkt->qp); kfree_skb(skb); skb = NULL; goto exit; } else { rxe_counter_inc(rxe, RXE_CNT_RNR_RETRY_EXCEEDED); wqe->status = IB_WC_RNR_RETRY_EXC_ERR; state = COMPST_ERROR; } break; case COMPST_ERROR: WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); if (pkt) { rxe_drop_ref(pkt->qp); kfree_skb(skb); skb = NULL; } WARN_ON_ONCE(skb); goto exit; } } exit: /* we come here if we are done with processing and want the task to * exit from the loop calling us */ WARN_ON_ONCE(skb); rxe_drop_ref(qp); return -EAGAIN; done: /* we come here if we have processed a packet we want the task to call * us again to see if there is anything else to do */ WARN_ON_ONCE(skb); rxe_drop_ref(qp); return 0; }