static int mlx4_poll_one(struct mlx4_cq *cq, struct mlx4_qp **cur_qp, struct ibv_wc *wc) { struct mlx4_wq *wq; struct mlx4_cqe *cqe; struct mlx4_srq *srq = NULL; uint32_t qpn; uint32_t srqn; uint32_t g_mlpath_rqpn; uint16_t wqe_index; int is_error; int is_send; cqe = next_cqe_sw(cq); if (!cqe) return CQ_EMPTY; ++cq->cons_index; VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe); /* * Make sure we read CQ entry contents after we've checked the * ownership bit. */ rmb(); qpn = ntohl(cqe->my_qpn); is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; if (qpn & MLX4_XRC_QPN_BIT && !is_send) { srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff; /* * We do not have to take the XRC SRQ table lock here, * because CQs will be locked while XRC SRQs are removed * from the table. */ srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn); if (!srq) return CQ_POLL_ERR; } else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn & 0xffffff); if (!*cur_qp) return CQ_POLL_ERR; } wc->qp_num = qpn & 0xffffff; if (is_send) { wq = &(*cur_qp)->sq; wqe_index = ntohs(cqe->wqe_index); wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail); wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } else if (srq) { wqe_index = htons(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_index]; mlx4_free_srq_wqe(srq, wqe_index); } else if ((*cur_qp)->ibv_qp.srq) { srq = to_msrq((*cur_qp)->ibv_qp.srq); wqe_index = htons(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_index]; mlx4_free_srq_wqe(srq, wqe_index); } else { wq = &(*cur_qp)->rq; wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } if (is_error) { mlx4_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); return CQ_OK; } wc->status = IBV_WC_SUCCESS; if (is_send) { wc->wc_flags = 0; switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IBV_WC_WITH_IMM; case MLX4_OPCODE_RDMA_WRITE: wc->opcode = IBV_WC_RDMA_WRITE; break; case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IBV_WC_WITH_IMM; case MLX4_OPCODE_SEND: wc->opcode = IBV_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: wc->opcode = IBV_WC_RDMA_READ; wc->byte_len = ntohl(cqe->byte_cnt); break; case MLX4_OPCODE_ATOMIC_CS: wc->opcode = IBV_WC_COMP_SWAP; wc->byte_len = 8; break; case MLX4_OPCODE_ATOMIC_FA: wc->opcode = IBV_WC_FETCH_ADD; wc->byte_len = 8; break; case MLX4_OPCODE_BIND_MW: wc->opcode = IBV_WC_BIND_MW; break; default: /* assume it's a send completion */ wc->opcode = IBV_WC_SEND; break; } } else { wc->byte_len = ntohl(cqe->byte_cnt); switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IBV_WC_WITH_IMM; wc->imm_data = cqe->immed_rss_invalid; break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IBV_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: wc->opcode = IBV_WC_RECV; wc->wc_flags = IBV_WC_WITH_IMM; wc->imm_data = cqe->immed_rss_invalid; break; } wc->slid = ntohs(cqe->rlid); wc->sl = cqe->sl >> 4; g_mlpath_rqpn = ntohl(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0; wc->pkey_index = ntohl(cqe->immed_rss_invalid) & 0x7f; } return CQ_OK; }
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) { struct mlx4_cqe *cqe; struct mlx4_qp *mqp; struct mlx4_ib_wq *wq; struct mlx4_ib_srq *srq; int is_send; int is_error; u32 g_mlpath_rqpn; u16 wqe_ctr; repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; ++cq->mcq.cons_index; /* * Make sure we read CQ entry contents after we've checked the * ownership bit. */ rmb(); is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && is_send)) { printk(KERN_WARNING "Completion for NOP opcode detected!\n"); return -EINVAL; } /* Resize CQ in progress */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { if (cq->resize_buf) { struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device); mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); cq->buf = cq->resize_buf->buf; cq->ibcq.cqe = cq->resize_buf->cqe; kfree(cq->resize_buf); cq->resize_buf = NULL; } goto repoll; } if (!*cur_qp || (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); if (unlikely(!mqp)) { printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); return -EINVAL; } *cur_qp = to_mibqp(mqp); } wc->qp = &(*cur_qp)->ibqp; if (is_send) { wq = &(*cur_qp)->sq; if (!(*cur_qp)->sq_signal_bits) { wqe_ctr = be16_to_cpu(cqe->wqe_index); wq->tail += (u16) (wqe_ctr - (u16) wq->tail); } wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } else if ((*cur_qp)->ibqp.srq) { srq = to_msrq((*cur_qp)->ibqp.srq); wqe_ctr = be16_to_cpu(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_ctr]; mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } if (unlikely(is_error)) { mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); return 0; } wc->status = IB_WC_SUCCESS; if (is_send) { wc->wc_flags = 0; switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_OPCODE_RDMA_WRITE_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_SEND: case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: wc->opcode = IB_WC_RDMA_READ; wc->byte_len = be32_to_cpu(cqe->byte_cnt); break; case MLX4_OPCODE_ATOMIC_CS: wc->opcode = IB_WC_COMP_SWAP; wc->byte_len = 8; break; case MLX4_OPCODE_ATOMIC_FA: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; case MLX4_OPCODE_MASKED_ATOMIC_CS: wc->opcode = IB_WC_MASKED_COMP_SWAP; wc->byte_len = 8; break; case MLX4_OPCODE_MASKED_ATOMIC_FA: wc->opcode = IB_WC_MASKED_FETCH_ADD; wc->byte_len = 8; break; case MLX4_OPCODE_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; case MLX4_OPCODE_FMR: wc->opcode = IB_WC_FAST_REG_MR; break; case MLX4_OPCODE_LOCAL_INVAL: wc->opcode = IB_WC_LOCAL_INV; break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->immed_rss_invalid; break; case MLX4_RECV_OPCODE_SEND_INVAL: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_INVALIDATE; wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; break; case MLX4_RECV_OPCODE_SEND_IMM: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->immed_rss_invalid; break; } wc->slid = be16_to_cpu(cqe->rlid); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum); if (rdma_port_get_link_layer(wc->qp->device, (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; else wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; }
static int hns_roce_v1_poll_one(struct hns_roce_cq *cq, struct hns_roce_qp **cur_qp, struct ibv_wc *wc) { uint32_t qpn; int is_send; uint16_t wqe_ctr; uint32_t local_qpn; struct hns_roce_wq *wq = NULL; struct hns_roce_cqe *cqe = NULL; struct hns_roce_wqe_ctrl_seg *sq_wqe = NULL; /* According to CI, find the relative cqe */ cqe = next_cqe_sw(cq); if (!cqe) return CQ_EMPTY; /* Get the next cqe, CI will be added gradually */ ++cq->cons_index; udma_from_device_barrier(); qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, CQE_BYTE_16_LOCAL_QPN_S); is_send = (roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S) == HNS_ROCE_CQE_IS_SQ); local_qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, CQE_BYTE_16_LOCAL_QPN_S); /* if qp is zero, it will not get the correct qpn */ if (!*cur_qp || (local_qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->ibv_qp.qp_num) { *cur_qp = hns_roce_find_qp(to_hr_ctx(cq->ibv_cq.context), qpn & 0xffffff); if (!*cur_qp) { fprintf(stderr, PFX "can't find qp!\n"); return CQ_POLL_ERR; } } wc->qp_num = qpn & 0xffffff; if (is_send) { wq = &(*cur_qp)->sq; /* * if sq_signal_bits is 1, the tail pointer first update to * the wqe corresponding the current cqe */ if ((*cur_qp)->sq_signal_bits) { wqe_ctr = (uint16_t)(roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_WQE_INDEX_M, CQE_BYTE_4_WQE_INDEX_S)); /* * wq->tail will plus a positive number every time, * when wq->tail exceeds 32b, it is 0 and acc */ wq->tail += (wqe_ctr - (uint16_t) wq->tail) & (wq->wqe_cnt - 1); } /* write the wr_id of wq into the wc */ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } else { wq = &(*cur_qp)->rq; wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } /* * HW maintains wc status, set the err type and directly return, after * generated the incorrect CQE */ if (roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) != HNS_ROCE_CQE_SUCCESS) { hns_roce_handle_error_cqe(cqe, wc); return CQ_OK; } wc->status = IBV_WC_SUCCESS; /* * According to the opcode type of cqe, mark the opcode and other * information of wc */ if (is_send) { /* Get opcode and flag before update the tail point for send */ sq_wqe = (struct hns_roce_wqe_ctrl_seg *) get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_WQE_INDEX_M, CQE_BYTE_4_WQE_INDEX_S)); switch (le32toh(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) { case HNS_ROCE_WQE_OPCODE_SEND: wc->opcode = IBV_WC_SEND; break; case HNS_ROCE_WQE_OPCODE_RDMA_READ: wc->opcode = IBV_WC_RDMA_READ; wc->byte_len = le32toh(cqe->byte_cnt); break; case HNS_ROCE_WQE_OPCODE_RDMA_WRITE: wc->opcode = IBV_WC_RDMA_WRITE; break; case HNS_ROCE_WQE_OPCODE_BIND_MW2: wc->opcode = IBV_WC_BIND_MW; break; default: wc->status = IBV_WC_GENERAL_ERR; break; } wc->wc_flags = (le32toh(sq_wqe->flag) & HNS_ROCE_WQE_IMM ? IBV_WC_WITH_IMM : 0); } else { /* Get opcode and flag in rq&srq */ wc->byte_len = le32toh(cqe->byte_cnt); switch (roce_get_field(cqe->cqe_byte_4, CQE_BYTE_4_OPERATION_TYPE_M, CQE_BYTE_4_OPERATION_TYPE_S) & HNS_ROCE_CQE_OPCODE_MASK) { case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IBV_WC_WITH_IMM; wc->imm_data = htobe32(le32toh(cqe->immediate_data)); break; case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: if (roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_IMMEDIATE_DATA_FLAG_S)) { wc->opcode = IBV_WC_RECV; wc->wc_flags = IBV_WC_WITH_IMM; wc->imm_data = htobe32(le32toh(cqe->immediate_data)); } else { wc->opcode = IBV_WC_RECV; wc->wc_flags = 0; } break; default: wc->status = IBV_WC_GENERAL_ERR; break; } } return CQ_OK; }