static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ibv_send_wr *wr) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = htonl(wr->wr.ud.remote_qpn); dseg->qkey = htonl(wr->wr.ud.remote_qkey); dseg->vlan = htons(to_mah(wr->wr.ud.ah)->vlan); memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6); }
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(ah->av.dlid); ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; if (ah->av.stat_rate) ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; ah_attr->src_path_bits = ah->av.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; ah_attr->grh.hop_limit = ah->av.hop_limit; ah_attr->grh.sgid_index = ah->av.gid_index; memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); } return 0; }
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; if (ah->av.ib.stat_rate) ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; ah_attr->grh.hop_limit = ah->av.ib.hop_limit; ah_attr->grh.sgid_index = ah->av.ib.gid_index; memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; }
static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, struct ib_ah *address, u32 dqpn) { struct mlx5e_priv *epriv = mlx5i_epriv(dev); struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; struct mlx5_ib_ah *mah = to_mah(address); struct mlx5i_priv *ipriv = epriv->ppriv; return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); }
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); if (ll == IB_LINK_LAYER_ETHERNET) ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29; else
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx5_ib_ah *ah = to_mah(ibah); u32 tmp; memset(ah_attr, 0, sizeof(*ah_attr)); tmp = be32_to_cpu(ah->av.grh_gid_fl); if (tmp & (1 << 30)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.sgid_index = (tmp >> 20) & 0xff; ah_attr->grh.flow_label = tmp & 0xfffff; memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16); ah_attr->grh.hop_limit = ah->av.hop_limit; ah_attr->grh.traffic_class = ah->av.tclass; }
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) { struct mlx5_ib_ah *ah = to_mah(ibah); u32 tmp; memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = ibah->type; tmp = be32_to_cpu(ah->av.grh_gid_fl); if (tmp & (1 << 30)) { rdma_ah_set_grh(ah_attr, NULL, tmp & 0xfffff, (tmp >> 20) & 0xff, ah->av.hop_limit, ah->av.tclass); rdma_ah_set_dgid_raw(ah_attr, ah->av.rgid); }
/* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int ind, struct ib_send_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { int header_size; int err; ib_ud_header_init(256, /* assume a MAD */ sqp->ud_header.grh_present, &sqp->ud_header); err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == 0xffff ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; switch (wr->opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; break; case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; sqp->ud_header.immediate_data = wr->imm_data; break; default: return -EINVAL; } sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == 0xffff) sqp->ud_header.lrh.source_lid = 0xffff; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->port, sqp->pkey_index, &sqp->ud_header.bth.pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->port, wr->wr.ud.pkey_index, &sqp->ud_header.bth.pkey); cpu_to_be16s(&sqp->ud_header.bth.pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? sqp->qkey : wr->wr.ud.remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf + ind * MTHCA_UD_HEADER_SIZE); data->byte_count = cpu_to_be32(header_size); data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); data->addr = cpu_to_be64(sqp->header_dma + ind * MTHCA_UD_HEADER_SIZE); return 0; }
int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { struct mlx4_context *ctx; struct mlx4_qp *qp = to_mqp(ibqp); void *wqe; struct mlx4_wqe_ctrl_seg *ctrl; int ind; int nreq; int inl = 0; int ret = 0; int size; int i; pthread_spin_lock(&qp->sq.lock); /* XXX check that state is OK to post send */ ind = qp->sq.head; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) { ret = -1; *bad_wr = wr; goto out; } if (wr->num_sge > qp->sq.max_gs) { ret = -1; *bad_wr = wr; goto out; } if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) { ret = -1; *bad_wr = wr; goto out; } ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; ctrl->xrcrb_flags = (wr->send_flags & IBV_SEND_SIGNALED ? htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | (wr->send_flags & IBV_SEND_SOLICITED ? htonl(MLX4_WQE_CTRL_SOLICIT) : 0) | qp->sq_signal_bits; if (wr->opcode == IBV_WR_SEND_WITH_IMM || wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) ctrl->imm = wr->imm_data; else ctrl->imm = 0; wqe += sizeof *ctrl; size = sizeof *ctrl / 16; switch (ibqp->qp_type) { case IBV_QPT_XRC: ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8); /* fall thru */ case IBV_QPT_RC: case IBV_QPT_UC: switch (wr->opcode) { case IBV_WR_ATOMIC_CMP_AND_SWP: case IBV_WR_ATOMIC_FETCH_AND_ADD: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); set_atomic_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; break; case IBV_WR_RDMA_READ: inl = 1; /* fall through */ case IBV_WR_RDMA_WRITE: case IBV_WR_RDMA_WRITE_WITH_IMM: set_raddr_seg(wqe, wr->wr.rdma.remote_addr, wr->wr.rdma.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; default: /* No extra segments required for sends */ break; } break; case IBV_QPT_UD: set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (to_mah(wr->wr.ud.ah)->tagged) { ctrl->ins_vlan = 1 << 6; ctrl->vlan_tag = htons(to_mah(wr->wr.ud.ah)->vlan); } break; default: break; } if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { struct mlx4_wqe_inline_seg *seg; void *addr; int len, seg_len; int num_seg; int off, to_copy; inl = 0; seg = wqe; wqe += sizeof *seg; off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1); num_seg = 0; seg_len = 0; for (i = 0; i < wr->num_sge; ++i) { addr = (void *) (uintptr_t) wr->sg_list[i].addr; len = wr->sg_list[i].length; inl += len; if (inl > qp->max_inline_data) { inl = 0; ret = -1; *bad_wr = wr; goto out; } while (len >= MLX4_INLINE_ALIGN - off) { to_copy = MLX4_INLINE_ALIGN - off; memcpy(wqe, addr, to_copy); len -= to_copy; wqe += to_copy; addr += to_copy; seg_len += to_copy; wmb(); /* see comment below */ seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); seg_len = 0; seg = wqe; wqe += sizeof *seg; off = sizeof *seg; ++num_seg; } memcpy(wqe, addr, len); wqe += len; seg_len += len; off += len; } if (seg_len) { ++num_seg; /* * Need a barrier here to make sure * all the data is visible before the * byte_count field is set. Otherwise * the HCA prefetcher could grab the * 64-byte chunk with this inline * segment and get a valid (!= * 0xffffffff) byte count but stale * data, and end up sending the wrong * data. */ wmb(); seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len); } size += (inl + num_seg * sizeof * seg + 15) / 16; } else { struct mlx4_wqe_data_seg *seg = wqe; for (i = wr->num_sge - 1; i >= 0 ; --i) set_data_seg(seg + i, wr->sg_list + i); size += wr->num_sge * (sizeof *seg / 16); } ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; /* * Make sure descriptor is fully written before * setting ownership bit (because HW can start * executing as soon as we do). */ wmb(); ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) | (ind & qp->sq.wqe_cnt ? htonl(1 << 31) : 0); /* * We can improve latency by not stamping the last * send queue WQE until after ringing the doorbell, so * only stamp here if there are still more WQEs to post. */ if (wr->next) stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & (qp->sq.wqe_cnt - 1)); ++ind; } out: ctx = to_mctx(ibqp->context); if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) { ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8); *(uint32_t *) (&ctrl->vlan_tag) |= qp->doorbell_qpn; /* * Make sure that descriptor is written to memory * before writing to BlueFlame page. */ wmb(); ++qp->sq.head; pthread_spin_lock(&ctx->bf_lock); mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl, align(size * 16, 64)); wc_wmb(); ctx->bf_offset ^= ctx->bf_buf_size; pthread_spin_unlock(&ctx->bf_lock); } else if (nreq) { qp->sq.head += nreq; /* * Make sure that descriptors are written before * doorbell record. */ wmb(); *(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn; } if (nreq) stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & (qp->sq.wqe_cnt - 1)); pthread_spin_unlock(&qp->sq.lock); return ret; }
int mlx4_ib_destroy_ah(struct ib_ah *ah) { kfree(to_mah(ah)); return 0; }