static int _roce_gid_cache_find_gid(struct ib_device *ib_dev, union ib_gid *gid,
				    const struct ib_gid_attr *val,
				    unsigned long mask,
				    u8 *port, u16 *index)
{
	struct ib_roce_gid_cache *cache;
	u8 p;
	int local_index;

	if (!ib_dev->cache.roce_gid_cache)
		return -ENOENT;

	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
		if (rdma_port_get_link_layer(ib_dev, p + start_port(ib_dev)) !=
		    IB_LINK_LAYER_ETHERNET)
			continue;
		cache = ib_dev->cache.roce_gid_cache[p];
		if (!cache || !cache->active)
			continue;
		local_index = find_gid(cache, gid, val, mask);
		if (local_index >= 0) {
			if (index)
				*index = local_index;
			if (port)
				*port = p + start_port(ib_dev);
			return 0;
		}
	}

	return -ENOENT;
}
示例#2
0
文件: ah.c 项目: jgroen/rtt_tests
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
    struct mlx4_ib_ah *ah;
    struct ib_ah *ret;

    ah = kzalloc(sizeof *ah, GFP_ATOMIC);
    if (!ah)
        return ERR_PTR(-ENOMEM);

    if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
        if (!(ah_attr->ah_flags & IB_AH_GRH)) {
            ret = ERR_PTR(-EINVAL);
        } else {
            /*
             * TBD: need to handle the case when we get
             * called in an atomic context and there we
             * might sleep.  We don't expect this
             * currently since we're working with link
             * local addresses which we can translate
             * without going to sleep.
             */
            ret = create_iboe_ah(pd, ah_attr, ah);
        }

        if (IS_ERR(ret))
            kfree(ah);

        return ret;
    } else
        return create_ib_ah(pd, ah_attr, ah); /* never fails */
}
示例#3
0
文件: mad.c 项目: 710leo/LVS
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
{
	struct ib_mad_agent *agent;
	int p, q;
	int ret;
	enum rdma_link_layer ll;

	for (p = 0; p < dev->num_ports; ++p) {
		ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
		for (q = 0; q <= 1; ++q) {
			if (ll == IB_LINK_LAYER_INFINIBAND) {
				agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
							      q ? IB_QPT_GSI : IB_QPT_SMI,
							      NULL, 0, send_handler,
							      NULL, NULL);
				if (IS_ERR(agent)) {
					ret = PTR_ERR(agent);
					goto err;
				}
				dev->send_agent[p][q] = agent;
			} else
				dev->send_agent[p][q] = NULL;
		}
	}

	return 0;

err:
	for (p = 0; p < dev->num_ports; ++p)
		for (q = 0; q <= 1; ++q)
			if (dev->send_agent[p][q])
				ib_unregister_mad_agent(dev->send_agent[p][q]);

	return ret;
}
示例#4
0
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
	struct mlx4_ib_ah *ah = to_mah(ibah);
	enum rdma_link_layer ll;

	memset(ah_attr, 0, sizeof *ah_attr);
	ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
	ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
	ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
	ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
	if (ah->av.ib.stat_rate)
		ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
	ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;

	if (mlx4_ib_ah_grh_present(ah)) {
		ah_attr->ah_flags = IB_AH_GRH;

		ah_attr->grh.traffic_class =
			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
		ah_attr->grh.flow_label =
			be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
		ah_attr->grh.hop_limit  = ah->av.ib.hop_limit;
		ah_attr->grh.sgid_index = ah->av.ib.gid_index;
		memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
	}

	return 0;
}
示例#5
0
文件: ah.c 项目: jgroen/rtt_tests
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
    struct mlx4_ib_ah *ah = to_mah(ibah);
    enum rdma_link_layer ll;

    memset(ah_attr, 0, sizeof *ah_attr);
    ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
    ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
    if (ll == IB_LINK_LAYER_ETHERNET)
        ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
    else
示例#6
0
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
		       struct ib_grh *grh, struct ib_ah_attr *ah_attr)
{
	u32 flow_class;
	u16 gid_index;
	int ret;
	u8 if_index;
	int is_eth = (rdma_port_get_link_layer(device, port_num) ==
			IB_LINK_LAYER_ETHERNET);

	memset(ah_attr, 0, sizeof *ah_attr);
	if (is_eth) {
		if (!(wc->wc_flags & IB_WC_GRH))
			return -EPROTOTYPE;

		if (wc->wc_flags & IB_WC_WITH_SMAC &&
		    wc->wc_flags & IB_WC_WITH_VLAN) {
			memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
			ah_attr->vlan_id = wc->vlan_id;
		} else {
			if_index = device->get_netdev(device, port_num);
			ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
					ah_attr->dmac, &ah_attr->vlan_id, if_index);
			if (ret)
				return ret;
		}
	} else {
		ah_attr->vlan_id = 0xffff;
	}


	ah_attr->dlid = wc->slid;
	ah_attr->sl = wc->sl;
	ah_attr->src_path_bits = wc->dlid_path_bits;
	ah_attr->port_num = port_num;

	if (wc->wc_flags & IB_WC_GRH) {
		ah_attr->ah_flags = IB_AH_GRH;
		ah_attr->grh.dgid = grh->sgid;

		ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
					 &gid_index);
		if (ret)
			return ret;

		ah_attr->grh.sgid_index = (u8) gid_index;
		flow_class = be32_to_cpu(grh->version_tclass_flow);
		ah_attr->grh.flow_label = flow_class & 0xFFFFF;
		ah_attr->grh.hop_limit = 0xFF;
		ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
	}
示例#7
0
文件: verbs.c 项目: Cai900205/test
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
		       struct ib_grh *grh, struct ib_ah_attr *ah_attr)
{
	u32 flow_class;
	u16 gid_index;
	int ret;
	int is_eth = (rdma_port_get_link_layer(device, port_num) ==
			IB_LINK_LAYER_ETHERNET);
	union ib_gid sgid;
	int version;
	struct iphdr *iph = (struct iphdr *)((void *)grh + 20);

	memset(ah_attr, 0, sizeof *ah_attr);

	if (wc->wc_flags & IB_WC_GRH) {
		ah_attr->ah_flags = IB_AH_GRH;
		version = is_eth ? get_grh_header_version(grh) : 6;
		if (version == 4) {
			ipv6_addr_set_v4mapped(iph->saddr,
					       (struct in6_addr *)&ah_attr->grh.dgid);
			ipv6_addr_set_v4mapped(iph->daddr,
					       (struct in6_addr *)&sgid);
			ret = ib_find_cached_gid(device, &sgid, &port_num,
						 &gid_index);
			if (ret)
				return ret;

			ah_attr->grh.sgid_index = (u8) gid_index;
			ah_attr->grh.flow_label = iph->id & 0xfffff;
			ah_attr->grh.hop_limit = iph->ttl;
			ah_attr->grh.traffic_class = iph->tos;
		} else if (version == 6) {
			ah_attr->grh.dgid = grh->sgid;

			ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
						 &gid_index);
			if (ret)
				return ret;

			ah_attr->grh.sgid_index = (u8) gid_index;
			flow_class = be32_to_cpu(grh->version_tclass_flow);
			ah_attr->grh.flow_label = flow_class & 0xFFFFF;
			ah_attr->grh.hop_limit = 0xFF;
			ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
		} else
static int roce_gid_cache_setup_one(struct ib_device *ib_dev)
{
	u8 port;
	int err = 0;

	if (!ib_dev->modify_gid)
		return -ENOSYS;

	ib_dev->cache.roce_gid_cache =
		kcalloc(ib_dev->phys_port_cnt,
			sizeof(*ib_dev->cache.roce_gid_cache), GFP_KERNEL);

	if (!ib_dev->cache.roce_gid_cache) {
		pr_warn("failed to allocate roce addr cache for %s\n",
			ib_dev->name);
		return -ENOMEM;
	}

	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
		if (rdma_port_get_link_layer(ib_dev, port + start_port(ib_dev))
		    != IB_LINK_LAYER_ETHERNET)
			continue;
		ib_dev->cache.roce_gid_cache[port] =
			alloc_roce_gid_cache(ib_dev->gid_tbl_len[port]);
		if (!ib_dev->cache.roce_gid_cache[port]) {
			err = -ENOMEM;
			goto rollback_cache_setup;
		}
	}
	return 0;

rollback_cache_setup:
	for (port = 1; port <= ib_dev->phys_port_cnt; port++)
		free_roce_gid_cache(ib_dev, port);

	kfree(ib_dev->cache.roce_gid_cache);
	ib_dev->cache.roce_gid_cache = NULL;
	return err;
}
示例#9
0
文件: cq.c 项目: bond-os/linux
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
			    struct mlx4_ib_qp **cur_qp,
			    struct ib_wc *wc)
{
	struct mlx4_cqe *cqe;
	struct mlx4_qp *mqp;
	struct mlx4_ib_wq *wq;
	struct mlx4_ib_srq *srq;
	int is_send;
	int is_error;
	u32 g_mlpath_rqpn;
	u16 wqe_ctr;

repoll:
	cqe = next_cqe_sw(cq);
	if (!cqe)
		return -EAGAIN;

	++cq->mcq.cons_index;

	/*
	 * Make sure we read CQ entry contents after we've checked the
	 * ownership bit.
	 */
	rmb();

	is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
		MLX4_CQE_OPCODE_ERROR;

	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
		     is_send)) {
		printk(KERN_WARNING "Completion for NOP opcode detected!\n");
		return -EINVAL;
	}

	/* Resize CQ in progress */
	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
		if (cq->resize_buf) {
			struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device);

			mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
			cq->buf      = cq->resize_buf->buf;
			cq->ibcq.cqe = cq->resize_buf->cqe;

			kfree(cq->resize_buf);
			cq->resize_buf = NULL;
		}

		goto repoll;
	}

	if (!*cur_qp ||
	    (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
		/*
		 * We do not have to take the QP table lock here,
		 * because CQs will be locked while QPs are removed
		 * from the table.
		 */
		mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
				       be32_to_cpu(cqe->vlan_my_qpn));
		if (unlikely(!mqp)) {
			printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
			       cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
			return -EINVAL;
		}

		*cur_qp = to_mibqp(mqp);
	}

	wc->qp = &(*cur_qp)->ibqp;

	if (is_send) {
		wq = &(*cur_qp)->sq;
		if (!(*cur_qp)->sq_signal_bits) {
			wqe_ctr = be16_to_cpu(cqe->wqe_index);
			wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
		}
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	} else if ((*cur_qp)->ibqp.srq) {
		srq = to_msrq((*cur_qp)->ibqp.srq);
		wqe_ctr = be16_to_cpu(cqe->wqe_index);
		wc->wr_id = srq->wrid[wqe_ctr];
		mlx4_ib_free_srq_wqe(srq, wqe_ctr);
	} else {
		wq	  = &(*cur_qp)->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	}

	if (unlikely(is_error)) {
		mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
		return 0;
	}

	wc->status = IB_WC_SUCCESS;

	if (is_send) {
		wc->wc_flags = 0;
		switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
		case MLX4_OPCODE_RDMA_WRITE_IMM:
			wc->wc_flags |= IB_WC_WITH_IMM;
		case MLX4_OPCODE_RDMA_WRITE:
			wc->opcode    = IB_WC_RDMA_WRITE;
			break;
		case MLX4_OPCODE_SEND_IMM:
			wc->wc_flags |= IB_WC_WITH_IMM;
		case MLX4_OPCODE_SEND:
		case MLX4_OPCODE_SEND_INVAL:
			wc->opcode    = IB_WC_SEND;
			break;
		case MLX4_OPCODE_RDMA_READ:
			wc->opcode    = IB_WC_RDMA_READ;
			wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
			break;
		case MLX4_OPCODE_ATOMIC_CS:
			wc->opcode    = IB_WC_COMP_SWAP;
			wc->byte_len  = 8;
			break;
		case MLX4_OPCODE_ATOMIC_FA:
			wc->opcode    = IB_WC_FETCH_ADD;
			wc->byte_len  = 8;
			break;
		case MLX4_OPCODE_MASKED_ATOMIC_CS:
			wc->opcode    = IB_WC_MASKED_COMP_SWAP;
			wc->byte_len  = 8;
			break;
		case MLX4_OPCODE_MASKED_ATOMIC_FA:
			wc->opcode    = IB_WC_MASKED_FETCH_ADD;
			wc->byte_len  = 8;
			break;
		case MLX4_OPCODE_BIND_MW:
			wc->opcode    = IB_WC_BIND_MW;
			break;
		case MLX4_OPCODE_LSO:
			wc->opcode    = IB_WC_LSO;
			break;
		case MLX4_OPCODE_FMR:
			wc->opcode    = IB_WC_FAST_REG_MR;
			break;
		case MLX4_OPCODE_LOCAL_INVAL:
			wc->opcode    = IB_WC_LOCAL_INV;
			break;
		}
	} else {
		wc->byte_len = be32_to_cpu(cqe->byte_cnt);

		switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
		case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
			wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
			wc->wc_flags	= IB_WC_WITH_IMM;
			wc->ex.imm_data = cqe->immed_rss_invalid;
			break;
		case MLX4_RECV_OPCODE_SEND_INVAL:
			wc->opcode	= IB_WC_RECV;
			wc->wc_flags	= IB_WC_WITH_INVALIDATE;
			wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
			break;
		case MLX4_RECV_OPCODE_SEND:
			wc->opcode   = IB_WC_RECV;
			wc->wc_flags = 0;
			break;
		case MLX4_RECV_OPCODE_SEND_IMM:
			wc->opcode	= IB_WC_RECV;
			wc->wc_flags	= IB_WC_WITH_IMM;
			wc->ex.imm_data = cqe->immed_rss_invalid;
			break;
		}

		wc->slid	   = be16_to_cpu(cqe->rlid);
		g_mlpath_rqpn	   = be32_to_cpu(cqe->g_mlpath_rqpn);
		wc->src_qp	   = g_mlpath_rqpn & 0xffffff;
		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
		wc->csum_ok	   = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
		if (rdma_port_get_link_layer(wc->qp->device,
				(*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
		else
			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
	}
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id)
{
	struct ib_qp_init_attr qp_init_attr = {
		.event_handler = sdp_qp_event_handler,
		.cap.max_send_wr = SDP_TX_SIZE,
		.cap.max_recv_wr = sdp_rx_size,
		.cap.max_inline_data = sdp_inline_thresh,
        	.sq_sig_type = IB_SIGNAL_REQ_WR,
        	.qp_type = IB_QPT_RC,
	};
	struct ib_device *device = id->device;
	int rc;

	sdp_dbg(sk, "%s\n", __func__);

	sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device);
	sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge);

	qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES);
	sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge);

	qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES);
	sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge);

	sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client);
	if (!sdp_sk(sk)->sdp_dev) {
		sdp_warn(sk, "SDP not available on device %s\n", device->name);
		rc = -ENODEV;
		goto err_rx;
	}

	rc = sdp_rx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_rx;

	rc = sdp_tx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_tx;

	qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq;
	qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq;

	rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr);
	if (rc) {
		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
		goto err_qp;
	}
	sdp_sk(sk)->qp = id->qp;
	sdp_sk(sk)->ib_device = device;
	sdp_sk(sk)->qp_active = 1;
	sdp_sk(sk)->context.device = device;
	sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data;

	sdp_dbg(sk, "%s done\n", __func__);
	return 0;

err_qp:
	sdp_tx_ring_destroy(sdp_sk(sk));
err_tx:
	sdp_rx_ring_destroy(sdp_sk(sk));
err_rx:
	return rc;
}

static int sdp_get_max_send_frags(u32 buf_size)
{
	return MIN(
		/* +1 to conpensate on not aligned buffers */
		(PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1,
		SDP_MAX_SEND_SGES - 1);
}

static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id,
		       	struct rdma_cm_event *event)
{
	struct sockaddr_in *dst_addr;
	struct sock *child;
	const struct sdp_hh *h;
	int rc = 0;

	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);

	if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) {
		sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n",
				h->ipv_cap & HH_IPV_MASK);
		return -EINVAL;
	}

	if (!h->max_adverts)
		return -EINVAL;

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0))
	child = sk_clone(sk, GFP_KERNEL);
#else
	child = sk_clone_lock(sk, GFP_KERNEL);
#endif
	if (!child)
		return -ENOMEM;

	sdp_init_sock(child);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(child) = dst_addr->sin_port;

#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
	if (inet6_sk(sk)) {
		struct ipv6_pinfo *newnp;

		newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child);

		memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo));
		if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) {
			/* V6 mapped */
			sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
			ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF),
					h->src_addr.ip4.addr);

			ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF),
					h->dst_addr.ip4.addr);

			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr);
		} else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) {
			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr;
			struct sockaddr_in6 *src_addr6 =
				(struct sockaddr_in6 *)&id->route.addr.src_addr;

			ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr);
			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr);
			ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr);
		} else {
			sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK);
		}

		sdp_inet_daddr(child) = sdp_inet_saddr(child) =
			sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6;
	} else
#endif
	{
		sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
	}

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	__sock_put(child, SOCK_REF_CLONE);

	down_read(&device_removal_lock);

	rc = sdp_init_qp(child, id);
	if (rc) {
		bh_unlock_sock(child);
		up_read(&device_removal_lock);
		sdp_sk(child)->destructed_already = 1;
#ifdef SDP_SOCK_HISTORY
		sdp_ssk_hist_close(child);
#endif
		sk_free(child);
		return rc;
	}

	sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs);

	sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4;
	sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) -
		sizeof(struct sdp_bsdh);

	sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal);
	sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size);

	id->context = child;
	sdp_sk(child)->id = id;

	list_add_tail(&sdp_sk(child)->backlog_queue,
			&sdp_sk(sk)->backlog_queue);
	sdp_sk(child)->parent = sk;

	bh_unlock_sock(child);
	sdp_add_sock(sdp_sk(child));
	up_read(&device_removal_lock);

	sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV);

	/* child->sk_write_space(child); */
	/* child->sk_data_ready(child, 0); */
	sk->sk_data_ready(sk);

	return 0;
}

static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
				struct rdma_cm_event *event)
{
	const struct sdp_hah *h;
	struct sockaddr_in *dst_addr;
	sdp_dbg(sk, "%s\n", __func__);

	sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED);
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
	sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs);
	sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4;
	sdp_sk(sk)->xmit_size_goal =
		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
	sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal);
	sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal,
		sdp_sk(sk)->send_frags * PAGE_SIZE);

	sdp_sk(sk)->poll_cq = 1;

	sk->sk_state_change(sk);
	sk_wake_async(sk, 0, POLL_OUT);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(sk) = dst_addr->sin_port;
	sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr;

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	return 0;
}

static int sdp_connected_handler(struct sock *sk)
{
	struct sock *parent;
	sdp_dbg(sk, "%s\n", __func__);

	parent = sdp_sk(sk)->parent;
	BUG_ON(!parent);

	sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED);

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	lock_sock(parent);
	if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
		sdp_dbg(sk, "parent is going away.\n");
		goto done;
	}

	sk_acceptq_added(parent);
	sdp_dbg(parent, "%s child connection established\n", __func__);
	list_del_init(&sdp_sk(sk)->backlog_queue);
	list_add_tail(&sdp_sk(sk)->accept_queue,
			&sdp_sk(parent)->accept_queue);

	parent->sk_state_change(parent);
	sk_wake_async(parent, 0, POLL_OUT);
done:
	release_sock(parent);

	return 0;
}

static int sdp_disconnected_handler(struct sock *sk)
{
	struct sdp_sock *ssk = sdp_sk(sk);

	sdp_dbg(sk, "%s\n", __func__);

	if (ssk->tx_ring.cq)
		if (sdp_xmit_poll(ssk, 1))
			sdp_post_sends(ssk, 0);

	if (sk->sk_state == TCP_SYN_RECV) {
		sdp_connected_handler(sk);

		if (rcv_nxt(ssk))
			return 0;
	}

	return -ECONNRESET;
}

int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
	struct rdma_conn_param conn_param;
	struct sock *parent = NULL;
	struct sock *child = NULL;
	struct sock *sk;
	struct sdp_hah hah;
	struct sdp_hh hh;

	int rc = 0, rc2;

	sk = id->context;
	if (!sk) {
		sdp_dbg(NULL, "cm_id is being torn down, event %s\n",
		       	rdma_cm_event_str(event->event));
		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
	}

	sdp_add_to_history(sk, rdma_cm_event_str(event->event));

	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
	sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event));
	if (!sdp_sk(sk)->id) {
		sdp_dbg(sk, "socket is being torn down\n");
		rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
		release_sock(sk);
		return rc;
	}

	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
		if (sdp_link_layer_ib_only &&
			rdma_node_get_transport(id->device->node_type) ==
				RDMA_TRANSPORT_IB &&
			rdma_port_get_link_layer(id->device, id->port_num) !=
				IB_LINK_LAYER_INFINIBAND) {
			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
				"is allowed\n",
				rdma_port_get_link_layer(id->device,
					id->port_num));
			rc = -ENETUNREACH;
			break;
		}

		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
		break;
	case RDMA_CM_EVENT_ADDR_ERROR:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_ROUTE_RESOLVED:
		rc = sdp_init_qp(sk, id);
		if (rc)
			break;
		memset(&hh, 0, sizeof hh);
		hh.bsdh.mid = SDP_MID_HELLO;
		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
		hh.max_adverts = 1;

		hh.majv_minv = SDP_MAJV_MINV;
		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
		atomic_set(&sdp_sk(sk)->remote_credits,
				rx_ring_posted(sdp_sk(sk)));
		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags *
				PAGE_SIZE + sizeof(struct sdp_bsdh));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
		if (inet6_sk(sk)) {
			struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr;
			struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr;
			struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr;

			if (src_addr->sa_family == AF_INET) {
				/* IPv4 over IPv6 */
				ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF),
						addr4->sin_addr.s_addr);
			} else {
				sk->sk_v6_rcv_saddr = addr6->sin6_addr;
			}
			inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr;
		}
			else
#endif
		{
			sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
				((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		}
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hh;
		conn_param.private_data = &hh;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh);

		if (sdp_apm_enable) {
			rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
			if (rc)
				sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc);
		}

		rc = rdma_connect(id, &conn_param);
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED:
		sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n",
				id->route.path_rec[1].slid, id->route.path_rec[1].dlid);
		break;

	case RDMA_CM_EVENT_ALT_PATH_LOADED:
		sdp_dbg(sk, "alt route path loaded\n");
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_ERROR:
		sdp_warn(sk, "alt route resolve error\n");
		break;

	case RDMA_CM_EVENT_ROUTE_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_CONNECT_REQUEST:
		rc = sdp_connect_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
			break;
		}
		child = id->context;
		atomic_set(&sdp_sk(child)->remote_credits,
				rx_ring_posted(sdp_sk(child)));
		memset(&hah, 0, sizeof hah);
		hah.bsdh.mid = SDP_MID_HELLO_ACK;
		hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child)));
		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
		hah.majv_minv = SDP_MAJV_MINV;
		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
					    but just in case */
		hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE +
			sizeof(struct sdp_bsdh));
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hah;
		conn_param.private_data = &hah;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
		rc = rdma_accept(id, &conn_param);
		if (rc) {
			sdp_sk(child)->id = NULL;
			id->qp = NULL;
			id->context = NULL;
			parent = sdp_sk(child)->parent; /* TODO: hold ? */
		} else if (sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2);
		}
		break;
	case RDMA_CM_EVENT_CONNECT_RESPONSE:
		rc = sdp_response_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
		} else {
			rc = rdma_accept(id, NULL);
			if (!rc && sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2);
			}
		}
		break;
	case RDMA_CM_EVENT_CONNECT_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_UNREACHABLE:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_REJECTED:
		rc = -ECONNREFUSED;
		break;
	case RDMA_CM_EVENT_ESTABLISHED:
		sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		rc = sdp_connected_handler(sk);
		break;
	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
		if (sk->sk_state == TCP_LAST_ACK) {
			sdp_cancel_dreq_wait_timeout(sdp_sk(sk));

			sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);

			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
				__func__);
		}

		sdp_sk(sk)->qp_active = 0;
		rdma_disconnect(id);

		if (sk->sk_state != TCP_TIME_WAIT) {
			if (sk->sk_state == TCP_CLOSE_WAIT) {
				sdp_dbg(sk, "IB teardown while in "
					"TCP_CLOSE_WAIT taking reference to "
					"let close() finish the work\n");
				sock_hold(sk, SOCK_REF_CMA);
				sdp_start_cma_timewait_timeout(sdp_sk(sk),
						SDP_CMA_TIMEWAIT_TIMEOUT);

			}
			sdp_set_error(sk, -EPIPE);
			rc = sdp_disconnected_handler(sk);
		}
		break;
	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
		rc = sdp_disconnected_handler(sk);
		break;
	case RDMA_CM_EVENT_DEVICE_REMOVAL:
		rc = -ENETRESET;
		break;

	case RDMA_CM_EVENT_ADDR_CHANGE:
		sdp_dbg(sk, "Got Address change event\n");
		rc = 0;
		break;
	default:
		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
		       event->event);
		rc = -ECONNABORTED;
		break;
	}

	sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event));

	if (rc && sdp_sk(sk)->id == id) {
		child = sk;
		sdp_sk(sk)->id = NULL;
		id->qp = NULL;
		id->context = NULL;
		parent = sdp_sk(sk)->parent;
		sdp_reset_sk(sk, rc);
	}

	release_sock(sk);

	sdp_dbg(sk, "event: %s done. status %d\n",
			rdma_cm_event_str(event->event), rc);

	if (parent) {
		lock_sock(parent);
		if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
			sdp_dbg(sk, "parent is going away.\n");
			child = NULL;
			goto done;
		}
		if (!list_empty(&sdp_sk(child)->backlog_queue))
			list_del_init(&sdp_sk(child)->backlog_queue);
		else
			child = NULL;
done:
		release_sock(parent);
		if (child)
			sdp_common_release(child);
	}
	return rc;
}
示例#11
0
文件: qp.c 项目: 020gzh/linux
/**
 * qib_modify_qp - modify the attributes of a queue pair
 * @ibqp: the queue pair who's attributes we're modifying
 * @attr: the new attributes
 * @attr_mask: the mask of attributes to modify
 * @udata: user data for libibverbs.so
 *
 * Return: 0 on success, otherwise returns an errno.
 */
int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
		  int attr_mask, struct ib_udata *udata)
{
	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
	enum ib_qp_state cur_state, new_state;
	struct ib_event ev;
	int lastwqe = 0;
	int mig = 0;
	int pmtu = 0; /* for gcc warning only */
	enum rdma_link_layer link;

	link = rdma_port_get_link_layer(ibqp->device, qp->port_num);

	spin_lock_irq(&qp->r_lock);
	spin_lock(&qp->s_hlock);
	spin_lock(&qp->s_lock);

	cur_state = attr_mask & IB_QP_CUR_STATE ?
		attr->cur_qp_state : qp->state;
	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;

	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
				attr_mask, link))
		goto inval;

	if (rdi->driver_f.check_modify_qp &&
	    rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
		goto inval;

	if (attr_mask & IB_QP_AV) {
		if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
			goto inval;
		if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
			goto inval;
	}

	if (attr_mask & IB_QP_ALT_PATH) {
		if (attr->alt_ah_attr.dlid >=
		    be16_to_cpu(IB_MULTICAST_LID_BASE))
			goto inval;
		if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
			goto inval;
		if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
			goto inval;
	}

	if (attr_mask & IB_QP_PKEY_INDEX)
		if (attr->pkey_index >= rvt_get_npkeys(rdi))
			goto inval;

	if (attr_mask & IB_QP_MIN_RNR_TIMER)
		if (attr->min_rnr_timer > 31)
			goto inval;

	if (attr_mask & IB_QP_PORT)
		if (qp->ibqp.qp_type == IB_QPT_SMI ||
		    qp->ibqp.qp_type == IB_QPT_GSI ||
		    attr->port_num == 0 ||
		    attr->port_num > ibqp->device->phys_port_cnt)
			goto inval;

	if (attr_mask & IB_QP_DEST_QPN)
		if (attr->dest_qp_num > RVT_QPN_MASK)
			goto inval;

	if (attr_mask & IB_QP_RETRY_CNT)
		if (attr->retry_cnt > 7)
			goto inval;

	if (attr_mask & IB_QP_RNR_RETRY)
		if (attr->rnr_retry > 7)
			goto inval;

	/*
	 * Don't allow invalid path_mtu values.  OK to set greater
	 * than the active mtu (or even the max_cap, if we have tuned
	 * that to a small mtu.  We'll set qp->path_mtu
	 * to the lesser of requested attribute mtu and active,
	 * for packetizing messages.
	 * Note that the QP port has to be set in INIT and MTU in RTR.
	 */
	if (attr_mask & IB_QP_PATH_MTU) {
		pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
		if (pmtu < 0)
			goto inval;
	}

	if (attr_mask & IB_QP_PATH_MIG_STATE) {
		if (attr->path_mig_state == IB_MIG_REARM) {
			if (qp->s_mig_state == IB_MIG_ARMED)
				goto inval;
			if (new_state != IB_QPS_RTS)
				goto inval;
		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
			if (qp->s_mig_state == IB_MIG_REARM)
				goto inval;
			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
				goto inval;
			if (qp->s_mig_state == IB_MIG_ARMED)
				mig = 1;
		} else {
			goto inval;
		}
	}

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
		if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
			goto inval;

	switch (new_state) {
	case IB_QPS_RESET:
		if (qp->state != IB_QPS_RESET)
			rvt_reset_qp(rdi, qp, ibqp->qp_type);
		break;

	case IB_QPS_RTR:
		/* Allow event to re-trigger if QP set to RTR more than once */
		qp->r_flags &= ~RVT_R_COMM_EST;
		qp->state = new_state;
		break;

	case IB_QPS_SQD:
		qp->s_draining = qp->s_last != qp->s_cur;
		qp->state = new_state;
		break;

	case IB_QPS_SQE:
		if (qp->ibqp.qp_type == IB_QPT_RC)
			goto inval;
		qp->state = new_state;
		break;

	case IB_QPS_ERR:
		lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
		break;

	default:
		qp->state = new_state;
		break;
	}

	if (attr_mask & IB_QP_PKEY_INDEX)
		qp->s_pkey_index = attr->pkey_index;

	if (attr_mask & IB_QP_PORT)
		qp->port_num = attr->port_num;

	if (attr_mask & IB_QP_DEST_QPN)
		qp->remote_qpn = attr->dest_qp_num;

	if (attr_mask & IB_QP_SQ_PSN) {
		qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
		qp->s_psn = qp->s_next_psn;
		qp->s_sending_psn = qp->s_next_psn;
		qp->s_last_psn = qp->s_next_psn - 1;
		qp->s_sending_hpsn = qp->s_last_psn;
	}

	if (attr_mask & IB_QP_RQ_PSN)
		qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;

	if (attr_mask & IB_QP_ACCESS_FLAGS)
		qp->qp_access_flags = attr->qp_access_flags;

	if (attr_mask & IB_QP_AV) {
		qp->remote_ah_attr = attr->ah_attr;
		qp->s_srate = attr->ah_attr.static_rate;
		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
	}

	if (attr_mask & IB_QP_ALT_PATH) {
		qp->alt_ah_attr = attr->alt_ah_attr;
		qp->s_alt_pkey_index = attr->alt_pkey_index;
	}

	if (attr_mask & IB_QP_PATH_MIG_STATE) {
		qp->s_mig_state = attr->path_mig_state;
		if (mig) {
			qp->remote_ah_attr = qp->alt_ah_attr;
			qp->port_num = qp->alt_ah_attr.port_num;
			qp->s_pkey_index = qp->s_alt_pkey_index;
		}
	}

	if (attr_mask & IB_QP_PATH_MTU) {
		qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
		qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
		qp->log_pmtu = ilog2(qp->pmtu);
	}

	if (attr_mask & IB_QP_RETRY_CNT) {
		qp->s_retry_cnt = attr->retry_cnt;
		qp->s_retry = attr->retry_cnt;
	}

	if (attr_mask & IB_QP_RNR_RETRY) {
		qp->s_rnr_retry_cnt = attr->rnr_retry;
		qp->s_rnr_retry = attr->rnr_retry;
	}

	if (attr_mask & IB_QP_MIN_RNR_TIMER)
		qp->r_min_rnr_timer = attr->min_rnr_timer;

	if (attr_mask & IB_QP_TIMEOUT) {
		qp->timeout = attr->timeout;
		qp->timeout_jiffies =
			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
				1000UL);
	}

	if (attr_mask & IB_QP_QKEY)
		qp->qkey = attr->qkey;

	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;

	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
		qp->s_max_rd_atomic = attr->max_rd_atomic;

	if (rdi->driver_f.modify_qp)
		rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);

	spin_unlock(&qp->s_lock);
	spin_unlock(&qp->s_hlock);
	spin_unlock_irq(&qp->r_lock);

	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
		rvt_insert_qp(rdi, qp);

	if (lastwqe) {
		ev.device = qp->ibqp.device;
		ev.element.qp = &qp->ibqp;
		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
	}
	if (mig) {
		ev.device = qp->ibqp.device;
		ev.element.qp = &qp->ibqp;
		ev.event = IB_EVENT_PATH_MIG;
		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
	}
	return 0;

inval:
	spin_unlock(&qp->s_lock);
	spin_unlock(&qp->s_hlock);
	spin_unlock_irq(&qp->r_lock);
	return -EINVAL;
}
int roce_gid_cache_find_gid_by_filter(struct ib_device *ib_dev,
				      union ib_gid *gid,
				      u8 port,
				      bool (*filter)(const union ib_gid *,
						     const struct ib_gid_attr *,
						     void *),
				      void *context,
				      u16 *index)
{
	struct ib_roce_gid_cache *cache;
	unsigned int i;
	bool found = false;

	if (!ib_dev->cache.roce_gid_cache)
		return -ENOSYS;

	if (port < start_port(ib_dev) ||
	    port > start_port(ib_dev) + ib_dev->phys_port_cnt ||
	    rdma_port_get_link_layer(ib_dev, port) !=
		IB_LINK_LAYER_ETHERNET)
		return -ENOSYS;

	cache = ib_dev->cache.roce_gid_cache[port - start_port(ib_dev)];

	if (!cache || !cache->active)
		return -ENOENT;

	for (i = 0; i < cache->sz; i++) {
		unsigned int orig_seq;
		struct ib_gid_attr attr;

		orig_seq = cache->data_vec[i].seq;
		if (orig_seq == -1)
			continue;
		/* Make sure the sequence number we remeber was read
		 * before the gid cache entry content is read.
		 */
		smp_rmb();

		if (memcmp(gid, &cache->data_vec[i].gid, sizeof(*gid)))
			continue;

		memcpy(&attr, &cache->data_vec[i].attr, sizeof(attr));

		rcu_read_lock();

		/* Make sure we finished reading the attribute */
		smp_rmb();
		if (orig_seq == ACCESS_ONCE(cache->data_vec[i].seq))
			if (!filter || filter(gid, &attr, context))
				found = true;

		rcu_read_unlock();

		if (found)
			break;
	}

	if (!found)
		return -ENOENT;

	if (index)
		*index = i;
	return 0;
}