Ejemplo n.º 1
0
static void mlx5_free_context(struct verbs_device *device,
			      struct ibv_context *ibctx)
{
	struct mlx5_context *context = to_mctx(ibctx);
	int page_size = to_mdev(ibctx->device)->page_size;
	int i;
	struct mlx5_wc_uar *wc_uar;

	if (context->hca_core_clock)
		munmap(context->hca_core_clock - context->core_clock.offset,
		       to_mdev(&device->device)->page_size);

	if (context->cc.buf)
		munmap(context->cc.buf, 4096 * context->num_ports);

	free(context->bfs);
	for (i = 0; i < MLX5_MAX_UAR_PAGES; ++i) {
		if (context->uar[i].regs)
			munmap(context->uar[i].regs, page_size);
	}

	if (context->max_ctx_res_domain) {
		mlx5_spin_lock(&context->send_db_lock);
		while (!list_empty(&context->wc_uar_list)) {
			wc_uar = list_entry(context->wc_uar_list.next,
					    struct mlx5_wc_uar, list);
			list_del(&wc_uar->list);
			free(wc_uar);
		}
		mlx5_spin_unlock(&context->send_db_lock);
	}
Ejemplo n.º 2
0
int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
{
	struct mlx4_context *mctx = to_mctx(srq->context);
	struct mlx4_srq *msrq = to_msrq(srq);
	struct mlx4_cq *mcq;
	int ret;

	mcq = to_mcq(msrq->verbs_srq.cq);
	mlx4_cq_clean(mcq, 0, msrq);
	pthread_spin_lock(&mcq->lock);
	mlx4_clear_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num);
	pthread_spin_unlock(&mcq->lock);

	ret = ibv_cmd_destroy_srq(srq);
	if (ret) {
		pthread_spin_lock(&mcq->lock);
		mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
		pthread_spin_unlock(&mcq->lock);
		return ret;
	}

	mlx4_free_db(mctx, MLX4_DB_TYPE_RQ, msrq->db);
	mlx4_free_buf(&msrq->buf);
	free(msrq->wrid);
	free(msrq);

	return 0;
}
Ejemplo n.º 3
0
int mlx4_arm_cq(struct ibv_cq *ibvcq, int solicited)
{
	struct mlx4_cq *cq = to_mcq(ibvcq);
	uint32_t doorbell[2];
	uint32_t sn;
	uint32_t ci;
	uint32_t cmd;

	sn  = cq->arm_sn & 3;
	ci  = cq->cons_index & 0xffffff;
	cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT;

	*cq->arm_db = htonl(sn << 28 | cmd | ci);

	/*
	 * Make sure that the doorbell record in host memory is
	 * written before ringing the doorbell via PCI MMIO.
	 */
	wmb();

	doorbell[0] = htonl(sn << 28 | cmd | cq->cqn);
	doorbell[1] = htonl(ci);

	mlx4_write64(doorbell, to_mctx(ibvcq->context), MLX4_CQ_DOORBELL);

	return 0;
}
Ejemplo n.º 4
0
int mlx5_alloc_srq_buf(struct ibv_context *context, struct mlx5_srq *srq)
{
	struct mlx5_wqe_srq_next_seg *next;
	int size;
	int buf_size;
	int i;
	struct mlx5_context	   *ctx;

	ctx = to_mctx(context);

	if (srq->max_gs < 0) {
		errno = EINVAL;
		return -1;
	}

	srq->wrid = malloc(srq->max * sizeof *srq->wrid);
	if (!srq->wrid)
		return -1;

	size = sizeof(struct mlx5_wqe_srq_next_seg) +
		srq->max_gs * sizeof(struct mlx5_wqe_data_seg);
	size = max(32, size);

	size = mlx5_round_up_power_of_two(size);

	if (size > ctx->max_recv_wr) {
		errno = EINVAL;
		return -1;
	}
	srq->max_gs = (size - sizeof(struct mlx5_wqe_srq_next_seg)) /
		sizeof(struct mlx5_wqe_data_seg);

	srq->wqe_shift = mlx5_ilog2(size);

	buf_size = srq->max * size;

	if (mlx5_alloc_buf(&srq->buf, buf_size,
			   to_mdev(context->device)->page_size)) {
		free(srq->wrid);
		return -1;
	}

	memset(srq->buf.buf, 0, buf_size);

	/*
	 * Now initialize the SRQ buffer so that all of the WQEs are
	 * linked into the list of free WQEs.
	 */

	for (i = 0; i < srq->max; ++i) {
		next = get_wqe(srq, i);
		next->next_wqe_index = htobe16((i + 1) & (srq->max - 1));
	}

	srq->head = 0;
	srq->tail = srq->max - 1;

	return 0;
}
Ejemplo n.º 5
0
static void mlx4_free_context(struct ibv_context *ibctx)
{
    struct mlx4_context *context = to_mctx(ibctx);

    munmap(context->uar, to_mdev(ibctx->device)->page_size);
    if (context->bf_page)
        munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
    free(context);
}
Ejemplo n.º 6
0
static void mthca_free_context(struct ibv_context *ibctx)
{
	struct mthca_context *context = to_mctx(ibctx);

	mthca_free_pd(context->pd);
	munmap(context->uar, to_mdev(ibctx->device)->page_size);
	mthca_free_db_tab(context->db_tab);
	free(context);
}
Ejemplo n.º 7
0
static void mlx4_uninit_context(struct verbs_device *v_device,
				struct ibv_context *ibv_ctx)
{
	struct mlx4_context *context = to_mctx(ibv_ctx);

	munmap(context->uar, to_mdev(&v_device->device)->page_size);
	if (context->bfs.page)
		munmap(context->bfs.page,
		       to_mdev(&v_device->device)->page_size);
	if (context->hca_core_clock)
		munmap((context->hca_core_clock - context->core_clk.offset),
		       context->core_clk.offset + sizeof(context->core_clk.mask));
}
Ejemplo n.º 8
0
static void mlx5_free_context(struct verbs_device *device,
			      struct ibv_context *ibctx)
{
	struct mlx5_context *context = to_mctx(ibctx);
	int page_size = to_mdev(ibctx->device)->page_size;
	int i;

	free(context->bfs);
	for (i = 0; i < MLX5_MAX_UAR_PAGES; ++i) {
		if (context->uar[i])
			munmap(context->uar[i], page_size);
	}
	close_debug_file(context);
}
Ejemplo n.º 9
0
Archivo: qp.c Proyecto: CJacky/virt-ib
int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
		       enum ibv_qp_type type, struct mlx4_qp *qp)
{
	qp->rq.max_gs	 = cap->max_recv_sge;

	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
	if (!qp->sq.wrid)
		return -1;

	if (qp->rq.wqe_cnt) {
		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
		if (!qp->rq.wrid) {
			free(qp->sq.wrid);
			return -1;
		}
	}

	for (qp->rq.wqe_shift = 4;
	     1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg);
	     qp->rq.wqe_shift++)
		; /* nothing */

	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
		qp->rq.offset = 0;
		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
	} else {
		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
		qp->sq.offset = 0;
	}

	if (vib_alloc_buf(to_mctx(pd->context), &qp->buf,
			    align(qp->buf_size, to_mdev(pd->context->device)->page_size),
			    to_mdev(pd->context->device)->page_size)) {
		free(qp->sq.wrid);
		free(qp->rq.wrid);
		return -1;
	}

	memset(qp->buf.buf, 0, qp->buf_size);

	return 0;
}
Ejemplo n.º 10
0
void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
		       enum ibv_qp_type type)
{
	int wqe_size;
	struct mlx4_context *ctx = to_mctx(qp->ibv_qp.context);

	wqe_size = min((1 << qp->sq.wqe_shift), MLX4_MAX_WQE_SIZE) -
		sizeof (struct mlx4_wqe_ctrl_seg);
	switch (type) {
	case IBV_QPT_UD:
		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
		break;

	case IBV_QPT_UC:
	case IBV_QPT_RC:
	case IBV_QPT_XRC:
		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
		break;

	default:
		break;
	}

	qp->sq.max_gs	     = wqe_size / sizeof (struct mlx4_wqe_data_seg);
	cap->max_send_sge    = min(ctx->max_sge, qp->sq.max_gs);
	qp->sq.max_post	     = min(ctx->max_qp_wr,
				   qp->sq.wqe_cnt - qp->sq_spare_wqes);
	cap->max_send_wr     = qp->sq.max_post;

	/*
	 * Inline data segments can't cross a 64 byte boundary.  So
	 * subtract off one segment header for each 64-byte chunk,
	 * taking into account the fact that wqe_size will be 32 mod
	 * 64 for non-UD QPs.
	 */
	qp->max_inline_data  = wqe_size -
		sizeof (struct mlx4_wqe_inline_seg) *
		(align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN);
	cap->max_inline_data = qp->max_inline_data;
}
Ejemplo n.º 11
0
static int mlx5_map_internal_clock(struct mlx5_device *mdev,
				   struct ibv_context *ibv_ctx)
{
	struct mlx5_context *context = to_mctx(ibv_ctx);
	void *hca_clock_page;
	off_t offset = 0;

	set_command(MLX5_EXP_MMAP_GET_CORE_CLOCK_CMD, &offset);
	hca_clock_page = mmap(NULL, mdev->page_size,
			      PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd,
			      offset * mdev->page_size);

	if (hca_clock_page == MAP_FAILED) {
		fprintf(stderr, PFX
			"Warning: Timestamp available,\n"
			"but failed to mmap() hca core clock page.\n");
		return -1;
	}

	context->hca_core_clock = hca_clock_page + context->core_clock.offset;

	return 0;
}
Ejemplo n.º 12
0
struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
				    struct ibv_srq_init_attr_ex *attr_ex)
{
	struct mlx4_create_xsrq cmd;
	struct mlx4_create_xsrq_resp resp;
	struct mlx4_srq *srq;
	int ret;

	/* Sanity check SRQ size before proceeding */
	if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
		return NULL;

	srq = calloc(1, sizeof *srq);
	if (!srq)
		return NULL;

	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
		goto err;

	srq->max     = align_queue_size(attr_ex->attr.max_wr + 1);
	srq->max_gs  = attr_ex->attr.max_sge;
	srq->counter = 0;
	srq->ext_srq = 1;

	if (mlx4_alloc_srq_buf(attr_ex->pd, &attr_ex->attr, srq))
		goto err;

	srq->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
	if (!srq->db)
		goto err_free;

	*srq->db = 0;

	cmd.buf_addr = (uintptr_t) srq->buf.buf;
	cmd.db_addr  = (uintptr_t) srq->db;

	ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq,
				    sizeof(srq->verbs_srq),
				    attr_ex,
				    &cmd.ibv_cmd, sizeof cmd,
				    &resp.ibv_resp, sizeof resp);
	if (ret)
		goto err_db;

	ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
			      srq->verbs_srq.srq_num, srq);
	if (ret)
		goto err_destroy;

	return &srq->verbs_srq.srq;

err_destroy:
	ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
err_db:
	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
err_free:
	free(srq->wrid);
	mlx4_free_buf(&srq->buf);
err:
	free(srq);
	return NULL;
}
Ejemplo n.º 13
0
static int mlx4_poll_one(struct mlx4_cq *cq,
			 struct mlx4_qp **cur_qp,
			 struct ibv_wc *wc)
{
	struct mlx4_wq *wq;
	struct mlx4_cqe *cqe;
	struct mlx4_srq *srq = NULL;
	uint32_t qpn;
	uint32_t srqn;
	uint32_t g_mlpath_rqpn;
	uint16_t wqe_index;
	int is_error;
	int is_send;

	cqe = next_cqe_sw(cq);
	if (!cqe)
		return CQ_EMPTY;

	++cq->cons_index;

	VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe);

	/*
	 * Make sure we read CQ entry contents after we've checked the
	 * ownership bit.
	 */
	rmb();

	qpn = ntohl(cqe->my_qpn);

	is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
		MLX4_CQE_OPCODE_ERROR;

	if (qpn & MLX4_XRC_QPN_BIT && !is_send) {
		srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff;
		/*
		 * We do not have to take the XRC SRQ table lock here,
		 * because CQs will be locked while XRC SRQs are removed
		 * from the table.
		 */
		srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn);
		if (!srq)
			return CQ_POLL_ERR;
	} else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
		/*
		 * We do not have to take the QP table lock here,
		 * because CQs will be locked while QPs are removed
		 * from the table.
		 */
		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context),
				       qpn & 0xffffff);
		if (!*cur_qp)
			return CQ_POLL_ERR;
	}

	wc->qp_num = qpn & 0xffffff;

	if (is_send) {
		wq = &(*cur_qp)->sq;
		wqe_index = ntohs(cqe->wqe_index);
		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	} else if (srq) {
		wqe_index = htons(cqe->wqe_index);
		wc->wr_id = srq->wrid[wqe_index];
		mlx4_free_srq_wqe(srq, wqe_index);
	} else if ((*cur_qp)->ibv_qp.srq) {
		srq = to_msrq((*cur_qp)->ibv_qp.srq);
		wqe_index = htons(cqe->wqe_index);
		wc->wr_id = srq->wrid[wqe_index];
		mlx4_free_srq_wqe(srq, wqe_index);
	} else {
		wq = &(*cur_qp)->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	}

	if (is_error) {
		mlx4_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
		return CQ_OK;
	}

	wc->status = IBV_WC_SUCCESS;

	if (is_send) {
		wc->wc_flags = 0;
		switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
		case MLX4_OPCODE_RDMA_WRITE_IMM:
			wc->wc_flags |= IBV_WC_WITH_IMM;
		case MLX4_OPCODE_RDMA_WRITE:
			wc->opcode    = IBV_WC_RDMA_WRITE;
			break;
		case MLX4_OPCODE_SEND_IMM:
			wc->wc_flags |= IBV_WC_WITH_IMM;
		case MLX4_OPCODE_SEND:
			wc->opcode    = IBV_WC_SEND;
			break;
		case MLX4_OPCODE_RDMA_READ:
			wc->opcode    = IBV_WC_RDMA_READ;
			wc->byte_len  = ntohl(cqe->byte_cnt);
			break;
		case MLX4_OPCODE_ATOMIC_CS:
			wc->opcode    = IBV_WC_COMP_SWAP;
			wc->byte_len  = 8;
			break;
		case MLX4_OPCODE_ATOMIC_FA:
			wc->opcode    = IBV_WC_FETCH_ADD;
			wc->byte_len  = 8;
			break;
		case MLX4_OPCODE_BIND_MW:
			wc->opcode    = IBV_WC_BIND_MW;
			break;
		default:
			/* assume it's a send completion */
			wc->opcode    = IBV_WC_SEND;
			break;
		}
	} else {
		wc->byte_len = ntohl(cqe->byte_cnt);

		switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
		case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
			wc->opcode   = IBV_WC_RECV_RDMA_WITH_IMM;
			wc->wc_flags = IBV_WC_WITH_IMM;
			wc->imm_data = cqe->immed_rss_invalid;
			break;
		case MLX4_RECV_OPCODE_SEND:
			wc->opcode   = IBV_WC_RECV;
			wc->wc_flags = 0;
			break;
		case MLX4_RECV_OPCODE_SEND_IMM:
			wc->opcode   = IBV_WC_RECV;
			wc->wc_flags = IBV_WC_WITH_IMM;
			wc->imm_data = cqe->immed_rss_invalid;
			break;
		}

		wc->slid	   = ntohs(cqe->rlid);
		wc->sl		   = cqe->sl >> 4;
		g_mlpath_rqpn	   = ntohl(cqe->g_mlpath_rqpn);
		wc->src_qp	   = g_mlpath_rqpn & 0xffffff;
		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0;
		wc->pkey_index     = ntohl(cqe->immed_rss_invalid) & 0x7f;
	}

	return CQ_OK;
}
Ejemplo n.º 14
0
int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
			  struct ibv_send_wr **bad_wr)
{
	struct mlx4_context *ctx;
	struct mlx4_qp *qp = to_mqp(ibqp);
	void *wqe;
	struct mlx4_wqe_ctrl_seg *ctrl;
	int ind;
	int nreq;
	int inl = 0;
	int ret = 0;
	int size;
	int i;

	pthread_spin_lock(&qp->sq.lock);

	/* XXX check that state is OK to post send */

	ind = qp->sq.head;

	for (nreq = 0; wr; ++nreq, wr = wr->next) {
		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		if (wr->num_sge > qp->sq.max_gs) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) {
			ret = -1;
			*bad_wr = wr;
			goto out;
		}

		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;

		ctrl->xrcrb_flags =
			(wr->send_flags & IBV_SEND_SIGNALED ?
			 htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
			(wr->send_flags & IBV_SEND_SOLICITED ?
			 htonl(MLX4_WQE_CTRL_SOLICIT) : 0)   |
			qp->sq_signal_bits;

		if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
		    wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
			ctrl->imm = wr->imm_data;
		else
			ctrl->imm = 0;

		wqe += sizeof *ctrl;
		size = sizeof *ctrl / 16;

		switch (ibqp->qp_type) {
		case IBV_QPT_XRC:
			ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8);
			/* fall thru */
		case IBV_QPT_RC:
		case IBV_QPT_UC:
			switch (wr->opcode) {
			case IBV_WR_ATOMIC_CMP_AND_SWP:
			case IBV_WR_ATOMIC_FETCH_AND_ADD:
				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
					      wr->wr.atomic.rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);

				set_atomic_seg(wqe, wr);
				wqe  += sizeof (struct mlx4_wqe_atomic_seg);
				size += (sizeof (struct mlx4_wqe_raddr_seg) +
					 sizeof (struct mlx4_wqe_atomic_seg)) / 16;

				break;

			case IBV_WR_RDMA_READ:
				inl = 1;
				/* fall through */
			case IBV_WR_RDMA_WRITE:
			case IBV_WR_RDMA_WRITE_WITH_IMM:
				set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
					      wr->wr.rdma.rkey);
				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;

				break;

			default:
				/* No extra segments required for sends */
				break;
			}
			break;

		case IBV_QPT_UD:
			set_datagram_seg(wqe, wr);
			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
			if (to_mah(wr->wr.ud.ah)->tagged) {
				ctrl->ins_vlan = 1 << 6;
				ctrl->vlan_tag = htons(to_mah(wr->wr.ud.ah)->vlan);
			}

			break;

		default:
			break;
		}

		if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
			struct mlx4_wqe_inline_seg *seg;
			void *addr;
			int len, seg_len;
			int num_seg;
			int off, to_copy;

			inl = 0;

			seg = wqe;
			wqe += sizeof *seg;
			off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
			num_seg = 0;
			seg_len = 0;

			for (i = 0; i < wr->num_sge; ++i) {
				addr = (void *) (uintptr_t) wr->sg_list[i].addr;
				len  = wr->sg_list[i].length;
				inl += len;

				if (inl > qp->max_inline_data) {
					inl = 0;
					ret = -1;
					*bad_wr = wr;
					goto out;
				}

				while (len >= MLX4_INLINE_ALIGN - off) {
					to_copy = MLX4_INLINE_ALIGN - off;
					memcpy(wqe, addr, to_copy);
					len -= to_copy;
					wqe += to_copy;
					addr += to_copy;
					seg_len += to_copy;
					wmb(); /* see comment below */
					seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
					seg_len = 0;
					seg = wqe;
					wqe += sizeof *seg;
					off = sizeof *seg;
					++num_seg;
				}

				memcpy(wqe, addr, len);
				wqe += len;
				seg_len += len;
				off += len;
			}

			if (seg_len) {
				++num_seg;
				/*
				 * Need a barrier here to make sure
				 * all the data is visible before the
				 * byte_count field is set.  Otherwise
				 * the HCA prefetcher could grab the
				 * 64-byte chunk with this inline
				 * segment and get a valid (!=
				 * 0xffffffff) byte count but stale
				 * data, and end up sending the wrong
				 * data.
				 */
				wmb();
				seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
			}

			size += (inl + num_seg * sizeof * seg + 15) / 16;
		} else {
			struct mlx4_wqe_data_seg *seg = wqe;

			for (i = wr->num_sge - 1; i >= 0 ; --i)
				set_data_seg(seg + i, wr->sg_list + i);

			size += wr->num_sge * (sizeof *seg / 16);
		}

		ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ?
				    MLX4_WQE_CTRL_FENCE : 0) | size;

		/*
		 * Make sure descriptor is fully written before
		 * setting ownership bit (because HW can start
		 * executing as soon as we do).
		 */
		wmb();

		ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) |
			(ind & qp->sq.wqe_cnt ? htonl(1 << 31) : 0);

		/*
		 * We can improve latency by not stamping the last
		 * send queue WQE until after ringing the doorbell, so
		 * only stamp here if there are still more WQEs to post.
		 */
		if (wr->next)
			stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
				       (qp->sq.wqe_cnt - 1));

		++ind;
	}

out:
	ctx = to_mctx(ibqp->context);

	if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
		ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
		*(uint32_t *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
		/*
		 * Make sure that descriptor is written to memory
		 * before writing to BlueFlame page.
		 */
		wmb();

		++qp->sq.head;

		pthread_spin_lock(&ctx->bf_lock);

		mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl,
			     align(size * 16, 64));
		wc_wmb();

		ctx->bf_offset ^= ctx->bf_buf_size;

		pthread_spin_unlock(&ctx->bf_lock);
	} else if (nreq) {
		qp->sq.head += nreq;

		/*
		 * Make sure that descriptors are written before
		 * doorbell record.
		 */
		wmb();

		*(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn;
	}

	if (nreq)
		stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
			       (qp->sq.wqe_cnt - 1));

	pthread_spin_unlock(&qp->sq.lock);

	return ret;
}
Ejemplo n.º 15
0
static int mlx4_init_context(struct verbs_device *v_device,
			     struct ibv_context *ibv_ctx, int cmd_fd)
{
	struct mlx4_context	        *context;
	struct mlx4_alloc_ucontext_req  req;
	struct mlx4_alloc_ucontext_resp resp;
	struct mlx4_alloc_ucontext_resp_v3 resp_v3;
	int				i;
	struct ibv_exp_device_attr	dev_attrs;
	struct ibv_device_attr	           dev_legacy_attrs;
	struct mlx4_device		*dev = to_mdev(&v_device->device);
	unsigned int			qp_tab_size;
	unsigned int			bf_reg_size;
	unsigned int			cqe_size;
	int				hca_clock_offset;
	void				*hca_clock_page = NULL;

	/* verbs_context should be used for new verbs.
	 * memory footprint of mlx4_context and verbs_context share
	 * struct ibv_context.
	 */
	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
	struct verbs_context_exp *verbs_exp_ctx = verbs_get_exp_ctx(ibv_ctx);

	memset(&req, 0, sizeof(req));
	context = to_mctx(ibv_ctx);
	ibv_ctx->cmd_fd = cmd_fd;
	ibv_ctx->device = &v_device->device;

	if (pthread_mutex_init(&context->env_mtx, NULL))
		return EIO;

	if (dev->driver_abi_ver > 3) {
#ifdef MLX4_WQE_FORMAT
		req.lib_caps = MLX4_USER_DEV_CAP_WQE_FORMAT;
#endif
		if (ibv_cmd_get_context(ibv_ctx, &req.cmd, sizeof(req),
					&resp.ibv_resp, sizeof(resp)))
			return errno;

		VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
		qp_tab_size			= resp.qp_tab_size;
		bf_reg_size			= resp.bf_reg_size;
		context->bf_regs_per_page	= resp.bf_regs_per_page;
		cqe_size			= resp.cqe_size;
	} else {
		if (ibv_cmd_get_context(ibv_ctx, &req.cmd, sizeof(req.cmd),
					&resp_v3.ibv_resp, sizeof(resp_v3)))
			return errno;

		VALGRIND_MAKE_MEM_DEFINED(&resp_v3, sizeof(resp_v3));
		qp_tab_size			= resp_v3.qp_tab_size;
		bf_reg_size			= resp_v3.bf_reg_size;
		context->bf_regs_per_page	= resp_v3.bf_regs_per_page;
		cqe_size			= 32;
	}

	context->num_qps	= qp_tab_size;
	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
	context->qp_table_mask	= (1 << context->qp_table_shift) - 1;
	context->cqe_size = cqe_size;
	for (i = 0; i < MLX4_PORTS_NUM; ++i)
		context->port_query_cache[i].valid = 0;

	pthread_mutex_init(&context->qp_table_mutex, NULL);
	for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
		context->qp_table[i].refcnt = 0;

	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
		context->db_list[i] = NULL;

	mlx4_init_xsrq_table(&context->xsrq_table, qp_tab_size);
	pthread_mutex_init(&context->db_list_mutex, NULL);

	context->uar = mmap(NULL, dev->page_size, PROT_WRITE,
			    MAP_SHARED, cmd_fd, 0);
	if (context->uar == MAP_FAILED)
		return errno;

	if (bf_reg_size) {
		context->bfs.page = mmap(NULL, dev->page_size,
					 PROT_WRITE, MAP_SHARED, cmd_fd,
					 dev->page_size);
		if (context->bfs.page == MAP_FAILED) {
			fprintf(stderr, PFX "Warning: BlueFlame available, "
				"but failed to mmap() BlueFlame page.\n");
			context->bfs.page		= NULL;
			context->bfs.buf_size		= 0;
			context->bfs.num_dedic_bfs	= 0;
		} else {
			context->bfs.num_dedic_bfs = min(context->bf_regs_per_page - 1,
							 MLX4_MAX_BFS_IN_PAGE - 1);
			context->bfs.buf_size = bf_reg_size / 2;
			mlx4_spinlock_init(&context->bfs.dedic_bf_lock, !mlx4_single_threaded);
			context->bfs.cmn_bf.address = context->bfs.page;

			mlx4_lock_init(&context->bfs.cmn_bf.lock,
				       !mlx4_single_threaded,
				       mlx4_get_locktype());

			context->bfs.dedic_bf_free = context->bfs.num_dedic_bfs;
			for (i = 0; i < context->bfs.num_dedic_bfs; i++) {
				context->bfs.dedic_bf[i].address   = context->bfs.page + (i + 1) * MLX4_BFS_STRIDE;
				context->bfs.dedic_bf_used[i] = 0;
			}
		}
	} else {
		context->bfs.page		= NULL;
		context->bfs.buf_size		= 0;
		context->bfs.num_dedic_bfs	= 0;
	}

	mlx4_spinlock_init(&context->uar_lock, !mlx4_single_threaded);

	mlx4_spinlock_init(&context->send_db_lock, !mlx4_single_threaded);
	INIT_LIST_HEAD(&context->send_db_list);

	mlx4_spinlock_init(&context->hugetlb_lock, !mlx4_single_threaded);
	INIT_LIST_HEAD(&context->hugetlb_list);

	pthread_mutex_init(&context->task_mutex, NULL);

	memset(&dev_attrs, 0, sizeof(dev_attrs));
	dev_attrs.comp_mask = IBV_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK |
			      IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK |
			      IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
			      IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN;

	if (mlx4_exp_query_device(ibv_ctx, &dev_attrs)) {
		if (mlx4_query_device(ibv_ctx, &dev_legacy_attrs))
			goto query_free;

		memcpy(&dev_attrs, &dev_legacy_attrs, sizeof(dev_legacy_attrs));
	}

	context->max_qp_wr = dev_attrs.max_qp_wr;
	context->max_sge = dev_attrs.max_sge;
	context->max_cqe = dev_attrs.max_cqe;
	context->exp_device_cap_flags = dev_attrs.exp_device_cap_flags;
	if (dev_attrs.comp_mask & IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN)
		context->max_ctx_res_domain = dev_attrs.max_ctx_res_domain;

	VALGRIND_MAKE_MEM_DEFINED(&context->hca_core_clock, sizeof(context->hca_core_clock));
	if (dev_attrs.comp_mask & IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) {
		if (dev_attrs.hca_core_clock)
			context->core_clk.mult = ((1ull * 1000) << 29) /
						dev_attrs.hca_core_clock;
		else
			context->core_clk.mult = 0;

		context->core_clk.shift = 29;
		context->core_clk.mask = dev_attrs.timestamp_mask;

		if (ioctl(cmd_fd, MLX4_IOCHWCLOCKOFFSET,
			  &hca_clock_offset) >= 0) {
			VALGRIND_MAKE_MEM_DEFINED(&hca_clock_offset, sizeof(hca_clock_offset));
			context->core_clk.offset = hca_clock_offset;
			hca_clock_page = mmap(NULL, hca_clock_offset +
					sizeof(context->core_clk.mask),
					PROT_READ, MAP_SHARED, cmd_fd,
					dev->page_size *
					(MLX4_IB_MMAP_GET_HW_CLOCK));

			if (hca_clock_page == MAP_FAILED) {
				fprintf(stderr, PFX
					"Warning: Timestamp available,\n"
					"but failed to mmap() hca core  "
					"clock page.\n");
			} else {
				context->hca_core_clock = hca_clock_page +
					context->core_clk.offset;
			}
		}
	}

	ibv_ctx->ops = mlx4_ctx_ops;

	verbs_ctx->has_comp_mask |= VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
				    VERBS_CONTEXT_QP;

	verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd);
	verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd);
	verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex);
	verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num);
	verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex);
	verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
	verbs_set_ctx_op(verbs_ctx, create_flow, ibv_cmd_create_flow);
	verbs_set_ctx_op(verbs_ctx, destroy_flow, ibv_cmd_destroy_flow);

	/*
	 * Set experimental verbs
	 */
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_reg_shared_mr, mlx4_reg_shared_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_flow, ibv_exp_cmd_create_flow);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_destroy_flow, ibv_exp_cmd_destroy_flow);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_ah, mlx4_exp_create_ah);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_device, mlx4_exp_query_device);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_create_qp, mlx4_exp_create_qp);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_qp, mlx4_exp_modify_qp);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_port, mlx4_exp_query_port);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_cq, mlx4_modify_cq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_task, mlx4_post_task);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_set_legacy_xrc, mlx4_set_legacy_xrc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_get_legacy_xrc, mlx4_get_legacy_xrc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_poll_cq, mlx4_exp_poll_cq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_cq, mlx4_create_cq_ex);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_values, mlx4_query_values);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_reg_mr, mlx4_exp_reg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_send, mlx4_exp_post_send);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_bind_mw, mlx4_exp_bind_mw);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_rereg_mr, mlx4_exp_rereg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_dereg_mr, mlx4_exp_dereg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_res_domain, mlx4_exp_create_res_domain);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_destroy_res_domain, mlx4_exp_destroy_res_domain);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_query_intf, mlx4_exp_query_intf);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_release_intf, mlx4_exp_release_intf);

	return 0;

query_free:
	munmap(context->uar, dev->page_size);
	if (context->bfs.page)
		munmap(context->bfs.page, dev->page_size);
	if (hca_clock_page)
		munmap(hca_clock_page, hca_clock_offset +
		       sizeof(context->core_clk.mask));

	return errno;
}
Ejemplo n.º 16
0
int mthca_tavor_post_srq_recv(struct ibv_srq *ibsrq,
			      struct ibv_recv_wr *wr,
			      struct ibv_recv_wr **bad_wr)
{
	struct mthca_srq *srq = to_msrq(ibsrq);
	uint32_t doorbell[2];
	int err = 0;
	int first_ind;
	int ind;
	int next_ind;
	int nreq;
	int i;
	void *wqe;
	void *prev_wqe;

	pthread_spin_lock(&srq->lock);

	first_ind = srq->first_free;

	for (nreq = 0; wr; wr = wr->next) {
		ind	  = srq->first_free;
		wqe       = get_wqe(srq, ind);
		next_ind  = *wqe_to_link(wqe);

		if (next_ind < 0) {
			err = -1;
			*bad_wr = wr;
			break;
		}

		prev_wqe  = srq->last;
		srq->last = wqe;

		((struct mthca_next_seg *) wqe)->ee_nds = 0;
		/* flags field will always remain 0 */

		wqe += sizeof (struct mthca_next_seg);

		if (wr->num_sge > srq->max_gs) {
			err = -1;
			*bad_wr = wr;
			srq->last = prev_wqe;
			break;
		}

		for (i = 0; i < wr->num_sge; ++i) {
			((struct mthca_data_seg *) wqe)->byte_count =
				htonl(wr->sg_list[i].length);
			((struct mthca_data_seg *) wqe)->lkey =
				htonl(wr->sg_list[i].lkey);
			((struct mthca_data_seg *) wqe)->addr =
				htonll(wr->sg_list[i].addr);
			wqe += sizeof (struct mthca_data_seg);
		}

		if (i < srq->max_gs) {
			((struct mthca_data_seg *) wqe)->byte_count = 0;
			((struct mthca_data_seg *) wqe)->lkey = htonl(MTHCA_INVAL_LKEY);
			((struct mthca_data_seg *) wqe)->addr = 0;
		}

		((struct mthca_next_seg *) prev_wqe)->ee_nds =
			htonl(MTHCA_NEXT_DBD);

		srq->wrid[ind]  = wr->wr_id;
		srq->first_free = next_ind;

		if (++nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB) {
			nreq = 0;

			doorbell[0] = htonl(first_ind << srq->wqe_shift);
			doorbell[1] = htonl(srq->srqn << 8);

			/*
			 * Make sure that descriptors are written
			 * before doorbell is rung.
			 */
			wmb();

			mthca_write64(doorbell, to_mctx(ibsrq->context), MTHCA_RECV_DOORBELL);

			first_ind = srq->first_free;
		}
	}

	if (nreq) {
		doorbell[0] = htonl(first_ind << srq->wqe_shift);
		doorbell[1] = htonl((srq->srqn << 8) | nreq);

		/*
		 * Make sure that descriptors are written before
		 * doorbell is rung.
		 */
		wmb();

		mthca_write64(doorbell, to_mctx(ibsrq->context), MTHCA_RECV_DOORBELL);
	}

	pthread_spin_unlock(&srq->lock);
	return err;
}
Ejemplo n.º 17
0
static int mlx5_alloc_context(struct verbs_device *vdev,
			      struct ibv_context *ctx, int cmd_fd)
{
	struct mlx5_context	       *context;
	struct mlx5_alloc_ucontext	req;
	struct mlx5_alloc_ucontext_resp resp;
	struct ibv_device		*ibdev = &vdev->device;
	struct verbs_context *verbs_ctx = verbs_get_ctx(ctx);
	int	i;
	int	page_size = to_mdev(ibdev)->page_size;
	int	tot_uuars;
	int	low_lat_uuars;
	int 	gross_uuars;
	int	j;
	off_t	offset;

	mlx5_single_threaded = single_threaded_app();

	context = to_mctx(ctx);
	context->ibv_ctx.cmd_fd = cmd_fd;

	memset(&resp, 0, sizeof(resp));
	open_debug_file(context);
	set_debug_mask();
	set_freeze_on_error();
	if (gethostname(context->hostname, sizeof(context->hostname)))
		strcpy(context->hostname, "host_unknown");

	tot_uuars = get_total_uuars();
	if (tot_uuars <= 0) {
		if (tot_uuars == 0)
			errno = EINVAL;
		else
			errno = -tot_uuars;
		goto err_free;
	}

	gross_uuars = tot_uuars / MLX5_NUM_UUARS_PER_PAGE * 4;
	context->bfs = calloc(gross_uuars, sizeof *context->bfs);
	if (!context->bfs) {
		errno = ENOMEM;
		goto err_free;
	}

	low_lat_uuars = get_num_low_lat_uuars();
	if (low_lat_uuars < 0) {
		errno = ENOMEM;
		goto err_free_bf;
	}

	if (low_lat_uuars > tot_uuars - 1) {
		errno = ENOMEM;
		goto err_free_bf;
	}

	memset(&req, 0, sizeof(req));
	req.total_num_uuars = tot_uuars;
	req.num_low_latency_uuars = low_lat_uuars;
	if (ibv_cmd_get_context(&context->ibv_ctx, &req.ibv_req, sizeof req,
				&resp.ibv_resp, sizeof resp))
		goto err_free_bf;

	context->max_num_qps		= resp.qp_tab_size;
	context->bf_reg_size		= resp.bf_reg_size;
	context->tot_uuars		= resp.tot_uuars;
	context->low_lat_uuars		= low_lat_uuars;
	context->cache_line_size	= resp.cache_line_size;
	context->max_sq_desc_sz = resp.max_sq_desc_sz;
	context->max_rq_desc_sz = resp.max_rq_desc_sz;
	context->max_send_wqebb	= resp.max_send_wqebb;
	context->num_ports	= resp.num_ports;
	context->max_recv_wr	= resp.max_recv_wr;
	context->max_srq_recv_wr = resp.max_srq_recv_wr;
	context->max_desc_sz_sq_dc = resp.max_desc_sz_sq_dc;
	context->atomic_sizes_dc = resp.atomic_sizes_dc;
	pthread_mutex_init(&context->rsc_table_mutex, NULL);
	pthread_mutex_init(&context->srq_table_mutex, NULL);
	for (i = 0; i < MLX5_QP_TABLE_SIZE; ++i)
		context->rsc_table[i].refcnt = 0;

	context->db_list = NULL;

	pthread_mutex_init(&context->db_list_mutex, NULL);


	for (i = 0; i < resp.tot_uuars / MLX5_NUM_UUARS_PER_PAGE; ++i) {
		offset = 0;
		set_command(MLX5_MMAP_GET_REGULAR_PAGES_CMD, &offset);
		set_index(i, &offset);
		context->uar[i] = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
				       MAP_SHARED, cmd_fd,
				       page_size * offset);
		if (context->uar[i] == MAP_FAILED) {
			context->uar[i] = NULL;
			goto err_free_bf;
		}
	}

	for (j = 0; j < gross_uuars; ++j) {
		context->bfs[j].reg = context->uar[j / 4] +
			MLX5_BF_OFFSET + (j % 4) * context->bf_reg_size;
		context->bfs[j].need_lock = need_uuar_lock(context, j);
		mlx5_spinlock_init(&context->bfs[j].lock);
		context->bfs[j].offset = 0;
		if (j)
			context->bfs[j].buf_size = context->bf_reg_size / 2;

		context->bfs[j].uuarn = j;
	}

	mlx5_spinlock_init(&context->lock32);

	context->prefer_bf = get_always_bf();
	context->shut_up_bf = get_shut_up_bf();
	mlx5_read_env(ibdev, context);

	mlx5_spinlock_init(&context->hugetlb_lock);
	INIT_LIST_HEAD(&context->hugetlb_list);

	pthread_mutex_init(&context->task_mutex, NULL);

	ctx->ops = mlx5_ctx_ops;
	set_extended(verbs_ctx);
	set_experimental(ctx);

	return 0;

err_free_bf:
	free(context->bfs);

err_free:
	for (i = 0; i < MLX5_MAX_UAR_PAGES; ++i) {
		if (context->uar[i])
			munmap(context->uar[i], page_size);
	}
	close_debug_file(context);
	return errno;
}
Ejemplo n.º 18
0
static void set_experimental(struct ibv_context *ctx)
{
	struct verbs_context_exp *verbs_exp_ctx = verbs_get_exp_ctx(ctx);
	struct mlx5_context *mctx = to_mctx(ctx);

	verbs_set_exp_ctx_op(verbs_exp_ctx, create_dct, mlx5_create_dct);
	verbs_set_exp_ctx_op(verbs_exp_ctx, destroy_dct, mlx5_destroy_dct);
	verbs_set_exp_ctx_op(verbs_exp_ctx, query_dct, mlx5_query_dct);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_arm_dct, mlx5_arm_dct);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_device, mlx5_query_device_ex);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_create_qp, mlx5_exp_create_qp);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_qp, mlx5_modify_qp_ex);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_get_legacy_xrc, mlx5_get_legacy_xrc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_set_legacy_xrc, mlx5_set_legacy_xrc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_cq, mlx5_modify_cq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_cq, mlx5_create_cq_ex);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_poll_cq, mlx5_poll_cq_ex);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_task, mlx5_post_task);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_reg_mr, mlx5_exp_reg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_send, mlx5_exp_post_send);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_alloc_mkey_list_memory, mlx5_alloc_mkey_mem);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_dealloc_mkey_list_memory, mlx5_free_mkey_mem);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_mkey, mlx5_query_mkey);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_create_mr, mlx5_create_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_prefetch_mr,
			     mlx5_prefetch_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_dereg_mr, mlx5_exp_dereg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_poll_dc_info, mlx5_poll_dc_info);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_wq, mlx5_exp_create_wq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_modify_wq, mlx5_exp_modify_wq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_destroy_wq, mlx5_exp_destroy_wq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_flow, ibv_exp_cmd_create_flow);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_destroy_flow, ibv_exp_cmd_destroy_flow);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_rwq_ind_table, mlx5_exp_create_rwq_ind_table);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_destroy_rwq_ind_table, mlx5_exp_destroy_rwq_ind_table);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_res_domain, mlx5_exp_create_res_domain);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_destroy_res_domain, mlx5_exp_destroy_res_domain);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_query_intf, mlx5_exp_query_intf);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_release_intf, mlx5_exp_release_intf);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_port, mlx5_exp_query_port);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_ah, mlx5_exp_create_ah);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_values, mlx5_exp_query_values);
	verbs_set_exp_ctx_op(verbs_exp_ctx, alloc_ec_calc, mlx5_alloc_ec_calc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, dealloc_ec_calc, mlx5_dealloc_ec_calc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_encode_async, mlx5_ec_encode_async);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_encode_sync, mlx5_ec_encode_sync);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_decode_async, mlx5_ec_decode_async);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_decode_sync, mlx5_ec_decode_sync);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_poll, mlx5_ec_poll);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_encode_send, mlx5_ec_encode_send);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_update_async, mlx5_ec_update_async);
	verbs_set_exp_ctx_op(verbs_exp_ctx, ec_update_sync, mlx5_ec_update_sync);
	if (mctx->cqe_version == 1)
		verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_poll_cq,
				     mlx5_poll_cq_ex_1);
	else
		verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_poll_cq,
				     mlx5_poll_cq_ex);

	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_peer_commit_qp, mlx5_exp_peer_commit_qp);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_rollback_send, mlx5_exp_rollback_send);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_peer_peek_cq, mlx5_exp_peer_peek_cq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_peer_abort_peek_cq, mlx5_exp_peer_abort_peek_cq);
}
Ejemplo n.º 19
0
static int mlx5_alloc_context(struct verbs_device *vdev,
			      struct ibv_context *ctx, int cmd_fd)
{
	struct mlx5_context	       *context;
	struct mlx5_alloc_ucontext	req;
	struct mlx5_exp_alloc_ucontext_resp resp;
	struct ibv_device		*ibdev = &vdev->device;
	struct verbs_context *verbs_ctx = verbs_get_ctx(ctx);
	struct ibv_exp_device_attr attr;
	int	i;
	int	page_size = to_mdev(ibdev)->page_size;
	int	tot_uuars;
	int	low_lat_uuars;
	int	gross_uuars;
	int	j;
	int	uar_mapped;
	off_t	offset;
	int	err;

	context = to_mctx(ctx);
	if (pthread_mutex_init(&context->env_mtx, NULL))
		return -1;

	context->ibv_ctx.cmd_fd = cmd_fd;

	memset(&resp, 0, sizeof(resp));
	if (gethostname(context->hostname, sizeof(context->hostname)))
		strcpy(context->hostname, "host_unknown");

	tot_uuars = get_total_uuars();
	gross_uuars = tot_uuars / MLX5_NUM_UUARS_PER_PAGE * 4;
	context->bfs = calloc(gross_uuars, sizeof *context->bfs);
	if (!context->bfs) {
		errno = ENOMEM;
		goto err_free;
	}

	low_lat_uuars = get_num_low_lat_uuars();
	if (low_lat_uuars > tot_uuars - 1) {
		errno = ENOMEM;
		goto err_free_bf;
	}

	memset(&req, 0, sizeof(req));
	req.total_num_uuars = tot_uuars;
	req.num_low_latency_uuars = low_lat_uuars;
	if (ibv_cmd_get_context(&context->ibv_ctx, &req.ibv_req, sizeof req,
				&resp.ibv_resp, sizeof resp))
		goto err_free_bf;

	context->max_num_qps		= resp.qp_tab_size;
	context->bf_reg_size		= resp.bf_reg_size;
	context->tot_uuars		= resp.tot_uuars;
	context->low_lat_uuars		= low_lat_uuars;
	context->cache_line_size	= resp.cache_line_size;
	context->max_sq_desc_sz = resp.max_sq_desc_sz;
	context->max_rq_desc_sz = resp.max_rq_desc_sz;
	context->max_send_wqebb	= resp.max_send_wqebb;
	context->num_ports	= resp.num_ports;
	context->max_recv_wr	= resp.max_recv_wr;
	context->max_srq_recv_wr = resp.max_srq_recv_wr;
	context->max_desc_sz_sq_dc = resp.max_desc_sz_sq_dc;
	context->atomic_sizes_dc = resp.atomic_sizes_dc;
	context->compact_av = resp.flags & MLX5_CAP_COMPACT_AV;

	if (resp.exp_data.comp_mask & MLX5_EXP_ALLOC_CTX_RESP_MASK_CQE_COMP_MAX_NUM)
		context->cqe_comp_max_num = resp.exp_data.cqe_comp_max_num;

	if (resp.exp_data.comp_mask & MLX5_EXP_ALLOC_CTX_RESP_MASK_CQE_VERSION)
		context->cqe_version = resp.exp_data.cqe_version;

	if (resp.exp_data.comp_mask & MLX5_EXP_ALLOC_CTX_RESP_MASK_RROCE_UDP_SPORT_MIN)
		context->rroce_udp_sport_min = resp.exp_data.rroce_udp_sport_min;

	if (resp.exp_data.comp_mask & MLX5_EXP_ALLOC_CTX_RESP_MASK_RROCE_UDP_SPORT_MAX)
		context->rroce_udp_sport_max = resp.exp_data.rroce_udp_sport_max;

	ctx->ops = mlx5_ctx_ops;
	if (context->cqe_version) {
		if (context->cqe_version == 1) {
			ctx->ops.poll_cq = mlx5_poll_cq_1;
		} else {
			printf("Unsupported cqe_vesion = %d, stay on  cqe version 0\n",
			       context->cqe_version);
			context->cqe_version = 0;
		}
	}

	attr.comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1;
	err = mlx5_query_device_ex(ctx, &attr);
	if (!err) {
		if (attr.comp_mask & IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS)
			context->exp_device_cap_flags = attr.exp_device_cap_flags;

		if (attr.comp_mask & IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN) {
			context->max_ctx_res_domain = attr.max_ctx_res_domain;
			mlx5_spinlock_init(&context->send_db_lock, !mlx5_single_threaded);
			INIT_LIST_HEAD(&context->send_wc_db_list);
			INIT_LIST_HEAD(&context->wc_uar_list);
		}
		if (resp.exp_data.comp_mask & MLX5_EXP_ALLOC_CTX_RESP_MASK_HCA_CORE_CLOCK_OFFSET) {
			context->core_clock.offset =
				resp.exp_data.hca_core_clock_offset &
				(to_mdev(ibdev)->page_size - 1);
			mlx5_map_internal_clock(to_mdev(ibdev), ctx);
			if (attr.hca_core_clock)
				context->core_clock.mult = ((1ull * 1000) << 21) /
					attr.hca_core_clock;
			else
				context->core_clock.mult = 0;

			/* ConnectX-4 supports 64bit timestamp. We choose these numbers
			 * in order to make sure that after arithmetic operations,
			 * we don't overflow a 64bit variable.
			 */
			context->core_clock.shift = 21;
			context->core_clock.mask = (1ULL << 49) - 1;
		}
	}

	pthread_mutex_init(&context->rsc_table_mutex, NULL);
	pthread_mutex_init(&context->srq_table_mutex, NULL);
	for (i = 0; i < MLX5_QP_TABLE_SIZE; ++i)
		context->rsc_table[i].refcnt = 0;

	for (i = 0; i < MLX5_QP_TABLE_SIZE; ++i)
		context->uidx_table[i].refcnt = 0;

	context->db_list = NULL;

	pthread_mutex_init(&context->db_list_mutex, NULL);

	context->prefer_bf = get_always_bf(&context->ibv_ctx);
	context->shut_up_bf = get_shut_up_bf(&context->ibv_ctx);
	context->enable_cqe_comp = get_cqe_comp(&context->ibv_ctx);
	mlx5_use_mutex = get_use_mutex(&context->ibv_ctx);

	offset = 0;
	set_command(MLX5_MMAP_MAP_DC_INFO_PAGE, &offset);
	context->cc.buf = mmap(NULL, 4096 * context->num_ports, PROT_READ,
			       MAP_PRIVATE, cmd_fd, page_size * offset);
	if (context->cc.buf == MAP_FAILED)
		context->cc.buf = NULL;

	mlx5_single_threaded = single_threaded_app(&context->ibv_ctx);
	for (i = 0; i < resp.tot_uuars / MLX5_NUM_UUARS_PER_PAGE; ++i) {
		uar_mapped = 0;

		/* Don't map UAR to WC if BF is not used */
		if (!context->shut_up_bf) {
			context->uar[i].regs = mlx5_uar_mmap(i, MLX5_MMAP_GET_WC_PAGES_CMD, page_size, cmd_fd);
			if (context->uar[i].regs != MAP_FAILED) {
				context->uar[i].map_type = MLX5_UAR_MAP_WC;
				uar_mapped = 1;
			}
		}

		if (!uar_mapped) {
			context->uar[i].regs = mlx5_uar_mmap(i, MLX5_MMAP_GET_NC_PAGES_CMD, page_size, cmd_fd);
			if (context->uar[i].regs != MAP_FAILED) {
				context->uar[i].map_type = MLX5_UAR_MAP_NC;
				uar_mapped = 1;
			}
		}

		if (!uar_mapped) {
			/* for backward compatibility with old kernel driver */
			context->uar[i].regs = mlx5_uar_mmap(i, MLX5_MMAP_GET_REGULAR_PAGES_CMD, page_size, cmd_fd);
			if (context->uar[i].regs != MAP_FAILED) {
				context->uar[i].map_type = MLX5_UAR_MAP_WC;
				uar_mapped = 1;
			}
		}

		if (!uar_mapped) {
			context->uar[i].regs = NULL;
			goto err_free_cc;
		}
	}

	for (j = 0; j < gross_uuars; ++j) {
		context->bfs[j].reg = context->uar[j / 4].regs +
			MLX5_BF_OFFSET + (j % 4) * context->bf_reg_size;
		context->bfs[j].need_lock = need_uuar_lock(context, j) &&
					    context->uar[j / 4].map_type == MLX5_UAR_MAP_WC;
		mlx5_lock_init(&context->bfs[j].lock,
			       !mlx5_single_threaded,
			       mlx5_get_locktype());
		context->bfs[j].offset = 0;
		if (context->uar[j / 4].map_type == MLX5_UAR_MAP_WC) {
			context->bfs[j].buf_size = context->bf_reg_size / 2;
			context->bfs[j].db_method = (context->bfs[j].need_lock &&  !mlx5_single_threaded) ?
						    MLX5_DB_METHOD_BF :
						    (mlx5_single_threaded && wc_auto_evict_size() == 64 ?
						     MLX5_DB_METHOD_DEDIC_BF_1_THREAD :
						     MLX5_DB_METHOD_DEDIC_BF);

		} else {
			context->bfs[j].db_method = MLX5_DB_METHOD_DB;
		}

		context->bfs[j].uuarn = j;
	}

	mlx5_lock_init(&context->lock32,
		       !mlx5_single_threaded,
		       mlx5_get_locktype());

	mlx5_spinlock_init(&context->hugetlb_lock, !mlx5_single_threaded);
	INIT_LIST_HEAD(&context->hugetlb_list);

	pthread_mutex_init(&context->task_mutex, NULL);

	set_extended(verbs_ctx);
	set_experimental(ctx);

	for (i = 0; i < MLX5_MAX_PORTS_NUM; ++i)
		context->port_query_cache[i].valid = 0;

	return 0;

err_free_cc:
	if (context->cc.buf)
		munmap(context->cc.buf, 4096 * context->num_ports);

	if (context->hca_core_clock)
		munmap(context->hca_core_clock - context->core_clock.offset,
		       to_mdev(ibdev)->page_size);

err_free_bf:
	free(context->bfs);

err_free:
	for (i = 0; i < MLX5_MAX_UAR_PAGES; ++i) {
		if (context->uar[i].regs)
			munmap(context->uar[i].regs, page_size);
	}
	close_debug_file(context);

	return errno;
}