Example #1
0
static int mlx4_init_context(struct verbs_device *v_device,
			     struct ibv_context *ibv_ctx, int cmd_fd)
{
	struct mlx4_context	        *context;
	struct mlx4_alloc_ucontext_req  req;
	struct mlx4_alloc_ucontext_resp resp;
	struct mlx4_alloc_ucontext_resp_v3 resp_v3;
	int				i;
	struct ibv_exp_device_attr	dev_attrs;
	struct ibv_device_attr	           dev_legacy_attrs;
	struct mlx4_device		*dev = to_mdev(&v_device->device);
	unsigned int			qp_tab_size;
	unsigned int			bf_reg_size;
	unsigned int			cqe_size;
	int				hca_clock_offset;
	void				*hca_clock_page = NULL;

	/* verbs_context should be used for new verbs.
	 * memory footprint of mlx4_context and verbs_context share
	 * struct ibv_context.
	 */
	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
	struct verbs_context_exp *verbs_exp_ctx = verbs_get_exp_ctx(ibv_ctx);

	memset(&req, 0, sizeof(req));
	context = to_mctx(ibv_ctx);
	ibv_ctx->cmd_fd = cmd_fd;
	ibv_ctx->device = &v_device->device;

	if (pthread_mutex_init(&context->env_mtx, NULL))
		return EIO;

	if (dev->driver_abi_ver > 3) {
#ifdef MLX4_WQE_FORMAT
		req.lib_caps = MLX4_USER_DEV_CAP_WQE_FORMAT;
#endif
		if (ibv_cmd_get_context(ibv_ctx, &req.cmd, sizeof(req),
					&resp.ibv_resp, sizeof(resp)))
			return errno;

		VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
		qp_tab_size			= resp.qp_tab_size;
		bf_reg_size			= resp.bf_reg_size;
		context->bf_regs_per_page	= resp.bf_regs_per_page;
		cqe_size			= resp.cqe_size;
	} else {
		if (ibv_cmd_get_context(ibv_ctx, &req.cmd, sizeof(req.cmd),
					&resp_v3.ibv_resp, sizeof(resp_v3)))
			return errno;

		VALGRIND_MAKE_MEM_DEFINED(&resp_v3, sizeof(resp_v3));
		qp_tab_size			= resp_v3.qp_tab_size;
		bf_reg_size			= resp_v3.bf_reg_size;
		context->bf_regs_per_page	= resp_v3.bf_regs_per_page;
		cqe_size			= 32;
	}

	context->num_qps	= qp_tab_size;
	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
	context->qp_table_mask	= (1 << context->qp_table_shift) - 1;
	context->cqe_size = cqe_size;
	for (i = 0; i < MLX4_PORTS_NUM; ++i)
		context->port_query_cache[i].valid = 0;

	pthread_mutex_init(&context->qp_table_mutex, NULL);
	for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
		context->qp_table[i].refcnt = 0;

	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
		context->db_list[i] = NULL;

	mlx4_init_xsrq_table(&context->xsrq_table, qp_tab_size);
	pthread_mutex_init(&context->db_list_mutex, NULL);

	context->uar = mmap(NULL, dev->page_size, PROT_WRITE,
			    MAP_SHARED, cmd_fd, 0);
	if (context->uar == MAP_FAILED)
		return errno;

	if (bf_reg_size) {
		context->bfs.page = mmap(NULL, dev->page_size,
					 PROT_WRITE, MAP_SHARED, cmd_fd,
					 dev->page_size);
		if (context->bfs.page == MAP_FAILED) {
			fprintf(stderr, PFX "Warning: BlueFlame available, "
				"but failed to mmap() BlueFlame page.\n");
			context->bfs.page		= NULL;
			context->bfs.buf_size		= 0;
			context->bfs.num_dedic_bfs	= 0;
		} else {
			context->bfs.num_dedic_bfs = min(context->bf_regs_per_page - 1,
							 MLX4_MAX_BFS_IN_PAGE - 1);
			context->bfs.buf_size = bf_reg_size / 2;
			mlx4_spinlock_init(&context->bfs.dedic_bf_lock, !mlx4_single_threaded);
			context->bfs.cmn_bf.address = context->bfs.page;

			mlx4_lock_init(&context->bfs.cmn_bf.lock,
				       !mlx4_single_threaded,
				       mlx4_get_locktype());

			context->bfs.dedic_bf_free = context->bfs.num_dedic_bfs;
			for (i = 0; i < context->bfs.num_dedic_bfs; i++) {
				context->bfs.dedic_bf[i].address   = context->bfs.page + (i + 1) * MLX4_BFS_STRIDE;
				context->bfs.dedic_bf_used[i] = 0;
			}
		}
	} else {
		context->bfs.page		= NULL;
		context->bfs.buf_size		= 0;
		context->bfs.num_dedic_bfs	= 0;
	}

	mlx4_spinlock_init(&context->uar_lock, !mlx4_single_threaded);

	mlx4_spinlock_init(&context->send_db_lock, !mlx4_single_threaded);
	INIT_LIST_HEAD(&context->send_db_list);

	mlx4_spinlock_init(&context->hugetlb_lock, !mlx4_single_threaded);
	INIT_LIST_HEAD(&context->hugetlb_list);

	pthread_mutex_init(&context->task_mutex, NULL);

	memset(&dev_attrs, 0, sizeof(dev_attrs));
	dev_attrs.comp_mask = IBV_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK |
			      IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK |
			      IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
			      IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN;

	if (mlx4_exp_query_device(ibv_ctx, &dev_attrs)) {
		if (mlx4_query_device(ibv_ctx, &dev_legacy_attrs))
			goto query_free;

		memcpy(&dev_attrs, &dev_legacy_attrs, sizeof(dev_legacy_attrs));
	}

	context->max_qp_wr = dev_attrs.max_qp_wr;
	context->max_sge = dev_attrs.max_sge;
	context->max_cqe = dev_attrs.max_cqe;
	context->exp_device_cap_flags = dev_attrs.exp_device_cap_flags;
	if (dev_attrs.comp_mask & IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN)
		context->max_ctx_res_domain = dev_attrs.max_ctx_res_domain;

	VALGRIND_MAKE_MEM_DEFINED(&context->hca_core_clock, sizeof(context->hca_core_clock));
	if (dev_attrs.comp_mask & IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) {
		if (dev_attrs.hca_core_clock)
			context->core_clk.mult = ((1ull * 1000) << 29) /
						dev_attrs.hca_core_clock;
		else
			context->core_clk.mult = 0;

		context->core_clk.shift = 29;
		context->core_clk.mask = dev_attrs.timestamp_mask;

		if (ioctl(cmd_fd, MLX4_IOCHWCLOCKOFFSET,
			  &hca_clock_offset) >= 0) {
			VALGRIND_MAKE_MEM_DEFINED(&hca_clock_offset, sizeof(hca_clock_offset));
			context->core_clk.offset = hca_clock_offset;
			hca_clock_page = mmap(NULL, hca_clock_offset +
					sizeof(context->core_clk.mask),
					PROT_READ, MAP_SHARED, cmd_fd,
					dev->page_size *
					(MLX4_IB_MMAP_GET_HW_CLOCK));

			if (hca_clock_page == MAP_FAILED) {
				fprintf(stderr, PFX
					"Warning: Timestamp available,\n"
					"but failed to mmap() hca core  "
					"clock page.\n");
			} else {
				context->hca_core_clock = hca_clock_page +
					context->core_clk.offset;
			}
		}
	}

	ibv_ctx->ops = mlx4_ctx_ops;

	verbs_ctx->has_comp_mask |= VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
				    VERBS_CONTEXT_QP;

	verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd);
	verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd);
	verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex);
	verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num);
	verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex);
	verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
	verbs_set_ctx_op(verbs_ctx, create_flow, ibv_cmd_create_flow);
	verbs_set_ctx_op(verbs_ctx, destroy_flow, ibv_cmd_destroy_flow);

	/*
	 * Set experimental verbs
	 */
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_reg_shared_mr, mlx4_reg_shared_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_flow, ibv_exp_cmd_create_flow);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_destroy_flow, ibv_exp_cmd_destroy_flow);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_ah, mlx4_exp_create_ah);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_device, mlx4_exp_query_device);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_create_qp, mlx4_exp_create_qp);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_qp, mlx4_exp_modify_qp);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_port, mlx4_exp_query_port);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_cq, mlx4_modify_cq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_task, mlx4_post_task);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_set_legacy_xrc, mlx4_set_legacy_xrc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_get_legacy_xrc, mlx4_get_legacy_xrc);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_poll_cq, mlx4_exp_poll_cq);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_cq, mlx4_create_cq_ex);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_values, mlx4_query_values);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_reg_mr, mlx4_exp_reg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_send, mlx4_exp_post_send);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_bind_mw, mlx4_exp_bind_mw);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_rereg_mr, mlx4_exp_rereg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_dereg_mr, mlx4_exp_dereg_mr);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_res_domain, mlx4_exp_create_res_domain);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_destroy_res_domain, mlx4_exp_destroy_res_domain);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_query_intf, mlx4_exp_query_intf);
	verbs_set_exp_ctx_op(verbs_exp_ctx, exp_release_intf, mlx4_exp_release_intf);

	return 0;

query_free:
	munmap(context->uar, dev->page_size);
	if (context->bfs.page)
		munmap(context->bfs.page, dev->page_size);
	if (hca_clock_page)
		munmap(hca_clock_page, hca_clock_offset +
		       sizeof(context->core_clk.mask));

	return errno;
}
Example #2
0
static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
{
    struct mlx4_context	       *context;
    struct ibv_get_context		cmd;
    struct mlx4_alloc_ucontext_resp resp;
    int				i;
    struct ibv_device_attr		dev_attrs;

    context = calloc(1, sizeof *context);
    if (!context)
        return NULL;

    context->ibv_ctx.cmd_fd = cmd_fd;

    if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
                            &resp.ibv_resp, sizeof resp))
        goto err_free;

    context->num_qps	= resp.qp_tab_size;
    context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
    context->qp_table_mask	= (1 << context->qp_table_shift) - 1;

    pthread_mutex_init(&context->qp_table_mutex, NULL);
    for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
        context->qp_table[i].refcnt = 0;

    context->num_xrc_srqs = resp.qp_tab_size;
    context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1
                                   - MLX4_XRC_SRQ_TABLE_BITS;
    context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1;

    pthread_mutex_init(&context->xrc_srq_table_mutex, NULL);
    for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
        context->xrc_srq_table[i].refcnt = 0;

    for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
        context->db_list[i] = NULL;

    pthread_mutex_init(&context->db_list_mutex, NULL);

    context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
                        MAP_SHARED, cmd_fd, 0);
    if (context->uar == MAP_FAILED)
        goto err_free;

    if (resp.bf_reg_size) {
        context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
                                PROT_WRITE, MAP_SHARED, cmd_fd,
                                to_mdev(ibdev)->page_size);
        if (context->bf_page == MAP_FAILED) {
            fprintf(stderr, PFX "Warning: BlueFlame available, "
                    "but failed to mmap() BlueFlame page.\n");
            context->bf_page     = NULL;
            context->bf_buf_size = 0;
        } else {
            context->bf_buf_size = resp.bf_reg_size / 2;
            context->bf_offset   = 0;
            pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE);
        }
    } else {
        context->bf_page     = NULL;
        context->bf_buf_size = 0;
    }

    context->cqe_size = resp.cqe_size;
    pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);

    context->ibv_ctx.ops = mlx4_ctx_ops;
#ifdef HAVE_IBV_XRC_OPS
    context->ibv_ctx.more_ops = &mlx4_more_ops;
#endif

    if (mlx4_query_device(&context->ibv_ctx, &dev_attrs))
        goto query_free;

    context->max_qp_wr = dev_attrs.max_qp_wr;
    context->max_sge = dev_attrs.max_sge;
    context->max_cqe = dev_attrs.max_cqe;
    if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) {
        fprintf(stderr, PFX "There is a mismatch between "
                "the kernel and the userspace libraries: "
                "Kernel does not support XRC. Exiting.\n");
        goto query_free;
    }

    return &context->ibv_ctx;

query_free:
    munmap(context->uar, to_mdev(ibdev)->page_size);
    if (context->bf_page)
        munmap(context->bf_page, to_mdev(ibdev)->page_size);

err_free:
    free(context);
    return NULL;
}