static int mlx4_init_context(struct verbs_device *v_device, struct ibv_context *ibv_ctx, int cmd_fd) { struct mlx4_context *context; struct mlx4_alloc_ucontext_req req; struct mlx4_alloc_ucontext_resp resp; struct mlx4_alloc_ucontext_resp_v3 resp_v3; int i; struct ibv_exp_device_attr dev_attrs; struct ibv_device_attr dev_legacy_attrs; struct mlx4_device *dev = to_mdev(&v_device->device); unsigned int qp_tab_size; unsigned int bf_reg_size; unsigned int cqe_size; int hca_clock_offset; void *hca_clock_page = NULL; /* verbs_context should be used for new verbs. * memory footprint of mlx4_context and verbs_context share * struct ibv_context. */ struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx); struct verbs_context_exp *verbs_exp_ctx = verbs_get_exp_ctx(ibv_ctx); memset(&req, 0, sizeof(req)); context = to_mctx(ibv_ctx); ibv_ctx->cmd_fd = cmd_fd; ibv_ctx->device = &v_device->device; if (pthread_mutex_init(&context->env_mtx, NULL)) return EIO; if (dev->driver_abi_ver > 3) { #ifdef MLX4_WQE_FORMAT req.lib_caps = MLX4_USER_DEV_CAP_WQE_FORMAT; #endif if (ibv_cmd_get_context(ibv_ctx, &req.cmd, sizeof(req), &resp.ibv_resp, sizeof(resp))) return errno; VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp)); qp_tab_size = resp.qp_tab_size; bf_reg_size = resp.bf_reg_size; context->bf_regs_per_page = resp.bf_regs_per_page; cqe_size = resp.cqe_size; } else { if (ibv_cmd_get_context(ibv_ctx, &req.cmd, sizeof(req.cmd), &resp_v3.ibv_resp, sizeof(resp_v3))) return errno; VALGRIND_MAKE_MEM_DEFINED(&resp_v3, sizeof(resp_v3)); qp_tab_size = resp_v3.qp_tab_size; bf_reg_size = resp_v3.bf_reg_size; context->bf_regs_per_page = resp_v3.bf_regs_per_page; cqe_size = 32; } context->num_qps = qp_tab_size; context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; context->qp_table_mask = (1 << context->qp_table_shift) - 1; context->cqe_size = cqe_size; for (i = 0; i < MLX4_PORTS_NUM; ++i) context->port_query_cache[i].valid = 0; pthread_mutex_init(&context->qp_table_mutex, NULL); for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) context->qp_table[i].refcnt = 0; for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) context->db_list[i] = NULL; mlx4_init_xsrq_table(&context->xsrq_table, qp_tab_size); pthread_mutex_init(&context->db_list_mutex, NULL); context->uar = mmap(NULL, dev->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, 0); if (context->uar == MAP_FAILED) return errno; if (bf_reg_size) { context->bfs.page = mmap(NULL, dev->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, dev->page_size); if (context->bfs.page == MAP_FAILED) { fprintf(stderr, PFX "Warning: BlueFlame available, " "but failed to mmap() BlueFlame page.\n"); context->bfs.page = NULL; context->bfs.buf_size = 0; context->bfs.num_dedic_bfs = 0; } else { context->bfs.num_dedic_bfs = min(context->bf_regs_per_page - 1, MLX4_MAX_BFS_IN_PAGE - 1); context->bfs.buf_size = bf_reg_size / 2; mlx4_spinlock_init(&context->bfs.dedic_bf_lock, !mlx4_single_threaded); context->bfs.cmn_bf.address = context->bfs.page; mlx4_lock_init(&context->bfs.cmn_bf.lock, !mlx4_single_threaded, mlx4_get_locktype()); context->bfs.dedic_bf_free = context->bfs.num_dedic_bfs; for (i = 0; i < context->bfs.num_dedic_bfs; i++) { context->bfs.dedic_bf[i].address = context->bfs.page + (i + 1) * MLX4_BFS_STRIDE; context->bfs.dedic_bf_used[i] = 0; } } } else { context->bfs.page = NULL; context->bfs.buf_size = 0; context->bfs.num_dedic_bfs = 0; } mlx4_spinlock_init(&context->uar_lock, !mlx4_single_threaded); mlx4_spinlock_init(&context->send_db_lock, !mlx4_single_threaded); INIT_LIST_HEAD(&context->send_db_list); mlx4_spinlock_init(&context->hugetlb_lock, !mlx4_single_threaded); INIT_LIST_HEAD(&context->hugetlb_list); pthread_mutex_init(&context->task_mutex, NULL); memset(&dev_attrs, 0, sizeof(dev_attrs)); dev_attrs.comp_mask = IBV_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK | IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK | IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN; if (mlx4_exp_query_device(ibv_ctx, &dev_attrs)) { if (mlx4_query_device(ibv_ctx, &dev_legacy_attrs)) goto query_free; memcpy(&dev_attrs, &dev_legacy_attrs, sizeof(dev_legacy_attrs)); } context->max_qp_wr = dev_attrs.max_qp_wr; context->max_sge = dev_attrs.max_sge; context->max_cqe = dev_attrs.max_cqe; context->exp_device_cap_flags = dev_attrs.exp_device_cap_flags; if (dev_attrs.comp_mask & IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN) context->max_ctx_res_domain = dev_attrs.max_ctx_res_domain; VALGRIND_MAKE_MEM_DEFINED(&context->hca_core_clock, sizeof(context->hca_core_clock)); if (dev_attrs.comp_mask & IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK) { if (dev_attrs.hca_core_clock) context->core_clk.mult = ((1ull * 1000) << 29) / dev_attrs.hca_core_clock; else context->core_clk.mult = 0; context->core_clk.shift = 29; context->core_clk.mask = dev_attrs.timestamp_mask; if (ioctl(cmd_fd, MLX4_IOCHWCLOCKOFFSET, &hca_clock_offset) >= 0) { VALGRIND_MAKE_MEM_DEFINED(&hca_clock_offset, sizeof(hca_clock_offset)); context->core_clk.offset = hca_clock_offset; hca_clock_page = mmap(NULL, hca_clock_offset + sizeof(context->core_clk.mask), PROT_READ, MAP_SHARED, cmd_fd, dev->page_size * (MLX4_IB_MMAP_GET_HW_CLOCK)); if (hca_clock_page == MAP_FAILED) { fprintf(stderr, PFX "Warning: Timestamp available,\n" "but failed to mmap() hca core " "clock page.\n"); } else { context->hca_core_clock = hca_clock_page + context->core_clk.offset; } } } ibv_ctx->ops = mlx4_ctx_ops; verbs_ctx->has_comp_mask |= VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ | VERBS_CONTEXT_QP; verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd); verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd); verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex); verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num); verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex); verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp); verbs_set_ctx_op(verbs_ctx, create_flow, ibv_cmd_create_flow); verbs_set_ctx_op(verbs_ctx, destroy_flow, ibv_cmd_destroy_flow); /* * Set experimental verbs */ verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_reg_shared_mr, mlx4_reg_shared_mr); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_flow, ibv_exp_cmd_create_flow); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_destroy_flow, ibv_exp_cmd_destroy_flow); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_create_ah, mlx4_exp_create_ah); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_device, mlx4_exp_query_device); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_create_qp, mlx4_exp_create_qp); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_qp, mlx4_exp_modify_qp); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_port, mlx4_exp_query_port); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_modify_cq, mlx4_modify_cq); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_task, mlx4_post_task); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_set_legacy_xrc, mlx4_set_legacy_xrc); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_get_legacy_xrc, mlx4_get_legacy_xrc); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_ibv_poll_cq, mlx4_exp_poll_cq); verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_cq, mlx4_create_cq_ex); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_query_values, mlx4_query_values); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_reg_mr, mlx4_exp_reg_mr); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_post_send, mlx4_exp_post_send); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_bind_mw, mlx4_exp_bind_mw); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_rereg_mr, mlx4_exp_rereg_mr); verbs_set_exp_ctx_op(verbs_exp_ctx, drv_exp_dereg_mr, mlx4_exp_dereg_mr); verbs_set_exp_ctx_op(verbs_exp_ctx, exp_create_res_domain, mlx4_exp_create_res_domain); verbs_set_exp_ctx_op(verbs_exp_ctx, exp_destroy_res_domain, mlx4_exp_destroy_res_domain); verbs_set_exp_ctx_op(verbs_exp_ctx, exp_query_intf, mlx4_exp_query_intf); verbs_set_exp_ctx_op(verbs_exp_ctx, exp_release_intf, mlx4_exp_release_intf); return 0; query_free: munmap(context->uar, dev->page_size); if (context->bfs.page) munmap(context->bfs.page, dev->page_size); if (hca_clock_page) munmap(hca_clock_page, hca_clock_offset + sizeof(context->core_clk.mask)); return errno; }
static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd) { struct mlx4_context *context; struct ibv_get_context cmd; struct mlx4_alloc_ucontext_resp resp; int i; struct ibv_device_attr dev_attrs; context = calloc(1, sizeof *context); if (!context) return NULL; context->ibv_ctx.cmd_fd = cmd_fd; if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, &resp.ibv_resp, sizeof resp)) goto err_free; context->num_qps = resp.qp_tab_size; context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; context->qp_table_mask = (1 << context->qp_table_shift) - 1; pthread_mutex_init(&context->qp_table_mutex, NULL); for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) context->qp_table[i].refcnt = 0; context->num_xrc_srqs = resp.qp_tab_size; context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1 - MLX4_XRC_SRQ_TABLE_BITS; context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1; pthread_mutex_init(&context->xrc_srq_table_mutex, NULL); for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i) context->xrc_srq_table[i].refcnt = 0; for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) context->db_list[i] = NULL; pthread_mutex_init(&context->db_list_mutex, NULL); context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, 0); if (context->uar == MAP_FAILED) goto err_free; if (resp.bf_reg_size) { context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, MAP_SHARED, cmd_fd, to_mdev(ibdev)->page_size); if (context->bf_page == MAP_FAILED) { fprintf(stderr, PFX "Warning: BlueFlame available, " "but failed to mmap() BlueFlame page.\n"); context->bf_page = NULL; context->bf_buf_size = 0; } else { context->bf_buf_size = resp.bf_reg_size / 2; context->bf_offset = 0; pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE); } } else { context->bf_page = NULL; context->bf_buf_size = 0; } context->cqe_size = resp.cqe_size; pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); context->ibv_ctx.ops = mlx4_ctx_ops; #ifdef HAVE_IBV_XRC_OPS context->ibv_ctx.more_ops = &mlx4_more_ops; #endif if (mlx4_query_device(&context->ibv_ctx, &dev_attrs)) goto query_free; context->max_qp_wr = dev_attrs.max_qp_wr; context->max_sge = dev_attrs.max_sge; context->max_cqe = dev_attrs.max_cqe; if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) { fprintf(stderr, PFX "There is a mismatch between " "the kernel and the userspace libraries: " "Kernel does not support XRC. Exiting.\n"); goto query_free; } return &context->ibv_ctx; query_free: munmap(context->uar, to_mdev(ibdev)->page_size); if (context->bf_page) munmap(context->bf_page, to_mdev(ibdev)->page_size); err_free: free(context); return NULL; }