struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event, enum pp_wr_calc_op calc_op, enum pp_wr_data_type calc_data_type, char *calc_operands_str) { struct pingpong_context *ctx; int rc; ctx = malloc(sizeof *ctx); if (!ctx) return NULL; memset(ctx, 0, sizeof *ctx); ctx->size = size; ctx->rx_depth = rx_depth; ctx->calc_op.opcode = IBV_EXP_CALC_OP_NUMBER; ctx->calc_op.data_type = IBV_EXP_CALC_DATA_TYPE_NUMBER; ctx->calc_op.data_size = IBV_EXP_CALC_DATA_SIZE_NUMBER; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } memset(ctx->buf, 0, size); ctx->net_buf = memalign(page_size, size); if (!ctx->net_buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_buffer; } memset(ctx->net_buf, 0, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_net_buf; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->net_buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } if (calc_op != PP_CALC_INVALID) { int op_per_gather, num_op, max_num_op; ctx->calc_op.opcode = IBV_EXP_CALC_OP_NUMBER; ctx->calc_op.data_type = IBV_EXP_CALC_DATA_TYPE_NUMBER; ctx->calc_op.data_size = IBV_EXP_CALC_DATA_SIZE_NUMBER; num_op = pp_parse_calc_to_gather(calc_operands_str, calc_op, calc_data_type, &ctx->calc_op, ctx->context, ctx->buf, ctx->net_buf); if (num_op < 0) { fprintf(stderr, "-E- failed parsing calc operators\n"); goto clean_mr; } rc = pp_query_calc_cap(ctx->context, ctx->calc_op.opcode, ctx->calc_op.data_type, ctx->calc_op.data_size, &op_per_gather, &max_num_op); if (rc) { fprintf(stderr, "-E- operation not supported on %s. valid ops are:\n", ibv_get_device_name(ib_dev)); pp_print_dev_calc_ops(ctx->context); goto clean_mr; } if (pp_prepare_sg_list(op_per_gather, num_op, ctx->mr->lkey, &ctx->calc_op, ctx->net_buf)) { fprintf(stderr, "-failed to prepare the sg list\n"); goto clean_mr; } } ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_exp_qp_init_attr attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .cap = { .max_send_wr = 16, .max_recv_wr = rx_depth, .max_send_sge = 16, .max_recv_sge = 16 }, .qp_type = IBV_QPT_RC, .pd = ctx->pd }; attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD; attr.exp_create_flags = IBV_EXP_QP_CREATE_CROSS_CHANNEL; ctx->qp = ibv_exp_create_qp(ctx->context, &attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } ctx->mcq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); if (!ctx->mcq) { fprintf(stderr, "Couldn't create CQ for MQP\n"); goto clean_qp; } { struct ibv_exp_qp_init_attr mattr = { .send_cq = ctx->mcq, .recv_cq = ctx->mcq, .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 16, .max_recv_sge = 16 }, .qp_type = IBV_QPT_RC, .pd = ctx->pd }; mattr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD; mattr.exp_create_flags = IBV_EXP_QP_CREATE_CROSS_CHANNEL; ctx->mqp = ibv_exp_create_qp(ctx->context, &mattr); if (!ctx->qp) { fprintf(stderr, "Couldn't create MQP\n"); goto clean_mcq; } } { struct ibv_qp_attr mattr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->mqp, &mattr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify MQP to INIT\n"); goto clean_mqp; } } return ctx; clean_mqp: ibv_destroy_qp(ctx->mqp); clean_mcq: ibv_destroy_cq(ctx->mcq); clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_net_buf: free(ctx->net_buf); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_qp(ctx->mqp)) { fprintf(stderr, "Couldn't destroy MQP\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_destroy_cq(ctx->mcq)) { fprintf(stderr, "Couldn't destroy MCQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx->net_buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { int rc; struct ibv_sge list = { .addr = (uintptr_t) ctx->net_buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PP_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) { rc = ibv_post_recv(ctx->qp, &wr, &bad_wr); if (rc) return rc; } return i; } static int pp_post_send(struct pingpong_context *ctx) { int ret; struct ibv_sge list = { .addr = (uintptr_t) ctx->net_buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_exp_send_wr wr = { .wr_id = PP_SEND_WRID, .sg_list = &list, .num_sge = 1, .exp_opcode = IBV_EXP_WR_SEND, .exp_send_flags = IBV_EXP_SEND_SIGNALED, }; struct ibv_exp_send_wr *bad_wr; /* If this is a calc operation - set the required params in the wr */ if (ctx->calc_op.opcode != IBV_EXP_CALC_OP_NUMBER) { wr.exp_opcode = IBV_EXP_WR_SEND; wr.exp_send_flags |= IBV_EXP_SEND_WITH_CALC; wr.sg_list = ctx->calc_op.gather_list; wr.num_sge = ctx->calc_op.gather_list_size; wr.op.calc.calc_op = ctx->calc_op.opcode; wr.op.calc.data_type = ctx->calc_op.data_type; wr.op.calc.data_size = ctx->calc_op.data_size; } ret = ibv_exp_post_send(ctx->qp, &wr, &bad_wr); return ret; } int pp_post_ext_wqe(struct pingpong_context *ctx, enum ibv_exp_wr_opcode op) { int ret; struct ibv_exp_send_wr wr = { .wr_id = PP_CQE_WAIT, .sg_list = NULL, .num_sge = 0, .exp_opcode = op, .exp_send_flags = IBV_EXP_SEND_SIGNALED, }; struct ibv_exp_send_wr *bad_wr; switch (op) { case IBV_EXP_WR_RECV_ENABLE: case IBV_EXP_WR_SEND_ENABLE: wr.task.wqe_enable.qp = ctx->qp; wr.task.wqe_enable.wqe_count = 0; wr.exp_send_flags |= IBV_EXP_SEND_WAIT_EN_LAST; break; case IBV_EXP_WR_CQE_WAIT: wr.task.cqe_wait.cq = ctx->cq; wr.task.cqe_wait.cq_count = 1; wr.exp_send_flags |= IBV_EXP_SEND_WAIT_EN_LAST; break; default: fprintf(stderr, "-E- unsupported m_wqe opcode %d\n", op); return -1; } ret = ibv_exp_post_send(ctx->mqp, &wr, &bad_wr); return ret; } int pp_poll_mcq(struct ibv_cq *cq, int num_cqe) { int ne; int i; struct ibv_wc wc[2]; if (num_cqe > 2) { fprintf(stderr, "-E- max num cqe exceeded\n"); return -1; } do { ne = ibv_poll_cq(cq, num_cqe, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed %s status %s (%d)\n", wr_id_str[(int)wc[i].wr_id], ibv_wc_status_str(wc[i].status), wc[i].status); return 1; } if ((int) wc[i].wr_id != PP_CQE_WAIT) { fprintf(stderr, "invalid wr_id %" PRIx64 "\n", wc[i].wr_id); return -1; } } return 0; } static int pp_calc_verify(struct pingpong_context *ctx, enum pp_wr_data_type calc_data_type, enum pp_wr_calc_op calc_opcode) { uint64_t *op1 = &(ctx->last_result); uint64_t *op2 = (uint64_t *)ctx->buf + 2; uint64_t *res = (uint64_t *)ctx->buf; return !EXEC_VERIFY(calc_data_type, calc_opcode, 1, op1, op2, res); } static int pp_update_last_result(struct pingpong_context *ctx, enum pp_wr_data_type calc_data_type, enum pp_wr_calc_op calc_opcode) { /* EXEC_VERIFY derefence result parameter */ uint64_t *dummy; uint64_t *op1 = (uint64_t *)ctx->buf; uint64_t *op2 = (uint64_t *)ctx->buf + 2; uint64_t res = (uint64_t)EXEC_VERIFY(calc_data_type, calc_opcode, 0, op1, op2, dummy); ctx->last_result = res; return 0; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s <host> connect to server at <host>\n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n"); printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n"); printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n"); printf(" -s, --size=<size> size of message to exchange (default 4096 minimum 16)\n"); printf(" -m, --mtu=<size> path MTU (default 1024)\n"); printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n"); printf(" -n, --iters=<iters> number of exchanges (default 1000)\n"); printf(" -l, --sl=<sl> service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -c, --calc=<operation> calc operation\n"); printf(" -t, --op_type=<type> calc operands type\n"); printf(" -o, --operands=<o1,o2,...> comma separated list of operands\n"); printf(" -w, --wait_cq=cqn wait for entries on cq\n"); printf(" -v, --verbose print verbose information\n"); printf(" -V, --verify verify calc operations\n"); }
static ucs_status_t uct_ib_md_umr_qp_create(uct_ib_md_t *md) { #if HAVE_EXP_UMR struct ibv_exp_qp_init_attr qp_init_attr; struct ibv_qp_attr qp_attr; uint8_t port_num; int ret; uct_ib_device_t *ibdev; struct ibv_exp_port_attr *port_attr; ibdev = &md->dev; if (!(ibdev->dev_attr.exp_device_cap_flags & IBV_EXP_DEVICE_UMR)) { return UCS_ERR_UNSUPPORTED; } /* TODO: fix port selection. It looks like active port should be used */ port_num = ibdev->first_port; port_attr = uct_ib_device_port_attr(ibdev, port_num); memset(&qp_init_attr, 0, sizeof(qp_init_attr)); md->umr_cq = ibv_create_cq(ibdev->ibv_context, 1, NULL, NULL, 0); if (md->umr_cq == NULL) { ucs_error("failed to create UMR CQ: %m"); goto err; } qp_init_attr.qp_type = IBV_QPT_RC; qp_init_attr.send_cq = md->umr_cq; qp_init_attr.recv_cq = md->umr_cq; qp_init_attr.cap.max_inline_data = 0; qp_init_attr.cap.max_recv_sge = 1; qp_init_attr.cap.max_send_sge = 1; qp_init_attr.srq = NULL; qp_init_attr.cap.max_recv_wr = 16; qp_init_attr.cap.max_send_wr = 16; qp_init_attr.pd = md->pd; qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD|IBV_EXP_QP_INIT_ATTR_MAX_INL_KLMS; qp_init_attr.max_inl_recv = 0; #if (HAVE_IBV_EXP_QP_CREATE_UMR_CAPS || HAVE_EXP_UMR_NEW_API) qp_init_attr.max_inl_send_klms = ibdev->dev_attr.umr_caps.max_send_wqe_inline_klms; #else qp_init_attr.max_inl_send_klms = ibdev->dev_attr.max_send_wqe_inline_klms; #endif #if HAVE_IBV_EXP_QP_CREATE_UMR qp_init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; qp_init_attr.exp_create_flags = IBV_EXP_QP_CREATE_UMR; #endif md->umr_qp = ibv_exp_create_qp(ibdev->ibv_context, &qp_init_attr); if (md->umr_qp == NULL) { ucs_error("failed to create UMR QP: %m"); goto err_destroy_cq; } memset(&qp_attr, 0, sizeof(qp_attr)); /* Modify QP to INIT state */ qp_attr.qp_state = IBV_QPS_INIT; qp_attr.pkey_index = 0; qp_attr.port_num = port_num; qp_attr.qp_access_flags = UCT_IB_MEM_ACCESS_FLAGS; ret = ibv_modify_qp(md->umr_qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); if (ret) { ucs_error("Failed to modify UMR QP to INIT: %m"); goto err_destroy_qp; } /* Modify to RTR */ qp_attr.qp_state = IBV_QPS_RTR; qp_attr.dest_qp_num = md->umr_qp->qp_num; memset(&qp_attr.ah_attr, 0, sizeof(qp_attr.ah_attr)); qp_attr.ah_attr.port_num = port_num; qp_attr.ah_attr.dlid = port_attr->lid; qp_attr.ah_attr.is_global = 1; if (uct_ib_device_query_gid(ibdev, port_num, 0, &qp_attr.ah_attr.grh.dgid) != UCS_OK) { goto err_destroy_qp; } qp_attr.rq_psn = 0; qp_attr.path_mtu = IBV_MTU_512; qp_attr.min_rnr_timer = 7; qp_attr.max_dest_rd_atomic = 1; ret = ibv_modify_qp(md->umr_qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); if (ret) { ucs_error("Failed to modify UMR QP to RTR: %m"); goto err_destroy_qp; } /* Modify to RTS */ qp_attr.qp_state = IBV_QPS_RTS; qp_attr.sq_psn = 0; qp_attr.timeout = 7; qp_attr.rnr_retry = 7; qp_attr.retry_cnt = 7; qp_attr.max_rd_atomic = 1; ret = ibv_modify_qp(md->umr_qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); if (ret) { ucs_error("Failed to modify UMR QP to RTS: %m"); goto err_destroy_qp; } return UCS_OK; err_destroy_qp: ibv_destroy_qp(md->umr_qp); err_destroy_cq: ibv_destroy_cq(md->umr_cq); err: return UCS_ERR_IO_ERROR; #else return UCS_ERR_UNSUPPORTED; #endif }