void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) { struct c2_cq *send_cq; struct c2_cq *recv_cq; send_cq = to_c2cq(qp->ibqp.send_cq); recv_cq = to_c2cq(qp->ibqp.recv_cq); /* * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ c2_lock_cqs(send_cq, recv_cq); c2_free_qpn(c2dev, qp->qpn); c2_unlock_cqs(send_cq, recv_cq); /* * Destory qp in the rnic... */ destroy_qp(c2dev, qp); /* * Mark any unreaped CQEs as null and void. */ c2_cq_clean(c2dev, qp, send_cq->cqn); if (send_cq != recv_cq) c2_cq_clean(c2dev, qp, recv_cq->cqn); /* * Unmap the MQs and return the shared pointers * to the message pool. */ iounmap(qp->sq_mq.peer); iounmap(qp->rq_mq.peer); c2_free_mqsp(qp->sq_mq.shared); c2_free_mqsp(qp->rq_mq.shared); atomic_dec(&qp->refcount); wait_event(qp->wait, !atomic_read(&qp->refcount)); }
/** * hinic_io_destroy_qps - Destroy the IO Queue Pairs * @func_to_io: func to io channel that holds the IO components * @num_qps: number queue pairs to destroy **/ void hinic_io_destroy_qps(struct hinic_func_to_io *func_to_io, int num_qps) { struct hinic_hwif *hwif = func_to_io->hwif; struct pci_dev *pdev = hwif->pdev; size_t ci_table_size; int i; ci_table_size = CI_TABLE_SIZE(num_qps); for (i = 0; i < num_qps; i++) destroy_qp(func_to_io, &func_to_io->qps[i]); dma_free_coherent(&pdev->dev, ci_table_size, func_to_io->ci_addr_base, func_to_io->ci_dma_base); devm_kfree(&pdev->dev, func_to_io->sq_db); devm_kfree(&pdev->dev, func_to_io->rq_wq); devm_kfree(&pdev->dev, func_to_io->sq_wq); devm_kfree(&pdev->dev, func_to_io->qps); }
int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd, struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp) { struct c2wr_qp_create_req wr; struct c2wr_qp_create_rep *reply; struct c2_vq_req *vq_req; struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq); struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq); unsigned long peer_pa; u32 q_size, msg_size, mmap_size; void __iomem *mmap; int err; err = c2_alloc_qpn(c2dev, qp); if (err) return err; qp->ibqp.qp_num = qp->qpn; qp->ibqp.qp_type = IB_QPT_RC; /* Allocate the SQ and RQ shared pointers */ qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, &qp->sq_mq.shared_dma, GFP_KERNEL); if (!qp->sq_mq.shared) { err = -ENOMEM; goto bail0; } qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool, &qp->rq_mq.shared_dma, GFP_KERNEL); if (!qp->rq_mq.shared) { err = -ENOMEM; goto bail1; } /* Allocate the verbs request */ vq_req = vq_req_alloc(c2dev); if (vq_req == NULL) { err = -ENOMEM; goto bail2; } /* Initialize the work request */ memset(&wr, 0, sizeof(wr)); c2_wr_set_id(&wr, CCWR_QP_CREATE); wr.hdr.context = (unsigned long) vq_req; wr.rnic_handle = c2dev->adapter_handle; wr.sq_cq_handle = send_cq->adapter_handle; wr.rq_cq_handle = recv_cq->adapter_handle; wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1); wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1); wr.srq_handle = 0; wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND | QP_ZERO_STAG | QP_RDMA_READ_RESPONSE); wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge); wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge); wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma); wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma); wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP); wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP); wr.pd_id = pd->pd_id; wr.user_context = (unsigned long) qp; vq_req_get(c2dev, vq_req); /* Send the WR to the adapter */ err = vq_send_wr(c2dev, (union c2wr *) & wr); if (err) { vq_req_put(c2dev, vq_req); goto bail3; } /* Wait for the verb reply */ err = vq_wait_for_reply(c2dev, vq_req); if (err) { goto bail3; } /* Process the reply */ reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg); if (!reply) { err = -ENOMEM; goto bail3; } if ((err = c2_wr_get_result(reply)) != 0) { goto bail4; } /* Fill in the kernel QP struct */ atomic_set(&qp->refcount, 1); qp->adapter_handle = reply->qp_handle; qp->state = IB_QPS_RESET; qp->send_sgl_depth = qp_attrs->cap.max_send_sge; qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge; qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge; init_waitqueue_head(&qp->wait); /* Initialize the SQ MQ */ q_size = be32_to_cpu(reply->sq_depth); msg_size = be32_to_cpu(reply->sq_msg_size); peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start); mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); mmap = ioremap_nocache(peer_pa, mmap_size); if (!mmap) { err = -ENOMEM; goto bail5; } c2_mq_req_init(&qp->sq_mq, be32_to_cpu(reply->sq_mq_index), q_size, msg_size, mmap + sizeof(struct c2_mq_shared), /* pool start */ mmap, /* peer */ C2_MQ_ADAPTER_TARGET); /* Initialize the RQ mq */ q_size = be32_to_cpu(reply->rq_depth); msg_size = be32_to_cpu(reply->rq_msg_size); peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start); mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size); mmap = ioremap_nocache(peer_pa, mmap_size); if (!mmap) { err = -ENOMEM; goto bail6; } c2_mq_req_init(&qp->rq_mq, be32_to_cpu(reply->rq_mq_index), q_size, msg_size, mmap + sizeof(struct c2_mq_shared), /* pool start */ mmap, /* peer */ C2_MQ_ADAPTER_TARGET); vq_repbuf_free(c2dev, reply); vq_req_free(c2dev, vq_req); return 0; bail6: iounmap(qp->sq_mq.peer); bail5: destroy_qp(c2dev, qp); bail4: vq_repbuf_free(c2dev, reply); bail3: vq_req_free(c2dev, vq_req); bail2: c2_free_mqsp(qp->rq_mq.shared); bail1: c2_free_mqsp(qp->sq_mq.shared); bail0: c2_free_qpn(c2dev, qp->qpn); return err; }
/** * hinic_io_create_qps - Create Queue Pairs * @func_to_io: func to io channel that holds the IO components * @base_qpn: base qp number * @num_qps: number queue pairs to create * @sq_msix_entry: msix entries for sq * @rq_msix_entry: msix entries for rq * * Return 0 - Success, negative - Failure **/ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io, u16 base_qpn, int num_qps, struct msix_entry *sq_msix_entries, struct msix_entry *rq_msix_entries) { struct hinic_hwif *hwif = func_to_io->hwif; struct pci_dev *pdev = hwif->pdev; size_t qps_size, wq_size, db_size; void *ci_addr_base; int i, j, err; qps_size = num_qps * sizeof(*func_to_io->qps); func_to_io->qps = devm_kzalloc(&pdev->dev, qps_size, GFP_KERNEL); if (!func_to_io->qps) return -ENOMEM; wq_size = num_qps * sizeof(*func_to_io->sq_wq); func_to_io->sq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL); if (!func_to_io->sq_wq) { err = -ENOMEM; goto err_sq_wq; } wq_size = num_qps * sizeof(*func_to_io->rq_wq); func_to_io->rq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL); if (!func_to_io->rq_wq) { err = -ENOMEM; goto err_rq_wq; } db_size = num_qps * sizeof(*func_to_io->sq_db); func_to_io->sq_db = devm_kzalloc(&pdev->dev, db_size, GFP_KERNEL); if (!func_to_io->sq_db) { err = -ENOMEM; goto err_sq_db; } ci_addr_base = dma_zalloc_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps), &func_to_io->ci_dma_base, GFP_KERNEL); if (!ci_addr_base) { dev_err(&pdev->dev, "Failed to allocate CI area\n"); err = -ENOMEM; goto err_ci_base; } func_to_io->ci_addr_base = ci_addr_base; for (i = 0; i < num_qps; i++) { err = init_qp(func_to_io, &func_to_io->qps[i], i, &sq_msix_entries[i], &rq_msix_entries[i]); if (err) { dev_err(&pdev->dev, "Failed to create QP %d\n", i); goto err_init_qp; } } err = write_qp_ctxts(func_to_io, base_qpn, num_qps); if (err) { dev_err(&pdev->dev, "Failed to init QP ctxts\n"); goto err_write_qp_ctxts; } return 0; err_write_qp_ctxts: err_init_qp: for (j = 0; j < i; j++) destroy_qp(func_to_io, &func_to_io->qps[j]); dma_free_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps), func_to_io->ci_addr_base, func_to_io->ci_dma_base); err_ci_base: devm_kfree(&pdev->dev, func_to_io->sq_db); err_sq_db: devm_kfree(&pdev->dev, func_to_io->rq_wq); err_rq_wq: devm_kfree(&pdev->dev, func_to_io->sq_wq); err_sq_wq: devm_kfree(&pdev->dev, func_to_io->qps); return err; }
static void* rdma_thread(void *ptr) { int i, j, rc; struct rdma_resource_t *rdma_resource; struct user_param_t *user_param; struct thread_context_t *t_ctx; struct rdma_req_t rdma_req; double lat; t_ctx = (struct thread_context_t*)ptr; rdma_resource = t_ctx->rdma_resource; user_param = &(rdma_resource->user_param); t_ctx->thread_id = pthread_self(); t_ctx->num_of_iter = user_param->num_of_iter; if (create_rdma_buf_pool(t_ctx)) { ERROR("Failed to create MR pool.\n"); return NULL; } { uint32_t qp_type; if (user_param->server_ip != NULL) { qp_type = htonl(user_param->qp_type); } sock_c2d(&(t_ctx->sock), sizeof(qp_type), &qp_type); if (user_param->server_ip == NULL) { user_param->qp_type = ntohl(qp_type); } t_ctx->qp_type = user_param->qp_type; /// redesign } if (create_qp(t_ctx)) { ERROR("Failed to create QP.\n"); return NULL; } { struct thread_sync_info_t { uint32_t qp_num; uint32_t direction; uint32_t opcode; uint32_t qkey; uint32_t psn; uint32_t num_of_iter; uint16_t lid; } ATTR_PACKED; struct thread_sync_info_t local_info; struct thread_sync_info_t remote_info; local_info.lid = htons(rdma_resource->port_attr.lid); local_info.qp_num = htonl(t_ctx->qp->qp_num); local_info.direction = htonl(user_param->direction); local_info.opcode = htonl(user_param->opcode); /// enum ibv_wr_opcode local_info.qkey = htonl(0); local_info.psn = htonl(0); local_info.num_of_iter = htonl(t_ctx->num_of_iter); rc = sock_sync_data(&(t_ctx->sock), sizeof(local_info), &local_info, &remote_info); if (rc) { ERROR("failed to sync data.\n"); return NULL; } t_ctx->remote_lid = ntohs(remote_info.lid); t_ctx->remote_qpn = ntohl(remote_info.qp_num); t_ctx->remote_qkey = ntohl(remote_info.qkey); t_ctx->remote_psn = ntohl(remote_info.psn); if (user_param->server_ip == NULL) { user_param->direction = ntohl(remote_info.direction); user_param->opcode = ntohl(remote_info.opcode); t_ctx->num_of_iter = ntohl(remote_info.num_of_iter); if (user_param->direction == 0 || user_param->direction == 1) { t_ctx->is_requestor = 0; } else if (user_param->direction == 2) { t_ctx->is_requestor = 1; } } else { if (user_param->direction == 0 || user_param->direction == 1) { t_ctx->is_requestor = 1; } else if (user_param->direction == 2) { t_ctx->is_requestor = 0; } } } t_ctx->t_a = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_a == NULL) { ERROR("Failed to allocate memory.\n"); return NULL; } t_ctx->t_b = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_b == NULL) { free(t_ctx->t_a); ERROR("Failed to allocate memory.\n"); return NULL; } t_ctx->t_c = (cycles_t*)malloc(t_ctx->num_of_iter * sizeof(cycles_t)); if (t_ctx->t_c == NULL) { free(t_ctx->t_b); free(t_ctx->t_a); ERROR("Failed to allocate memory.\n"); return NULL; } for (i = 0; i < LAT_LEVEL; i++) { t_ctx->lat[i] = 0; } if (connect_qp(t_ctx)) { ERROR("Failed to connect QP.\n"); return NULL; } for(i = 0; i < user_param->num_of_oust; i++) { rdma_req.rdma_buf = get_rdma_buf(t_ctx); rdma_req.num_of_oust = 1; rdma_req.data_size = DEF_BUF_SIZE; rc = post_receive(t_ctx, &rdma_req); if (rc) { ERROR("Failed to post_receive, i:%d.\n", i); return NULL; } } sock_sync_ready(&t_ctx->sock); for (i = 0; i < t_ctx->num_of_iter; i++) { t_ctx->t_a[i] = get_cycles(); DEBUG("do_rdma_transaction, t_ctx->num_of_iter=%d, i=%d.\n", t_ctx->num_of_iter, i); rc = do_rdma_transaction(t_ctx, i); if (rc) { ERROR("Failed to do_rdma_transaction, i:%d.\n", i); return NULL; } t_ctx->t_c[i] = get_cycles(); if (user_param->direction == 0 || (!t_ctx->is_requestor)) { rdma_req.rdma_buf = get_rdma_buf(t_ctx); if (rdma_req.rdma_buf == NULL) { ERROR("Failed to get RDMA buffer.\n"); return NULL; /// Memory Leak and remove hung RX buffers } rdma_req.num_of_oust = 1; post_receive(t_ctx, &rdma_req); } if (user_param->interval) { usleep(user_param->interval); } } /// Memory leak, release the hung RX rdma_buf; destroy_qp(t_ctx); t_ctx->min_lat = 0x7fffffff; t_ctx->max_lat = 0; for (i = 0; i < t_ctx->num_of_iter; i++) { lat = (t_ctx->t_c[i] - t_ctx->t_a[i]) / rdma_resource->freq_mhz; if (lat < t_ctx->min_lat) { t_ctx->min_lat = lat; t_ctx->min_lat_iter_num = i; } if (lat > t_ctx->max_lat) { t_ctx->max_lat = lat; t_ctx->max_lat_iter_num = i; } for (j = 0; j < LAT_LEVEL; j++) { if (j < 7) { if (lat < (1 + j)) { t_ctx->lat[j]++; break; } } else { if (lat < (1 << (j - 4))) { t_ctx->lat[j]++; break; } } } if (j == LAT_LEVEL) { t_ctx->lat[LAT_LEVEL - 1]++; } } free(t_ctx->t_a); free(t_ctx->t_b); free(t_ctx->t_c); if (!user_param->server_ip) { /// sock_close_multi(&(t_ctx->sock), sock_bind); // how to close sock_fd. free(t_ctx); /// Need to improve. } INFO("RDMA testing thread successfully exited.\n"); return NULL; }