static int rping_create_qp(struct rping_cb *cb) { struct ibv_qp_init_attr init_attr; int ret; memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = RPING_SQ_DEPTH; init_attr.cap.max_recv_wr = 2; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IBV_QPT_RC; init_attr.send_cq = cb->cq; init_attr.recv_cq = cb->cq; if (cb->server) { ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->child_cm_id->qp; } else { ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); if (!ret) cb->qp = cb->cm_id->qp; } return ret; }
int on_addr_resolved(struct rdma_cm_id *id) { struct ibv_qp_init_attr qp_attr; struct connection *conn; printf("address resolved.\n"); build_context(id->verbs); build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); id->context = conn = (struct connection *)malloc(sizeof(struct connection)); conn->id = id; conn->qp = id->qp; conn->num_completions = 0; register_memory(conn); post_receives(conn); TEST_NZ(rdma_resolve_route(id, TIMEOUT_IN_MS)); return 0; }
static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; int i; struct ibv_cq **cqs[] = {&node->cq[SEND_CQ_INDEX], &node->cq[RECV_CQ_INDEX]}; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("cmatose: unable to allocate PD\n"); goto out; } cqe = message_count ? message_count : 1; for (i = 0; i < sizeof(cqs)/sizeof(cqs[0]); i++) { if (set_ts) { struct ibv_exp_cq_init_attr cq_init_attr; memset(&cq_init_attr, 0, sizeof(cq_init_attr)); cq_init_attr.flags = IBV_EXP_CQ_TIMESTAMP; cq_init_attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_FLAGS; *cqs[i] = (struct ibv_cq *)ibv_exp_create_cq( node->cma_id->verbs, cqe, node, NULL, 0, &cq_init_attr); } else { *cqs[i] = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); } } if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) { ret = -ENOMEM; printf("cmatose: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = cqe; init_qp_attr.cap.max_recv_wr = cqe; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 1; init_qp_attr.qp_type = IBV_QPT_RC; init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX]; init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX]; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("cmatose: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("cmatose: failed to create messages: %d\n", ret); goto out; } out: return ret; }
void build_connection(struct rdma_cm_id *id) { struct connection *conn; struct ibv_qp_init_attr qp_attr; //init semaphores sem_init(&read_ops, 0, 0); sem_init(&done_ops, 0, 0); sem_init(&write_ops, 0, 1); build_context(id->verbs); build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); id->context = conn = (struct connection *)malloc(sizeof(struct connection)); conn->id = id; conn->qp = id->qp; conn->send_state = SS_INIT; conn->recv_state = RS_INIT; conn->connected = 0; register_memory(conn); post_receives(conn); }
void build_connection(struct rdma_cm_id *id) { rdma_conn_t *conn; struct ibv_qp_init_attr qp_attr; build_context(id->verbs); build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); conn = malloc(sizeof(rdma_conn_t)); id->context = conn; rdma_conn = conn; conn->id = id; conn->qp = id->qp; conn->send_state = SS_INIT; conn->recv_state = RS_INIT; conn->connected = 0; register_memory(conn); post_receives(conn); }
void RDMAServerSocket::accept(client_t client_id) const { ibv_qp_init_attr qp_attr = {}; qp_attr.qp_type = IBV_QPT_RC; qp_attr.cap.max_send_wr = 256; qp_attr.cap.max_recv_wr = 0; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_sge = 0; qp_attr.cap.max_inline_data = 72; qp_attr.recv_cq = cq; qp_attr.send_cq = cq; qp_attr.srq = id->srq; qp_attr.sq_sig_all = 1; check_zero(rdma_create_qp(client_id.get(), NULL, &qp_attr)); check_zero(rdma_accept(client_id.get(), nullptr)); clients([client_id = std::move(client_id)](auto && clients) mutable { auto pos = std::lower_bound(std::begin(clients), std::end(clients), client_id->qp->qp_num, [](const auto &client, const qp_t &qp_num) { return client->qp->qp_num < qp_num; }); clients.insert(pos, std::move(client_id)); }); }
static int isert_conn_qp_create(struct isert_connection *isert_conn) { struct rdma_cm_id *cm_id = isert_conn->cm_id; struct isert_device *isert_dev = isert_conn->isert_dev; struct ib_qp_init_attr qp_attr; int err; int cq_idx; int max_wr = ISER_MAX_WCE; TRACE_ENTRY(); cq_idx = isert_get_cq_idx(isert_dev); memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = isert_async_evt_handler; qp_attr.qp_context = isert_conn; qp_attr.send_cq = isert_dev->cq_desc[cq_idx].cq; qp_attr.recv_cq = isert_dev->cq_desc[cq_idx].cq; isert_conn->cq_desc = &isert_dev->cq_desc[cq_idx]; qp_attr.cap.max_send_sge = isert_conn->max_sge; qp_attr.cap.max_recv_sge = 3; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; do { if (max_wr < ISER_MIN_SQ_SIZE) { pr_err("Failed to create qp, not enough memory\n"); goto fail_create_qp; } qp_attr.cap.max_send_wr = max_wr; qp_attr.cap.max_recv_wr = max_wr; err = rdma_create_qp(cm_id, isert_dev->pd, &qp_attr); if (err && err != -ENOMEM) { pr_err("Failed to create qp, err:%d\n", err); goto fail_create_qp; } max_wr /= 2; } while (err == -ENOMEM); isert_conn->qp = cm_id->qp; pr_info("iser created cm_id:%p qp:0x%X\n", cm_id, cm_id->qp->qp_num); out: TRACE_EXIT_RES(err); return err; fail_create_qp: mutex_lock(&dev_list_mutex); isert_dev->cq_qps[cq_idx]--; mutex_unlock(&dev_list_mutex); goto out; }
void Connector::build_conn(struct rdma_cm_id* id_) { struct ibv_qp_init_attr qp_attr; build_context(id_->verbs); build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id_, s_ctx_->pd_, &qp_attr) ) }
static ssize_t fi_ibv_rdm_process_addr_resolved(struct rdma_cm_id *id, struct fi_ibv_rdm_ep *ep) { ssize_t ret = FI_SUCCESS; struct ibv_qp_init_attr qp_attr; struct fi_ibv_rdm_tagged_conn *conn = id->context; VERBS_INFO(FI_LOG_AV, "ADDR_RESOLVED conn %p, addr %s:%u\n", conn, inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); assert(id->verbs == ep->domain->verbs); do { fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep); if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_create_qp failed\n", errno); return -errno; } if (conn->cm_role == FI_VERBS_CM_PASSIVE) { break; } conn->qp[0] = id->qp; assert(conn->id[0] == id); if (conn->cm_role == FI_VERBS_CM_SELF) { break; } ret = fi_ibv_rdm_prepare_conn_memory(ep, conn); if (ret != FI_SUCCESS) { goto err; } ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth); if (ret < 0) { VERBS_INFO(FI_LOG_AV, "repost receives failed\n"); goto err; } else { ret = FI_SUCCESS; } } while (0); if (rdma_resolve_route(id, FI_IBV_RDM_CM_RESOLVEADDR_TIMEOUT)) { VERBS_INFO(FI_LOG_AV, "rdma_resolve_route failed\n"); ret = -FI_EHOSTUNREACH; goto err; } return ret; err: rdma_destroy_qp(id); return ret; }
/** * @param[in] ni * @param[in] conn * @param[in] event * * @return status * * conn is locked */ static int accept_connection_request(ni_t *ni, conn_t *conn, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct ibv_qp_init_attr init_attr; struct cm_priv_accept priv; conn->state = CONN_STATE_CONNECTING; memset(&init_attr, 0, sizeof(init_attr)); init_attr.qp_type = IBV_QPT_RC; init_attr.cap.max_send_wr = ni->iface->cap.max_send_wr; init_attr.send_cq = ni->rdma.cq; init_attr.recv_cq = ni->rdma.cq; init_attr.srq = ni->rdma.srq; init_attr.cap.max_send_sge = ni->iface->cap.max_send_sge; if (rdma_create_qp(event->id, ni->iface->pd, &init_attr)) { conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); return PTL_FAIL; } /* If we were already trying to connect ourselves, cancel it. */ if (conn->rdma.cm_id != NULL) { assert(conn->rdma.cm_id->context == conn); conn->rdma.cm_id->context = NULL; } event->id->context = conn; conn->rdma.cm_id = event->id; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 7; if (ni->options & PTL_NI_LOGICAL) { conn_param.private_data = &priv; conn_param.private_data_len = sizeof(priv); } if (rdma_accept(event->id, &conn_param)) { rdma_destroy_qp(event->id); conn->rdma.cm_id = NULL; conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); return PTL_FAIL; } return PTL_OK; }
/* * Connect unconnected endpoint. */ int rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rdma_cm_id *id, *old; int rc = 0; int retry_count = 0; if (ep->rep_connected != 0) { struct rpcrdma_xprt *xprt; retry: dprintk("RPC: %s: reconnecting...\n", __func__); rpcrdma_ep_disconnect(ep, ia); rpcrdma_flush_cqs(ep); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); ia->ri_ops->ro_reset(xprt); id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { rc = -EHOSTUNREACH; goto out; } /* TEMP TEMP TEMP - fail if new device: * Deregister/remarshal *all* requests! * Close and recreate adapter, pd, etc! * Re-determine all attributes still sane! * More stuff I haven't thought of! * Rrrgh! */ if (ia->ri_id->device != id->device) { printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); rc = -ENETUNREACH; goto out; } /* END TEMP */ rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); if (rc) { dprintk("RPC: %s: rdma_create_qp failed %i\n", __func__, rc); rdma_destroy_id(id); rc = -ENETUNREACH; goto out; } write_lock(&ia->ri_qplock); old = ia->ri_id; ia->ri_id = id; write_unlock(&ia->ri_qplock); rdma_destroy_qp(old); rdma_destroy_id(old); } else {
void IBConnection::create_qp(struct ibv_pd* pd, struct ibv_cq* cq) { struct ibv_qp_init_attr qp_attr; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.cap = qp_cap_; qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.qp_type = IBV_QPT_RC; int err = rdma_create_qp(cm_id_, pd, &qp_attr); if (err) throw InfinibandException("creation of QP failed"); }
void build_connection(struct rdma_cm_id *id){ struct ibv_qp_init_attr qp_attr; struct connection *conn; build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); id->context = conn = (struct connection *)malloc(sizeof(struct connection)); conn->id = id; conn->qp = id->qp; }
static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("cmatose: unable to allocate PD\n"); goto out; } cqe = message_count ? message_count : 1; node->cq[SEND_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0); node->cq[RECV_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0); if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) { ret = -ENOMEM; printf("cmatose: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = cqe; init_qp_attr.cap.max_recv_wr = cqe; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 1; init_qp_attr.qp_type = IBV_QPT_RC; init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX]; init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX]; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("cmatose: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("cmatose: failed to create messages: %d\n", ret); goto out; } out: return ret; }
/** * Accept an RC connection request to self. * * called while holding connect->mutex * only used for physical NIs * * @param[in] ni * @param[in] conn * @param[in] event * * @return status */ static int accept_connection_self(ni_t *ni, conn_t *conn, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct ibv_qp_init_attr init_attr; conn->state = CONN_STATE_CONNECTING; memset(&init_attr, 0, sizeof(init_attr)); init_attr.qp_type = IBV_QPT_RC; init_attr.send_cq = ni->rdma.cq; init_attr.recv_cq = ni->rdma.cq; init_attr.srq = ni->rdma.srq; init_attr.cap.max_send_wr = ni->iface->cap.max_send_wr; init_attr.cap.max_send_sge = ni->iface->cap.max_send_sge; if (rdma_create_qp(event->id, ni->iface->pd, &init_attr)) { conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); return PTL_FAIL; } ni->rdma.self_cm_id = event->id; /* The lower 2 bits (on 32 bits hosts), or 3 bits (on 64 bits * hosts) of a pointer is always 0. Use it to store the type of * context. 0=conn; 1=NI. */ event->id->context = (void *)((uintptr_t) ni | 1); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.rnr_retry_count = 7; if (rdma_accept(event->id, &conn_param)) { rdma_destroy_qp(event->id); conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); return PTL_FAIL; } return PTL_OK; }
static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("rxe_send_mc: unable to allocate PD\n"); goto out; } cqe = message_buffer ? message_buffer * 2 : 2; node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0); if (!node->cq) { ret = -ENOMEM; printf("rxe_send_mc: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = message_buffer ? message_buffer : 1; init_qp_attr.cap.max_recv_wr = message_buffer ? message_buffer : 1; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 1; //singal all init_qp_attr.qp_type = IBV_QPT_UD; init_qp_attr.send_cq = node->cq; init_qp_attr.recv_cq = node->cq; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("rxe_send_mc: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("rxe_send_mc: failed to create messages: %d\n", ret); goto out; } out: return ret; }
void IBConnection::on_addr_resolved(struct ibv_pd* pd, struct ibv_cq* cq) { L_(debug) << "address resolved"; struct ibv_qp_init_attr qp_attr; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.cap = qp_cap_; qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.qp_type = IBV_QPT_RC; int err = rdma_create_qp(cm_id_, pd, &qp_attr); if (err) throw InfinibandException("creation of QP failed"); err = rdma_resolve_route(cm_id_, RESOLVE_TIMEOUT_MS); if (err) throw InfinibandException("rdma_resolve_route failed"); setup(pd); }
void build_connection(struct rdma_cm_id *id) { IbvConnection *conn; struct ibv_qp_init_attr qp_attr; id->context = conn = (IbvConnection *)malloc(sizeof(IbvConnection)); build_verbs(conn, id->verbs); build_qp_attr(conn, &qp_attr); TEST_NZ(rdma_create_qp(id, conn->pd, &qp_attr)); conn->id = id; conn->qp = id->qp; conn->connected = 0; register_memory(conn); post_receives(conn); }
void build_connection(struct rdma_cm_id *id){ struct ibv_qp_init_attr qp_attr; struct connection *conn; struct timeval start, end, dt; build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); id->context = conn = (struct connection *)malloc(sizeof(struct connection)); conn->id = id; conn->qp = id->qp; gettimeofday(&start, NULL); register_memory(conn); gettimeofday(&end, NULL); timersub(&end, &start, &dt); long usec = dt.tv_usec + 1000000 * dt.tv_sec; printf("[Register] takes %ld micro_secs.\n", usec); }
int on_connect_request(struct rdma_cm_id *id) { struct ibv_qp_init_attr qp_attr; struct rdma_conn_param cm_params; struct connection *conn; printf("received connection request.\n"); build_context(id->verbs); build_qp_attr(&qp_attr); TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); id->context = conn = (struct connection *)malloc(sizeof(struct connection)); conn->qp = id->qp; register_memory(conn); post_receives(conn); memset(&cm_params, 0, sizeof(cm_params)); TEST_NZ(rdma_accept(id, &cm_params)); return 0; }
} /* END TEMP */ rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); if (rc) { dprintk("RPC: %s: rdma_create_qp failed %i\n", __func__, rc); rdma_destroy_id(id); rc = -ENETUNREACH; goto out; } rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = id; } else { dprintk("RPC: %s: connecting...\n", __func__); rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); if (rc) { dprintk("RPC: %s: rdma_create_qp failed %i\n", __func__, rc); /* do not update ep->rep_connected */ return -ENETUNREACH; } } ep->rep_connected = 0; rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); if (rc) { dprintk("RPC: %s: rdma_connect() failed with %i\n", __func__, rc); goto out;
/** * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) * * returns 0 on success, -1 on failure */ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; int ret = -ENOMEM; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!ib_conn->login_buf) { goto alloc_err; ret = -ENOMEM; } ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device, (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), GFP_KERNEL); if (!ib_conn->page_vec) { ret = -ENOMEM; goto alloc_err; } ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * * page aligned, the map whould be of N+1 pages */ params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; /* make the pool size twice the max number of SCSI commands * * the ML is expected to queue, watermark for unmap at 50% */ params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); if (IS_ERR(ib_conn->fmr_pool)) { ret = PTR_ERR(ib_conn->fmr_pool); goto fmr_pool_err; } memset(&init_attr, 0, sizeof init_attr); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->tx_cq; init_attr.recv_cq = device->rx_cq; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto qp_err; ib_conn->qp = ib_conn->cma_id->qp; iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", ib_conn, ib_conn->cma_id, ib_conn->fmr_pool, ib_conn->cma_id->qp); return ret; qp_err: (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); fmr_pool_err: kfree(ib_conn->page_vec); kfree(ib_conn->login_buf); alloc_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; }
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_iw_setup_qp(struct rds_connection *conn) { struct rds_iw_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; struct rds_iw_device *rds_iwdev; int ret; /* rds_iw_add_one creates a rds_iw_device object per IB device, * and allocates a protection domain, memory range and MR pool * for each. If that fails for any reason, it will not register * the rds_iwdev at all. */ rds_iwdev = ib_get_client_data(dev, &rds_iw_client); if (!rds_iwdev) { if (printk_ratelimit()) printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } /* Protection domain and memory range */ ic->i_pd = rds_iwdev->pd; ic->i_mr = rds_iwdev->mr; ret = rds_iw_init_qp_attrs(&attr, rds_iwdev, &ic->i_send_ring, rds_iw_send_cq_comp_handler, &ic->i_recv_ring, rds_iw_recv_cq_comp_handler, conn); if (ret < 0) goto out; ic->i_send_cq = attr.send_cq; ic->i_recv_cq = attr.recv_cq; /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); goto out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, ic->i_send_ring.w_nr * sizeof(struct rds_header), &ic->i_send_hdrs_dma, GFP_KERNEL); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); goto out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, ic->i_recv_ring.w_nr * sizeof(struct rds_header), &ic->i_recv_hdrs_dma, GFP_KERNEL); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); goto out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), &ic->i_ack_dma, GFP_KERNEL); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); goto out; } ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); goto out; } rds_iw_send_init_ring(ic); ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); goto out; } rds_iw_recv_init_ring(ic); rds_iw_recv_init_ack(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, ic->i_send_cq, ic->i_recv_cq); out: return ret; }
static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; int req_err, resp_err, ret = -ENOMEM; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!ib_conn->login_buf) goto out_err; ib_conn->login_req_buf = ib_conn->login_buf; ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, (void *)ib_conn->login_req_buf, ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, (void *)ib_conn->login_resp_buf, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); if (req_err || resp_err) { if (req_err) ib_conn->login_req_dma = 0; if (resp_err) ib_conn->login_resp_dma = 0; goto out_err; } ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), GFP_KERNEL); if (!ib_conn->page_vec) goto out_err; ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); params.page_shift = SHIFT_4K; params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); if (IS_ERR(ib_conn->fmr_pool)) { ret = PTR_ERR(ib_conn->fmr_pool); ib_conn->fmr_pool = NULL; goto out_err; } memset(&init_attr, 0, sizeof init_attr); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->tx_cq; init_attr.recv_cq = device->rx_cq; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", ib_conn, ib_conn->cma_id, ib_conn->fmr_pool, ib_conn->cma_id->qp); return ret; out_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; }
static int ibw_setup_cq_qp(struct ibw_conn *conn) { struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); struct ibv_qp_init_attr init_attr; struct ibv_qp_attr attr; int rc; DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id)); /* init verbs */ pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs); if (!pconn->verbs_channel) { sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno); return -1; } DEBUG(DEBUG_DEBUG, ("created channel %p\n", pconn->verbs_channel)); pconn->verbs_channel_event = tevent_add_fd(pctx->ectx, NULL, /* not pconn or conn */ pconn->verbs_channel->fd, TEVENT_FD_READ, ibw_event_handler_verbs, conn); pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs); if (!pconn->pd) { sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno); return -1; } DEBUG(DEBUG_DEBUG, ("created pd %p\n", pconn->pd)); /* init mr */ if (ibw_init_memory(conn)) return -1; /* init cq */ pconn->cq = ibv_create_cq(pconn->cm_id->verbs, pctx->opts.max_recv_wr + pctx->opts.max_send_wr, conn, pconn->verbs_channel, 0); if (pconn->cq==NULL) { sprintf(ibw_lasterr, "ibv_create_cq failed\n"); return -1; } rc = ibv_req_notify_cq(pconn->cq, 0); if (rc) { sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc); return rc; } /* init qp */ memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = pctx->opts.max_send_wr; init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_send_sge = 1; init_attr.qp_type = IBV_QPT_RC; init_attr.send_cq = pconn->cq; init_attr.recv_cq = pconn->cq; rc = rdma_create_qp(pconn->cm_id, pconn->pd, &init_attr); if (rc) { sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc); return rc; } /* elase result is in pconn->cm_id->qp */ rc = ibv_query_qp(pconn->cm_id->qp, &attr, IBV_QP_PATH_MTU, &init_attr); if (rc) { sprintf(ibw_lasterr, "ibv_query_qp failed with %d\n", rc); return rc; } return ibw_fill_cq(conn); }
static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) { struct sockaddr_in cl = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), }; int port, err = -EINVAL; for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { cl.sin_port = htons((ushort)port); err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); if (err != -EADDRINUSE) break; } return err; } /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Bind to a privileged port if we need to */ if (opts.privport) { err = p9_rdma_bind_privport(rdma); if (err < 0) { pr_err("%s (%d): problem binding to privport: %d\n", __func__, task_pid_nr(current), -err); goto error; } } /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, &cq_attr); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id) { struct ib_qp_init_attr qp_init_attr = { .event_handler = sdp_qp_event_handler, .cap.max_send_wr = SDP_TX_SIZE, .cap.max_recv_wr = sdp_rx_size, .cap.max_inline_data = sdp_inline_thresh, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; struct ib_device *device = id->device; int rc; sdp_dbg(sk, "%s\n", __func__); sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device); sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge); qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES); sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge); qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES); sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge); sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_sk(sk)->sdp_dev) { sdp_warn(sk, "SDP not available on device %s\n", device->name); rc = -ENODEV; goto err_rx; } rc = sdp_rx_ring_create(sdp_sk(sk), device); if (rc) goto err_rx; rc = sdp_tx_ring_create(sdp_sk(sk), device); if (rc) goto err_tx; qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq; qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq; rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr); if (rc) { sdp_warn(sk, "Unable to create QP: %d.\n", rc); goto err_qp; } sdp_sk(sk)->qp = id->qp; sdp_sk(sk)->ib_device = device; sdp_sk(sk)->qp_active = 1; sdp_sk(sk)->context.device = device; sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data; sdp_dbg(sk, "%s done\n", __func__); return 0; err_qp: sdp_tx_ring_destroy(sdp_sk(sk)); err_tx: sdp_rx_ring_destroy(sdp_sk(sk)); err_rx: return rc; } static int sdp_get_max_send_frags(u32 buf_size) { return MIN( /* +1 to conpensate on not aligned buffers */ (PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1, SDP_MAX_SEND_SGES - 1); } static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr_in *dst_addr; struct sock *child; const struct sdp_hh *h; int rc = 0; sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) { sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n", h->ipv_cap & HH_IPV_MASK); return -EINVAL; } if (!h->max_adverts) return -EINVAL; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) child = sk_clone(sk, GFP_KERNEL); #else child = sk_clone_lock(sk, GFP_KERNEL); #endif if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct ipv6_pinfo *newnp; newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } sdp_inet_daddr(child) = sdp_inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; } #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif __sock_put(child, SOCK_REF_CLONE); down_read(&device_removal_lock); rc = sdp_init_qp(child, id); if (rc) { bh_unlock_sock(child); up_read(&device_removal_lock); sdp_sk(child)->destructed_already = 1; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_close(child); #endif sk_free(child); return rc; } sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs); sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal); sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size); id->context = child; sdp_sk(child)->id = id; list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue); sdp_sk(child)->parent = sk; bh_unlock_sock(child); sdp_add_sock(sdp_sk(child)); up_read(&device_removal_lock); sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV); /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ sk->sk_data_ready(sk); return 0; } static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { const struct sdp_hah *h; struct sockaddr_in *dst_addr; sdp_dbg(sk, "%s\n", __func__); sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED); sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs); sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal); sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal, sdp_sk(sk)->send_frags * PAGE_SIZE); sdp_sk(sk)->poll_cq = 1; sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif return 0; } static int sdp_connected_handler(struct sock *sk) { struct sock *parent; sdp_dbg(sk, "%s\n", __func__); parent = sdp_sk(sk)->parent; BUG_ON(!parent); sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); goto done; } sk_acceptq_added(parent); sdp_dbg(parent, "%s child connection established\n", __func__); list_del_init(&sdp_sk(sk)->backlog_queue); list_add_tail(&sdp_sk(sk)->accept_queue, &sdp_sk(parent)->accept_queue); parent->sk_state_change(parent); sk_wake_async(parent, 0, POLL_OUT); done: release_sock(parent); return 0; } static int sdp_disconnected_handler(struct sock *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); if (ssk->tx_ring.cq) if (sdp_xmit_poll(ssk, 1)) sdp_post_sends(ssk, 0); if (sk->sk_state == TCP_SYN_RECV) { sdp_connected_handler(sk); if (rcv_nxt(ssk)) return 0; } return -ECONNRESET; } int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct sock *parent = NULL; struct sock *child = NULL; struct sock *sk; struct sdp_hah hah; struct sdp_hh hh; int rc = 0, rc2; sk = id->context; if (!sk) { sdp_dbg(NULL, "cm_id is being torn down, event %s\n", rdma_cm_event_str(event->event)); return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; } sdp_add_to_history(sk, rdma_cm_event_str(event->event)); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event)); if (!sdp_sk(sk)->id) { sdp_dbg(sk, "socket is being torn down\n"); rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; release_sock(sk); return rc; } switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", rdma_port_get_link_layer(id->device, id->port_num)); rc = -ENETUNREACH; break; } rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: rc = sdp_init_qp(sk, id); if (rc) break; memset(&hh, 0, sizeof hh); hh.bsdh.mid = SDP_MID_HELLO; hh.bsdh.len = htonl(sizeof(struct sdp_hh)); hh.max_adverts = 1; hh.majv_minv = SDP_MAJV_MINV; sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); atomic_set(&sdp_sk(sk)->remote_credits, rx_ring_posted(sdp_sk(sk))); hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr; struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr; if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF), addr4->sin_addr.s_addr); } else { sk->sk_v6_rcv_saddr = addr6->sin6_addr; } inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hh; conn_param.private_data = &hh; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh); if (sdp_apm_enable) { rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc) sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc); } rc = rdma_connect(id, &conn_param); break; case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED: sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n", id->route.path_rec[1].slid, id->route.path_rec[1].dlid); break; case RDMA_CM_EVENT_ALT_PATH_LOADED: sdp_dbg(sk, "alt route path loaded\n"); break; case RDMA_CM_EVENT_ALT_ROUTE_ERROR: sdp_warn(sk, "alt route resolve error\n"); break; case RDMA_CM_EVENT_ROUTE_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_CONNECT_REQUEST: rc = sdp_connect_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); break; } child = id->context; atomic_set(&sdp_sk(child)->remote_credits, rx_ring_posted(sdp_sk(child))); memset(&hah, 0, sizeof hah); hah.bsdh.mid = SDP_MID_HELLO_ACK; hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child))); hah.bsdh.len = htonl(sizeof(struct sdp_hah)); hah.majv_minv = SDP_MAJV_MINV; hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, but just in case */ hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hah; conn_param.private_data = &hah; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); rc = rdma_accept(id, &conn_param); if (rc) { sdp_sk(child)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(child)->parent; /* TODO: hold ? */ } else if (sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: rc = sdp_response_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); } else { rc = rdma_accept(id, NULL); if (!rc && sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2); } } break; case RDMA_CM_EVENT_CONNECT_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_UNREACHABLE: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_REJECTED: rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ if (sk->sk_state == TCP_LAST_ACK) { sdp_cancel_dreq_wait_timeout(sdp_sk(sk)); sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT); sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", __func__); } sdp_sk(sk)->qp_active = 0; rdma_disconnect(id); if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state == TCP_CLOSE_WAIT) { sdp_dbg(sk, "IB teardown while in " "TCP_CLOSE_WAIT taking reference to " "let close() finish the work\n"); sock_hold(sk, SOCK_REF_CMA); sdp_start_cma_timewait_timeout(sdp_sk(sk), SDP_CMA_TIMEWAIT_TIMEOUT); } sdp_set_error(sk, -EPIPE); rc = sdp_disconnected_handler(sk); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: rc = sdp_disconnected_handler(sk); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: rc = -ENETRESET; break; case RDMA_CM_EVENT_ADDR_CHANGE: sdp_dbg(sk, "Got Address change event\n"); rc = 0; break; default: printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", event->event); rc = -ECONNABORTED; break; } sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event)); if (rc && sdp_sk(sk)->id == id) { child = sk; sdp_sk(sk)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(sk)->parent; sdp_reset_sk(sk, rc); } release_sock(sk); sdp_dbg(sk, "event: %s done. status %d\n", rdma_cm_event_str(event->event), rc); if (parent) { lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); child = NULL; goto done; } if (!list_empty(&sdp_sk(child)->backlog_queue)) list_del_init(&sdp_sk(child)->backlog_queue); else child = NULL; done: release_sock(parent); if (child) sdp_common_release(child); } return rc; }
static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; int ret; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), GFP_KERNEL); if (!ib_conn->page_vec) { ret = -ENOMEM; goto alloc_err; } ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); params.page_shift = SHIFT_4K; params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; params.cache = 0; params.flush_function = NULL; params.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); if (IS_ERR(ib_conn->fmr_pool)) { ret = PTR_ERR(ib_conn->fmr_pool); goto fmr_pool_err; } memset(&init_attr, 0, sizeof init_attr); init_attr.event_handler = iser_qp_event_callback; init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->cq; init_attr.recv_cq = device->cq; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; init_attr.cap.max_recv_sge = 2; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) goto qp_err; ib_conn->qp = ib_conn->cma_id->qp; iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", ib_conn, ib_conn->cma_id, ib_conn->fmr_pool, ib_conn->cma_id->qp); return ret; qp_err: (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); fmr_pool_err: kfree(ib_conn->page_vec); alloc_err: iser_err("unable to alloc mem or create resource, err %d\n", ret); return ret; }
/** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, opts.sq_depth + opts.rq_depth + 1, 0); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
int main(int argc, char *argv[]) { struct pdata rep_pdata; struct rdma_event_channel *cm_channel; struct rdma_cm_id *listen_id; struct rdma_cm_id *cm_id; struct rdma_cm_event *event; struct rdma_conn_param conn_param = { }; struct ibv_pd *pd; struct ibv_comp_channel *comp_chan; struct ibv_cq *cq; struct ibv_cq *evt_cq; struct ibv_mr *mr; struct ibv_qp_init_attr qp_attr = { }; struct ibv_sge sge; struct ibv_send_wr send_wr = { }; struct ibv_send_wr *bad_send_wr; struct ibv_recv_wr recv_wr = { }; struct ibv_recv_wr *bad_recv_wr; struct ibv_wc wc; void *cq_context; struct sockaddr_in sin; uint32_t *buf; int err; /* Set up RDMA CM structures */ cm_channel = rdma_create_event_channel(); if (!cm_channel) return 1; err = rdma_create_id(cm_channel, &listen_id, NULL, RDMA_PS_TCP); if (err) return err; sin.sin_family = AF_INET; sin.sin_port = htons(20079); sin.sin_addr.s_addr = INADDR_ANY; /* Bind to local port and listen for connection request */ err = rdma_bind_addr(listen_id, (struct sockaddr *) &sin); if (err) return 1; err = rdma_listen(listen_id, 1); if (err) return 1; err = rdma_get_cm_event(cm_channel, &event); if (err) return err; printf("after get_cm_event\n"); if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) return 1; cm_id = event->id; rdma_ack_cm_event(event); /* Create verbs objects now that we know which device to use */ pd = ibv_alloc_pd(cm_id->verbs); if (!pd) return 1; comp_chan = ibv_create_comp_channel(cm_id->verbs); if (!comp_chan) return 1; cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0); if (!cq) return 1; if (ibv_req_notify_cq(cq, 0)) return 1; buf = calloc(2, sizeof(uint32_t)); if (!buf) return 1; mr = ibv_reg_mr(pd, buf, 2 * sizeof(uint32_t), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); if (!mr) return 1; qp_attr.cap.max_send_wr = 1; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_wr = 1; qp_attr.cap.max_recv_sge = 1; qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.qp_type = IBV_QPT_RC; err = rdma_create_qp(cm_id, pd, &qp_attr); if (err) return err; /* Post receive before accepting connection */ sge.addr = (uintptr_t) buf + sizeof(uint32_t); sge.length = sizeof(uint32_t); sge.lkey = mr->lkey; recv_wr.sg_list = &sge; recv_wr.num_sge = 1; if (ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr)) return 1; rep_pdata.buf_va = htonll((uintptr_t) buf); rep_pdata.buf_rkey = htonl(mr->rkey); conn_param.responder_resources = 1; conn_param.private_data = &rep_pdata; conn_param.private_data_len = sizeof rep_pdata; /* Accept connection */ printf("before accept\n"); err = rdma_accept(cm_id, &conn_param); if (err) return 1; printf("after accept\n"); err = rdma_get_cm_event(cm_channel, &event); if (err) return err; if (event->event != RDMA_CM_EVENT_ESTABLISHED) return 1; rdma_ack_cm_event(event); /* Wait for receive completion */ if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context)) return 1; if (ibv_req_notify_cq(cq, 0)) return 1; if (ibv_poll_cq(cq, 1, &wc) < 1) return 1; if (wc.status != IBV_WC_SUCCESS) return 1; /* Add two integers and send reply back */ buf[0] = htonl(ntohl(buf[0]) + ntohl(buf[1])); sge.addr = (uintptr_t) buf; sge.length = sizeof(uint32_t); sge.lkey = mr->lkey; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.sg_list = &sge; send_wr.num_sge = 1; if (ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr)) return 1; /* Wait for send completion */ if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context)) return 1; if (ibv_poll_cq(cq, 1, &wc) < 1) return 1; if (wc.status != IBV_WC_SUCCESS) return 1; printf("before ack cq 2\n"); ibv_ack_cq_events(cq, 2); return 0; }