Exemple #1
0
static int rping_create_qp(struct rping_cb *cb)
{
	struct ibv_qp_init_attr init_attr;
	int ret;

	memset(&init_attr, 0, sizeof(init_attr));
	init_attr.cap.max_send_wr = RPING_SQ_DEPTH;
	init_attr.cap.max_recv_wr = 2;
	init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_send_sge = 1;
	init_attr.qp_type = IBV_QPT_RC;
	init_attr.send_cq = cb->cq;
	init_attr.recv_cq = cb->cq;

	if (cb->server) {
		ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);
		if (!ret)
			cb->qp = cb->child_cm_id->qp;
	} else {
		ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr);
		if (!ret)
			cb->qp = cb->cm_id->qp;
	}

	return ret;
}
int on_addr_resolved(struct rdma_cm_id *id)
{
  struct ibv_qp_init_attr qp_attr;
  struct connection *conn;

  printf("address resolved.\n");

  build_context(id->verbs);
  build_qp_attr(&qp_attr);

  TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));

  id->context = conn = (struct connection *)malloc(sizeof(struct connection));

  conn->id = id;
  conn->qp = id->qp;
  conn->num_completions = 0;

  register_memory(conn);
  post_receives(conn);

  TEST_NZ(rdma_resolve_route(id, TIMEOUT_IN_MS));

  return 0;
}
Exemple #3
0
static int init_node(struct cmatest_node *node)
{
	struct ibv_qp_init_attr init_qp_attr;
	int cqe, ret;
	int i;
	struct ibv_cq **cqs[] = {&node->cq[SEND_CQ_INDEX],
				 &node->cq[RECV_CQ_INDEX]};

	node->pd = ibv_alloc_pd(node->cma_id->verbs);
	if (!node->pd) {
		ret = -ENOMEM;
		printf("cmatose: unable to allocate PD\n");
		goto out;
	}

	cqe = message_count ? message_count : 1;
	for (i = 0; i < sizeof(cqs)/sizeof(cqs[0]); i++) {
		if (set_ts) {
			struct ibv_exp_cq_init_attr cq_init_attr;
			memset(&cq_init_attr, 0, sizeof(cq_init_attr));
			cq_init_attr.flags = IBV_EXP_CQ_TIMESTAMP;
			cq_init_attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_FLAGS;
			*cqs[i] = (struct ibv_cq *)ibv_exp_create_cq(
					node->cma_id->verbs, cqe, node,
					NULL, 0, &cq_init_attr);
		} else {
			*cqs[i] = ibv_create_cq(node->cma_id->verbs, cqe, node,
					       0, 0);
		}
	}
	if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) {
		ret = -ENOMEM;
		printf("cmatose: unable to create CQ\n");
		goto out;
	}

	memset(&init_qp_attr, 0, sizeof init_qp_attr);
	init_qp_attr.cap.max_send_wr = cqe;
	init_qp_attr.cap.max_recv_wr = cqe;
	init_qp_attr.cap.max_send_sge = 1;
	init_qp_attr.cap.max_recv_sge = 1;
	init_qp_attr.qp_context = node;
	init_qp_attr.sq_sig_all = 1;
	init_qp_attr.qp_type = IBV_QPT_RC;
	init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX];
	init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX];
	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
	if (ret) {
		perror("cmatose: unable to create QP");
		goto out;
	}

	ret = create_message(node);
	if (ret) {
		printf("cmatose: failed to create messages: %d\n", ret);
		goto out;
	}
out:
	return ret;
}
Exemple #4
0
void build_connection(struct rdma_cm_id *id)
{
  struct connection *conn;
  struct ibv_qp_init_attr qp_attr;

  //init semaphores
  sem_init(&read_ops, 0, 0);
  sem_init(&done_ops, 0, 0);
  sem_init(&write_ops, 0, 1);

  build_context(id->verbs);
  build_qp_attr(&qp_attr);

  TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));

  id->context = conn = (struct connection *)malloc(sizeof(struct connection));

  conn->id = id;
  conn->qp = id->qp;

  conn->send_state = SS_INIT;
  conn->recv_state = RS_INIT;

  conn->connected = 0;

  register_memory(conn);
  post_receives(conn);
}
Exemple #5
0
void build_connection(struct rdma_cm_id *id)
{
    rdma_conn_t *conn;
    struct ibv_qp_init_attr qp_attr;

    build_context(id->verbs);
    build_qp_attr(&qp_attr);

    TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));

    conn = malloc(sizeof(rdma_conn_t));
    id->context = conn; 
    rdma_conn = conn;

    conn->id = id;
    conn->qp = id->qp;

    conn->send_state = SS_INIT;
    conn->recv_state = RS_INIT;

    conn->connected = 0;

    register_memory(conn);

    post_receives(conn);
}
void RDMAServerSocket::accept(client_t client_id) const {
  ibv_qp_init_attr qp_attr = {};
  qp_attr.qp_type = IBV_QPT_RC;
  qp_attr.cap.max_send_wr = 256;
  qp_attr.cap.max_recv_wr = 0;
  qp_attr.cap.max_send_sge = 1;
  qp_attr.cap.max_recv_sge = 0;
  qp_attr.cap.max_inline_data = 72;
  qp_attr.recv_cq = cq;
  qp_attr.send_cq = cq;
  qp_attr.srq = id->srq;
  qp_attr.sq_sig_all = 1;

  check_zero(rdma_create_qp(client_id.get(), NULL, &qp_attr));

  check_zero(rdma_accept(client_id.get(), nullptr));

  clients([client_id = std::move(client_id)](auto && clients) mutable {
    auto pos = std::lower_bound(std::begin(clients), std::end(clients),
                                client_id->qp->qp_num,
                                [](const auto &client, const qp_t &qp_num) {
      return client->qp->qp_num < qp_num;
    });
    clients.insert(pos, std::move(client_id));
  });
}
Exemple #7
0
static int isert_conn_qp_create(struct isert_connection *isert_conn)
{
	struct rdma_cm_id *cm_id = isert_conn->cm_id;
	struct isert_device *isert_dev = isert_conn->isert_dev;
	struct ib_qp_init_attr qp_attr;
	int err;
	int cq_idx;
	int max_wr = ISER_MAX_WCE;

	TRACE_ENTRY();

	cq_idx = isert_get_cq_idx(isert_dev);

	memset(&qp_attr, 0, sizeof(qp_attr));

	qp_attr.event_handler = isert_async_evt_handler;
	qp_attr.qp_context = isert_conn;
	qp_attr.send_cq = isert_dev->cq_desc[cq_idx].cq;
	qp_attr.recv_cq = isert_dev->cq_desc[cq_idx].cq;

	isert_conn->cq_desc = &isert_dev->cq_desc[cq_idx];

	qp_attr.cap.max_send_sge = isert_conn->max_sge;
	qp_attr.cap.max_recv_sge = 3;
	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_attr.qp_type = IB_QPT_RC;

	do {
		if (max_wr < ISER_MIN_SQ_SIZE) {
			pr_err("Failed to create qp, not enough memory\n");
			goto fail_create_qp;
		}

		qp_attr.cap.max_send_wr = max_wr;
		qp_attr.cap.max_recv_wr = max_wr;

		err = rdma_create_qp(cm_id, isert_dev->pd, &qp_attr);
		if (err && err != -ENOMEM) {
			pr_err("Failed to create qp, err:%d\n", err);
			goto fail_create_qp;
		}

		max_wr /= 2;
	} while (err == -ENOMEM);

	isert_conn->qp = cm_id->qp;

	pr_info("iser created cm_id:%p qp:0x%X\n", cm_id, cm_id->qp->qp_num);

out:
	TRACE_EXIT_RES(err);
	return err;

fail_create_qp:
	mutex_lock(&dev_list_mutex);
	isert_dev->cq_qps[cq_idx]--;
	mutex_unlock(&dev_list_mutex);
	goto out;
}
void Connector::build_conn(struct rdma_cm_id* id_)
{
  struct ibv_qp_init_attr qp_attr;

  build_context(id_->verbs);
  build_qp_attr(&qp_attr);

  TEST_NZ(rdma_create_qp(id_, s_ctx_->pd_, &qp_attr) )
}
Exemple #9
0
static ssize_t
fi_ibv_rdm_process_addr_resolved(struct rdma_cm_id *id,
				 struct fi_ibv_rdm_ep *ep)
{
	ssize_t ret = FI_SUCCESS;
	struct ibv_qp_init_attr qp_attr;
	struct fi_ibv_rdm_tagged_conn *conn = id->context;

	VERBS_INFO(FI_LOG_AV, "ADDR_RESOLVED conn %p, addr %s:%u\n",
		   conn, inet_ntoa(conn->addr.sin_addr),
		   ntohs(conn->addr.sin_port));

	assert(id->verbs == ep->domain->verbs);

	do {
		fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep);
		if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) {
			VERBS_INFO_ERRNO(FI_LOG_AV,
					 "rdma_create_qp failed\n", errno);
			return -errno;
		}

		if (conn->cm_role == FI_VERBS_CM_PASSIVE) {
			break;
		}

		conn->qp[0] = id->qp;
		assert(conn->id[0] == id);
		if (conn->cm_role == FI_VERBS_CM_SELF) {
			break;
		}

		ret = fi_ibv_rdm_prepare_conn_memory(ep, conn);
		if (ret != FI_SUCCESS) {
			goto err;
		}

		ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth);
		if (ret < 0) {
			VERBS_INFO(FI_LOG_AV, "repost receives failed\n");
			goto err;
		} else {
			ret = FI_SUCCESS;
		}
	} while (0);

	if (rdma_resolve_route(id, FI_IBV_RDM_CM_RESOLVEADDR_TIMEOUT)) {
		VERBS_INFO(FI_LOG_AV, "rdma_resolve_route failed\n");
		ret = -FI_EHOSTUNREACH;
		goto err;
	}

	return ret;
err:
	rdma_destroy_qp(id);
	return ret;
}
Exemple #10
0
/**
 * @param[in] ni
 * @param[in] conn
 * @param[in] event
 *
 * @return status
 *
 * conn is locked
 */
static int accept_connection_request(ni_t *ni, conn_t *conn,
                                     struct rdma_cm_event *event)
{
    struct rdma_conn_param conn_param;
    struct ibv_qp_init_attr init_attr;
    struct cm_priv_accept priv;

    conn->state = CONN_STATE_CONNECTING;

    memset(&init_attr, 0, sizeof(init_attr));

    init_attr.qp_type = IBV_QPT_RC;
    init_attr.cap.max_send_wr = ni->iface->cap.max_send_wr;
    init_attr.send_cq = ni->rdma.cq;
    init_attr.recv_cq = ni->rdma.cq;
    init_attr.srq = ni->rdma.srq;
    init_attr.cap.max_send_sge = ni->iface->cap.max_send_sge;

    if (rdma_create_qp(event->id, ni->iface->pd, &init_attr)) {
        conn->state = CONN_STATE_DISCONNECTED;
        pthread_cond_broadcast(&conn->move_wait);

        return PTL_FAIL;
    }

    /* If we were already trying to connect ourselves, cancel it. */
    if (conn->rdma.cm_id != NULL) {
        assert(conn->rdma.cm_id->context == conn);
        conn->rdma.cm_id->context = NULL;
    }

    event->id->context = conn;
    conn->rdma.cm_id = event->id;

    memset(&conn_param, 0, sizeof conn_param);
    conn_param.responder_resources = 1;
    conn_param.initiator_depth = 1;
    conn_param.retry_count = 7;
    conn_param.rnr_retry_count = 7;

    if (ni->options & PTL_NI_LOGICAL) {
        conn_param.private_data = &priv;
        conn_param.private_data_len = sizeof(priv);
    }

    if (rdma_accept(event->id, &conn_param)) {
        rdma_destroy_qp(event->id);
        conn->rdma.cm_id = NULL;
        conn->state = CONN_STATE_DISCONNECTED;
        pthread_cond_broadcast(&conn->move_wait);

        return PTL_FAIL;
    }

    return PTL_OK;
}
Exemple #11
0
/*
 * Connect unconnected endpoint.
 */
int
rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
	struct rdma_cm_id *id, *old;
	int rc = 0;
	int retry_count = 0;

	if (ep->rep_connected != 0) {
		struct rpcrdma_xprt *xprt;
retry:
		dprintk("RPC:       %s: reconnecting...\n", __func__);

		rpcrdma_ep_disconnect(ep, ia);
		rpcrdma_flush_cqs(ep);

		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
		ia->ri_ops->ro_reset(xprt);

		id = rpcrdma_create_id(xprt, ia,
				(struct sockaddr *)&xprt->rx_data.addr);
		if (IS_ERR(id)) {
			rc = -EHOSTUNREACH;
			goto out;
		}
		/* TEMP TEMP TEMP - fail if new device:
		 * Deregister/remarshal *all* requests!
		 * Close and recreate adapter, pd, etc!
		 * Re-determine all attributes still sane!
		 * More stuff I haven't thought of!
		 * Rrrgh!
		 */
		if (ia->ri_id->device != id->device) {
			printk("RPC:       %s: can't reconnect on "
				"different device!\n", __func__);
			rdma_destroy_id(id);
			rc = -ENETUNREACH;
			goto out;
		}
		/* END TEMP */
		rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
		if (rc) {
			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
				__func__, rc);
			rdma_destroy_id(id);
			rc = -ENETUNREACH;
			goto out;
		}

		write_lock(&ia->ri_qplock);
		old = ia->ri_id;
		ia->ri_id = id;
		write_unlock(&ia->ri_qplock);

		rdma_destroy_qp(old);
		rdma_destroy_id(old);
	} else {
Exemple #12
0
void IBConnection::create_qp(struct ibv_pd* pd, struct ibv_cq* cq)
{
    struct ibv_qp_init_attr qp_attr;
    memset(&qp_attr, 0, sizeof qp_attr);
    qp_attr.cap = qp_cap_;
    qp_attr.send_cq = cq;
    qp_attr.recv_cq = cq;
    qp_attr.qp_type = IBV_QPT_RC;
    int err = rdma_create_qp(cm_id_, pd, &qp_attr);
    if (err)
        throw InfinibandException("creation of QP failed");
}
Exemple #13
0
void build_connection(struct rdma_cm_id *id){

  struct ibv_qp_init_attr qp_attr;
  struct connection *conn;
  build_qp_attr(&qp_attr);

  TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));

  id->context = conn = (struct connection *)malloc(sizeof(struct connection));

  conn->id = id;
  conn->qp = id->qp;

}
Exemple #14
0
static int init_node(struct cmatest_node *node)
{
	struct ibv_qp_init_attr init_qp_attr;
	int cqe, ret;

	node->pd = ibv_alloc_pd(node->cma_id->verbs);
	if (!node->pd) {
		ret = -ENOMEM;
		printf("cmatose: unable to allocate PD\n");
		goto out;
	}

	cqe = message_count ? message_count : 1;
	node->cq[SEND_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0);
	node->cq[RECV_CQ_INDEX] = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0);
	if (!node->cq[SEND_CQ_INDEX] || !node->cq[RECV_CQ_INDEX]) {
		ret = -ENOMEM;
		printf("cmatose: unable to create CQ\n");
		goto out;
	}

	memset(&init_qp_attr, 0, sizeof init_qp_attr);
	init_qp_attr.cap.max_send_wr = cqe;
	init_qp_attr.cap.max_recv_wr = cqe;
	init_qp_attr.cap.max_send_sge = 1;
	init_qp_attr.cap.max_recv_sge = 1;
	init_qp_attr.qp_context = node;
	init_qp_attr.sq_sig_all = 1;
	init_qp_attr.qp_type = IBV_QPT_RC;
	init_qp_attr.send_cq = node->cq[SEND_CQ_INDEX];
	init_qp_attr.recv_cq = node->cq[RECV_CQ_INDEX];
	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
	if (ret) {
		perror("cmatose: unable to create QP");
		goto out;
	}

	ret = create_message(node);
	if (ret) {
		printf("cmatose: failed to create messages: %d\n", ret);
		goto out;
	}
out:
	return ret;
}
Exemple #15
0
/**
 * Accept an RC connection request to self.
 *
 * called while holding connect->mutex
 * only used for physical NIs
 *
 * @param[in] ni
 * @param[in] conn
 * @param[in] event
 *
 * @return status
 */
static int accept_connection_self(ni_t *ni, conn_t *conn,
                                  struct rdma_cm_event *event)
{
    struct rdma_conn_param conn_param;
    struct ibv_qp_init_attr init_attr;

    conn->state = CONN_STATE_CONNECTING;

    memset(&init_attr, 0, sizeof(init_attr));
    init_attr.qp_type = IBV_QPT_RC;
    init_attr.send_cq = ni->rdma.cq;
    init_attr.recv_cq = ni->rdma.cq;
    init_attr.srq = ni->rdma.srq;
    init_attr.cap.max_send_wr = ni->iface->cap.max_send_wr;
    init_attr.cap.max_send_sge = ni->iface->cap.max_send_sge;

    if (rdma_create_qp(event->id, ni->iface->pd, &init_attr)) {
        conn->state = CONN_STATE_DISCONNECTED;
        pthread_cond_broadcast(&conn->move_wait);

        return PTL_FAIL;
    }

    ni->rdma.self_cm_id = event->id;

    /* The lower 2 bits (on 32 bits hosts), or 3 bits (on 64 bits
     * hosts) of a pointer is always 0. Use it to store the type of
     * context. 0=conn; 1=NI. */
    event->id->context = (void *)((uintptr_t) ni | 1);

    memset(&conn_param, 0, sizeof conn_param);
    conn_param.responder_resources = 1;
    conn_param.initiator_depth = 1;
    conn_param.rnr_retry_count = 7;

    if (rdma_accept(event->id, &conn_param)) {
        rdma_destroy_qp(event->id);
        conn->state = CONN_STATE_DISCONNECTED;
        pthread_cond_broadcast(&conn->move_wait);

        return PTL_FAIL;
    }

    return PTL_OK;
}
Exemple #16
0
static int init_node(struct cmatest_node *node)
{
    struct ibv_qp_init_attr init_qp_attr;
    int cqe, ret;

    node->pd = ibv_alloc_pd(node->cma_id->verbs);
    if (!node->pd) {
        ret = -ENOMEM;
        printf("rxe_send_mc: unable to allocate PD\n");
        goto out;
    }

    cqe = message_buffer ? message_buffer * 2 : 2;
    node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
    if (!node->cq) {
        ret = -ENOMEM;
        printf("rxe_send_mc: unable to create CQ\n");
        goto out;
    }

    memset(&init_qp_attr, 0, sizeof init_qp_attr);
    init_qp_attr.cap.max_send_wr = message_buffer ? message_buffer : 1;
    init_qp_attr.cap.max_recv_wr = message_buffer ? message_buffer : 1;
    init_qp_attr.cap.max_send_sge = 1;
    init_qp_attr.cap.max_recv_sge = 1;
    init_qp_attr.qp_context = node;
    init_qp_attr.sq_sig_all = 1; //singal all
    init_qp_attr.qp_type = IBV_QPT_UD;
    init_qp_attr.send_cq = node->cq;
    init_qp_attr.recv_cq = node->cq;
    ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
    if (ret) {
        perror("rxe_send_mc: unable to create QP");
        goto out;
    }

    ret = create_message(node);
    if (ret) {
        printf("rxe_send_mc: failed to create messages: %d\n", ret);
        goto out;
    }
out:
    return ret;
}
Exemple #17
0
void IBConnection::on_addr_resolved(struct ibv_pd* pd, struct ibv_cq* cq)
{
    L_(debug) << "address resolved";

    struct ibv_qp_init_attr qp_attr;
    memset(&qp_attr, 0, sizeof qp_attr);
    qp_attr.cap = qp_cap_;
    qp_attr.send_cq = cq;
    qp_attr.recv_cq = cq;
    qp_attr.qp_type = IBV_QPT_RC;
    int err = rdma_create_qp(cm_id_, pd, &qp_attr);
    if (err)
        throw InfinibandException("creation of QP failed");

    err = rdma_resolve_route(cm_id_, RESOLVE_TIMEOUT_MS);
    if (err)
        throw InfinibandException("rdma_resolve_route failed");

    setup(pd);
}
Exemple #18
0
void build_connection(struct rdma_cm_id *id)
{
    IbvConnection *conn;
    struct ibv_qp_init_attr qp_attr;

    id->context = conn = (IbvConnection *)malloc(sizeof(IbvConnection));

    build_verbs(conn, id->verbs);
    build_qp_attr(conn, &qp_attr);

    TEST_NZ(rdma_create_qp(id, conn->pd, &qp_attr));

    conn->id = id;
    conn->qp = id->qp;

    conn->connected = 0;

    register_memory(conn);
    post_receives(conn);
}
Exemple #19
0
void build_connection(struct rdma_cm_id *id){

  struct ibv_qp_init_attr qp_attr;
  struct connection *conn;
  struct timeval start, end, dt;
  build_qp_attr(&qp_attr);

  TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));

  id->context = conn = (struct connection *)malloc(sizeof(struct connection));

  conn->id = id;
  conn->qp = id->qp;


  
  gettimeofday(&start, NULL);
  register_memory(conn);
  gettimeofday(&end, NULL);
  timersub(&end, &start, &dt);
  long usec = dt.tv_usec + 1000000 * dt.tv_sec;
  printf("[Register] takes %ld micro_secs.\n", usec);

}
Exemple #20
0
int on_connect_request(struct rdma_cm_id *id)
{
	struct ibv_qp_init_attr qp_attr;
	struct rdma_conn_param cm_params;
	struct connection *conn;

	printf("received connection request.\n");

	build_context(id->verbs);
	build_qp_attr(&qp_attr);

	TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));

	id->context = conn = (struct connection *)malloc(sizeof(struct connection));
	conn->qp = id->qp;

	register_memory(conn);
	post_receives(conn);

	memset(&cm_params, 0, sizeof(cm_params));
	TEST_NZ(rdma_accept(id, &cm_params));

	return 0;
}
Exemple #21
0
		}
		/* END TEMP */
		rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
		if (rc) {
			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
				__func__, rc);
			rdma_destroy_id(id);
			rc = -ENETUNREACH;
			goto out;
		}
		rdma_destroy_qp(ia->ri_id);
		rdma_destroy_id(ia->ri_id);
		ia->ri_id = id;
	} else {
		dprintk("RPC:       %s: connecting...\n", __func__);
		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
		if (rc) {
			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
				__func__, rc);
			/* do not update ep->rep_connected */
			return -ENETUNREACH;
		}
	}

	ep->rep_connected = 0;

	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
	if (rc) {
		dprintk("RPC:       %s: rdma_connect() failed with %i\n",
				__func__, rc);
		goto out;
Exemple #22
0
/**
 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
 *
 * returns 0 on success, -1 on failure
 */
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
	struct iser_device	*device;
	struct ib_qp_init_attr	init_attr;
	int			ret = -ENOMEM;
	struct ib_fmr_pool_param params;

	BUG_ON(ib_conn->device == NULL);

	device = ib_conn->device;

	ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
	if (!ib_conn->login_buf) {
		goto alloc_err;
		ret = -ENOMEM;
	}

	ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
				DMA_FROM_DEVICE);

	ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
				    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
				    GFP_KERNEL);
	if (!ib_conn->page_vec) {
		ret = -ENOMEM;
		goto alloc_err;
	}
	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);

	params.page_shift        = SHIFT_4K;
	/* when the first/last SG element are not start/end *
	 * page aligned, the map whould be of N+1 pages     */
	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
	/* make the pool size twice the max number of SCSI commands *
	 * the ML is expected to queue, watermark for unmap at 50%  */
	params.pool_size	 = ISCSI_DEF_XMIT_CMDS_MAX * 2;
	params.dirty_watermark	 = ISCSI_DEF_XMIT_CMDS_MAX;
	params.cache		 = 0;
	params.flush_function	 = NULL;
	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
				    IB_ACCESS_REMOTE_WRITE |
				    IB_ACCESS_REMOTE_READ);

	ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
	if (IS_ERR(ib_conn->fmr_pool)) {
		ret = PTR_ERR(ib_conn->fmr_pool);
		goto fmr_pool_err;
	}

	memset(&init_attr, 0, sizeof init_attr);

	init_attr.event_handler = iser_qp_event_callback;
	init_attr.qp_context	= (void *)ib_conn;
	init_attr.send_cq	= device->tx_cq;
	init_attr.recv_cq	= device->rx_cq;
	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
	init_attr.cap.max_send_sge = 2;
	init_attr.cap.max_recv_sge = 1;
	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
	init_attr.qp_type	= IB_QPT_RC;

	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
	if (ret)
		goto qp_err;

	ib_conn->qp = ib_conn->cma_id->qp;
	iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
		 ib_conn, ib_conn->cma_id,
		 ib_conn->fmr_pool, ib_conn->cma_id->qp);
	return ret;

qp_err:
	(void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
fmr_pool_err:
	kfree(ib_conn->page_vec);
	kfree(ib_conn->login_buf);
alloc_err:
	iser_err("unable to alloc mem or create resource, err %d\n", ret);
	return ret;
}
Exemple #23
0
/*
 * This needs to be very careful to not leave IS_ERR pointers around for
 * cleanup to trip over.
 */
static int rds_iw_setup_qp(struct rds_connection *conn)
{
	struct rds_iw_connection *ic = conn->c_transport_data;
	struct ib_device *dev = ic->i_cm_id->device;
	struct ib_qp_init_attr attr;
	struct rds_iw_device *rds_iwdev;
	int ret;

	/* rds_iw_add_one creates a rds_iw_device object per IB device,
	 * and allocates a protection domain, memory range and MR pool
	 * for each.  If that fails for any reason, it will not register
	 * the rds_iwdev at all.
	 */
	rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
	if (!rds_iwdev) {
		if (printk_ratelimit())
			printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
					dev->name);
		return -EOPNOTSUPP;
	}

	/* Protection domain and memory range */
	ic->i_pd = rds_iwdev->pd;
	ic->i_mr = rds_iwdev->mr;

	ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
			&ic->i_send_ring, rds_iw_send_cq_comp_handler,
			&ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
			conn);
	if (ret < 0)
		goto out;

	ic->i_send_cq = attr.send_cq;
	ic->i_recv_cq = attr.recv_cq;

	/*
	 * XXX this can fail if max_*_wr is too large?  Are we supposed
	 * to back off until we get a value that the hardware can support?
	 */
	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
	if (ret) {
		rdsdebug("rdma_create_qp failed: %d\n", ret);
		goto out;
	}

	ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_send_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_send_hdrs_dma, GFP_KERNEL);
	if (!ic->i_send_hdrs) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent send failed\n");
		goto out;
	}

	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
					   ic->i_recv_ring.w_nr *
						sizeof(struct rds_header),
					   &ic->i_recv_hdrs_dma, GFP_KERNEL);
	if (!ic->i_recv_hdrs) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent recv failed\n");
		goto out;
	}

	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
				       &ic->i_ack_dma, GFP_KERNEL);
	if (!ic->i_ack) {
		ret = -ENOMEM;
		rdsdebug("ib_dma_alloc_coherent ack failed\n");
		goto out;
	}

	ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
	if (!ic->i_sends) {
		ret = -ENOMEM;
		rdsdebug("send allocation failed\n");
		goto out;
	}
	rds_iw_send_init_ring(ic);

	ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
	if (!ic->i_recvs) {
		ret = -ENOMEM;
		rdsdebug("recv allocation failed\n");
		goto out;
	}

	rds_iw_recv_init_ring(ic);
	rds_iw_recv_init_ack(ic);

	/* Post receive buffers - as a side effect, this will update
	 * the posted credit count. */
	rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);

	rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
		 ic->i_send_cq, ic->i_recv_cq);

out:
	return ret;
}
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
	struct iser_device	*device;
	struct ib_qp_init_attr	init_attr;
	int			req_err, resp_err, ret = -ENOMEM;
	struct ib_fmr_pool_param params;

	BUG_ON(ib_conn->device == NULL);

	device = ib_conn->device;

	ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
	if (!ib_conn->login_buf)
		goto out_err;

	ib_conn->login_req_buf  = ib_conn->login_buf;
	ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;

	ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_req_buf,
				ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);

	ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
				(void *)ib_conn->login_resp_buf,
				ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);

	req_err  = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
	resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);

	if (req_err || resp_err) {
		if (req_err)
			ib_conn->login_req_dma = 0;
		if (resp_err)
			ib_conn->login_resp_dma = 0;
		goto out_err;
	}

	ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
				    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
				    GFP_KERNEL);
	if (!ib_conn->page_vec)
		goto out_err;

	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);

	params.page_shift        = SHIFT_4K;
	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
	params.pool_size	 = ISCSI_DEF_XMIT_CMDS_MAX * 2;
	params.dirty_watermark	 = ISCSI_DEF_XMIT_CMDS_MAX;
	params.cache		 = 0;
	params.flush_function	 = NULL;
	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
				    IB_ACCESS_REMOTE_WRITE |
				    IB_ACCESS_REMOTE_READ);

	ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
	if (IS_ERR(ib_conn->fmr_pool)) {
		ret = PTR_ERR(ib_conn->fmr_pool);
		ib_conn->fmr_pool = NULL;
		goto out_err;
	}

	memset(&init_attr, 0, sizeof init_attr);

	init_attr.event_handler = iser_qp_event_callback;
	init_attr.qp_context	= (void *)ib_conn;
	init_attr.send_cq	= device->tx_cq;
	init_attr.recv_cq	= device->rx_cq;
	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
	init_attr.cap.max_send_sge = 2;
	init_attr.cap.max_recv_sge = 1;
	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
	init_attr.qp_type	= IB_QPT_RC;

	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
	if (ret)
		goto out_err;

	ib_conn->qp = ib_conn->cma_id->qp;
	iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
		 ib_conn, ib_conn->cma_id,
		 ib_conn->fmr_pool, ib_conn->cma_id->qp);
	return ret;

out_err:
	iser_err("unable to alloc mem or create resource, err %d\n", ret);
	return ret;
}
Exemple #25
0
static int ibw_setup_cq_qp(struct ibw_conn *conn)
{
	struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
	struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
	struct ibv_qp_init_attr init_attr;
	struct ibv_qp_attr attr;
	int rc;

	DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id));

	/* init verbs */
	pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs);
	if (!pconn->verbs_channel) {
		sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno);
		return -1;
	}
	DEBUG(DEBUG_DEBUG, ("created channel %p\n", pconn->verbs_channel));

	pconn->verbs_channel_event = tevent_add_fd(pctx->ectx, NULL, /* not pconn or conn */
		pconn->verbs_channel->fd, TEVENT_FD_READ, ibw_event_handler_verbs, conn);

	pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs);
	if (!pconn->pd) {
		sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno);
		return -1;
	}
	DEBUG(DEBUG_DEBUG, ("created pd %p\n", pconn->pd));

	/* init mr */
	if (ibw_init_memory(conn))
		return -1;

	/* init cq */
	pconn->cq = ibv_create_cq(pconn->cm_id->verbs,
		pctx->opts.max_recv_wr + pctx->opts.max_send_wr,
		conn, pconn->verbs_channel, 0);
	if (pconn->cq==NULL) {
		sprintf(ibw_lasterr, "ibv_create_cq failed\n");
		return -1;
	}

	rc = ibv_req_notify_cq(pconn->cq, 0);
	if (rc) {
		sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc);
		return rc;
	}

	/* init qp */
	memset(&init_attr, 0, sizeof(init_attr));
	init_attr.cap.max_send_wr = pctx->opts.max_send_wr;
	init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr;
	init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_send_sge = 1;
	init_attr.qp_type = IBV_QPT_RC;
	init_attr.send_cq = pconn->cq;
	init_attr.recv_cq = pconn->cq;

	rc = rdma_create_qp(pconn->cm_id, pconn->pd, &init_attr);
	if (rc) {
		sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc);
		return rc;
	}
	/* elase result is in pconn->cm_id->qp */

	rc = ibv_query_qp(pconn->cm_id->qp, &attr, IBV_QP_PATH_MTU, &init_attr);
	if (rc) {
		sprintf(ibw_lasterr, "ibv_query_qp failed with %d\n", rc);
		return rc;
	}

	return ibw_fill_cq(conn);
}
Exemple #26
0
static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
{
	struct sockaddr_in cl = {
		.sin_family = AF_INET,
		.sin_addr.s_addr = htonl(INADDR_ANY),
	};
	int port, err = -EINVAL;

	for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
		cl.sin_port = htons((ushort)port);
		err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
		if (err != -EADDRINUSE)
			break;
	}
	return err;
}

/**
 * trans_create_rdma - Transport method for creating atransport instance
 * @client: client instance
 * @addr: IP address string
 * @args: Mount options string
 */
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
	int err;
	struct p9_rdma_opts opts;
	struct p9_trans_rdma *rdma;
	struct rdma_conn_param conn_param;
	struct ib_qp_init_attr qp_attr;
	struct ib_device_attr devattr;
	struct ib_cq_init_attr cq_attr = {};

	/* Parse the transport specific mount options */
	err = parse_opts(args, &opts);
	if (err < 0)
		return err;

	/* Create and initialize the RDMA transport structure */
	rdma = alloc_rdma(&opts);
	if (!rdma)
		return -ENOMEM;

	/* Create the RDMA CM ID */
	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
				     IB_QPT_RC);
	if (IS_ERR(rdma->cm_id))
		goto error;

	/* Associate the client with the transport */
	client->trans = rdma;

	/* Bind to a privileged port if we need to */
	if (opts.privport) {
		err = p9_rdma_bind_privport(rdma);
		if (err < 0) {
			pr_err("%s (%d): problem binding to privport: %d\n",
			       __func__, task_pid_nr(current), -err);
			goto error;
		}
	}

	/* Resolve the server's address */
	rdma->addr.sin_family = AF_INET;
	rdma->addr.sin_addr.s_addr = in_aton(addr);
	rdma->addr.sin_port = htons(opts.port);
	err = rdma_resolve_addr(rdma->cm_id, NULL,
				(struct sockaddr *)&rdma->addr,
				rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
		goto error;

	/* Resolve the route to the server */
	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
		goto error;

	/* Query the device attributes */
	err = ib_query_device(rdma->cm_id->device, &devattr);
	if (err)
		goto error;

	/* Create the Completion Queue */
	cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1;
	rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
				cq_event_handler, client,
				&cq_attr);
	if (IS_ERR(rdma->cq))
		goto error;
	ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);

	/* Create the Protection Domain */
	rdma->pd = ib_alloc_pd(rdma->cm_id->device);
	if (IS_ERR(rdma->pd))
		goto error;

	/* Cache the DMA lkey in the transport */
	rdma->dma_mr = NULL;
	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
		rdma->lkey = rdma->cm_id->device->local_dma_lkey;
	else {
		rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(rdma->dma_mr))
			goto error;
		rdma->lkey = rdma->dma_mr->lkey;
	}

	/* Create the Queue Pair */
	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.event_handler = qp_event_handler;
	qp_attr.qp_context = client;
	qp_attr.cap.max_send_wr = opts.sq_depth;
	qp_attr.cap.max_recv_wr = opts.rq_depth;
	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_attr.qp_type = IB_QPT_RC;
	qp_attr.send_cq = rdma->cq;
	qp_attr.recv_cq = rdma->cq;
	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
	if (err)
		goto error;
	rdma->qp = rdma->cm_id->qp;

	/* Request a connection */
	memset(&conn_param, 0, sizeof(conn_param));
	conn_param.private_data = NULL;
	conn_param.private_data_len = 0;
	conn_param.responder_resources = P9_RDMA_IRD;
	conn_param.initiator_depth = P9_RDMA_ORD;
	err = rdma_connect(rdma->cm_id, &conn_param);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_CONNECTED))
		goto error;

	client->status = Connected;

	return 0;

error:
	rdma_destroy_trans(rdma);
	return -ENOTCONN;
}
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id)
{
	struct ib_qp_init_attr qp_init_attr = {
		.event_handler = sdp_qp_event_handler,
		.cap.max_send_wr = SDP_TX_SIZE,
		.cap.max_recv_wr = sdp_rx_size,
		.cap.max_inline_data = sdp_inline_thresh,
        	.sq_sig_type = IB_SIGNAL_REQ_WR,
        	.qp_type = IB_QPT_RC,
	};
	struct ib_device *device = id->device;
	int rc;

	sdp_dbg(sk, "%s\n", __func__);

	sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device);
	sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge);

	qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES);
	sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge);

	qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES);
	sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge);

	sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client);
	if (!sdp_sk(sk)->sdp_dev) {
		sdp_warn(sk, "SDP not available on device %s\n", device->name);
		rc = -ENODEV;
		goto err_rx;
	}

	rc = sdp_rx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_rx;

	rc = sdp_tx_ring_create(sdp_sk(sk), device);
	if (rc)
		goto err_tx;

	qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq;
	qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq;

	rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr);
	if (rc) {
		sdp_warn(sk, "Unable to create QP: %d.\n", rc);
		goto err_qp;
	}
	sdp_sk(sk)->qp = id->qp;
	sdp_sk(sk)->ib_device = device;
	sdp_sk(sk)->qp_active = 1;
	sdp_sk(sk)->context.device = device;
	sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data;

	sdp_dbg(sk, "%s done\n", __func__);
	return 0;

err_qp:
	sdp_tx_ring_destroy(sdp_sk(sk));
err_tx:
	sdp_rx_ring_destroy(sdp_sk(sk));
err_rx:
	return rc;
}

static int sdp_get_max_send_frags(u32 buf_size)
{
	return MIN(
		/* +1 to conpensate on not aligned buffers */
		(PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1,
		SDP_MAX_SEND_SGES - 1);
}

static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id,
		       	struct rdma_cm_event *event)
{
	struct sockaddr_in *dst_addr;
	struct sock *child;
	const struct sdp_hh *h;
	int rc = 0;

	sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);

	if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) {
		sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n",
				h->ipv_cap & HH_IPV_MASK);
		return -EINVAL;
	}

	if (!h->max_adverts)
		return -EINVAL;

#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0))
	child = sk_clone(sk, GFP_KERNEL);
#else
	child = sk_clone_lock(sk, GFP_KERNEL);
#endif
	if (!child)
		return -ENOMEM;

	sdp_init_sock(child);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(child) = dst_addr->sin_port;

#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
	if (inet6_sk(sk)) {
		struct ipv6_pinfo *newnp;

		newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child);

		memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo));
		if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) {
			/* V6 mapped */
			sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
			ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF),
					h->src_addr.ip4.addr);

			ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF),
					h->dst_addr.ip4.addr);

			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr);
		} else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) {
			struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr;
			struct sockaddr_in6 *src_addr6 =
				(struct sockaddr_in6 *)&id->route.addr.src_addr;

			ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr);
			ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr);
			ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr);
		} else {
			sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK);
		}

		sdp_inet_daddr(child) = sdp_inet_saddr(child) =
			sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6;
	} else
#endif
	{
		sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
	}

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	__sock_put(child, SOCK_REF_CLONE);

	down_read(&device_removal_lock);

	rc = sdp_init_qp(child, id);
	if (rc) {
		bh_unlock_sock(child);
		up_read(&device_removal_lock);
		sdp_sk(child)->destructed_already = 1;
#ifdef SDP_SOCK_HISTORY
		sdp_ssk_hist_close(child);
#endif
		sk_free(child);
		return rc;
	}

	sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs);

	sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4;
	sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) -
		sizeof(struct sdp_bsdh);

	sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal);
	sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size);

	id->context = child;
	sdp_sk(child)->id = id;

	list_add_tail(&sdp_sk(child)->backlog_queue,
			&sdp_sk(sk)->backlog_queue);
	sdp_sk(child)->parent = sk;

	bh_unlock_sock(child);
	sdp_add_sock(sdp_sk(child));
	up_read(&device_removal_lock);

	sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV);

	/* child->sk_write_space(child); */
	/* child->sk_data_ready(child, 0); */
	sk->sk_data_ready(sk);

	return 0;
}

static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
				struct rdma_cm_event *event)
{
	const struct sdp_hah *h;
	struct sockaddr_in *dst_addr;
	sdp_dbg(sk, "%s\n", __func__);

	sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED);
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	h = event->param.conn.private_data;
	SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh);
	sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs);
	atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs);
	sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4;
	sdp_sk(sk)->xmit_size_goal =
		ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh);
	sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal);
	sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal,
		sdp_sk(sk)->send_frags * PAGE_SIZE);

	sdp_sk(sk)->poll_cq = 1;

	sk->sk_state_change(sk);
	sk_wake_async(sk, 0, POLL_OUT);

	dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
	sdp_inet_dport(sk) = dst_addr->sin_port;
	sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr;

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	return 0;
}

static int sdp_connected_handler(struct sock *sk)
{
	struct sock *parent;
	sdp_dbg(sk, "%s\n", __func__);

	parent = sdp_sk(sk)->parent;
	BUG_ON(!parent);

	sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED);

#ifdef SDP_SOCK_HISTORY
	sdp_ssk_hist_rename(sk);
#endif
	sdp_set_default_moderation(sdp_sk(sk));

	if (sock_flag(sk, SOCK_KEEPOPEN))
		sdp_start_keepalive_timer(sk);

	if (sock_flag(sk, SOCK_DEAD))
		return 0;

	lock_sock(parent);
	if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
		sdp_dbg(sk, "parent is going away.\n");
		goto done;
	}

	sk_acceptq_added(parent);
	sdp_dbg(parent, "%s child connection established\n", __func__);
	list_del_init(&sdp_sk(sk)->backlog_queue);
	list_add_tail(&sdp_sk(sk)->accept_queue,
			&sdp_sk(parent)->accept_queue);

	parent->sk_state_change(parent);
	sk_wake_async(parent, 0, POLL_OUT);
done:
	release_sock(parent);

	return 0;
}

static int sdp_disconnected_handler(struct sock *sk)
{
	struct sdp_sock *ssk = sdp_sk(sk);

	sdp_dbg(sk, "%s\n", __func__);

	if (ssk->tx_ring.cq)
		if (sdp_xmit_poll(ssk, 1))
			sdp_post_sends(ssk, 0);

	if (sk->sk_state == TCP_SYN_RECV) {
		sdp_connected_handler(sk);

		if (rcv_nxt(ssk))
			return 0;
	}

	return -ECONNRESET;
}

int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
	struct rdma_conn_param conn_param;
	struct sock *parent = NULL;
	struct sock *child = NULL;
	struct sock *sk;
	struct sdp_hah hah;
	struct sdp_hh hh;

	int rc = 0, rc2;

	sk = id->context;
	if (!sk) {
		sdp_dbg(NULL, "cm_id is being torn down, event %s\n",
		       	rdma_cm_event_str(event->event));
		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
	}

	sdp_add_to_history(sk, rdma_cm_event_str(event->event));

	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
	sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event));
	if (!sdp_sk(sk)->id) {
		sdp_dbg(sk, "socket is being torn down\n");
		rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ?
			-EINVAL : 0;
		release_sock(sk);
		return rc;
	}

	switch (event->event) {
	case RDMA_CM_EVENT_ADDR_RESOLVED:
		if (sdp_link_layer_ib_only &&
			rdma_node_get_transport(id->device->node_type) ==
				RDMA_TRANSPORT_IB &&
			rdma_port_get_link_layer(id->device, id->port_num) !=
				IB_LINK_LAYER_INFINIBAND) {
			sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
				"is allowed\n",
				rdma_port_get_link_layer(id->device,
					id->port_num));
			rc = -ENETUNREACH;
			break;
		}

		rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT);
		break;
	case RDMA_CM_EVENT_ADDR_ERROR:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_ROUTE_RESOLVED:
		rc = sdp_init_qp(sk, id);
		if (rc)
			break;
		memset(&hh, 0, sizeof hh);
		hh.bsdh.mid = SDP_MID_HELLO;
		hh.bsdh.len = htonl(sizeof(struct sdp_hh));
		hh.max_adverts = 1;

		hh.majv_minv = SDP_MAJV_MINV;
		sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size);
		hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk)));
		atomic_set(&sdp_sk(sk)->remote_credits,
				rx_ring_posted(sdp_sk(sk)));
		hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags *
				PAGE_SIZE + sizeof(struct sdp_bsdh));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
		if (inet6_sk(sk)) {
			struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr;
			struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr;
			struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr;

			if (src_addr->sa_family == AF_INET) {
				/* IPv4 over IPv6 */
				ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF),
						addr4->sin_addr.s_addr);
			} else {
				sk->sk_v6_rcv_saddr = addr6->sin6_addr;
			}
			inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr;
		}
			else
#endif
		{
			sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
				((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		}
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hh;
		conn_param.private_data = &hh;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh);

		if (sdp_apm_enable) {
			rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
			if (rc)
				sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc);
		}

		rc = rdma_connect(id, &conn_param);
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED:
		sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n",
				id->route.path_rec[1].slid, id->route.path_rec[1].dlid);
		break;

	case RDMA_CM_EVENT_ALT_PATH_LOADED:
		sdp_dbg(sk, "alt route path loaded\n");
		break;

	case RDMA_CM_EVENT_ALT_ROUTE_ERROR:
		sdp_warn(sk, "alt route resolve error\n");
		break;

	case RDMA_CM_EVENT_ROUTE_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_CONNECT_REQUEST:
		rc = sdp_connect_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
			break;
		}
		child = id->context;
		atomic_set(&sdp_sk(child)->remote_credits,
				rx_ring_posted(sdp_sk(child)));
		memset(&hah, 0, sizeof hah);
		hah.bsdh.mid = SDP_MID_HELLO_ACK;
		hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child)));
		hah.bsdh.len = htonl(sizeof(struct sdp_hah));
		hah.majv_minv = SDP_MAJV_MINV;
		hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
					    but just in case */
		hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE +
			sizeof(struct sdp_bsdh));
		memset(&conn_param, 0, sizeof conn_param);
		conn_param.private_data_len = sizeof hah;
		conn_param.private_data = &hah;
		conn_param.responder_resources = 4 /* TODO */;
		conn_param.initiator_depth = 4 /* TODO */;
		conn_param.retry_count = sdp_retry_count;
		SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh);
		rc = rdma_accept(id, &conn_param);
		if (rc) {
			sdp_sk(child)->id = NULL;
			id->qp = NULL;
			id->context = NULL;
			parent = sdp_sk(child)->parent; /* TODO: hold ? */
		} else if (sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2);
		}
		break;
	case RDMA_CM_EVENT_CONNECT_RESPONSE:
		rc = sdp_response_handler(sk, id, event);
		if (rc) {
			sdp_dbg(sk, "Destroying qp\n");
			rdma_reject(id, NULL, 0);
		} else {
			rc = rdma_accept(id, NULL);
			if (!rc && sdp_apm_enable) {
				rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST);
				if (rc2)
					sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2);
			}
		}
		break;
	case RDMA_CM_EVENT_CONNECT_ERROR:
		rc = -ETIMEDOUT;
		break;
	case RDMA_CM_EVENT_UNREACHABLE:
		rc = -ENETUNREACH;
		break;
	case RDMA_CM_EVENT_REJECTED:
		rc = -ECONNREFUSED;
		break;
	case RDMA_CM_EVENT_ESTABLISHED:
		sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
			((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
		rc = sdp_connected_handler(sk);
		break;
	case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */
		if (sk->sk_state == TCP_LAST_ACK) {
			sdp_cancel_dreq_wait_timeout(sdp_sk(sk));

			sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);

			sdp_dbg(sk, "%s: waiting for Infiniband tear down\n",
				__func__);
		}

		sdp_sk(sk)->qp_active = 0;
		rdma_disconnect(id);

		if (sk->sk_state != TCP_TIME_WAIT) {
			if (sk->sk_state == TCP_CLOSE_WAIT) {
				sdp_dbg(sk, "IB teardown while in "
					"TCP_CLOSE_WAIT taking reference to "
					"let close() finish the work\n");
				sock_hold(sk, SOCK_REF_CMA);
				sdp_start_cma_timewait_timeout(sdp_sk(sk),
						SDP_CMA_TIMEWAIT_TIMEOUT);

			}
			sdp_set_error(sk, -EPIPE);
			rc = sdp_disconnected_handler(sk);
		}
		break;
	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
		rc = sdp_disconnected_handler(sk);
		break;
	case RDMA_CM_EVENT_DEVICE_REMOVAL:
		rc = -ENETRESET;
		break;

	case RDMA_CM_EVENT_ADDR_CHANGE:
		sdp_dbg(sk, "Got Address change event\n");
		rc = 0;
		break;
	default:
		printk(KERN_ERR "SDP: Unexpected CMA event: %d\n",
		       event->event);
		rc = -ECONNABORTED;
		break;
	}

	sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event));

	if (rc && sdp_sk(sk)->id == id) {
		child = sk;
		sdp_sk(sk)->id = NULL;
		id->qp = NULL;
		id->context = NULL;
		parent = sdp_sk(sk)->parent;
		sdp_reset_sk(sk, rc);
	}

	release_sock(sk);

	sdp_dbg(sk, "event: %s done. status %d\n",
			rdma_cm_event_str(event->event), rc);

	if (parent) {
		lock_sock(parent);
		if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */
			sdp_dbg(sk, "parent is going away.\n");
			child = NULL;
			goto done;
		}
		if (!list_empty(&sdp_sk(child)->backlog_queue))
			list_del_init(&sdp_sk(child)->backlog_queue);
		else
			child = NULL;
done:
		release_sock(parent);
		if (child)
			sdp_common_release(child);
	}
	return rc;
}
Exemple #28
0
static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
{
	struct iser_device	*device;
	struct ib_qp_init_attr	init_attr;
	int			ret;
	struct ib_fmr_pool_param params;

	BUG_ON(ib_conn->device == NULL);

	device = ib_conn->device;

	ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
				    (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
				    GFP_KERNEL);
	if (!ib_conn->page_vec) {
		ret = -ENOMEM;
		goto alloc_err;
	}
	ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);

	params.page_shift        = SHIFT_4K;
	
	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
	
	params.pool_size	 = ISCSI_DEF_XMIT_CMDS_MAX * 2;
	params.dirty_watermark	 = ISCSI_DEF_XMIT_CMDS_MAX;
	params.cache		 = 0;
	params.flush_function	 = NULL;
	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
				    IB_ACCESS_REMOTE_WRITE |
				    IB_ACCESS_REMOTE_READ);

	ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
	if (IS_ERR(ib_conn->fmr_pool)) {
		ret = PTR_ERR(ib_conn->fmr_pool);
		goto fmr_pool_err;
	}

	memset(&init_attr, 0, sizeof init_attr);

	init_attr.event_handler = iser_qp_event_callback;
	init_attr.qp_context	= (void *)ib_conn;
	init_attr.send_cq	= device->cq;
	init_attr.recv_cq	= device->cq;
	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;
	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;
	init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN;
	init_attr.cap.max_recv_sge = 2;
	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;
	init_attr.qp_type	= IB_QPT_RC;

	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
	if (ret)
		goto qp_err;

	ib_conn->qp = ib_conn->cma_id->qp;
	iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
		 ib_conn, ib_conn->cma_id,
		 ib_conn->fmr_pool, ib_conn->cma_id->qp);
	return ret;

qp_err:
	(void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
fmr_pool_err:
	kfree(ib_conn->page_vec);
alloc_err:
	iser_err("unable to alloc mem or create resource, err %d\n", ret);
	return ret;
}
/**
 * trans_create_rdma - Transport method for creating atransport instance
 * @client: client instance
 * @addr: IP address string
 * @args: Mount options string
 */
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
	int err;
	struct p9_rdma_opts opts;
	struct p9_trans_rdma *rdma;
	struct rdma_conn_param conn_param;
	struct ib_qp_init_attr qp_attr;
	struct ib_device_attr devattr;

	/* Parse the transport specific mount options */
	err = parse_opts(args, &opts);
	if (err < 0)
		return err;

	/* Create and initialize the RDMA transport structure */
	rdma = alloc_rdma(&opts);
	if (!rdma)
		return -ENOMEM;

	/* Create the RDMA CM ID */
	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
	if (IS_ERR(rdma->cm_id))
		goto error;

	/* Associate the client with the transport */
	client->trans = rdma;

	/* Resolve the server's address */
	rdma->addr.sin_family = AF_INET;
	rdma->addr.sin_addr.s_addr = in_aton(addr);
	rdma->addr.sin_port = htons(opts.port);
	err = rdma_resolve_addr(rdma->cm_id, NULL,
				(struct sockaddr *)&rdma->addr,
				rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
		goto error;

	/* Resolve the route to the server */
	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
		goto error;

	/* Query the device attributes */
	err = ib_query_device(rdma->cm_id->device, &devattr);
	if (err)
		goto error;

	/* Create the Completion Queue */
	rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
				cq_event_handler, client,
				opts.sq_depth + opts.rq_depth + 1, 0);
	if (IS_ERR(rdma->cq))
		goto error;
	ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);

	/* Create the Protection Domain */
	rdma->pd = ib_alloc_pd(rdma->cm_id->device);
	if (IS_ERR(rdma->pd))
		goto error;

	/* Cache the DMA lkey in the transport */
	rdma->dma_mr = NULL;
	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
		rdma->lkey = rdma->cm_id->device->local_dma_lkey;
	else {
		rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(rdma->dma_mr))
			goto error;
		rdma->lkey = rdma->dma_mr->lkey;
	}

	/* Create the Queue Pair */
	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.event_handler = qp_event_handler;
	qp_attr.qp_context = client;
	qp_attr.cap.max_send_wr = opts.sq_depth;
	qp_attr.cap.max_recv_wr = opts.rq_depth;
	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_attr.qp_type = IB_QPT_RC;
	qp_attr.send_cq = rdma->cq;
	qp_attr.recv_cq = rdma->cq;
	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
	if (err)
		goto error;
	rdma->qp = rdma->cm_id->qp;

	/* Request a connection */
	memset(&conn_param, 0, sizeof(conn_param));
	conn_param.private_data = NULL;
	conn_param.private_data_len = 0;
	conn_param.responder_resources = P9_RDMA_IRD;
	conn_param.initiator_depth = P9_RDMA_ORD;
	err = rdma_connect(rdma->cm_id, &conn_param);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_CONNECTED))
		goto error;

	client->status = Connected;

	return 0;

error:
	rdma_destroy_trans(rdma);
	return -ENOTCONN;
}
Exemple #30
0
int main(int argc, char *argv[]) {
	struct pdata rep_pdata;

	struct rdma_event_channel *cm_channel;
	struct rdma_cm_id *listen_id;
	struct rdma_cm_id *cm_id;
	struct rdma_cm_event *event;
	struct rdma_conn_param conn_param = { };

	struct ibv_pd *pd;
	struct ibv_comp_channel *comp_chan;
	struct ibv_cq *cq;
	struct ibv_cq *evt_cq;
	struct ibv_mr *mr;
	struct ibv_qp_init_attr qp_attr = { };
	struct ibv_sge sge;
	struct ibv_send_wr send_wr = { };
	struct ibv_send_wr *bad_send_wr;
	struct ibv_recv_wr recv_wr = { };
	struct ibv_recv_wr *bad_recv_wr;
	struct ibv_wc wc;
	void *cq_context;

	struct sockaddr_in sin;

	uint32_t *buf;

	int err;

	/* Set up RDMA CM structures */

	cm_channel = rdma_create_event_channel();
	if (!cm_channel)
		return 1;

	err = rdma_create_id(cm_channel, &listen_id, NULL, RDMA_PS_TCP);
	if (err)
		return err;

	sin.sin_family = AF_INET;
	sin.sin_port = htons(20079);
	sin.sin_addr.s_addr = INADDR_ANY;

	/* Bind to local port and listen for connection request */

	err = rdma_bind_addr(listen_id, (struct sockaddr *) &sin);
	if (err)
		return 1;


	err = rdma_listen(listen_id, 1);
	if (err)
		return 1;

	err = rdma_get_cm_event(cm_channel, &event);
	if (err)
		return err;
	printf("after get_cm_event\n");

	if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST)
		return 1;

	cm_id = event->id;

	rdma_ack_cm_event(event);

	/* Create verbs objects now that we know which device to use */

	pd = ibv_alloc_pd(cm_id->verbs);
	if (!pd)
		return 1;

	comp_chan = ibv_create_comp_channel(cm_id->verbs);
	if (!comp_chan)
		return 1;

	cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0);
	if (!cq)
		return 1;

	if (ibv_req_notify_cq(cq, 0))
		return 1;

	buf = calloc(2, sizeof(uint32_t));
	if (!buf)
		return 1;

	mr = ibv_reg_mr(pd, buf, 2 * sizeof(uint32_t),
			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ
					| IBV_ACCESS_REMOTE_WRITE);
	if (!mr)
		return 1;

	qp_attr.cap.max_send_wr = 1;
	qp_attr.cap.max_send_sge = 1;
	qp_attr.cap.max_recv_wr = 1;
	qp_attr.cap.max_recv_sge = 1;

	qp_attr.send_cq = cq;
	qp_attr.recv_cq = cq;

	qp_attr.qp_type = IBV_QPT_RC;

	err = rdma_create_qp(cm_id, pd, &qp_attr);
	if (err)
		return err;

	/* Post receive before accepting connection */

	sge.addr = (uintptr_t) buf + sizeof(uint32_t);
	sge.length = sizeof(uint32_t);
	sge.lkey = mr->lkey;

	recv_wr.sg_list = &sge;
	recv_wr.num_sge = 1;

	if (ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr))
		return 1;

	rep_pdata.buf_va = htonll((uintptr_t) buf);
	rep_pdata.buf_rkey = htonl(mr->rkey);

	conn_param.responder_resources = 1;
	conn_param.private_data = &rep_pdata;
	conn_param.private_data_len = sizeof rep_pdata;

	/* Accept connection */
	printf("before accept\n");
	err = rdma_accept(cm_id, &conn_param);
	if (err)
		return 1;
	printf("after accept\n");
	err = rdma_get_cm_event(cm_channel, &event);
	if (err)
		return err;

	if (event->event != RDMA_CM_EVENT_ESTABLISHED)
		return 1;

	rdma_ack_cm_event(event);

	/* Wait for receive completion */

	if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context))
		return 1;

	if (ibv_req_notify_cq(cq, 0))
		return 1;

	if (ibv_poll_cq(cq, 1, &wc) < 1)
		return 1;

	if (wc.status != IBV_WC_SUCCESS)
		return 1;

	/* Add two integers and send reply back */

	buf[0] = htonl(ntohl(buf[0]) + ntohl(buf[1]));

	sge.addr = (uintptr_t) buf;
	sge.length = sizeof(uint32_t);
	sge.lkey = mr->lkey;

	send_wr.opcode = IBV_WR_SEND;
	send_wr.send_flags = IBV_SEND_SIGNALED;
	send_wr.sg_list = &sge;
	send_wr.num_sge = 1;

	if (ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr))
		return 1;

	/* Wait for send completion */

	if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context))
		return 1;

	if (ibv_poll_cq(cq, 1, &wc) < 1)
		return 1;

	if (wc.status != IBV_WC_SUCCESS)
		return 1;

	printf("before ack cq 2\n");
	ibv_ack_cq_events(cq, 2);

	return 0;
}