/*
 * Called when the CPC has established a connection on an endpoint
 */
void mca_btl_openib_endpoint_cpc_complete(mca_btl_openib_endpoint_t *endpoint)
{
    /* If the CPC uses the CTS protocol, then start it up */
    if (endpoint->endpoint_local_cpc->cbm_uses_cts) {
        int transport_type_ib_p = 0;
        /* Post our receives, which will make credit management happy
           (i.e., rd_credits will be 0) */
        if (OMPI_SUCCESS != mca_btl_openib_endpoint_post_recvs(endpoint)) {
            BTL_ERROR(("Failed to post receive buffers"));
            mca_btl_openib_endpoint_invoke_error(endpoint);
            return;
        }
        endpoint->endpoint_posted_recvs = true;

        /* If this is IB, send the CTS immediately.  If this is iWARP,
           then only send the CTS if this endpoint was the initiator
           of the connection (the receiver will send its CTS when it
           receives this side's CTS).  Also send the CTS if we already
           received the peer's CTS (e.g., if this process was slow to
           call cpc_complete(). */
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
        transport_type_ib_p = (IBV_TRANSPORT_IB == endpoint->endpoint_btl->device->ib_dev->transport_type);
#endif
        OPAL_OUTPUT((-1, "cpc_complete to peer %s: is IB %d, initiatior %d, cts received: %d",
                     (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ?
                     "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname,
                     transport_type_ib_p,
                     endpoint->endpoint_initiator,
                     endpoint->endpoint_cts_received));
        if (transport_type_ib_p ||
            endpoint->endpoint_initiator ||
            endpoint->endpoint_cts_received) {
            mca_btl_openib_endpoint_send_cts(endpoint);

            /* If we've already got the CTS from the other side, then
               mark us as connected */
            if (endpoint->endpoint_cts_received) {
                OPAL_OUTPUT((-1, "cpc_complete to %s -- already got CTS, so marking endpoint as complete",
                             (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ?
                             "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname));
                mca_btl_openib_endpoint_connected(endpoint);
            }
        }

        OPAL_OUTPUT((-1, "cpc_complete to %s -- done",
                     (NULL == endpoint->endpoint_proc->proc_ompi->proc_hostname) ?
                     "unknown" : endpoint->endpoint_proc->proc_ompi->proc_hostname));
        return;
    }

    /* Otherwise, just set the endpoint to "connected" */
    mca_btl_openib_endpoint_connected(endpoint);
}
/*
 * Create the local side of all the qp's.  The remote sides will be
 * connected later.
 */
static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
{
    int qp, rc, pp_qp_num = 0;
    int32_t rd_rsv_total = 0;

    for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp)
        if(BTL_OPENIB_QP_TYPE_PP(qp)) {
            rd_rsv_total +=
                mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv;
            pp_qp_num++;
        }

    /* if there is no pp QPs we still need reserved WQE for eager rdma flow
     * control */
    if(0 == pp_qp_num && true == endpoint->use_eager_rdma)
        pp_qp_num = 1;

    for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) { 
        struct ibv_srq *srq = NULL;
        uint32_t max_recv_wr, max_send_wr;
        int32_t rd_rsv, rd_num_credits;

        /* QP used for SW flow control need some additional recourses */
        if(qp == mca_btl_openib_component.credits_qp) {
            rd_rsv = rd_rsv_total;
            rd_num_credits = pp_qp_num;
        } else {
            rd_rsv = rd_num_credits = 0;
        }

        if(BTL_OPENIB_QP_TYPE_PP(qp)) {
            max_recv_wr = mca_btl_openib_component.qp_infos[qp].rd_num + rd_rsv;
            max_send_wr = mca_btl_openib_component.qp_infos[qp].rd_num +
                rd_num_credits;
        } else {
            srq = endpoint->endpoint_btl->qps[qp].u.srq_qp.srq;
            /* no receives are posted to SRQ qp */
            max_recv_wr = 0;
            max_send_wr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max
                + rd_num_credits;
        }

        rc = qp_create_one(endpoint, qp, srq, max_recv_wr, max_send_wr);
        if (OMPI_SUCCESS != rc) {
            return rc;
        }
    }

    /* Now that all the qp's are created locally, post some receive
       buffers, setup credits, etc. */
    return mca_btl_openib_endpoint_post_recvs(endpoint);
}
/*
 * Non blocking RML recv callback.  Read incoming QP and other info,
 * and if this endpoint is trying to connect, reply with our QP info,
 * otherwise try to modify QP's and establish reliable connection
 */
static void xoob_rml_recv_cb(int status, ompi_process_name_t* process_name,
                        opal_buffer_t* buffer, ompi_rml_tag_t tag,
                        void* cbdata)
{
    int rc;
    uint8_t message_type;
    uint16_t requested_lid = 0;
    mca_btl_openib_rem_info_t rem_info;
    mca_btl_openib_endpoint_t *ib_endpoint = NULL;

    if ( OMPI_SUCCESS != init_rem_info(&rem_info)) {
        return;
    }

    /* Get data. */
    if ( OMPI_SUCCESS != xoob_receive_connect_data(&rem_info, &requested_lid, &message_type, buffer)) {
        BTL_ERROR(("Failed to read data\n"));
        mca_btl_openib_endpoint_invoke_error(NULL);
        return;
    }

    /* Processing message */
    switch (message_type) {
        case ENDPOINT_XOOB_CONNECT_REQUEST:
            BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_REQUEST: lid %d, sid %" PRIx64 ", rlid %d\n",
                        rem_info.rem_lid,
                        rem_info.rem_subnet_id,
                        requested_lid));
            ib_endpoint = xoob_find_endpoint(process_name,rem_info.rem_subnet_id,
                    requested_lid, message_type);
            if ( NULL == ib_endpoint) {
                BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_REQUEST."
                           " Failed to find endpoint with subnet %" PRIx64
                           " and LID %d",
                           rem_info.rem_subnet_id,requested_lid));
                mca_btl_openib_endpoint_invoke_error(NULL);
                return;
            }
            OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
            /* prepost data on receiver site */
            if (OMPI_SUCCESS != mca_btl_openib_endpoint_post_recvs(ib_endpoint)) {
                BTL_ERROR(("Failed to post on XRC SRQs"));
                mca_btl_openib_endpoint_invoke_error(NULL);
                OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
                return;
            }
            /* we should create qp and send the info + srq to requestor */
            rc = xoob_reply_first_connect(ib_endpoint, &rem_info);
            if (OMPI_SUCCESS != rc) {
                BTL_ERROR(("error in endpoint reply start connect"));
                mca_btl_openib_endpoint_invoke_error(NULL);
                OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
                return;
            }
            /* enable pooling for this btl */
            OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
            break;
        case ENDPOINT_XOOB_CONNECT_XRC_REQUEST:
            /* pasha we don't need the remote lid here ??*/
            BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_XRC_REQUEST: lid %d, sid %" PRIx64 "\n",
                        rem_info.rem_lid,
                        rem_info.rem_subnet_id));
            ib_endpoint = xoob_find_endpoint(process_name,rem_info.rem_subnet_id,
                    requested_lid, message_type);
            if ( NULL == ib_endpoint) {
                BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_XRC_REQUEST."
                            " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
                            rem_info.rem_subnet_id,requested_lid));
                mca_btl_openib_endpoint_invoke_error(NULL);
                return;
            }
            if (OMPI_SUCCESS == xoob_recv_qp_connect(ib_endpoint, &rem_info)) {
                if (OMPI_SUCCESS != mca_btl_openib_endpoint_post_recvs(ib_endpoint)) {
                    BTL_ERROR(("Failed to post on XRC SRQs"));
                    mca_btl_openib_endpoint_invoke_error(ib_endpoint);
                    return;
                }
                OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
                rc = xoob_send_connect_data(ib_endpoint, ENDPOINT_XOOB_CONNECT_XRC_RESPONSE);
                if (OMPI_SUCCESS != rc) {
                    BTL_ERROR(("error in endpoint reply start connect"));
                    mca_btl_openib_endpoint_invoke_error(ib_endpoint);
                    OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
                    return;
                }
                OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
            } else {
                /* The XRC recv qp was destroyed */
                OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
                rc = xoob_send_connect_data(ib_endpoint, ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE);
                if (OMPI_SUCCESS != rc) {
                    BTL_ERROR(("error in endpoint reply start connect"));
                    mca_btl_openib_endpoint_invoke_error(ib_endpoint);
                    OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
                    return;
                }
                OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
            }
            /* enable pooling for this btl */
            break;
        case ENDPOINT_XOOB_CONNECT_RESPONSE:
            BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_RESPONSE: lid %d, sid %" PRIx64 "\n",
                        rem_info.rem_lid,
                        rem_info.rem_subnet_id));
            ib_endpoint = xoob_find_endpoint(process_name, rem_info.rem_subnet_id,
                    rem_info.rem_lid, message_type);
            if ( NULL == ib_endpoint) {
                BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_RESPONSE."
                            " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
                            rem_info.rem_subnet_id,rem_info.rem_lid));
                mca_btl_openib_endpoint_invoke_error(NULL);
                return;
            }
            OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
            /* we got all the data srq. switch the endpoint to connect mode */
            XOOB_SET_REMOTE_INFO(ib_endpoint->rem_info, rem_info);
            /* update ib_addr with remote qp number */
            ib_endpoint->ib_addr->remote_xrc_rcv_qp_num =
                ib_endpoint->rem_info.rem_qps->rem_qp_num;
            BTL_VERBOSE(("rem_info: lid %d, sid %" PRIx64
                         " ep %d %" PRIx64 "\n",
                         rem_info.rem_lid,
                         rem_info.rem_subnet_id,
                         ib_endpoint->rem_info.rem_lid,
                         ib_endpoint->rem_info.rem_subnet_id));
            if (OMPI_SUCCESS != xoob_send_qp_connect(ib_endpoint, &rem_info)) {
                BTL_ERROR(("Failed to connect  endpoint\n"));
                mca_btl_openib_endpoint_invoke_error(NULL);
                OPAL_THREAD_UNLOCK(&ib_endpoint->endpoint_lock);
                return;
            }
            mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
            /* cpc complete unlock the endpoint */
            break;
        case ENDPOINT_XOOB_CONNECT_XRC_RESPONSE:
            BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_XRC_RESPONSE: lid %d, sid %" PRIx64 "\n",
                        rem_info.rem_lid,
                        rem_info.rem_subnet_id));
            ib_endpoint = xoob_find_endpoint(process_name, rem_info.rem_subnet_id,
                    rem_info.rem_lid, message_type);
            if ( NULL == ib_endpoint) {
                BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_XRC_RESPONSE."
                            " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
                            rem_info.rem_subnet_id,rem_info.rem_lid));
                mca_btl_openib_endpoint_invoke_error(NULL);
                return;
            }
            OPAL_THREAD_LOCK(&ib_endpoint->endpoint_lock);
            /* we got srq numbers on our request */
            XOOB_SET_REMOTE_INFO(ib_endpoint->rem_info, rem_info);
            mca_btl_openib_endpoint_cpc_complete(ib_endpoint);
            /* cpc complete unlock the endpoint */
            break;
        case ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE:
            /* The XRC recv site already was destroyed so we need
             * start to bringup the connection from scratch  */
            BTL_VERBOSE(("Received ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE: lid %d, sid %" PRIx64 "\n",
                        rem_info.rem_lid,
                        rem_info.rem_subnet_id));
            ib_endpoint = xoob_find_endpoint(process_name, rem_info.rem_subnet_id,
                    rem_info.rem_lid, message_type);
            if ( NULL == ib_endpoint) {
                BTL_ERROR(("Got ENDPOINT_XOOB_CONNECT_XRC_NR_RESPONSE."
                            " Failed to find endpoint with subnet %" PRIx64 " and LID %d",
                            rem_info.rem_subnet_id,rem_info.rem_lid));
                mca_btl_openib_endpoint_invoke_error(NULL);
                return;
            }
            xoob_restart_connect(ib_endpoint);
            break;
        default :
            BTL_ERROR(("Invalid message type %d", message_type));
    }

    free_rem_info(&rem_info);
}
/* Create XRC send qp */
static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
{
    int prio = BTL_OPENIB_LP_CQ; /* all send completions go to low prio CQ */
    uint32_t send_wr;
    struct ibv_qp **qp;
    uint32_t *psn;
    struct ibv_qp_init_attr qp_init_attr;
    struct ibv_qp_attr attr;
    int ret;
    size_t req_inline;

    mca_btl_openib_module_t *openib_btl =
        (mca_btl_openib_module_t*)endpoint->endpoint_btl;

    /* Prepare QP structs */
    BTL_VERBOSE(("Creating Send QP\n"));
    qp = &endpoint->qps[0].qp->lcl_qp;
    psn = &endpoint->qps[0].qp->lcl_psn;
    /* reserve additional wr for eager rdma credit management */
    send_wr = endpoint->ib_addr->qp->sd_wqe +
        (mca_btl_openib_component.use_eager_rdma ?
         mca_btl_openib_component.max_eager_rdma : 0);
    memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
    memset(&attr, 0, sizeof(struct ibv_qp_attr));

    qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio];

    /* no need recv queue; receives are posted to srq */
    qp_init_attr.cap.max_recv_wr = 0;
    qp_init_attr.cap.max_send_wr = send_wr;
    qp_init_attr.cap.max_inline_data = req_inline =
        openib_btl->device->max_inline_data;
    qp_init_attr.cap.max_send_sge = 1;
    /* this one is ignored by driver */
    qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
    qp_init_attr.qp_type = IBV_QPT_XRC;
    qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
    *qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr);
    if (NULL == *qp) {
	opal_show_help("help-mpi-btl-openib-cpc-base.txt",
		       "ibv_create_qp failed", true,
		       ompi_process_info.nodename,
		       ibv_get_device_name(openib_btl->device->ib_dev),
		       "Reliable connected (XRC)");
        return OMPI_ERROR;
    }

    if (qp_init_attr.cap.max_inline_data < req_inline) {
        endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data;
        opal_show_help("help-mpi-btl-openib-cpc-base.txt",
                       "inline truncated", ompi_process_info.nodename,
                       ibv_get_device_name(openib_btl->device->ib_dev),
                       openib_btl->port_num,
                       req_inline, qp_init_attr.cap.max_inline_data);
    } else {
        endpoint->qps[0].ib_inline_max = req_inline;
    }

    attr.qp_state = IBV_QPS_INIT;
    attr.pkey_index = openib_btl->pkey_index;
    attr.port_num = openib_btl->port_num;
    attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
    ret = ibv_modify_qp(*qp, &attr,
                      IBV_QP_STATE |
                      IBV_QP_PKEY_INDEX |
                      IBV_QP_PORT |
                      IBV_QP_ACCESS_FLAGS );
    if (ret) {
        BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_INIT errno says: %s [%d]",
                    (*qp)->qp_num, strerror(ret), ret));
        return OMPI_ERROR;
    }

    /* Setup meta data on the endpoint */
    *psn = lrand48() & 0xffffff;

    /* Now that all the qp's are created locally, post some receive
       buffers, setup credits, etc. */
    return mca_btl_openib_endpoint_post_recvs(endpoint);
}