示例#1
0
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
                                gni_ep_handle_t *ep_handle)
{
    gni_return_t grc;

    if (OPAL_UNLIKELY(NULL == cep)) {
        assert (0);
        return OPAL_ERR_BAD_PARAM;
    }

    /* create a uGNI endpoint handle and bind it to the remote peer */
    OPAL_THREAD_LOCK(&cep->dev->dev_lock);
    grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
    OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
        return opal_common_rc_ugni_to_opal (grc);
    }

    OPAL_THREAD_LOCK(&cep->dev->dev_lock);
    grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
    OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);

    if (GNI_RC_SUCCESS != grc) {
        OPAL_THREAD_LOCK(&cep->dev->dev_lock);
        GNI_EpDestroy (*ep_handle);
        OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
        return opal_common_rc_ugni_to_opal (grc);
    }

    return OPAL_SUCCESS;
}
示例#2
0
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
    gni_return_t rc;

    BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, (void *)ep));
    ep->mailbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;

    rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
                            &ep->remote_attr, sizeof (ep->remote_attr),
                            MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);

    return opal_common_rc_ugni_to_opal (rc);
}
示例#3
0
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
    gni_return_t grc;
    int rc;

    BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
                 "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
                 "msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer,
                 ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1,
                 ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset,
                 ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize));

    BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
                 "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
                 "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer,
                 ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1,
                 ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
                 ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));

    grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
        BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));

        return opal_common_rc_ugni_to_opal (grc);
    }

    /* set the local event data to the local index and the remote event data to my
     * index on the remote peer. This makes lookup of endpoints on completion take
     * a single lookup in the endpoints array. we will not be able to change the
     * remote peer's index in the endpoint's array after this point. */
    GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
    GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);

    ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl;
    ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
    (void) opal_atomic_add_64 (&ep->btl->connected_peer_count, 1);

    /* send all pending messages */
    BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));

    rc = mca_btl_ugni_progress_send_wait_list (ep);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock);
        if (false == ep->wait_listed) {
            opal_list_append (&ep->btl->ep_wait_list, &ep->super);
            ep->wait_listed = true;
        }
        OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock);
    }

    return OPAL_SUCCESS;
}
示例#4
0
文件: common_ugni.c 项目: AT95/ompi
static int opal_common_ugni_device_init (opal_common_ugni_device_t *device,
                                         int device_id)
{
    int rc;

    /* Create a NIC Adress */
    device->dev_id = device_id; /* Minor number of the Gemini NIC */

    device->dev_addr = opal_common_ugni_get_nic_address (device->dev_id);

    OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", device->dev_addr, device->dev_id));

    OBJ_CONSTRUCT(&device->dev_lock,opal_mutex_t);

    /* Attach device to the communication domain */
    rc = GNI_CdmAttach (opal_common_ugni_module.cd_handle, device->dev_id,
                        &device->dev_pe_addr, &device->dev_handle);
    if (GNI_RC_SUCCESS != rc) {
        OPAL_OUTPUT((0, "Error: Creating communication domain %d\n", rc));
        return opal_common_rc_ugni_to_opal (rc);
    }

    return OPAL_SUCCESS;
}
示例#5
0
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
{
    mca_btl_ugni_post_descriptor_t *post_desc = NULL;
    gni_cq_entry_t event_data = 0;
    gni_post_descriptor_t *desc;
    uint32_t recoverable = 1;
    gni_return_t grc;
    gni_cq_handle_t the_cq;

    the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;

    OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
    grc = GNI_CqGetEvent (the_cq, &event_data);
    if (GNI_RC_NOT_DONE == grc) {
        OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
        return 0;
    }

    if (OPAL_UNLIKELY((GNI_RC_SUCCESS != grc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
        /* TODO -- need to handle overrun -- how do we do this without an event?
           will the event eventually come back? Ask Cray */
        BTL_ERROR(("unhandled post error! ugni rc = %d %s", grc, gni_err_str[grc]));
        OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);

        return opal_common_rc_ugni_to_opal (grc);
    }

    grc = GNI_GetCompleted (the_cq, event_data, &desc);
    OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc && GNI_RC_TRANSACTION_ERROR != grc)) {
        BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
        return opal_common_rc_ugni_to_opal (grc);
    }

    post_desc = MCA_BTL_UGNI_DESC_TO_PDESC(desc);

    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data))) {
        (void) GNI_CqErrorRecoverable (event_data, &recoverable);

        if (OPAL_UNLIKELY(++post_desc->desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
                          !recoverable)) {
            char char_buffer[1024];
            GNI_CqErrorStr (event_data, char_buffer, 1024);
            /* give up */
            BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc,
                       recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
            btl_ugni_dump_post_desc (post_desc);
#endif
            mca_btl_ugni_post_desc_complete (ugni_module, post_desc, OPAL_ERROR);

            return OPAL_ERROR;
        }

        mca_btl_ugni_repost (ugni_module, post_desc);

        return 0;
    }

    mca_btl_ugni_post_desc_complete (ugni_module, post_desc, opal_common_rc_ugni_to_opal (grc));

    return 1;
}
示例#6
0
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
{
    uint64_t datagram_id, data, proc_id;
    uint32_t remote_addr, remote_id;
    mca_btl_base_endpoint_t *ep;
    gni_post_state_t post_state;
    gni_ep_handle_t handle;
    gni_return_t grc;
    int count = 0, rc;

    /* check for datagram completion */
    OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);  /* TODO: may not need lock for this function */
    grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
    if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
        OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
        return 0;
    }

    data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);

    BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));

    if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
        ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
        handle = ep->smsg_ep_handle;
    } else {
        handle = ugni_module->wildcard_ep;
    }

    /* wait for the incoming datagram to complete (in case it isn't) */
    grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
                                  &remote_addr, &remote_id);
    OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
    if (GNI_RC_SUCCESS != grc) {
        BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
        return opal_common_rc_ugni_to_opal (grc);
    }

    /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
    if (handle == ugni_module->wildcard_ep) {
        proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);

        BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
                     proc_id));

        OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
        rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
        OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);

        /* check if the endpoint is known */
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
            struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
            BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
                         ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
            ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
            if (OPAL_UNLIKELY(NULL == ep)) {
                return rc;
            }
        }
    } else {
        BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
    }

    /* should not have gotten a NULL endpoint */
    assert (NULL != ep);

    BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
                 "data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
                 data, (void *) ep, remote_id));

    /* NTH: TODO -- error handling */
    opal_mutex_lock (&ep->lock);
    if (handle != ugni_module->wildcard_ep) {
        /* directed post complete */
        ep->dg_posted = false;
    }

    (void) mca_btl_ugni_ep_connect_progress (ep);
    opal_mutex_unlock (&ep->lock);

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
        /*  process messages waiting in the endpoint's smsg mailbox */
        count = mca_btl_ugni_smsg_process (ep);
    }

    /* repost the wildcard datagram */
    if (handle == ugni_module->wildcard_ep) {
        mca_btl_ugni_wildcard_ep_post (ugni_module);
    }

    return count;
}
示例#7
0
文件: common_ugni.c 项目: AT95/ompi
int opal_common_ugni_init (void)
{
    int modes, rc, i;
    uint32_t my_cdm_id;

    opal_common_ugni_module_ref_count ++;

    if (opal_common_ugni_module_ref_count > 1) {
        return OPAL_SUCCESS;
    }

    /* use pid for my_cdm_id.  Although its not stated in the uGNI
       documentation, the cdm_id only needs to be unique
       within a node for a given ptag/cookie tuple */

    my_cdm_id = getpid();   /*TODO: eventually need something else for thread-hot support */

    /* pull settings from ugni btl */
    opal_common_ugni_module.rdma_max_retries =
        mca_btl_ugni_component.rdma_max_retries;

    /* Create a communication domain */

    modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
            GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL;

    /* collect uGNI information */
    rc = get_ptag(&opal_common_ugni_module.ptag);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return rc;
    }

    rc = get_cookie(&opal_common_ugni_module.cookie);
    if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
        return rc;
    }

    /* create a communication domain */
    rc = GNI_CdmCreate (my_cdm_id, opal_common_ugni_module.ptag,
                        opal_common_ugni_module.cookie, modes,
                        &opal_common_ugni_module.cd_handle);
    if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
        OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc));
        return opal_common_rc_ugni_to_opal (rc);
    }

    /* setup uGNI devices. we only support one device atm */
    opal_common_ugni_module.device_count = 1;
    opal_common_ugni_module.devices = calloc (opal_common_ugni_module.device_count,
                                              sizeof (opal_common_ugni_device_t));

    for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
        rc = opal_common_ugni_device_init (opal_common_ugni_module.devices + i, i);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            OPAL_OUTPUT((-1, "error initializing uGNI device"));
            return rc;
        }
    }

    /* send ugni modex */
    opal_common_ugni_send_modex (my_cdm_id);

    return OPAL_SUCCESS;
}