示例#1
0
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
{
    mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
    mca_btl_ugni_device_t *device;
    int rc;

    if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
        /* nothing to do */
        return OPAL_SUCCESS;
    }

    device = ep->smsg_ep_handle.device;

    while (device->dev_smsg_local_cq.active_operations) {
        /* ensure all sends are complete before removing and procs */
        rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
        if (OPAL_SUCCESS != rc) {
            break;
        }
    }

    if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
        rc = mca_btl_ugni_ep_send_disconnect (ep);
        if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
            BTL_VERBOSE(("could not send disconnect message to peer"));
        }

        /* wait for the disconnect messagse to go */
        do {
            /* ensure all sends are complete before removing and procs */
            rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
            if (OPAL_SUCCESS != rc) {
                break;
            }
        } while (device->dev_smsg_local_cq.active_operations);

        (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, -1);
    }

    mca_btl_ugni_device_lock (device);

    /* NTH: this call may not need the device lock. seems to work without it but
     * the lock is here to be safe. */
    (void) mca_btl_ugni_ep_handle_cleanup (&ep->smsg_ep_handle);

    mca_btl_ugni_device_unlock (device);

    if (ep->mailbox) {
        opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
        ep->mailbox = NULL;
    }

    ep->state = MCA_BTL_UGNI_EP_STATE_INIT;

    return OPAL_SUCCESS;
}
示例#2
0
static int mca_btl_ugni_component_progress (void)
{
    mca_btl_ugni_module_t *ugni_module;
    unsigned int i;
    int count = 0;

    for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
        ugni_module = mca_btl_ugni_component.modules + i;

        mca_btl_ugni_progress_wait_list (ugni_module);

        count += mca_btl_ugni_progress_datagram (ugni_module);
        count += mca_btl_ugni_progress_local_smsg (ugni_module);
        count += mca_btl_ugni_progress_remote_smsg (ugni_module);
        count += mca_btl_ugni_progress_rdma (ugni_module, 0);
        if (mca_btl_ugni_component.progress_thread_enabled) {
            count += mca_btl_ugni_progress_rdma (ugni_module, 1);
        }

        /* post pending after progressing rdma */
        mca_btl_ugni_post_pending (ugni_module);
    }

    return count;
}
示例#3
0
static int mca_btl_ugni_component_progress (void)
{
    mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
    int count = 0;

    count += mca_btl_ugni_progress_remote_smsg (ugni_module);

    if (ugni_module->active_datagrams) {
        count += mca_btl_ugni_progress_datagram (ugni_module->devices);
    }

    for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
        mca_btl_ugni_device_t *device = ugni_module->devices + i;

        if (device->smsg_connections) {
            count += mca_btl_ugni_progress_local_smsg (ugni_module, device);
            mca_btl_ugni_progress_wait_list (ugni_module);
        }

        if (device->dev_rdma_local_cq.active_operations) {
            count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq);
        }

        if (mca_btl_ugni_component.progress_thread_enabled && device->dev_rdma_local_irq_cq.active_operations) {
            count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_irq_cq);
        }
    }

    return count;
}
static int mca_btl_ugni_component_progress (void)
{
    mca_btl_ugni_module_t *ugni_module;
    static int64_t call_count = 0;
    int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1);
    unsigned int i;
    int count = 0;

    for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
        ugni_module = mca_btl_ugni_component.modules + i;

        if ((cur_call_count & 0x7) == 0) {
            count += mca_btl_ugni_progress_datagram (ugni_module);
        }

        if (ugni_module->connected_peer_count) {
            mca_btl_ugni_progress_wait_list (ugni_module);
            count += mca_btl_ugni_progress_local_smsg (ugni_module);
            count += mca_btl_ugni_progress_remote_smsg (ugni_module);
        }

        if (ugni_module->active_rdma_count) {
            count += mca_btl_ugni_progress_rdma (ugni_module, 0);
        }

        if (mca_btl_ugni_component.progress_thread_enabled) {
            count += mca_btl_ugni_progress_rdma (ugni_module, 1);
        }

        /* post pending after progressing rdma */
        mca_btl_ugni_post_pending (ugni_module);
    }

    return count;
}
示例#5
0
static int mca_btl_ugni_component_progress (void)
{
    mca_btl_ugni_module_t *ugni_module;
    unsigned int i;
    int count = 0;

    for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
        ugni_module = mca_btl_ugni_component.modules + i;

        mca_btl_ugni_retry_failed (ugni_module);
        mca_btl_ugni_progress_wait_list (ugni_module);

        count += mca_btl_ugni_progress_datagram (ugni_module);
        count += mca_btl_ugni_progress_local_smsg (ugni_module);
        count += mca_btl_ugni_progress_remote_smsg (ugni_module);
        count += mca_btl_ugni_progress_rdma (ugni_module);
    }

    return count;
}
static int
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
{
    mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl;
    mca_btl_base_endpoint_t *ep;
    uint64_t key;
    void *node;
    int rc;

    while (ugni_module->active_send_count) {
        /* ensure all sends are complete before closing the module */
        rc = mca_btl_ugni_progress_local_smsg (ugni_module);
        if (OMPI_SUCCESS != rc) {
            break;
        }
    }

    OBJ_DESTRUCT(&ugni_module->eager_frags_send);
    OBJ_DESTRUCT(&ugni_module->eager_frags_recv);
    OBJ_DESTRUCT(&ugni_module->smsg_frags);
    OBJ_DESTRUCT(&ugni_module->rdma_frags);
    OBJ_DESTRUCT(&ugni_module->rdma_int_frags);
    OBJ_DESTRUCT(&ugni_module->ep_wait_list);

    /* close all open connections and release endpoints */
    if (ugni_module->initialized) {
        rc = opal_hash_table_get_first_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, &node);
        while (OPAL_SUCCESS == rc) {
            if (NULL != ep) {
                mca_btl_ugni_release_ep (ep);
            }

            rc = opal_hash_table_get_next_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, node, &node);
        }

        /* destroy all cqs */
        rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
        if (GNI_RC_SUCCESS != rc) {
            BTL_ERROR(("error tearing down local BTE/FMA CQ"));
        }

        rc = GNI_CqDestroy (ugni_module->smsg_local_cq);
        if (GNI_RC_SUCCESS != rc) {
            BTL_ERROR(("error tearing down local SMSG CQ"));
        }

        rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
        if (GNI_RC_SUCCESS != rc) {
            BTL_ERROR(("error tearing down remote SMSG CQ"));
        }

        /* cancel wildcard post */
        rc = GNI_EpPostDataCancelById (ugni_module->wildcard_ep,
                                       MCA_BTL_UGNI_CONNECT_WILDCARD_ID |
                                       OMPI_PROC_MY_NAME->vpid);
        if (GNI_RC_SUCCESS != rc) {
            BTL_VERBOSE(("btl/ugni error cancelling wildcard post"));
        }

        /* tear down wildcard endpoint */
        rc = GNI_EpDestroy (ugni_module->wildcard_ep);
        if (GNI_RC_SUCCESS != rc) {
            BTL_VERBOSE(("btl/ugni error destroying endpoint"));
        }

        if (NULL != ugni_module->smsg_mpool) {
            (void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool);
            ugni_module->smsg_mpool  = NULL;
        }

        if (NULL != ugni_module->super.btl_mpool) {
            (void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool);
            ugni_module->super.btl_mpool = NULL;
        }
    }

    OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
    OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
    OBJ_DESTRUCT(&ugni_module->endpoints);
    OBJ_DESTRUCT(&ugni_module->failed_frags);

    ugni_module->initialized = false;

    return OMPI_SUCCESS;
}