示例#1
0
static int mca_btl_ugni_component_progress (void)
{
    mca_btl_ugni_module_t *ugni_module;
    unsigned int i;
    int count = 0;

    for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
        ugni_module = mca_btl_ugni_component.modules + i;

        mca_btl_ugni_progress_wait_list (ugni_module);

        count += mca_btl_ugni_progress_datagram (ugni_module);
        count += mca_btl_ugni_progress_local_smsg (ugni_module);
        count += mca_btl_ugni_progress_remote_smsg (ugni_module);
        count += mca_btl_ugni_progress_rdma (ugni_module, 0);
        if (mca_btl_ugni_component.progress_thread_enabled) {
            count += mca_btl_ugni_progress_rdma (ugni_module, 1);
        }

        /* post pending after progressing rdma */
        mca_btl_ugni_post_pending (ugni_module);
    }

    return count;
}
static int mca_btl_ugni_component_progress (void)
{
    mca_btl_ugni_module_t *ugni_module;
    static int64_t call_count = 0;
    int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1);
    unsigned int i;
    int count = 0;

    for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
        ugni_module = mca_btl_ugni_component.modules + i;

        if ((cur_call_count & 0x7) == 0) {
            count += mca_btl_ugni_progress_datagram (ugni_module);
        }

        if (ugni_module->connected_peer_count) {
            mca_btl_ugni_progress_wait_list (ugni_module);
            count += mca_btl_ugni_progress_local_smsg (ugni_module);
            count += mca_btl_ugni_progress_remote_smsg (ugni_module);
        }

        if (ugni_module->active_rdma_count) {
            count += mca_btl_ugni_progress_rdma (ugni_module, 0);
        }

        if (mca_btl_ugni_component.progress_thread_enabled) {
            count += mca_btl_ugni_progress_rdma (ugni_module, 1);
        }

        /* post pending after progressing rdma */
        mca_btl_ugni_post_pending (ugni_module);
    }

    return count;
}
示例#3
0
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
                                              mca_btl_ugni_cq_t *cq)
{
    mca_btl_ugni_post_descriptor_t *post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
    gni_cq_entry_t event_data[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
    int rc;

    rc = mca_btl_ugni_cq_get_completed_desc (device, cq, event_data, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP);
    if (0 >= rc) {
        return rc;
    }

    BTL_VERBOSE(("got %d completed rdma descriptors", rc));

    for (int i = 0 ; i < rc ; ++i) {
        BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i],
                     GNI_CQ_STATUS_OK(event_data[i])));

        if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) {
            uint32_t recoverable = 1;

            (void) GNI_CqErrorRecoverable (event_data[i], &recoverable);

            if (OPAL_UNLIKELY(++post_desc[i]->tries >= mca_btl_ugni_component.rdma_max_retries ||
                              !recoverable)) {
                char char_buffer[1024];
                GNI_CqErrorStr (event_data[i], char_buffer, 1024);
                /* give up */
                BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc[i],
                           recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
                btl_ugni_dump_post_desc (post_desc[i]);
#endif
                mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_ERROR);

                return OPAL_ERROR;
            }

            mca_btl_ugni_repost (ugni_module, post_desc[i]);

            return 0;
        }

        mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_SUCCESS);
    }

    /* should be resources to progress the pending post list */
    (void) mca_btl_ugni_post_pending (ugni_module, device);

    return rc;
}