static int mca_btl_ugni_component_progress (void) { mca_btl_ugni_module_t *ugni_module; unsigned int i; int count = 0; for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) { ugni_module = mca_btl_ugni_component.modules + i; mca_btl_ugni_progress_wait_list (ugni_module); count += mca_btl_ugni_progress_datagram (ugni_module); count += mca_btl_ugni_progress_local_smsg (ugni_module); count += mca_btl_ugni_progress_remote_smsg (ugni_module); count += mca_btl_ugni_progress_rdma (ugni_module, 0); if (mca_btl_ugni_component.progress_thread_enabled) { count += mca_btl_ugni_progress_rdma (ugni_module, 1); } /* post pending after progressing rdma */ mca_btl_ugni_post_pending (ugni_module); } return count; }
static int mca_btl_ugni_component_progress (void) { mca_btl_ugni_module_t *ugni_module; static int64_t call_count = 0; int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1); unsigned int i; int count = 0; for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) { ugni_module = mca_btl_ugni_component.modules + i; if ((cur_call_count & 0x7) == 0) { count += mca_btl_ugni_progress_datagram (ugni_module); } if (ugni_module->connected_peer_count) { mca_btl_ugni_progress_wait_list (ugni_module); count += mca_btl_ugni_progress_local_smsg (ugni_module); count += mca_btl_ugni_progress_remote_smsg (ugni_module); } if (ugni_module->active_rdma_count) { count += mca_btl_ugni_progress_rdma (ugni_module, 0); } if (mca_btl_ugni_component.progress_thread_enabled) { count += mca_btl_ugni_progress_rdma (ugni_module, 1); } /* post pending after progressing rdma */ mca_btl_ugni_post_pending (ugni_module); } return count; }
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq) { mca_btl_ugni_post_descriptor_t *post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP]; gni_cq_entry_t event_data[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP]; int rc; rc = mca_btl_ugni_cq_get_completed_desc (device, cq, event_data, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP); if (0 >= rc) { return rc; } BTL_VERBOSE(("got %d completed rdma descriptors", rc)); for (int i = 0 ; i < rc ; ++i) { BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i], GNI_CQ_STATUS_OK(event_data[i]))); if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) { uint32_t recoverable = 1; (void) GNI_CqErrorRecoverable (event_data[i], &recoverable); if (OPAL_UNLIKELY(++post_desc[i]->tries >= mca_btl_ugni_component.rdma_max_retries || !recoverable)) { char char_buffer[1024]; GNI_CqErrorStr (event_data[i], char_buffer, 1024); /* give up */ BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc[i], recoverable, char_buffer)); #if OPAL_ENABLE_DEBUG btl_ugni_dump_post_desc (post_desc[i]); #endif mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_ERROR); return OPAL_ERROR; } mca_btl_ugni_repost (ugni_module, post_desc[i]); return 0; } mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_SUCCESS); } /* should be resources to progress the pending post list */ (void) mca_btl_ugni_post_pending (ugni_module, device); return rc; }