int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep); mca_btl_ugni_device_t *device; int rc; if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) { /* nothing to do */ return OPAL_SUCCESS; } device = ep->smsg_ep_handle.device; while (device->dev_smsg_local_cq.active_operations) { /* ensure all sends are complete before removing and procs */ rc = mca_btl_ugni_progress_local_smsg (ugni_module, device); if (OPAL_SUCCESS != rc) { break; } } if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) { rc = mca_btl_ugni_ep_send_disconnect (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { BTL_VERBOSE(("could not send disconnect message to peer")); } /* wait for the disconnect messagse to go */ do { /* ensure all sends are complete before removing and procs */ rc = mca_btl_ugni_progress_local_smsg (ugni_module, device); if (OPAL_SUCCESS != rc) { break; } } while (device->dev_smsg_local_cq.active_operations); (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, -1); } mca_btl_ugni_device_lock (device); /* NTH: this call may not need the device lock. seems to work without it but * the lock is here to be safe. */ (void) mca_btl_ugni_ep_handle_cleanup (&ep->smsg_ep_handle); mca_btl_ugni_device_unlock (device); if (ep->mailbox) { opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox)); ep->mailbox = NULL; } ep->state = MCA_BTL_UGNI_EP_STATE_INIT; return OPAL_SUCCESS; }
static int mca_btl_ugni_component_progress (void) { mca_btl_ugni_module_t *ugni_module; unsigned int i; int count = 0; for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) { ugni_module = mca_btl_ugni_component.modules + i; mca_btl_ugni_progress_wait_list (ugni_module); count += mca_btl_ugni_progress_datagram (ugni_module); count += mca_btl_ugni_progress_local_smsg (ugni_module); count += mca_btl_ugni_progress_remote_smsg (ugni_module); count += mca_btl_ugni_progress_rdma (ugni_module, 0); if (mca_btl_ugni_component.progress_thread_enabled) { count += mca_btl_ugni_progress_rdma (ugni_module, 1); } /* post pending after progressing rdma */ mca_btl_ugni_post_pending (ugni_module); } return count; }
static int mca_btl_ugni_component_progress (void) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules; int count = 0; count += mca_btl_ugni_progress_remote_smsg (ugni_module); if (ugni_module->active_datagrams) { count += mca_btl_ugni_progress_datagram (ugni_module->devices); } for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) { mca_btl_ugni_device_t *device = ugni_module->devices + i; if (device->smsg_connections) { count += mca_btl_ugni_progress_local_smsg (ugni_module, device); mca_btl_ugni_progress_wait_list (ugni_module); } if (device->dev_rdma_local_cq.active_operations) { count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq); } if (mca_btl_ugni_component.progress_thread_enabled && device->dev_rdma_local_irq_cq.active_operations) { count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_irq_cq); } } return count; }
static int mca_btl_ugni_component_progress (void) { mca_btl_ugni_module_t *ugni_module; static int64_t call_count = 0; int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1); unsigned int i; int count = 0; for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) { ugni_module = mca_btl_ugni_component.modules + i; if ((cur_call_count & 0x7) == 0) { count += mca_btl_ugni_progress_datagram (ugni_module); } if (ugni_module->connected_peer_count) { mca_btl_ugni_progress_wait_list (ugni_module); count += mca_btl_ugni_progress_local_smsg (ugni_module); count += mca_btl_ugni_progress_remote_smsg (ugni_module); } if (ugni_module->active_rdma_count) { count += mca_btl_ugni_progress_rdma (ugni_module, 0); } if (mca_btl_ugni_component.progress_thread_enabled) { count += mca_btl_ugni_progress_rdma (ugni_module, 1); } /* post pending after progressing rdma */ mca_btl_ugni_post_pending (ugni_module); } return count; }
static int mca_btl_ugni_component_progress (void) { mca_btl_ugni_module_t *ugni_module; unsigned int i; int count = 0; for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) { ugni_module = mca_btl_ugni_component.modules + i; mca_btl_ugni_retry_failed (ugni_module); mca_btl_ugni_progress_wait_list (ugni_module); count += mca_btl_ugni_progress_datagram (ugni_module); count += mca_btl_ugni_progress_local_smsg (ugni_module); count += mca_btl_ugni_progress_remote_smsg (ugni_module); count += mca_btl_ugni_progress_rdma (ugni_module); } return count; }
static int mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl; mca_btl_base_endpoint_t *ep; uint64_t key; void *node; int rc; while (ugni_module->active_send_count) { /* ensure all sends are complete before closing the module */ rc = mca_btl_ugni_progress_local_smsg (ugni_module); if (OMPI_SUCCESS != rc) { break; } } OBJ_DESTRUCT(&ugni_module->eager_frags_send); OBJ_DESTRUCT(&ugni_module->eager_frags_recv); OBJ_DESTRUCT(&ugni_module->smsg_frags); OBJ_DESTRUCT(&ugni_module->rdma_frags); OBJ_DESTRUCT(&ugni_module->rdma_int_frags); OBJ_DESTRUCT(&ugni_module->ep_wait_list); /* close all open connections and release endpoints */ if (ugni_module->initialized) { rc = opal_hash_table_get_first_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, &node); while (OPAL_SUCCESS == rc) { if (NULL != ep) { mca_btl_ugni_release_ep (ep); } rc = opal_hash_table_get_next_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, node, &node); } /* destroy all cqs */ rc = GNI_CqDestroy (ugni_module->rdma_local_cq); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error tearing down local BTE/FMA CQ")); } rc = GNI_CqDestroy (ugni_module->smsg_local_cq); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error tearing down local SMSG CQ")); } rc = GNI_CqDestroy (ugni_module->smsg_remote_cq); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error tearing down remote SMSG CQ")); } /* cancel wildcard post */ rc = GNI_EpPostDataCancelById (ugni_module->wildcard_ep, MCA_BTL_UGNI_CONNECT_WILDCARD_ID | OMPI_PROC_MY_NAME->vpid); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni error cancelling wildcard post")); } /* tear down wildcard endpoint */ rc = GNI_EpDestroy (ugni_module->wildcard_ep); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni error destroying endpoint")); } if (NULL != ugni_module->smsg_mpool) { (void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool); ugni_module->smsg_mpool = NULL; } if (NULL != ugni_module->super.btl_mpool) { (void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool); ugni_module->super.btl_mpool = NULL; } } OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb); OBJ_DESTRUCT(&ugni_module->id_to_endpoint); OBJ_DESTRUCT(&ugni_module->endpoints); OBJ_DESTRUCT(&ugni_module->failed_frags); ugni_module->initialized = false; return OMPI_SUCCESS; }