int mca_mpool_sm_ft_event(int state) { mca_mpool_base_module_t *self_module = NULL; mca_mpool_sm_module_t *self_sm_module = NULL; char * file_name = NULL; if(OPAL_CRS_CHECKPOINT == state) { /* Record the shared memory filename */ asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s", opal_process_info.job_session_dir, opal_proc_local_get()->proc_hostname ); /* Disabled to get FT code compiled again * TODO: FIXIT soon orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name); */ free(file_name); file_name = NULL; } else if(OPAL_CRS_CONTINUE == state) { if (opal_cr_continue_like_restart) { /* Find the sm module */ self_module = mca_mpool_base_module_lookup("sm"); self_sm_module = (mca_mpool_sm_module_t*) self_module; /* Mark the old sm file for eventual removal via CRS */ if (NULL != self_sm_module->sm_common_module) { opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false); } /* Remove self from the list of all modules */ mca_mpool_base_module_destroy(self_module); } } else if(OPAL_CRS_RESTART == state || OPAL_CRS_RESTART_PRE == state) { /* Find the sm module */ self_module = mca_mpool_base_module_lookup("sm"); self_sm_module = (mca_mpool_sm_module_t*) self_module; /* Mark the old sm file for eventual removal via CRS */ if (NULL != self_sm_module->sm_common_module) { opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false); } /* Remove self from the list of all modules */ mca_mpool_base_module_destroy(self_module); } else if(OPAL_CRS_TERM == state ) { ; } else { ; } return OPAL_SUCCESS; }
int mca_btl_udapl_finalize(struct mca_btl_base_module_t* base_btl) { mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) base_btl; int32_t i; /* * Cleaning up the endpoints here because mca_btl_udapl_del_procs * is never called by upper layers. * Note: this is only looking at those endpoints which are available * off of the btl module rdma list. */ for (i=0; i < udapl_btl->udapl_eager_rdma_endpoint_count; i++) { mca_btl_udapl_endpoint_t* endpoint = opal_pointer_array_get_item(udapl_btl->udapl_eager_rdma_endpoints, i); OBJ_DESTRUCT(endpoint); } /* release uDAPL resources */ dat_evd_free(udapl_btl->udapl_evd_dto); dat_evd_free(udapl_btl->udapl_evd_conn); dat_pz_free(udapl_btl->udapl_pz); dat_ia_close(udapl_btl->udapl_ia, DAT_CLOSE_GRACEFUL_FLAG); /* destroy objects */ OBJ_DESTRUCT(&udapl_btl->udapl_lock); OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager); OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager_recv); OBJ_DESTRUCT(&udapl_btl->udapl_frag_max); OBJ_DESTRUCT(&udapl_btl->udapl_frag_max_recv); OBJ_DESTRUCT(&udapl_btl->udapl_frag_user); OBJ_DESTRUCT(&udapl_btl->udapl_frag_control); OBJ_DESTRUCT(&udapl_btl->udapl_eager_rdma_lock); /* destroy mpool */ if (OMPI_SUCCESS != mca_mpool_base_module_destroy(udapl_btl->super.btl_mpool)) { BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, ("WARNING: Failed to release mpool")); return OMPI_ERROR; } free(udapl_btl); return OMPI_SUCCESS; }
int ompi_common_mx_finalize(void) { mx_return_t mx_return; ompi_common_mx_initialize_ref_cnt--; if( 0 == ompi_common_mx_initialize_ref_cnt ) { if (ompi_common_mx_fake_mpool) mca_mpool_base_module_destroy(ompi_common_mx_fake_mpool); mx_return = mx_finalize(); if(mx_return != MX_SUCCESS){ opal_output(0, "Error in mx_finalize (error %s)\n", mx_strerror(mx_return)); return OMPI_ERROR; } } return OMPI_SUCCESS; }
int ompi_common_mx_initialize(void) { mx_return_t mx_return; struct mca_mpool_base_resources_t mpool_resources; int index, value, ret = OMPI_SUCCESS; ompi_common_mx_initialize_ref_cnt++; if(ompi_common_mx_initialize_ref_cnt == 1) { /* set the MX error handle to always return. This function is the * only MX function allowed to be called before mx_init in order * to make sure that if the MX is not up and running the MX * library does not exit the application. */ mx_set_error_handler(MX_ERRORS_RETURN); /* If we have a memory manager available, and mpi_leave_pinned == -1, then set mpi_leave_pinned to 1. We have a memory manager if: - we have both FREE and MUNMAP support - we have MUNMAP support and the linux mallopt */ value = opal_mem_hooks_support_level(); if ((value & (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)) == (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)) { index = mca_base_param_find("mpi", NULL, "leave_pinned"); if (index >= 0) if ((mca_base_param_lookup_int(index, &value) == OPAL_SUCCESS) && (value == -1)) { ompi_mpi_leave_pinned = 1; setenv("MX_RCACHE", "2", 1); ompi_common_mx_fake_mpool = mca_mpool_base_module_create("fake", NULL, &mpool_resources); if (!ompi_common_mx_fake_mpool) { ompi_mpi_leave_pinned = 0; setenv("MX_RCACHE", "0", 1); opal_output(0, "Error creating fake mpool (error %s)\n", strerror(errno)); } } } /* initialize the mx library */ mx_return = mx_init(); if(MX_SUCCESS != mx_return) { ompi_common_mx_available = -1; if (ompi_common_mx_fake_mpool) { mca_mpool_base_module_destroy(ompi_common_mx_fake_mpool); ompi_common_mx_fake_mpool = NULL; } opal_output(0, "Error in mx_init (error %s)\n", mx_strerror(mx_return)); /* We did not succeed to initialize the MX device */ ompi_common_mx_initialize_ref_cnt = 0; return OMPI_ERR_NOT_AVAILABLE; } ompi_common_mx_available = 1; mpool_resources.regcache_clean = mx__regcache_clean; } else if (ompi_common_mx_available < 0) { ret = OMPI_ERR_NOT_AVAILABLE; } return ret; }
static int mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl; mca_btl_base_endpoint_t *ep; uint64_t key; void *node; int rc; while (ugni_module->active_send_count) { /* ensure all sends are complete before closing the module */ rc = mca_btl_ugni_progress_local_smsg (ugni_module); if (OMPI_SUCCESS != rc) { break; } } OBJ_DESTRUCT(&ugni_module->eager_frags_send); OBJ_DESTRUCT(&ugni_module->eager_frags_recv); OBJ_DESTRUCT(&ugni_module->smsg_frags); OBJ_DESTRUCT(&ugni_module->rdma_frags); OBJ_DESTRUCT(&ugni_module->rdma_int_frags); OBJ_DESTRUCT(&ugni_module->ep_wait_list); /* close all open connections and release endpoints */ if (ugni_module->initialized) { rc = opal_hash_table_get_first_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, &node); while (OPAL_SUCCESS == rc) { if (NULL != ep) { mca_btl_ugni_release_ep (ep); } rc = opal_hash_table_get_next_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, node, &node); } /* destroy all cqs */ rc = GNI_CqDestroy (ugni_module->rdma_local_cq); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error tearing down local BTE/FMA CQ")); } rc = GNI_CqDestroy (ugni_module->smsg_local_cq); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error tearing down local SMSG CQ")); } rc = GNI_CqDestroy (ugni_module->smsg_remote_cq); if (GNI_RC_SUCCESS != rc) { BTL_ERROR(("error tearing down remote SMSG CQ")); } /* cancel wildcard post */ rc = GNI_EpPostDataCancelById (ugni_module->wildcard_ep, MCA_BTL_UGNI_CONNECT_WILDCARD_ID | OMPI_PROC_MY_NAME->vpid); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni error cancelling wildcard post")); } /* tear down wildcard endpoint */ rc = GNI_EpDestroy (ugni_module->wildcard_ep); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni error destroying endpoint")); } if (NULL != ugni_module->smsg_mpool) { (void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool); ugni_module->smsg_mpool = NULL; } if (NULL != ugni_module->super.btl_mpool) { (void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool); ugni_module->super.btl_mpool = NULL; } } OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb); OBJ_DESTRUCT(&ugni_module->id_to_endpoint); OBJ_DESTRUCT(&ugni_module->endpoints); OBJ_DESTRUCT(&ugni_module->failed_frags); ugni_module->initialized = false; return OMPI_SUCCESS; }
static void mca_bcol_iboffload_device_destructor (mca_bcol_iboffload_device_t *device) { int qp_index, num_qps = mca_bcol_iboffload_component.num_qps; IBOFFLOAD_VERBOSE(10, ("Device %s will be destroyed.\n", ibv_get_device_name(device->dev.ib_dev))); if (NULL != device->frags_free) { for (qp_index = 0; qp_index < num_qps; ++qp_index) { mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource = mca_bcol_iboffload_component.qp_infos[qp_index].dealloc_resource; if (NULL != dealloc_resource) { dealloc_resource(qp_index, device); } } free(device->frags_free); } if (NULL != device->mpool) { IBOFFLOAD_VERBOSE(10, ("Mpool destroy - %p.\n", device->mpool)); if (OMPI_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) { IBOFFLOAD_ERROR(("Device %s, failed to destroy mpool", ibv_get_device_name(device->dev.ib_dev))); } } if (NULL != device->dummy_reg.mr) { IBOFFLOAD_VERBOSE(10, ("Dummy memory MR unregister - %p.\n", device->dummy_reg.mr)); if (OMPI_SUCCESS != mca_bcol_iboffload_deregister_mr((void *) device, &device->dummy_reg.base)) { IBOFFLOAD_ERROR(("Device %s: failed to unregister dummy memory MR.", ibv_get_device_name(device->dev.ib_dev))); } } if (NULL != device->ib_cq) { if (ibv_destroy_cq(device->ib_cq)) { IBOFFLOAD_ERROR(("Device %s, failed to destroy CQ, errno says %s", ibv_get_device_name(device->dev.ib_dev), strerror(errno))); } } if (NULL != device->ib_mq_cq) { if (ibv_destroy_cq(device->ib_mq_cq)) { IBOFFLOAD_ERROR(("Device %s, failed to destroy mq CQ, errno says %s", ibv_get_device_name(device->dev.ib_dev), strerror(errno))); } } /* Release IB PD if we have one */ if (NULL != device->ib_pd) { if(ibv_dealloc_pd(device->ib_pd)) { IBOFFLOAD_ERROR(("Device %s, failed to release PD, errno says %s", ibv_get_device_name(device->dev.ib_dev), strerror(errno))); } } /* close the device */ if (NULL != device->dev.ib_dev_context) { if (ibv_close_device(device->dev.ib_dev_context)) { IBOFFLOAD_ERROR(("Device %s " ", failed to close the device, errno says %s", ibv_get_device_name(device->dev.ib_dev), strerror(errno))); } } /* release memory */ if (NULL != device->ports) { free(device->ports); } }