Esempio n. 1
0
int mca_mpool_sm_ft_event(int state) {
    mca_mpool_base_module_t *self_module = NULL;
    mca_mpool_sm_module_t   *self_sm_module = NULL;
    char * file_name = NULL;

    if(OPAL_CRS_CHECKPOINT == state) {
        /* Record the shared memory filename */
        asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
                  opal_process_info.job_session_dir,
                  opal_proc_local_get()->proc_hostname );
        /* Disabled to get FT code compiled again
         * TODO: FIXIT soon
        orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name);
         */
        free(file_name);
        file_name = NULL;
    }
    else if(OPAL_CRS_CONTINUE == state) {
        if (opal_cr_continue_like_restart) {
            /* Find the sm module */
            self_module = mca_mpool_base_module_lookup("sm");
            self_sm_module = (mca_mpool_sm_module_t*) self_module;

            /* Mark the old sm file for eventual removal via CRS */
            if (NULL != self_sm_module->sm_common_module) {
                opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false);
            }

            /* Remove self from the list of all modules */
            mca_mpool_base_module_destroy(self_module);
        }
    }
    else if(OPAL_CRS_RESTART == state ||
            OPAL_CRS_RESTART_PRE == state) {
        /* Find the sm module */
        self_module = mca_mpool_base_module_lookup("sm");
        self_sm_module = (mca_mpool_sm_module_t*) self_module;

        /* Mark the old sm file for eventual removal via CRS */
        if (NULL != self_sm_module->sm_common_module) {
            opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false);
        }

        /* Remove self from the list of all modules */
        mca_mpool_base_module_destroy(self_module);
    }
    else if(OPAL_CRS_TERM == state ) {
        ;
    }
    else {
        ;
    }

    return OPAL_SUCCESS;
}
Esempio n. 2
0
int mca_btl_udapl_finalize(struct mca_btl_base_module_t* base_btl)
{
    mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) base_btl; 
    int32_t i;
    
    /*
     * Cleaning up the endpoints here because mca_btl_udapl_del_procs
     * is never called by upper layers.
     * Note: this is only looking at those endpoints which are available
     * off of the btl module rdma list. 
     */
    for (i=0; i < udapl_btl->udapl_eager_rdma_endpoint_count; i++) {
        mca_btl_udapl_endpoint_t* endpoint =
            opal_pointer_array_get_item(udapl_btl->udapl_eager_rdma_endpoints,
                i);

        OBJ_DESTRUCT(endpoint);
    }

    /* release uDAPL resources */
    dat_evd_free(udapl_btl->udapl_evd_dto);
    dat_evd_free(udapl_btl->udapl_evd_conn);
    dat_pz_free(udapl_btl->udapl_pz);
    dat_ia_close(udapl_btl->udapl_ia, DAT_CLOSE_GRACEFUL_FLAG);

    /* destroy objects */
    OBJ_DESTRUCT(&udapl_btl->udapl_lock);
    OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager);
    OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager_recv);
    OBJ_DESTRUCT(&udapl_btl->udapl_frag_max);
    OBJ_DESTRUCT(&udapl_btl->udapl_frag_max_recv);
    OBJ_DESTRUCT(&udapl_btl->udapl_frag_user);
    OBJ_DESTRUCT(&udapl_btl->udapl_frag_control);
    OBJ_DESTRUCT(&udapl_btl->udapl_eager_rdma_lock);
    
    /* destroy mpool */
    if (OMPI_SUCCESS !=
        mca_mpool_base_module_destroy(udapl_btl->super.btl_mpool)) {
        BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
            ("WARNING: Failed to release mpool"));
        return OMPI_ERROR;
    }

    free(udapl_btl);
    return OMPI_SUCCESS;
}
Esempio n. 3
0
int
ompi_common_mx_finalize(void)
{
    mx_return_t mx_return;
    ompi_common_mx_initialize_ref_cnt--;
    if( 0 == ompi_common_mx_initialize_ref_cnt ) { 

        if (ompi_common_mx_fake_mpool) 
	  mca_mpool_base_module_destroy(ompi_common_mx_fake_mpool);
        
        mx_return = mx_finalize(); 
        if(mx_return != MX_SUCCESS){ 
            opal_output(0, "Error in mx_finalize (error %s)\n", mx_strerror(mx_return));
            return OMPI_ERROR;
        } 
    }
    return OMPI_SUCCESS;
}
Esempio n. 4
0
int
ompi_common_mx_initialize(void)
{
    mx_return_t mx_return;
    struct mca_mpool_base_resources_t mpool_resources;
    int index, value, ret = OMPI_SUCCESS;
    
    ompi_common_mx_initialize_ref_cnt++;
    
    if(ompi_common_mx_initialize_ref_cnt == 1) { 
        /* set the MX error handle to always return. This function is the
         * only MX function allowed to be called before mx_init in order
         * to make sure that if the MX is not up and running the MX
         * library does not exit the application.
         */
        mx_set_error_handler(MX_ERRORS_RETURN);
	
	/* If we have a memory manager available, and
	   mpi_leave_pinned == -1, then set mpi_leave_pinned to 1.

	   We have a memory manager if:
	   - we have both FREE and MUNMAP support
	   - we have MUNMAP support and the linux mallopt */
	value = opal_mem_hooks_support_level();
	if ((value & (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT))
            == (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)) {
	  index = mca_base_param_find("mpi", NULL, "leave_pinned");
	  if (index >= 0)
            if ((mca_base_param_lookup_int(index, &value) == OPAL_SUCCESS) 
		&& (value == -1)) {
	      
	      ompi_mpi_leave_pinned = 1;
	      setenv("MX_RCACHE", "2", 1);
	      ompi_common_mx_fake_mpool = 
		mca_mpool_base_module_create("fake", NULL, &mpool_resources);
	      if (!ompi_common_mx_fake_mpool) {
		ompi_mpi_leave_pinned = 0;
		setenv("MX_RCACHE", "0", 1);
		opal_output(0, "Error creating fake mpool (error %s)\n",
			    strerror(errno));
	      }
	    }
	}
	
        /* initialize the mx library */
        mx_return = mx_init(); 
        
        if(MX_SUCCESS != mx_return) {
            ompi_common_mx_available = -1;
            if (ompi_common_mx_fake_mpool) {
                mca_mpool_base_module_destroy(ompi_common_mx_fake_mpool);
	        ompi_common_mx_fake_mpool = NULL;
            }
            opal_output(0,
                        "Error in mx_init (error %s)\n",
                        mx_strerror(mx_return));
            /* We did not succeed to initialize the MX device */
            ompi_common_mx_initialize_ref_cnt = 0;
            return OMPI_ERR_NOT_AVAILABLE;
        }
        ompi_common_mx_available = 1;
        mpool_resources.regcache_clean = mx__regcache_clean;
    } else if (ompi_common_mx_available < 0) {
        ret = OMPI_ERR_NOT_AVAILABLE;
    }

    return ret;
} 
static int
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
{
    mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl;
    mca_btl_base_endpoint_t *ep;
    uint64_t key;
    void *node;
    int rc;

    while (ugni_module->active_send_count) {
        /* ensure all sends are complete before closing the module */
        rc = mca_btl_ugni_progress_local_smsg (ugni_module);
        if (OMPI_SUCCESS != rc) {
            break;
        }
    }

    OBJ_DESTRUCT(&ugni_module->eager_frags_send);
    OBJ_DESTRUCT(&ugni_module->eager_frags_recv);
    OBJ_DESTRUCT(&ugni_module->smsg_frags);
    OBJ_DESTRUCT(&ugni_module->rdma_frags);
    OBJ_DESTRUCT(&ugni_module->rdma_int_frags);
    OBJ_DESTRUCT(&ugni_module->ep_wait_list);

    /* close all open connections and release endpoints */
    if (ugni_module->initialized) {
        rc = opal_hash_table_get_first_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, &node);
        while (OPAL_SUCCESS == rc) {
            if (NULL != ep) {
                mca_btl_ugni_release_ep (ep);
            }

            rc = opal_hash_table_get_next_key_uint64 (&ugni_module->id_to_endpoint, &key, (void **) &ep, node, &node);
        }

        /* destroy all cqs */
        rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
        if (GNI_RC_SUCCESS != rc) {
            BTL_ERROR(("error tearing down local BTE/FMA CQ"));
        }

        rc = GNI_CqDestroy (ugni_module->smsg_local_cq);
        if (GNI_RC_SUCCESS != rc) {
            BTL_ERROR(("error tearing down local SMSG CQ"));
        }

        rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
        if (GNI_RC_SUCCESS != rc) {
            BTL_ERROR(("error tearing down remote SMSG CQ"));
        }

        /* cancel wildcard post */
        rc = GNI_EpPostDataCancelById (ugni_module->wildcard_ep,
                                       MCA_BTL_UGNI_CONNECT_WILDCARD_ID |
                                       OMPI_PROC_MY_NAME->vpid);
        if (GNI_RC_SUCCESS != rc) {
            BTL_VERBOSE(("btl/ugni error cancelling wildcard post"));
        }

        /* tear down wildcard endpoint */
        rc = GNI_EpDestroy (ugni_module->wildcard_ep);
        if (GNI_RC_SUCCESS != rc) {
            BTL_VERBOSE(("btl/ugni error destroying endpoint"));
        }

        if (NULL != ugni_module->smsg_mpool) {
            (void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool);
            ugni_module->smsg_mpool  = NULL;
        }

        if (NULL != ugni_module->super.btl_mpool) {
            (void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool);
            ugni_module->super.btl_mpool = NULL;
        }
    }

    OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
    OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
    OBJ_DESTRUCT(&ugni_module->endpoints);
    OBJ_DESTRUCT(&ugni_module->failed_frags);

    ugni_module->initialized = false;

    return OMPI_SUCCESS;
}
static void mca_bcol_iboffload_device_destructor
(mca_bcol_iboffload_device_t *device)
{
    int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;

    IBOFFLOAD_VERBOSE(10, ("Device %s will be destroyed.\n",
                           ibv_get_device_name(device->dev.ib_dev)));

    if (NULL != device->frags_free) {
        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
            mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource =
                mca_bcol_iboffload_component.qp_infos[qp_index].dealloc_resource;
            if (NULL != dealloc_resource) {
                dealloc_resource(qp_index, device);
            }
        }

        free(device->frags_free);
    }

    if (NULL != device->mpool) {
        IBOFFLOAD_VERBOSE(10, ("Mpool destroy - %p.\n", device->mpool));
        if (OMPI_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) {
            IBOFFLOAD_ERROR(("Device %s, failed to destroy mpool",
                             ibv_get_device_name(device->dev.ib_dev)));
        }
    }

    if (NULL != device->dummy_reg.mr) {
        IBOFFLOAD_VERBOSE(10, ("Dummy memory MR unregister - %p.\n", device->dummy_reg.mr));
        if (OMPI_SUCCESS !=
                mca_bcol_iboffload_deregister_mr((void *) device, &device->dummy_reg.base)) {
            IBOFFLOAD_ERROR(("Device %s: failed to unregister dummy memory MR.",
                             ibv_get_device_name(device->dev.ib_dev)));
        }
    }

    if (NULL != device->ib_cq) {
        if (ibv_destroy_cq(device->ib_cq)) {
            IBOFFLOAD_ERROR(("Device %s, failed to destroy CQ, errno says %s",
                             ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
        }
    }

    if (NULL != device->ib_mq_cq) {
        if (ibv_destroy_cq(device->ib_mq_cq)) {
            IBOFFLOAD_ERROR(("Device %s, failed to destroy mq CQ, errno says %s",
                             ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
        }
    }

    /* Release IB PD if we have one */
    if (NULL != device->ib_pd) {
        if(ibv_dealloc_pd(device->ib_pd)) {
            IBOFFLOAD_ERROR(("Device %s, failed to release PD, errno says %s",
                             ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
        }
    }

    /* close the device */
    if (NULL != device->dev.ib_dev_context) {
        if (ibv_close_device(device->dev.ib_dev_context)) {
            IBOFFLOAD_ERROR(("Device %s "
                             ", failed to close the device, errno says %s",
                             ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
        }
    }

    /* release memory */
    if (NULL != device->ports) {
        free(device->ports);
    }
}