Пример #1
0
int mca_pml_base_close(void)
{
    /* turn off the progress code for the pml */
    if( NULL != mca_pml.pml_progress ) {
        opal_progress_unregister(mca_pml.pml_progress);
    }

    /* Blatently ignore the return code (what would we do to recover,
       anyway?  This module is going away, so errors don't matter
       anymore) */

    /**
     * Destruct the send and receive queues. The ompi_free_list_t destructor
     * will return the memory to the mpool, so this has to be done before the
     * mpool get released by the PML close function.
     */
    OBJ_DESTRUCT(&mca_pml_base_send_requests);
    OBJ_DESTRUCT(&mca_pml_base_recv_requests);

    mca_pml.pml_progress = mca_pml_base_progress;

    OBJ_DESTRUCT(&mca_pml_base_pml);

    /* Close all remaining available modules (may be one if this is a
       OMPI RTE program, or [possibly] multiple if this is ompi_info) */

    mca_base_components_close(mca_pml_base_output,
                              &mca_pml_base_components_available, NULL, true);

    /* All done */

    return OMPI_SUCCESS;
}
Пример #2
0
static int mca_pml_base_close(void)
{
    int i, j;

    /* turn off the progress code for the pml */
    if( NULL != mca_pml.pml_progress ) {
        opal_progress_unregister(mca_pml.pml_progress);
    }

    /* Blatently ignore the return code (what would we do to recover,
       anyway?  This module is going away, so errors don't matter
       anymore) */

    /**
     * Destruct the send and receive queues. The ompi_free_list_t destructor
     * will return the memory to the mpool, so this has to be done before the
     * mpool get released by the PML close function.
     */
    OBJ_DESTRUCT(&mca_pml_base_send_requests);
    OBJ_DESTRUCT(&mca_pml_base_recv_requests);

    mca_pml.pml_progress = mca_pml_base_progress;
    
    /* Free all the strings in the array */
    j = opal_pointer_array_get_size(&mca_pml_base_pml);
    for (i = 0; i < j; ++i) {
        char *str;
        str = (char*) opal_pointer_array_get_item(&mca_pml_base_pml, i);
        free(str);
    }
    OBJ_DESTRUCT(&mca_pml_base_pml);

    /* Close all remaining available components */
    return mca_base_framework_components_close(&ompi_pml_base_framework, NULL);
}
Пример #3
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    mca_coll_hcoll_module_t *hcoll_module;
    static bool libhcoll_initialized = false;
    *priority = 0;
    module = NULL;

    if (!mca_coll_hcoll_component.hcoll_enable){
        goto exit;
    }

    if (!libhcoll_initialized)
    {
        /* libhcoll should be initialized here since current implmentation of
           mxm bcol in libhcoll needs world_group fully functional during init

           world_group, i.e. ompi_comm_world, is not ready at hcoll component open
           call */
        opal_progress_register(hcoll_progress_fn);
        int rc = hcoll_init();

        if (HCOLL_SUCCESS != rc){
            mca_coll_hcoll_component.hcoll_enable = 0;
            opal_progress_unregister(hcoll_progress_fn);
            HCOL_VERBOSE(0,"Hcol library init failed");
            return NULL;
        }
        libhcoll_initialized = true;
    }
    hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
    if (!hcoll_module){
        goto exit;
    }

    if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){
        goto exit;
    }



    hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
    hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
    hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
    hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
    hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
    hcoll_module->super.coll_alltoall = /*hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : */  NULL;
    hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
    hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
    hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
    hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;

    *priority = mca_coll_hcoll_component.hcoll_priority;
    module = &hcoll_module->super;


exit:
    return module;
}
Пример #4
0
static int hcoll_close(void)
{
    int rc;
    mca_coll_hcoll_component_t *cm;
    cm = &mca_coll_hcoll_component;

    if (false == cm->libhcoll_initialized) {
        return OMPI_SUCCESS;
    }

    if (cm->using_mem_hooks) {
        opal_mem_hooks_unregister_release(mca_coll_hcoll_mem_release_cb);
    }

#if HCOLL_API >= HCOLL_VERSION(3,2)
    hcoll_free_init_opts(cm->init_opts);
#endif

    HCOL_VERBOSE(5,"HCOLL FINALIZE");
    rc = hcoll_finalize();

    opal_progress_unregister(mca_coll_hcoll_progress);
    if (HCOLL_SUCCESS != rc){
        HCOL_VERBOSE(1,"Hcol library finalize failed");
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Пример #5
0
int
ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) { 
    psm_error_t err;

    opal_progress_unregister(ompi_mtl_psm_progress);

    /* free resources */
    err = psm_mq_finalize(ompi_mtl_psm.mq);
    if (err) {
        opal_output(0, "Error in psm_mq_finalize (error %s)\n", 
		    psm_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9);
    if (err) {
        opal_output(0, "Error in psm_ep_close (error %s)\n", 
		    psm_error_get_string(err));
        return OMPI_ERROR;
    }

    err = psm_finalize();
    if (err) {
        opal_output(0, "Error in psm_finalize (error %s)\n", 
		    psm_error_get_string(err));
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
Пример #6
0
int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl)
{
#if MXM_API >= MXM_VERSION(2,0)
    if (ompi_mtl_mxm.using_mem_hooks) {
        opal_mem_hooks_unregister_release(ompi_mtl_mxm_mem_release_cb);
    }
#endif
    opal_progress_unregister(ompi_mtl_mxm_progress);
    mxm_ep_destroy(ompi_mtl_mxm.ep);
    return OMPI_SUCCESS;
}
static int hcoll_close(void)
{
    int rc;
    rc = hcoll_finalize();
    opal_progress_unregister(hcoll_progress_fn);
    if (HCOLL_SUCCESS != rc){
        HCOL_VERBOSE(1,"Hcol library finalize failed");
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
/*
 * Close the component
 */
static int basesmuma_close(void)
{
    int ret;
    bcol_basesmuma_registration_data_t *net_ctx;
    bcol_base_network_context_t *net_reg;
	mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;

	/* gvm Leak FIX */
    while(!opal_list_is_empty(&(cs->ctl_structures))) {
        opal_list_item_t *item;
        item = opal_list_remove_first(&(cs->ctl_structures));
		OBJ_DESTRUCT(item);
	}
    OBJ_DESTRUCT(&(cs->ctl_structures));


    /* deregister the progress function */
    ret=opal_progress_unregister(bcol_basesmuma_progress);
    if (MPI_SUCCESS != ret) {
        opal_output(0, "failed to unregister the progress function\n");
    }

    /* remove the control structure backing file */
    ret=mca_bcol_basesmuma_deregister_ctl_sm(&mca_bcol_basesmuma_component);
    if (MPI_SUCCESS != ret) {
        opal_output(0, "failed to remove control structure backing file\n");
    }

    /* remove the network contexts - only one network context defined for
     * this component.
     */
    /* file_name returne by asprintf, so need to free the resource */
    if(mca_bcol_basesmuma_component.super.network_contexts ) {
        net_reg=(bcol_base_network_context_t *)
            mca_bcol_basesmuma_component.super.network_contexts[0];
        if(net_reg) {
            net_ctx=(bcol_basesmuma_registration_data_t *)net_reg->context_data;
            if( net_ctx) {
                if(net_ctx->file_name) {
                    free(net_ctx->file_name);
                }
                free(net_ctx);
            }
            free(net_reg);
        }
        free(mca_bcol_basesmuma_component.super.network_contexts);
        mca_bcol_basesmuma_component.super.network_contexts=NULL;
    }

    /* normal return */
    return OMPI_SUCCESS;
}
Пример #9
0
static int mca_spml_ucx_component_fini(void)
{
    opal_progress_unregister(spml_ucx_progress);
        
    if (mca_spml_ucx.ucp_worker) {
        ucp_worker_destroy(mca_spml_ucx.ucp_worker);
    }
    if(!mca_spml_ucx.enabled)
        return OSHMEM_SUCCESS; /* never selected.. return success.. */

    mca_spml_ucx.enabled = false;  /* not anymore */
    return OSHMEM_SUCCESS;
}
Пример #10
0
void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx)
{
    MCA_SPML_CALL(quiet(ctx));

    SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex);
    _ctx_remove(&mca_spml_ucx.active_array, (mca_spml_ucx_ctx_t *)ctx);
    _ctx_add(&mca_spml_ucx.idle_array, (mca_spml_ucx_ctx_t *)ctx);
    SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex);

    if (!mca_spml_ucx.active_array.ctxs_count) {
        opal_progress_unregister(spml_ucx_ctx_progress);
    }
}
Пример #11
0
static int hcoll_close(void)
{
    int rc;
    HCOL_VERBOSE(5,"HCOLL FINALIZE");
    rc = hcoll_finalize();
    opal_progress_unregister(mca_coll_hcoll_progress);
    OBJ_DESTRUCT(&mca_coll_hcoll_component.active_modules);
    memset(&mca_coll_hcoll_component.active_modules,0,sizeof(mca_coll_hcoll_component.active_modules));
    if (HCOLL_SUCCESS != rc){
        HCOL_VERBOSE(1,"Hcol library finalize failed");
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Пример #12
0
int
ompi_mtl_mx_finalize(struct mca_mtl_base_module_t* mtl) { 
    mx_return_t mx_return;
    
    opal_progress_unregister(ompi_mtl_mx_progress);
    
    /* free resources */
    mx_return = mx_close_endpoint(ompi_mtl_mx.mx_endpoint);
    if(mx_return != MX_SUCCESS){ 
        opal_output(ompi_mtl_base_framework.framework_output, "Error in mx_close_endpoint (error %s)\n", mx_strerror(mx_return));
        return OMPI_ERROR;
    }
    
    return ompi_common_mx_finalize();
    
}
Пример #13
0
void memheap_oob_destruct(void)
{
    int i;
    oob_comm_request_t *r;

    opal_progress_unregister(oshmem_mkey_recv_cb);

    for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) { 
        r = &memheap_oob.req_pool[i];
        MPI_Cancel(&r->recv_req);
        MPI_Request_free(&r->recv_req);
    }

    OBJ_DESTRUCT(&memheap_oob.req_list);
    OBJ_DESTRUCT(&memheap_oob.lck);
    OBJ_DESTRUCT(&memheap_oob.cond);
}
Пример #14
0
int
ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
{
    opal_progress_unregister(ompi_mtl_portals4_progress);
    while (0 != ompi_mtl_portals4_progress()) { }

    ompi_mtl_portals4_recv_short_fini(&ompi_mtl_portals4);

    PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
    PtlMDRelease(ompi_mtl_portals4.zero_md_h);
    PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
    PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.send_idx);
    PtlEQFree(ompi_mtl_portals4.eq_h);
    PtlNIFini(ompi_mtl_portals4.ni_h);
    PtlFini();

    return OMPI_SUCCESS;
}
Пример #15
0
static inline int bml_r2_remove_btl_progress(mca_btl_base_module_t* btl)
{
    unsigned int p;

    if(NULL == btl->btl_component->btl_progress) {
        return OMPI_SUCCESS;
    }
    for(p = 0; p < mca_bml_r2.num_btl_progress; p++) {
        if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[p])
            continue;
        opal_progress_unregister( btl->btl_component->btl_progress );
        if( p < (mca_bml_r2.num_btl_progress-1) ) { 
            mca_bml_r2.btl_progress[p] = mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress-1];
        }
        mca_bml_r2.num_btl_progress--;
        return OMPI_SUCCESS;
    }
    return OMPI_ERR_NOT_FOUND;
}
static int hcoll_open(void)
{
    int rc;

    mca_coll_hcoll_output = opal_output_open(NULL);
    opal_output_set_verbosity(mca_coll_hcoll_output, mca_coll_hcoll_component.hcoll_verbose);

    hcoll_rte_fns_setup();

    opal_progress_register(hcoll_progress_fn);
    rc = hcoll_init();

    if (HCOLL_SUCCESS != rc){
        opal_progress_unregister(hcoll_progress_fn);
        HCOL_VERBOSE(1,"Hcol library init failed");
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;
}
Пример #17
0
static int component_finalize(void) {
    int i;
    for (i = 0; i < ompi_proc_world_size(); i++) {
        ucp_ep_h ep = OSC_UCX_GET_EP(&(ompi_mpi_comm_world.comm), i);
        if (ep != NULL) {
            ucp_ep_destroy(ep);
        }
    }

    if (mca_osc_ucx_component.ucp_worker != NULL) {
        ucp_worker_destroy(mca_osc_ucx_component.ucp_worker);
    }

    assert(mca_osc_ucx_component.num_incomplete_req_ops == 0);
    OBJ_DESTRUCT(&mca_osc_ucx_component.requests);
    opal_progress_unregister(progress_callback);
    ucp_cleanup(mca_osc_ucx_component.ucp_context);
    return OMPI_SUCCESS;
}
Пример #18
0
int mca_pml_ucx_cleanup(void)
{
    PML_UCX_VERBOSE(1, "mca_pml_ucx_cleanup");

    opal_progress_unregister(mca_pml_ucx_progress);

    ompi_pml_ucx.completed_send_req.req_state = OMPI_REQUEST_INVALID;
    OMPI_REQUEST_FINI(&ompi_pml_ucx.completed_send_req);
    OBJ_DESTRUCT(&ompi_pml_ucx.completed_send_req);

    OBJ_DESTRUCT(&ompi_pml_ucx.convs);
    OBJ_DESTRUCT(&ompi_pml_ucx.persistent_reqs);

    if (ompi_pml_ucx.ucp_worker) {
        ucp_worker_destroy(ompi_pml_ucx.ucp_worker);
        ompi_pml_ucx.ucp_worker = NULL;
    }

    return OMPI_SUCCESS;
}
Пример #19
0
int mca_pml_yalla_cleanup(void)
{
    PML_YALLA_VERBOSE(1, "mca_pml_yalla_cleanup");

    opal_progress_unregister(mca_pml_yalla_progress);

    OBJ_DESTRUCT(&ompi_pml_yalla.convs);
    OBJ_DESTRUCT(&ompi_pml_yalla.recv_reqs);
    OBJ_DESTRUCT(&ompi_pml_yalla.bsend_reqs);
    OBJ_DESTRUCT(&ompi_pml_yalla.send_reqs);

    if (ompi_pml_yalla.mxm_ep) {
        mxm_ep_destroy(ompi_pml_yalla.mxm_ep);
        ompi_pml_yalla.mxm_ep = NULL;
    }
    if (ompi_pml_yalla.using_mem_hooks) {
        opal_mem_hooks_unregister_release(mca_pml_yalla_mem_release_cb);
    }

    return OMPI_SUCCESS;
}
Пример #20
0
void memheap_oob_destruct(void)
{
    int i;
    oob_comm_request_t *r;

    opal_progress_unregister(oshmem_mkey_recv_cb);

    for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) {
        r = &memheap_oob.req_pool[i];
        PMPI_Cancel(&r->recv_req);
        PMPI_Request_free(&r->recv_req);
    }

    /*clear these list object as they don't belong here */
    while (NULL != opal_list_remove_first(&memheap_oob.req_list)) {
        continue;
    }

    OBJ_DESTRUCT(&memheap_oob.req_list);
    OBJ_DESTRUCT(&memheap_oob.lck);
    OBJ_DESTRUCT(&memheap_oob.cond);
}
/*
 * Close the component
 */
static int iboffload_close(void)
{
    int rc;

    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;

    IBOFFLOAD_VERBOSE(10, ("Destroy component free lists.\n"));

    if (true == cm->init_done) {
        OBJ_DESTRUCT(&cm->tasks_free);
        OBJ_DESTRUCT(&cm->collreqs_free);
        OBJ_DESTRUCT(&cm->collfrags_free);
        OBJ_DESTRUCT(&cm->calc_tasks_free);
    }

    /* Unregister the progress function */
    rc = opal_progress_unregister(mca_bcol_iboffload_component_progress);
    if (OMPI_SUCCESS != rc) {
        IBOFFLOAD_ERROR(("Failed to unregister the progress function"
                         " for iboffload component.\n"));
    }

    rc = iboffload_release_devices();
    if (OMPI_SUCCESS != rc) {
        return rc;
    }

    if (NULL != cm->receive_queues) {
        free(cm->receive_queues);
    }

    OBJ_DESTRUCT(&cm->recv_wrs.lock);

    IBOFFLOAD_VERBOSE(10, ("The component closed.\n"));

    return OMPI_SUCCESS;
}
Пример #22
0
int
ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl)
{
    opal_progress_unregister(ompi_mtl_portals4_progress);
    while (0 != ompi_mtl_portals4_progress()) { }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    ompi_mtl_portals4_flowctl_fini();
#endif
    ompi_mtl_portals4_recv_short_fini();

    PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
    PtlMDRelease(ompi_mtl_portals4.zero_md_h);
#if OPAL_PORTALS4_MAX_MD_SIZE < OPAL_PORTALS4_MAX_VA_SIZE
    {
        int i;
        int num_mds = ompi_mtl_portals4_get_num_mds();

        for (i = 0 ; i < num_mds ; ++i) {
            PtlMDRelease(ompi_mtl_portals4.send_md_hs[i]);
        }

        free(ompi_mtl_portals4.send_md_hs);
    }
#else
    PtlMDRelease(ompi_mtl_portals4.send_md_h);
#endif

    PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.read_idx);
    PtlPTFree(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
    PtlEQFree(ompi_mtl_portals4.send_eq_h);
    PtlEQFree(ompi_mtl_portals4.recv_eq_h);
    PtlNIFini(ompi_mtl_portals4.ni_h);
    PtlFini();

    return OMPI_SUCCESS;
}
Пример #23
0
int
ompi_mtl_ofi_finalize(struct mca_mtl_base_module_t *mtl)
{
    opal_progress_unregister(ompi_mtl_ofi_progress_no_inline);

    /**
 *      * Close all the OFI objects
 *           */
    if (fi_close((fid_t)ompi_mtl_ofi.ep)) {
        opal_output(ompi_mtl_base_framework.framework_output,
                "fi_close failed: %s", strerror(errno));
        abort();
    }
    if (fi_close((fid_t)ompi_mtl_ofi.cq)) {
        opal_output(ompi_mtl_base_framework.framework_output,
                "fi_close failed: %s", strerror(errno));
        abort();
    }
    if (fi_close((fid_t)ompi_mtl_ofi.av)) {
        opal_output(ompi_mtl_base_framework.framework_output,
                "fi_close failed: %s", strerror(errno));
        abort();
    }
    if (fi_close((fid_t)ompi_mtl_ofi.domain)) {
        opal_output(ompi_mtl_base_framework.framework_output,
                "fi_close failed: %s", strerror(errno));
        abort();
    }
    if (fi_close((fid_t)ompi_mtl_ofi.fabric)) {
        opal_output(ompi_mtl_base_framework.framework_output,
                "fi_close failed: %s", strerror(errno));
        abort();
    }

    return OMPI_SUCCESS;
}
Пример #24
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    mca_coll_hcoll_module_t *hcoll_module;
    ompi_attribute_fn_ptr_union_t del_fn;
    ompi_attribute_fn_ptr_union_t copy_fn;
    mca_coll_hcoll_component_t *cm;
    int err;
    int rc;
    cm = &mca_coll_hcoll_component;
    *priority = 0;
    module = NULL;

    if (!cm->hcoll_enable) {
        return NULL;
    }

    if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np
            || ompi_comm_size(comm) < 2) {
        return NULL;
    }


    if (!cm->libhcoll_initialized)
    {
        /* libhcoll should be initialized here since current implmentation of
           mxm bcol in libhcoll needs world_group fully functional during init
           world_group, i.e. ompi_comm_world, is not ready at hcoll component open
           call */
        opal_progress_register(mca_coll_hcoll_progress);

        HCOL_VERBOSE(10,"Calling hcoll_init();");
#if HCOLL_API >= HCOLL_VERSION(3,2)
        hcoll_read_init_opts(&cm->init_opts);
        cm->init_opts->base_tag = MCA_COLL_BASE_TAG_HCOLL_BASE;
        cm->init_opts->max_tag = mca_pml.pml_max_tag;
        cm->init_opts->enable_thread_support = ompi_mpi_thread_multiple;

        rc = hcoll_init_with_opts(&cm->init_opts);
#else
        hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
        rc = hcoll_init();
#endif

        if (HCOLL_SUCCESS != rc) {
            cm->hcoll_enable = 0;
            opal_progress_unregister(mca_coll_hcoll_progress);
            HCOL_ERROR("Hcol library init failed");
            return NULL;
        }

#if HCOLL_API >= HCOLL_VERSION(3,2)
        if (cm->using_mem_hooks && cm->init_opts->mem_hook_needed) {
#else
        if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) {
#endif
            opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL);
        } else {
            cm->using_mem_hooks = 0;
        }

        copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN;
        del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
        err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL);
        if (OMPI_SUCCESS != err) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
            HCOL_ERROR("Hcol comm keyval create failed");
            return NULL;
        }

        if (mca_coll_hcoll_component.derived_types_support_enabled) {
            copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function *) MPI_TYPE_NULL_COPY_FN;
            del_fn.attr_datatype_delete_fn = hcoll_type_attr_del_fn;
            err = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, &hcoll_type_attr_keyval, NULL ,0, NULL);
            if (OMPI_SUCCESS != err) {
                cm->hcoll_enable = 0;
                hcoll_finalize();
                opal_progress_unregister(mca_coll_hcoll_progress);
                HCOL_ERROR("Hcol type keyval create failed");
                return NULL;
            }
        }
        OBJ_CONSTRUCT(&cm->dtypes, opal_free_list_t);
        opal_free_list_init(&cm->dtypes, sizeof(mca_coll_hcoll_dtype_t),
                            8, OBJ_CLASS(mca_coll_hcoll_dtype_t), 0, 0,
                            32, -1, 32, NULL, 0, NULL, NULL, NULL);

    }

    hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
    if (!hcoll_module) {
        if (!cm->libhcoll_initialized) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
        }
        return NULL;
    }

    hcoll_module->comm = comm;

    HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d",
                 (void*)comm,comm->c_contextid,ompi_comm_size(comm));

    hcoll_module->hcoll_context =
        hcoll_create_context((rte_grp_handle_t)comm);

    if (NULL == hcoll_module->hcoll_context) {
        HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
        OBJ_RELEASE(hcoll_module);
        if (!cm->libhcoll_initialized) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
        }
        return NULL;
    }

    hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
    hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
    hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
    hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
    hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL;
    hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
    hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL;
    hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL;
    hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL;
    hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL;
    hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
    hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
    hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
#if HCOLL_API >= HCOLL_VERSION(3,5)
    hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL;
#else
    hcoll_module->super.coll_iallgatherv = NULL;
#endif
    hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;
#if HCOLL_API >= HCOLL_VERSION(3,5)
    hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL;
#else
    hcoll_module->super.coll_ireduce = NULL;
#endif
    hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL;
    hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL;
    hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL;
#if HCOLL_API >= HCOLL_VERSION(3,7)
    hcoll_module->super.coll_ialltoallv = hcoll_collectives.coll_ialltoallv ? mca_coll_hcoll_ialltoallv : NULL;
#else
    hcoll_module->super.coll_ialltoallv = NULL;
#endif
    *priority = cm->hcoll_priority;
    module = &hcoll_module->super;

    if (!cm->libhcoll_initialized) {
        cm->libhcoll_initialized = true;
    }

    return module;
}


OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t,
                   mca_coll_base_module_t,
                   mca_coll_hcoll_module_construct,
                   mca_coll_hcoll_module_destruct);
Пример #25
0
static int component_init(bool enable_progress_threads, bool enable_mpi_threads) {
    ucp_config_t *config = NULL;
    ucp_params_t context_params;
    bool progress_registered = false, requests_created = false;
    int ret = OMPI_SUCCESS;
    ucs_status_t status;

    mca_osc_ucx_component.ucp_context = NULL;
    mca_osc_ucx_component.ucp_worker = NULL;
    mca_osc_ucx_component.enable_mpi_threads = enable_mpi_threads;

    status = ucp_config_read("MPI", NULL, &config);
    if (UCS_OK != status) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_config_read failed: %d\n",
                            __FILE__, __LINE__, status);
        return OMPI_ERROR;
    }

    OBJ_CONSTRUCT(&mca_osc_ucx_component.requests, opal_free_list_t);
    requests_created = true;
    ret = opal_free_list_init (&mca_osc_ucx_component.requests,
                               sizeof(ompi_osc_ucx_request_t),
                               opal_cache_line_size,
                               OBJ_CLASS(ompi_osc_ucx_request_t),
                               0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: opal_free_list_init failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    mca_osc_ucx_component.num_incomplete_req_ops = 0;

    ret = opal_progress_register(progress_callback);
    progress_registered = true;
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: opal_progress_register failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* initialize UCP context */

    memset(&context_params, 0, sizeof(ucp_context_h));
    context_params.field_mask = UCP_PARAM_FIELD_FEATURES |
                                UCP_PARAM_FIELD_MT_WORKERS_SHARED |
                                UCP_PARAM_FIELD_ESTIMATED_NUM_EPS |
                                UCP_PARAM_FIELD_REQUEST_INIT |
                                UCP_PARAM_FIELD_REQUEST_SIZE;
    context_params.features = UCP_FEATURE_RMA | UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64;
    context_params.mt_workers_shared = 0;
    context_params.estimated_num_eps = ompi_proc_world_size();
    context_params.request_init = internal_req_init;
    context_params.request_size = sizeof(ompi_osc_ucx_internal_request_t);

    status = ucp_init(&context_params, config, &mca_osc_ucx_component.ucp_context);
    ucp_config_release(config);
    if (UCS_OK != status) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_init failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    return ret;
 error:
    if (progress_registered) opal_progress_unregister(progress_callback);
    if (requests_created) OBJ_DESTRUCT(&mca_osc_ucx_component.requests);
    if (mca_osc_ucx_component.ucp_context) ucp_cleanup(mca_osc_ucx_component.ucp_context);
    return ret;
}
Пример #26
0
int
ompi_osc_rdma_module_free(ompi_win_t *win)
{
    int ret = OMPI_SUCCESS;
    int tmp, i;
    ompi_osc_rdma_module_t *module = GET_MODULE(win);

    opal_output_verbose(1, ompi_osc_base_output,
                        "rdma component destroying window with id %d",
                        ompi_comm_get_cid(module->m_comm));

    /* finish with a barrier */
    if (ompi_group_size(win->w_group) > 1) {
        ret = module->m_comm->c_coll.coll_barrier(module->m_comm,
                                                  module->m_comm->c_coll.coll_barrier_module);
    }

    /* remove from component information */
    OPAL_THREAD_LOCK(&mca_osc_rdma_component.c_lock);
    tmp = opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.c_modules,
                                              ompi_comm_get_cid(module->m_comm));
    /* only take the output of hast_table_remove if there wasn't already an error */
    ret = (ret != OMPI_SUCCESS) ? ret : tmp;

    if (0 == opal_hash_table_get_size(&mca_osc_rdma_component.c_modules)) {
#if OPAL_ENABLE_PROGRESS_THREADS
        void *foo;

        mca_osc_rdma_component.c_thread_run = false;
        opal_condition_broadcast(&ompi_request_cond);
        opal_thread_join(&mca_osc_rdma_component.c_thread, &foo);
#else
        opal_progress_unregister(ompi_osc_rdma_component_progress);
#endif
    }
    OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.c_lock);

    win->w_osc_module = NULL;

    OBJ_DESTRUCT(&module->m_unlocks_pending);
    OBJ_DESTRUCT(&module->m_locks_pending);
    OBJ_DESTRUCT(&module->m_queued_sendreqs);
    OBJ_DESTRUCT(&module->m_copy_pending_sendreqs);
    OBJ_DESTRUCT(&module->m_pending_sendreqs);
    OBJ_DESTRUCT(&module->m_acc_lock);
    OBJ_DESTRUCT(&module->m_cond);
    OBJ_DESTRUCT(&module->m_lock);

    if (NULL != module->m_sc_remote_ranks) {
        free(module->m_sc_remote_ranks);
    }
    if (NULL != module->m_sc_remote_active_ranks) {
        free(module->m_sc_remote_active_ranks);
    }
    if (NULL != module->m_pending_buffers) {
        free(module->m_pending_buffers);
    }
    if (NULL != module->m_fence_coll_counts) {
        free(module->m_fence_coll_counts);
    }
    if (NULL != module->m_copy_num_pending_sendreqs) {
        free(module->m_copy_num_pending_sendreqs);
    }
    if (NULL != module->m_num_pending_sendreqs) {
        free(module->m_num_pending_sendreqs);
    }
    if (NULL != module->m_peer_info) {
        for (i = 0 ; i < ompi_comm_size(module->m_comm) ; ++i) {
            ompi_osc_rdma_peer_info_free(&module->m_peer_info[i]);
        }
        free(module->m_peer_info);
    }
    if (NULL != module->m_comm) ompi_comm_free(&module->m_comm);
    if (NULL != module) free(module);

    return ret;
}
Пример #27
0
int mca_pml_ob1_progress(void)
{
    int i, queue_length = opal_list_get_size(&mca_pml_ob1.send_pending);
    int j, completed_requests = 0;
    bool send_succedded;

#if OPAL_CUDA_SUPPORT
    if (opal_cuda_support)
        completed_requests += mca_pml_ob1_process_pending_cuda_async_copies();
#endif /* OPAL_CUDA_SUPPORT */

    for( i = 0; i < queue_length; i++ ) {
        mca_pml_ob1_send_pending_t pending_type = MCA_PML_OB1_SEND_PENDING_NONE;
        mca_pml_ob1_send_request_t* sendreq;
        mca_bml_base_endpoint_t* endpoint;

        sendreq = get_request_from_send_pending(&pending_type);
        if(OPAL_UNLIKELY(NULL == sendreq))
            break;

        switch(pending_type) {
        case MCA_PML_OB1_SEND_PENDING_NONE:
            assert(0);
            return 0;
        case MCA_PML_OB1_SEND_PENDING_SCHEDULE:
            if( mca_pml_ob1_send_request_schedule_exclusive(sendreq) ==
                OMPI_ERR_OUT_OF_RESOURCE ) {
                return 0;
            }
            completed_requests++;
            break;
        case MCA_PML_OB1_SEND_PENDING_START:
            MCA_PML_OB1_SEND_REQUEST_RESET(sendreq);
            endpoint = sendreq->req_endpoint;
            send_succedded = false;
            for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) {
                mca_bml_base_btl_t* bml_btl;
                int rc;

                /* select a btl */
                bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
                rc = mca_pml_ob1_send_request_start_btl(sendreq, bml_btl);
                if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
                    send_succedded = true;
                    completed_requests++;
                    break;
                }
            }
            if( false == send_succedded ) {
                add_request_to_send_pending(sendreq, MCA_PML_OB1_SEND_PENDING_START, true);
            }
        }
    }

    if( 0 != completed_requests ) {
        j = OPAL_ATOMIC_ADD_FETCH32(&mca_pml_ob1_progress_needed, -completed_requests);
        if( 0 == j ) {
            opal_progress_unregister(mca_pml_ob1_progress);
        }
    }

    return completed_requests;
}
Пример #28
0
static int
portals4_close(void)
{
    int ret;

    OBJ_DESTRUCT(&mca_coll_portals4_component.requests);

    if (!PtlHandleIsEqual(mca_coll_portals4_component.zero_md_h, PTL_INVALID_HANDLE)) {
        ret = PtlMDRelease(mca_coll_portals4_component.zero_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMDRelease failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    mca_coll_portals4_component.zero_md_h = PTL_INVALID_HANDLE;

    if (!PtlHandleIsEqual(mca_coll_portals4_component.data_md_h, PTL_INVALID_HANDLE)) {
        ret = PtlMDRelease(mca_coll_portals4_component.data_md_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMDRelease failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE;

    if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) {
        ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMEUnlink failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (!PtlHandleIsEqual(mca_coll_portals4_component.unex_me_h, PTL_INVALID_HANDLE)) {
        ret = PtlMEUnlink(mca_coll_portals4_component.unex_me_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlMEUnlink failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (mca_coll_portals4_component.finish_pt_idx >= 0) {
        ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlPTFree failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (mca_coll_portals4_component.pt_idx >= 0) {
        ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlPTFree failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (!PtlHandleIsEqual(mca_coll_portals4_component.eq_h, PTL_INVALID_HANDLE)) {
        ret = PtlEQFree(mca_coll_portals4_component.eq_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlEQFree failed: %d\n",
                    __FILE__, __LINE__, ret);
        }
    }
    if (!PtlHandleIsEqual(mca_coll_portals4_component.ni_h, PTL_INVALID_HANDLE)) {
        ret = PtlNIFini(mca_coll_portals4_component.ni_h);
        if (PTL_OK != ret) {
            opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                    "%s:%d: PtlNIFini failed: %d\n",
                    __FILE__, __LINE__, ret);
        }

        PtlFini();
    }

    opal_progress_unregister(portals4_progress);

    return OMPI_SUCCESS;
}