예제 #1
0
int
ompi_osc_portals4_free(struct ompi_win_t *win)
{
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    int ret = OMPI_SUCCESS;

    /* synchronize */
    module->comm->c_coll.coll_barrier(module->comm,
                                      module->comm->c_coll.coll_barrier_module);

    /* cleanup */
    PtlMEUnlink(module->data_me_h);
    PtlMDRelease(module->md_h);
    PtlMDRelease(module->req_md_h);
    PtlCTFree(module->ct_h);
    if (NULL != module->disp_units) free(module->disp_units);
    ompi_comm_free(&module->comm);
    if (NULL != module->free_after) free(module->free_after);

    if (!opal_list_is_empty(&module->outstanding_locks)) {
        ret = OMPI_ERR_RMA_SYNC;
    }
    OBJ_DESTRUCT(&module->outstanding_locks);

    free(module);

    return ret;
}
예제 #2
0
static void mca_scoll_mpi_module_destruct(mca_scoll_mpi_module_t *mpi_module)
{

    OBJ_RELEASE(mpi_module->previous_barrier_module);
    OBJ_RELEASE(mpi_module->previous_broadcast_module);
    OBJ_RELEASE(mpi_module->previous_reduce_module);
    OBJ_RELEASE(mpi_module->previous_collect_module);

    mca_scoll_mpi_module_clear(mpi_module);
    /* Free ompi_comm */
    if (mpi_module->comm != &(ompi_mpi_comm_world.comm) && (NULL != mpi_module->comm)) {
        ompi_comm_free(&mpi_module->comm);
    }
}
int mca_sharedfp_addproc_file_close (mca_io_ompio_file_t *fh)
{
    struct mca_sharedfp_base_data_t *sh=NULL;
    int err = OMPI_SUCCESS;
    long sendBuff = 0;
    int count = 1;
    int rank;
    struct mca_sharedfp_addproc_data * addproc_data = NULL;

    if ( NULL == fh->f_sharedfp_data){
	/* Can happen with lazy initialization of the sharedfp structures */
	if ( mca_sharedfp_addproc_verbose ) {
	    opal_output(0, "sharedfp_addproc_file_close - shared file pointer structure not initialized\n");
	}
        return OMPI_SUCCESS;
    }
    sh = fh->f_sharedfp_data;

    rank = ompi_comm_rank ( sh->comm );

    /* Make sure that all processes are ready to release the
    ** shared file pointer resources
    */
    sh->comm->c_coll.coll_barrier(sh->comm, sh->comm->c_coll.coll_barrier_module );

    addproc_data = (struct mca_sharedfp_addproc_data*)(sh->selected_module_data);

    if (addproc_data) {
        /*tell additional proc to stop listening*/
        if(0 == rank){
            MCA_PML_CALL(send( &sendBuff, count, OMPI_OFFSET_DATATYPE, 0, END_TAG,
			       MCA_PML_BASE_SEND_STANDARD, addproc_data->intercom));
        }

        /* Free  intercommunicator */
        if(addproc_data->intercom){
            ompi_comm_free(&(addproc_data->intercom));
        }
        free(addproc_data);
    }

    /* Close the main file opened by this component*/
    err = mca_common_ompio_file_close(sh->sharedfh);

    /*free shared file pointer data struct*/
    free(sh);
    return err;
}
예제 #4
0
int ompi_osc_ucx_free(struct ompi_win_t *win) {
    ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module;
    int i, ret = OMPI_SUCCESS;

    if ((module->epoch_type.access != NONE_EPOCH && module->epoch_type.access != FENCE_EPOCH)
        || module->epoch_type.exposure != NONE_EPOCH) {
        ret = OMPI_ERR_RMA_SYNC;
    }

    if (module->start_group != NULL || module->post_group != NULL) {
        ret = OMPI_ERR_RMA_SYNC;
    }

    assert(module->global_ops_num == 0);
    assert(module->lock_count == 0);
    assert(opal_list_is_empty(&module->pending_posts) == true);
    OBJ_DESTRUCT(&module->outstanding_locks);
    OBJ_DESTRUCT(&module->pending_posts);

    while (module->state.lock != TARGET_LOCK_UNLOCKED) {
        /* not sure if this is required */
        ucp_worker_progress(mca_osc_ucx_component.ucp_worker);
    }

    ret = module->comm->c_coll->coll_barrier(module->comm,
                                             module->comm->c_coll->coll_barrier_module);

    for (i = 0; i < ompi_comm_size(module->comm); i++) {
        ucp_rkey_destroy((module->win_info_array[i]).rkey);
        ucp_rkey_destroy((module->state_info_array[i]).rkey);
    }
    free(module->win_info_array);
    free(module->state_info_array);

    free(module->per_target_ops_nums);

    ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->memh);
    ucp_mem_unmap(mca_osc_ucx_component.ucp_context, module->state_memh);

    if (module->disp_units) free(module->disp_units);
    ompi_comm_free(&module->comm);

    free(module);

    return ret;
}
예제 #5
0
static int
component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
                 struct ompi_communicator_t *comm, struct ompi_info_t *info,
                 int flavor, int *model)
{
    ompi_osc_portals4_module_t *module = NULL;
    int ret = OMPI_ERROR;
    int tmp;
    ptl_md_t md;
    ptl_me_t me;
    char *name;

    if (MPI_WIN_FLAVOR_SHARED == flavor) return OMPI_ERR_NOT_SUPPORTED;

    /* create module structure */
    module = (ompi_osc_portals4_module_t*)
        calloc(1, sizeof(ompi_osc_portals4_module_t));
    if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_portals4_module_template,
           sizeof(ompi_osc_base_module_t));

    /* fill in our part */
    if (MPI_WIN_FLAVOR_ALLOCATE == flavor) {
        module->free_after = *base = malloc(size);
        if (NULL == *base) goto error;
    } else {
        module->free_after = NULL;
    }

    ret = ompi_comm_dup(comm, &module->comm);
    if (OMPI_SUCCESS != ret) goto error;

    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                        "portals4 component creating window with id %d",
                        ompi_comm_get_cid(module->comm));

    asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm));
    ompi_win_set_name(win, name);
    free(name);

    /* share everyone's displacement units. Only do an allgather if
       strictly necessary, since it requires O(p) state. */
    tmp = disp_unit;
    ret = module->comm->c_coll.coll_bcast(&tmp, 1, MPI_INT, 0,
                                          module->comm,
                                          module->comm->c_coll.coll_bcast_module);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: MPI_Bcast failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }
    tmp = (tmp == disp_unit) ? 1 : 0;
    ret = module->comm->c_coll.coll_allreduce(MPI_IN_PLACE, &tmp, 1, MPI_INT, MPI_LAND,
                                              module->comm, module->comm->c_coll.coll_allreduce_module);
    if (OMPI_SUCCESS != ret) goto error;
    if (tmp == 1) {
        module->disp_unit = disp_unit;
        module->disp_units = NULL;
    } else {
        module->disp_unit = -1;
        module->disp_units = malloc(sizeof(int) * ompi_comm_size(module->comm));
        ret = module->comm->c_coll.coll_allgather(&disp_unit, 1, MPI_INT,
                                                  module->disp_units, 1, MPI_INT,
                                                  module->comm,
                                                  module->comm->c_coll.coll_allgather_module);
        if (OMPI_SUCCESS != ret) goto error;
    }

    module->ni_h = mca_osc_portals4_component.matching_ni_h;
    module->pt_idx = mca_osc_portals4_component.matching_pt_idx;

    ret = PtlCTAlloc(module->ni_h, &(module->ct_h));
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlCTAlloc failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
    md.eq_handle = mca_osc_portals4_component.matching_eq_h;
    md.ct_handle = module->ct_h;
    ret = PtlMDBind(module->ni_h, &md, &module->md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
    md.eq_handle = mca_osc_portals4_component.matching_eq_h;
    md.ct_handle = module->ct_h;
    ret = PtlMDBind(module->ni_h, &md, &module->req_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    if (MPI_WIN_FLAVOR_DYNAMIC == flavor) {
        me.start = 0;
        me.length = PTL_SIZE_MAX;
    } else {
        me.start = *base;
        me.length = size;
    }
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = module->comm->c_contextid;
    me.ignore_bits = 0;

    ret = PtlMEAppend(module->ni_h,
                      module->pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &module->data_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    me.start = &module->state;
    me.length = sizeof(module->state);
    me.ct_handle = PTL_CT_NONE;
    me.uid = PTL_UID_ANY;
    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_NO_TRUNCATE | PTL_ME_EVENT_SUCCESS_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = module->comm->c_contextid | OSC_PORTALS4_MB_CONTROL;
    me.ignore_bits = 0;

    ret = PtlMEAppend(module->ni_h,
                      module->pt_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      NULL,
                      &module->control_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    module->opcount = 0;
    module->match_bits = module->comm->c_contextid;
    module->atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
        mca_osc_portals4_component.matching_atomic_max :
        MIN(mca_osc_portals4_component.matching_atomic_max,
            mca_osc_portals4_component.matching_atomic_ordered_size);
    module->fetch_atomic_max = (check_config_value_equal("accumulate_ordering", info, "none")) ?
        mca_osc_portals4_component.matching_fetch_atomic_max :
        MIN(mca_osc_portals4_component.matching_fetch_atomic_max,
            mca_osc_portals4_component.matching_atomic_ordered_size);

    module->zero = 0;
    module->one = 1;
    module->start_group = NULL;
    module->post_group = NULL;

    module->state.post_count = 0;
    module->state.complete_count = 0;
    if (check_config_value_bool("no_locks", info)) {
        module->state.lock = LOCK_ILLEGAL;
    } else {
        module->state.lock = LOCK_UNLOCKED;
    }

    OBJ_CONSTRUCT(&module->outstanding_locks, opal_list_t);

    module->passive_target_access_epoch = false;

#if OPAL_ASSEMBLY_ARCH == OPAL_AMD64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32
    *model = MPI_WIN_UNIFIED;
#else
    *model = MPI_WIN_SEPARATE;
#endif

    win->w_osc_module = &module->super;

    PtlAtomicSync();

    /* Make sure that everyone's ready to receive. */
    module->comm->c_coll.coll_barrier(module->comm,
                                      module->comm->c_coll.coll_barrier_module);

    return OMPI_SUCCESS;

 error:
    /* BWB: FIX ME: This is all wrong... */
    if (0 != module->ct_h) PtlCTFree(module->ct_h);
    if (0 != module->data_me_h) PtlMEUnlink(module->data_me_h);
    if (0 != module->req_md_h) PtlMDRelease(module->req_md_h);
    if (0 != module->md_h) PtlMDRelease(module->md_h);
    if (NULL != module->comm) ompi_comm_free(&module->comm);
    if (NULL != module) free(module);

    return ret;
}
예제 #6
0
/*
 * function - partitions a communicator into subgroups which
 *            form lower-dimensional cartesian subgrids
 *
 * @param comm communicator with cartesian structure (handle)
 * @param remain_dims the 'i'th entry of 'remain_dims' specifies whether
 *                the 'i'th dimension is kept in the subgrid (true)
 *                or is dropped (false) (logical vector)
 * @param new_comm communicator containing the subgrid that includes the
 *                 calling process (handle)
 *
 * @retval MPI_SUCCESS
 * @retval MPI_ERR_TOPOLOGY
 * @retval MPI_ERR_COMM
 */
int mca_topo_base_cart_sub (ompi_communicator_t* comm,
                            const int *remain_dims,
                            ompi_communicator_t** new_comm)
{
    struct ompi_communicator_t *temp_comm;
    mca_topo_base_comm_cart_2_2_0_t *old_cart;
    int errcode, colour, key, colfactor, keyfactor;
    int ndim, dim, i;
    int *d, *dorig = NULL, *dold, *c, *p, *porig = NULL, *pold;
    mca_topo_base_module_t* topo;
    mca_topo_base_comm_cart_2_2_0_t* cart;

    *new_comm = MPI_COMM_NULL;
    old_cart = comm->c_topo->mtc.cart;

    /*
     * Compute colour and key used in splitting the communicator.
     */
    colour = key = 0;
    colfactor = keyfactor = 1;
    ndim = 0;

    i = old_cart->ndims - 1;
    d = old_cart->dims + i;
    c = comm->c_topo->mtc.cart->coords + i;

    for (; i >= 0; --i, --d, --c) {
        dim = *d;
        if (remain_dims[i] == 0) {
            colour += colfactor * (*c);
            colfactor *= dim;
        } else {
            ++ndim;
            key += keyfactor * (*c);
            keyfactor *= dim;
        }
    }
    /* Special case: if all of remain_dims were false, we need to make
       a 0-dimension cartesian communicator with just ourselves in it
       (you can't have a communicator unless you're in it). */
    if (0 == ndim) {
        colour = ompi_comm_rank (comm);
    }
    /* Split the communicator. */
    errcode = ompi_comm_split(comm, colour, key, &temp_comm, false);
    if (errcode != OMPI_SUCCESS) {
        return errcode;
    }

    /* Fill the communicator with topology information. */
    if (temp_comm != MPI_COMM_NULL) {

        assert( NULL == temp_comm->c_topo );
        if (OMPI_SUCCESS != (errcode = mca_topo_base_comm_select(temp_comm,
                                                                 comm->c_topo,
                                                                 &topo,
                                                                 OMPI_COMM_CART))) {
            ompi_comm_free(&temp_comm);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        if (ndim >= 1) {
            /* Copy the dimensions */
            dorig = d = (int*)malloc(ndim * sizeof(int));
            dold = old_cart->dims;
            /* Copy the periods */
            porig = p = (int*)malloc(ndim * sizeof(int));
            pold = old_cart->periods;
            for (i = 0; i < old_cart->ndims; ++i, ++dold, ++pold) {
                if (remain_dims[i]) {
                    *d++ = *dold;
                    *p++ = *pold;
                }
            }
        }
        cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t);
        if( NULL == cart ) {
            ompi_comm_free(&temp_comm);
            if (NULL != dorig) {
                free(dorig);
            }
            if (NULL != porig) {
                free(porig);
            }
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        cart->ndims = ndim;
        cart->dims = dorig;
        cart->periods = porig;

        /* NTH: protect against a 0-byte alloc in the ndims = 0 case */
        if (ndim > 0) {
            cart->coords = (int*)malloc(sizeof(int) * ndim);
            if (NULL == cart->coords) {
                free(cart->periods);
                if(NULL != cart->dims) free(cart->dims);
                OBJ_RELEASE(cart);
                return OMPI_ERR_OUT_OF_RESOURCE;
            }
            {  /* setup the cartesian topology */
                int nprocs = temp_comm->c_local_group->grp_proc_count,
                    rank   = temp_comm->c_local_group->grp_my_rank;

                for (i = 0; i < ndim; ++i) {
                    nprocs /= cart->dims[i];
                    cart->coords[i] = rank / nprocs;
                    rank %= nprocs;
                }
            }
        }

        temp_comm->c_topo           = topo;
        temp_comm->c_topo->mtc.cart = cart;
        temp_comm->c_topo->reorder  = false;
        temp_comm->c_flags         |= OMPI_COMM_CART;
    }

    *new_comm = temp_comm;

    return MPI_SUCCESS;
}
예제 #7
0
int mca_topo_base_cart_create(mca_topo_base_module_t *topo,
                              ompi_communicator_t* old_comm,
                              int ndims,
                              const int *dims,
                              const int *periods,
                              bool reorder,
                              ompi_communicator_t** comm_topo)
{
    int nprocs = 1, i, new_rank, num_procs, ret;
    ompi_communicator_t *new_comm;
    ompi_proc_t **topo_procs = NULL;
    mca_topo_base_comm_cart_2_2_0_t* cart;

    num_procs = old_comm->c_local_group->grp_proc_count;
    new_rank = old_comm->c_local_group->grp_my_rank;
    assert(topo->type == OMPI_COMM_CART);

    /* Calculate the number of processes in this grid */
    for (i = 0; i < ndims; ++i) {
        if(dims[i] <= 0) {
            return OMPI_ERROR;
        }
        nprocs *= dims[i];
    }

    /* check for the error condition */
    if (num_procs < nprocs) {
        return MPI_ERR_DIMS;
    }

    /* check if we have to trim the list of processes */
    if (nprocs < num_procs) {
        num_procs = nprocs;
    }

    if (new_rank > (nprocs-1)) {
        ndims = 0;
        new_rank = MPI_UNDEFINED;
        num_procs = 0;
    }

    cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t);
    if( NULL == cart ) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    cart->ndims = ndims;

    /* MPI-2.1 allows 0-dimension cartesian communicators, so prevent
       a 0-byte malloc -- leave dims as NULL */
    if( ndims > 0 ) {
        cart->dims = (int*)malloc(sizeof(int) * ndims);
        if (NULL == cart->dims) {
            OBJ_RELEASE(cart);
            return OMPI_ERROR;
        }
        memcpy(cart->dims, dims, ndims * sizeof(int));

        /* Cartesian communicator; copy the right data to the common information */
        cart->periods = (int*)malloc(sizeof(int) * ndims);
        if (NULL == cart->periods) {
            OBJ_RELEASE(cart);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(cart->periods, periods, ndims * sizeof(int));

        cart->coords = (int*)malloc(sizeof(int) * ndims);
        if (NULL == cart->coords) {
            OBJ_RELEASE(cart);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        {  /* setup the cartesian topology */
            int nprocs = num_procs, rank = new_rank;

            for (i = 0; i < ndims; ++i) {
                nprocs /= cart->dims[i];
                cart->coords[i] = rank / nprocs;
                rank %= nprocs;
            }
        }
    }

    /* JMS: This should really be refactored to use
       comm_create_group(), because ompi_comm_allocate() still
       complains about 0-byte mallocs in debug builds for 0-member
       groups. */
    if (num_procs > 0) {
        /* Copy the proc structure from the previous communicator over to
           the new one.  The topology module is then able to work on this
           copy and rearrange it as it deems fit. */
        topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *));
        if (NULL == topo_procs) {
            OBJ_RELEASE(cart);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) {
            memcpy(topo_procs,
                   old_comm->c_local_group->grp_proc_pointers,
                   num_procs * sizeof(ompi_proc_t *));
        } else {
            for(i = 0 ; i < num_procs; i++) {
                topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i);
            }
        }
    }

    /* allocate a new communicator */
    new_comm = ompi_comm_allocate(num_procs, 0);
    if (NULL == new_comm) {
        free(topo_procs);
        OBJ_RELEASE(cart);
        return MPI_ERR_INTERN;
    }

    assert(NULL == new_comm->c_topo);
    assert(!(new_comm->c_flags & OMPI_COMM_CART));
    new_comm->c_topo           = topo;
    new_comm->c_topo->mtc.cart = cart;
    new_comm->c_topo->reorder  = reorder;
    new_comm->c_flags         |= OMPI_COMM_CART;
    ret = ompi_comm_enable(old_comm, new_comm,
                           new_rank, num_procs, topo_procs);
    if (OMPI_SUCCESS != ret) {
        /* something wrong happened during setting the communicator */
        free(topo_procs);
        OBJ_RELEASE(cart);
        if (MPI_COMM_NULL != new_comm) {
            new_comm->c_topo = NULL;
            new_comm->c_flags &= ~OMPI_COMM_CART;
            ompi_comm_free (&new_comm);
        }
        return ret;
    }

    *comm_topo = new_comm;

    if( MPI_UNDEFINED == new_rank ) {
        ompi_comm_free(&new_comm);
        *comm_topo = MPI_COMM_NULL;
    }

    /* end here */
    return OMPI_SUCCESS;
}
예제 #8
0
int
ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh)
{
    int ret = OMPI_SUCCESS;
    int delete_flag = 0;
    char name[256];

    if(mca_io_ompio_coll_timing_info){
        strcpy (name, "WRITE");
        if (!ompi_io_ompio_empty_print_queue(WRITE_PRINT_QUEUE)){
            ret = ompi_io_ompio_print_time_info(WRITE_PRINT_QUEUE,
                                                name,
                                                ompio_fh);
            if (OMPI_SUCCESS != ret){
                printf("Error in print_time_info ");
            }

        }
        strcpy (name, "READ");
        if (!ompi_io_ompio_empty_print_queue(READ_PRINT_QUEUE)){
            ret = ompi_io_ompio_print_time_info(READ_PRINT_QUEUE,
                                                name,
                                                ompio_fh);
            if (OMPI_SUCCESS != ret){
                printf("Error in print_time_info ");
            }
        }
    }
    if ( ompio_fh->f_amode & MPI_MODE_DELETE_ON_CLOSE ) {
        delete_flag = 1;
    }

    /*close the sharedfp file*/
    if( NULL != ompio_fh->f_sharedfp ){
        ret = ompio_fh->f_sharedfp->sharedfp_file_close(ompio_fh);
    }
    if ( NULL != ompio_fh->f_fs ) {
	/* The pointer might not be set if file_close() is
	** called from the file destructor in case of an error
	** during file_open()
	*/
	ret = ompio_fh->f_fs->fs_file_close (ompio_fh);
    }
    if ( delete_flag && 0 == ompio_fh->f_rank ) {
        mca_io_ompio_file_delete ( ompio_fh->f_filename, MPI_INFO_NULL );
    }

    if ( NULL != ompio_fh->f_fs ) {
	mca_fs_base_file_unselect (ompio_fh);
    }
    if ( NULL != ompio_fh->f_fbtl ) {
	mca_fbtl_base_file_unselect (ompio_fh);
    }

    if ( NULL != ompio_fh->f_fcoll ) {
	mca_fcoll_base_file_unselect (ompio_fh);
    }
    if ( NULL != ompio_fh->f_sharedfp)  {
	mca_sharedfp_base_file_unselect (ompio_fh);
    }

    if (NULL != ompio_fh->f_io_array) {
        free (ompio_fh->f_io_array);
        ompio_fh->f_io_array = NULL;
    }

    if (NULL != ompio_fh->f_init_procs_in_group) {
        free (ompio_fh->f_init_procs_in_group);
        ompio_fh->f_init_procs_in_group = NULL;
    }
    if (NULL != ompio_fh->f_procs_in_group) {
        free (ompio_fh->f_procs_in_group);
        ompio_fh->f_procs_in_group = NULL;
    }

    if (NULL != ompio_fh->f_decoded_iov) {
        free (ompio_fh->f_decoded_iov);
        ompio_fh->f_decoded_iov = NULL;
    }

    if (NULL != ompio_fh->f_convertor) {
        free (ompio_fh->f_convertor);
        ompio_fh->f_convertor = NULL;
    }

    if (NULL != ompio_fh->f_datarep) {
        free (ompio_fh->f_datarep);
        ompio_fh->f_datarep = NULL;
    }


    if (MPI_DATATYPE_NULL != ompio_fh->f_iov_type) {
        ompi_datatype_destroy (&ompio_fh->f_iov_type);
    }

    if ( MPI_DATATYPE_NULL != ompio_fh->f_etype ) {
	ompi_datatype_destroy (&ompio_fh->f_etype);
    }
    if ( MPI_DATATYPE_NULL != ompio_fh->f_filetype ){
	ompi_datatype_destroy (&ompio_fh->f_filetype);
    }

    if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){
	ompi_datatype_destroy (&ompio_fh->f_orig_filetype);
    }


    if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) )  {
        ompi_comm_free (&ompio_fh->f_comm);
    }

    return ret;
}
int
ompio_io_ompio_file_close (mca_io_ompio_file_t *ompio_fh)
{
    int ret = OMPI_SUCCESS;
    int delete_flag = 0;
    char name[256];

    if(mca_io_ompio_coll_timing_info) {
        strcpy (name, "WRITE");
        if (!ompi_io_ompio_empty_print_queue(WRITE_PRINT_QUEUE)) {
            ret = ompi_io_ompio_print_time_info(WRITE_PRINT_QUEUE,
                                                name,
                                                ompio_fh);
            if (OMPI_SUCCESS != ret) {
                printf("Error in print_time_info ");
            }

        }
        strcpy (name, "READ");
        if (!ompi_io_ompio_empty_print_queue(READ_PRINT_QUEUE)) {
            ret = ompi_io_ompio_print_time_info(READ_PRINT_QUEUE,
                                                name,
                                                ompio_fh);
            if (OMPI_SUCCESS != ret) {
                printf("Error in print_time_info ");
            }
        }
    }
    if ( ompio_fh->f_amode & MPI_MODE_DELETE_ON_CLOSE ) {
        delete_flag = 1;
    }

    /*close the sharedfp file*/
    if(ompio_fh->f_sharedfp != NULL) {
        ret = ompio_fh->f_sharedfp->sharedfp_file_close(ompio_fh);
    }
    ret = ompio_fh->f_fs->fs_file_close (ompio_fh);
    if ( delete_flag && 0 == ompio_fh->f_rank ) {
        mca_io_ompio_file_delete ( ompio_fh->f_filename, MPI_INFO_NULL );
    }

    mca_fs_base_file_unselect (ompio_fh);
    mca_fbtl_base_file_unselect (ompio_fh);
    mca_fcoll_base_file_unselect (ompio_fh);
    /* mca_sharedfp_base_file_unselect (ompio_fh) ; EG?*/

    if (NULL != ompio_fh->f_io_array) {
        free (ompio_fh->f_io_array);
        ompio_fh->f_io_array = NULL;
    }

    if (NULL != ompio_fh->f_init_procs_in_group) {
        free (ompio_fh->f_init_procs_in_group);
        ompio_fh->f_init_procs_in_group = NULL;
    }
    if (NULL != ompio_fh->f_procs_in_group) {
        free (ompio_fh->f_procs_in_group);
        ompio_fh->f_procs_in_group = NULL;
    }

    if (NULL != ompio_fh->f_decoded_iov) {
        free (ompio_fh->f_decoded_iov);
        ompio_fh->f_decoded_iov = NULL;
    }

    if (NULL != ompio_fh->f_convertor) {
        free (ompio_fh->f_convertor);
        ompio_fh->f_convertor = NULL;
    }

    if (NULL != ompio_fh->f_datarep) {
        free (ompio_fh->f_datarep);
        ompio_fh->f_datarep = NULL;
    }


    if (MPI_DATATYPE_NULL != ompio_fh->f_iov_type) {
        ompi_datatype_destroy (&ompio_fh->f_iov_type);
    }

    if (MPI_COMM_NULL != ompio_fh->f_comm)  {
        ompi_comm_free (&ompio_fh->f_comm);
    }


    /*
    if (MPI_INFO_NULL != ompio_fh->f_info)
    {
        ompi_info_free (&ompio_fh->f_info);
    }
    */


    return ret;
}
예제 #10
0
int
ompi_osc_pt2pt_free(ompi_win_t *win)
{
    int ret = OMPI_SUCCESS;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);

    if (NULL == module) {
        return OMPI_SUCCESS;
    }

    if (NULL != module->comm) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "pt2pt component destroying window with id %d",
                            ompi_comm_get_cid(module->comm));

        /* finish with a barrier */
        if (ompi_group_size(win->w_group) > 1) {
            ret = module->comm->c_coll.coll_barrier(module->comm,
                                                    module->comm->c_coll.coll_barrier_module);
        }

        /* remove from component information */
        OPAL_THREAD_SCOPED_LOCK(&mca_osc_pt2pt_component.lock,
                                opal_hash_table_remove_value_uint32(&mca_osc_pt2pt_component.modules,
                                        ompi_comm_get_cid(module->comm)));
    }

    win->w_osc_module = NULL;

    OBJ_DESTRUCT(&module->outstanding_locks);
    OBJ_DESTRUCT(&module->locks_pending);
    OBJ_DESTRUCT(&module->locks_pending_lock);
    OBJ_DESTRUCT(&module->acc_lock);
    OBJ_DESTRUCT(&module->cond);
    OBJ_DESTRUCT(&module->lock);

    /* it is erroneous to close a window with active operations on it so we should
     * probably produce an error here instead of cleaning up */
    OPAL_LIST_DESTRUCT(&module->pending_acc);
    OPAL_LIST_DESTRUCT(&module->pending_posts);

    osc_pt2pt_gc_clean (module);
    OPAL_LIST_DESTRUCT(&module->request_gc);
    OPAL_LIST_DESTRUCT(&module->buffer_gc);
    OBJ_DESTRUCT(&module->gc_lock);

    if (NULL != module->peers) {
        for (int i = 0 ; i < ompi_comm_size (module->comm) ; ++i) {
            OBJ_DESTRUCT(module->peers + i);
        }

        free(module->peers);
    }

    if (NULL != module->epoch_outgoing_frag_count) free(module->epoch_outgoing_frag_count);

    if (NULL != module->frag_request) {
        module->frag_request->req_complete_cb = NULL;
        ompi_request_cancel (module->frag_request);
        ompi_request_free (&module->frag_request);
    }
    if (NULL != module->comm) {
        ompi_comm_free(&module->comm);
    }
    if (NULL != module->incoming_buffer) free (module->incoming_buffer);
    if (NULL != module->free_after) free(module->free_after);

    free (module);

    return ret;
}
int mca_topo_base_graph_create(mca_topo_base_module_t *topo,
                               ompi_communicator_t* old_comm,
                               int nnodes,
                               int *index,
                               int *edges,
                               bool reorder,
                               ompi_communicator_t** comm_topo)
{
    ompi_communicator_t *new_comm;
    int new_rank, num_procs, ret, i;
    ompi_proc_t **topo_procs = NULL;
    mca_topo_base_comm_graph_2_2_0_t* graph;

    num_procs = old_comm->c_local_group->grp_proc_count;
    new_rank = old_comm->c_local_group->grp_my_rank;
    assert(topo->type == OMPI_COMM_GRAPH);

    if( num_procs < nnodes ) {
        return MPI_ERR_DIMS;
    }
    if( num_procs > nnodes ) {
        num_procs = nnodes;
    }
    if( new_rank > (nnodes - 1) ) {
        new_rank = MPI_UNDEFINED;
        num_procs = 0;
        nnodes = 0;
    }

    graph = OBJ_NEW(mca_topo_base_comm_graph_2_2_0_t);
    if( NULL == graph ) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    graph->nnodes = nnodes;

    /* Don't do any of the other initialization if we're not supposed
       to be part of the new communicator (because nnodes has been
       reset to 0, making things like index[nnodes-1] be junk).

       JMS: This should really be refactored to use
       comm_create_group(), because ompi_comm_allocate() still
       complains about 0-byte mallocs in debug builds for 0-member
       groups. */
    if (MPI_UNDEFINED != new_rank) {
        graph->index = (int*)malloc(sizeof(int) * nnodes);
        if (NULL == graph->index) {
            OBJ_RELEASE(graph);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(graph->index, index, nnodes * sizeof(int));

        /* Graph communicator; copy the right data to the common information */
        graph->edges = (int*)malloc(sizeof(int) * index[nnodes-1]);
        if (NULL == graph->edges) {
            OBJ_RELEASE(graph);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(graph->edges, edges, index[nnodes-1] * sizeof(int));

        topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *));
        if (NULL == topo_procs) {
           OBJ_RELEASE(graph);
           return OMPI_ERR_OUT_OF_RESOURCE;
        }
        if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) {
            memcpy(topo_procs, 
                   old_comm->c_local_group->grp_proc_pointers,
                   num_procs * sizeof(ompi_proc_t *));
        } else {
            for(i = 0 ; i < num_procs; i++) {
                topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i);
            }
        }
    }

    /* allocate a new communicator */
    new_comm = ompi_comm_allocate(nnodes, 0);
    if (NULL == new_comm) {
        free(topo_procs);
        OBJ_RELEASE(graph);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    ret = ompi_comm_enable(old_comm, new_comm,
                           new_rank, num_procs, topo_procs);
    if (OMPI_SUCCESS != ret) {
        free(topo_procs);
        OBJ_RELEASE(graph);
        ompi_comm_free (&new_comm);
        return ret;
    }
    
    new_comm->c_topo            = topo;
    new_comm->c_topo->mtc.graph = graph;
    new_comm->c_flags          |= OMPI_COMM_GRAPH;
    new_comm->c_topo->reorder   = reorder;
    *comm_topo = new_comm;

    if( MPI_UNDEFINED == new_rank ) {
        ompi_comm_free(&new_comm);
        *comm_topo = MPI_COMM_NULL;
    }

    return OMPI_SUCCESS;
}
int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module,
        ompi_communicator_t *comm_old,
        int indegree, int sources[],
        int sourceweights[],
        int outdegree,
        int destinations[],
        int destweights[],
        ompi_info_t *info, int reorder,
        ompi_communicator_t **newcomm)
{
    mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL;
    int err;

    if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old,
                               comm_old->c_local_group,
                               newcomm)) ) {
        return err;
    }
    err = OMPI_ERR_OUT_OF_RESOURCE;  /* suppose by default something bad will happens */

    assert( NULL == (*newcomm)->c_topo );

    topo = OBJ_NEW(mca_topo_base_comm_dist_graph_2_2_0_t);
    if( NULL == topo ) {
        goto bail_out;
    }
    topo->in = topo->inw = NULL;
    topo->out = topo->outw = NULL;
    topo->indegree = indegree;
    topo->outdegree = outdegree;
    topo->weighted = !((MPI_UNWEIGHTED == sourceweights) && (MPI_UNWEIGHTED == destweights));

    if (topo->indegree > 0) {
        topo->in = (int*)malloc(sizeof(int) * topo->indegree);
        if (NULL == topo->in) {
            goto bail_out;
        }
        memcpy(topo->in, sources, sizeof(int) * topo->indegree);
        if (MPI_UNWEIGHTED != sourceweights) {
            topo->inw = (int*)malloc(sizeof(int) * topo->indegree);
            if( NULL == topo->inw ) {
                goto bail_out;
            }
            memcpy( topo->inw, sourceweights, sizeof(int) * topo->indegree );
        }
    }

    if (topo->outdegree > 0) {
        topo->out = (int*)malloc(sizeof(int) * topo->outdegree);
        if (NULL == topo->out) {
            goto bail_out;
        }
        memcpy(topo->out, destinations, sizeof(int) * topo->outdegree);
        topo->outw = NULL;
        if (MPI_UNWEIGHTED != destweights) {
            if (topo->outdegree > 0) {
                topo->outw = (int*)malloc(sizeof(int) * topo->outdegree);
                if (NULL == topo->outw) {
                    goto bail_out;
                }
                memcpy(topo->outw, destweights, sizeof(int) * topo->outdegree);
            }
        }
    }

    (*newcomm)->c_topo                 = module;
    (*newcomm)->c_topo->mtc.dist_graph = topo;
    (*newcomm)->c_topo->reorder        = reorder;
    (*newcomm)->c_flags               |= OMPI_COMM_DIST_GRAPH;

    return OMPI_SUCCESS;

bail_out:
    if( NULL != topo->in ) free(topo->in);
    if( MPI_UNWEIGHTED != sourceweights ) {
        if( NULL != topo->inw ) free(topo->inw);
    }
    if( NULL != topo->out ) free(topo->out);
    if( MPI_UNWEIGHTED != destweights ) {
        if( NULL != topo->outw ) free(topo->outw);
    }
    if( NULL != topo ) {
        free(topo);
    }
    ompi_comm_free(newcomm);
    return err;
}
예제 #13
0
int
ompi_osc_rdma_module_free(ompi_win_t *win)
{
    int ret = OMPI_SUCCESS;
    int tmp, i;
    ompi_osc_rdma_module_t *module = GET_MODULE(win);

    opal_output_verbose(1, ompi_osc_base_output,
                        "rdma component destroying window with id %d",
                        ompi_comm_get_cid(module->m_comm));

    /* finish with a barrier */
    if (ompi_group_size(win->w_group) > 1) {
        ret = module->m_comm->c_coll.coll_barrier(module->m_comm,
                                                  module->m_comm->c_coll.coll_barrier_module);
    }

    /* remove from component information */
    OPAL_THREAD_LOCK(&mca_osc_rdma_component.c_lock);
    tmp = opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.c_modules,
                                              ompi_comm_get_cid(module->m_comm));
    /* only take the output of hast_table_remove if there wasn't already an error */
    ret = (ret != OMPI_SUCCESS) ? ret : tmp;

    if (0 == opal_hash_table_get_size(&mca_osc_rdma_component.c_modules)) {
#if OPAL_ENABLE_PROGRESS_THREADS
        void *foo;

        mca_osc_rdma_component.c_thread_run = false;
        opal_condition_broadcast(&ompi_request_cond);
        opal_thread_join(&mca_osc_rdma_component.c_thread, &foo);
#else
        opal_progress_unregister(ompi_osc_rdma_component_progress);
#endif
    }
    OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.c_lock);

    win->w_osc_module = NULL;

    OBJ_DESTRUCT(&module->m_unlocks_pending);
    OBJ_DESTRUCT(&module->m_locks_pending);
    OBJ_DESTRUCT(&module->m_queued_sendreqs);
    OBJ_DESTRUCT(&module->m_copy_pending_sendreqs);
    OBJ_DESTRUCT(&module->m_pending_sendreqs);
    OBJ_DESTRUCT(&module->m_acc_lock);
    OBJ_DESTRUCT(&module->m_cond);
    OBJ_DESTRUCT(&module->m_lock);

    if (NULL != module->m_sc_remote_ranks) {
        free(module->m_sc_remote_ranks);
    }
    if (NULL != module->m_sc_remote_active_ranks) {
        free(module->m_sc_remote_active_ranks);
    }
    if (NULL != module->m_pending_buffers) {
        free(module->m_pending_buffers);
    }
    if (NULL != module->m_fence_coll_counts) {
        free(module->m_fence_coll_counts);
    }
    if (NULL != module->m_copy_num_pending_sendreqs) {
        free(module->m_copy_num_pending_sendreqs);
    }
    if (NULL != module->m_num_pending_sendreqs) {
        free(module->m_num_pending_sendreqs);
    }
    if (NULL != module->m_peer_info) {
        for (i = 0 ; i < ompi_comm_size(module->m_comm) ; ++i) {
            ompi_osc_rdma_peer_info_free(&module->m_peer_info[i]);
        }
        free(module->m_peer_info);
    }
    if (NULL != module->m_comm) ompi_comm_free(&module->m_comm);
    if (NULL != module) free(module);

    return ret;
}
예제 #14
0
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
                            struct ompi_communicator_t *comm, struct opal_info_t *info,
                            int flavor, int *model) {
    ompi_osc_ucx_module_t *module = NULL;
    char *name = NULL;
    long values[2];
    int ret = OMPI_SUCCESS;
    ucs_status_t status;
    int i, comm_size = ompi_comm_size(comm);
    int is_eps_ready;
    bool eps_created = false, worker_created = false;
    ucp_address_t *my_addr = NULL;
    size_t my_addr_len;
    char *recv_buf = NULL;
    void *rkey_buffer = NULL, *state_rkey_buffer = NULL;
    size_t rkey_buffer_size, state_rkey_buffer_size;
    void *state_base = NULL;
    void * my_info = NULL;
    size_t my_info_len;
    int disps[comm_size];
    int rkey_sizes[comm_size];

    /* the osc/sm component is the exclusive provider for support for
     * shared memory windows */
    if (flavor == MPI_WIN_FLAVOR_SHARED) {
        return OMPI_ERR_NOT_SUPPORTED;
    }

    /* if UCP worker has never been initialized before, init it first */
    if (mca_osc_ucx_component.ucp_worker == NULL) {
        ucp_worker_params_t worker_params;
        ucp_worker_attr_t worker_attr;

        memset(&worker_params, 0, sizeof(ucp_worker_h));
        worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE;
        worker_params.thread_mode = (mca_osc_ucx_component.enable_mpi_threads == true)
                                    ? UCS_THREAD_MODE_MULTI : UCS_THREAD_MODE_SINGLE;
        status = ucp_worker_create(mca_osc_ucx_component.ucp_context, &worker_params,
                                   &(mca_osc_ucx_component.ucp_worker));
        if (UCS_OK != status) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_create failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        /* query UCP worker attributes */
        worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE;
        status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr);
        if (UCS_OK != status) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_query failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        if (mca_osc_ucx_component.enable_mpi_threads == true &&
            worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucx does not support multithreading\n",
                                __FILE__, __LINE__);
            ret = OMPI_ERROR;
            goto error;
        }

        worker_created = true;
    }

    /* create module structure */
    module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t));
    if (module == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t));

    ret = ompi_comm_dup(comm, &module->comm);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm));
    ompi_win_set_name(win, name);
    free(name);

    /* share everyone's displacement units. Only do an allgather if
       strictly necessary, since it requires O(p) state. */
    values[0] = disp_unit;
    values[1] = -disp_unit;

    ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, values, 2, MPI_LONG,
                                               MPI_MIN, module->comm,
                                               module->comm->c_coll->coll_allreduce_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    if (values[0] == -values[1]) { /* everyone has the same disp_unit, we do not need O(p) space */
        module->disp_unit = disp_unit;
    } else { /* different disp_unit sizes, allocate O(p) space to store them */
        module->disp_unit = -1;
        module->disp_units = calloc(comm_size, sizeof(int));
        if (module->disp_units == NULL) {
            ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
            goto error;
        }

        ret = module->comm->c_coll->coll_allgather(&disp_unit, 1, MPI_INT,
                                                   module->disp_units, 1, MPI_INT,
                                                   module->comm,
                                                   module->comm->c_coll->coll_allgather_module);
        if (OMPI_SUCCESS != ret) {
            goto error;
        }
    }

    /* exchange endpoints if necessary */
    is_eps_ready = 1;
    for (i = 0; i < comm_size; i++) {
        if (OSC_UCX_GET_EP(module->comm, i) == NULL) {
            is_eps_ready = 0;
            break;
        }
    }

    ret = module->comm->c_coll->coll_allreduce(MPI_IN_PLACE, &is_eps_ready, 1, MPI_INT,
                                               MPI_LAND,
                                               module->comm,
                                               module->comm->c_coll->coll_allreduce_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    if (!is_eps_ready) {
        status = ucp_worker_get_address(mca_osc_ucx_component.ucp_worker,
                                        &my_addr, &my_addr_len);
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_worker_get_address failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        ret = allgather_len_and_info(my_addr, (int)my_addr_len,
                                     &recv_buf, disps, module->comm);
        if (ret != OMPI_SUCCESS) {
            goto error;
        }

        for (i = 0; i < comm_size; i++) {
            if (OSC_UCX_GET_EP(module->comm, i) == NULL) {
                ucp_ep_params_t ep_params;
                ucp_ep_h ep;
                memset(&ep_params, 0, sizeof(ucp_ep_params_t));
                ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS;
                ep_params.address = (ucp_address_t *)&(recv_buf[disps[i]]);
                status = ucp_ep_create(mca_osc_ucx_component.ucp_worker, &ep_params, &ep);
                if (status != UCS_OK) {
                    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                        "%s:%d: ucp_ep_create failed: %d\n",
                                        __FILE__, __LINE__, status);
                    ret = OMPI_ERROR;
                    goto error;
                }

                ompi_comm_peer_lookup(module->comm, i)->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_UCX] = ep;
            }
        }

        ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr);
        my_addr = NULL;
        free(recv_buf);
        recv_buf = NULL;

        eps_created = true;
    }

    ret = mem_map(base, size, &(module->memh), module, flavor);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    state_base = (void *)&(module->state);
    ret = mem_map(&state_base, sizeof(ompi_osc_ucx_state_t), &(module->state_memh),
                  module, MPI_WIN_FLAVOR_CREATE);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    module->win_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t));
    if (module->win_info_array == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    module->state_info_array = calloc(comm_size, sizeof(ompi_osc_ucx_win_info_t));
    if (module->state_info_array == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->memh,
                           &rkey_buffer, &rkey_buffer_size);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_rkey_pack failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    status = ucp_rkey_pack(mca_osc_ucx_component.ucp_context, module->state_memh,
                           &state_rkey_buffer, &state_rkey_buffer_size);
    if (status != UCS_OK) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "%s:%d: ucp_rkey_pack failed: %d\n",
                            __FILE__, __LINE__, status);
        ret = OMPI_ERROR;
        goto error;
    }

    my_info_len = 2 * sizeof(uint64_t) + rkey_buffer_size + state_rkey_buffer_size;
    my_info = malloc(my_info_len);
    if (my_info == NULL) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto error;
    }

    memcpy(my_info, base, sizeof(uint64_t));
    memcpy((void *)((char *)my_info + sizeof(uint64_t)), &state_base, sizeof(uint64_t));
    memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t)), rkey_buffer, rkey_buffer_size);
    memcpy((void *)((char *)my_info + 2 * sizeof(uint64_t) + rkey_buffer_size),
           state_rkey_buffer, state_rkey_buffer_size);

    ret = allgather_len_and_info(my_info, (int)my_info_len, &recv_buf, disps, module->comm);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    ret = comm->c_coll->coll_allgather((void *)&rkey_buffer_size, 1, MPI_INT,
                                       rkey_sizes, 1, MPI_INT, comm,
                                       comm->c_coll->coll_allgather_module);
    if (OMPI_SUCCESS != ret) {
        goto error;
    }

    for (i = 0; i < comm_size; i++) {
        ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i);
        assert(ep != NULL);

        memcpy(&(module->win_info_array[i]).addr, &recv_buf[disps[i]], sizeof(uint64_t));
        memcpy(&(module->state_info_array[i]).addr, &recv_buf[disps[i] + sizeof(uint64_t)], 
               sizeof(uint64_t));

        status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t)]),
                                    &((module->win_info_array[i]).rkey));
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }

        status = ucp_ep_rkey_unpack(ep, &(recv_buf[disps[i] + 2 * sizeof(uint64_t) + rkey_sizes[i]]),
                                    &((module->state_info_array[i]).rkey));
        if (status != UCS_OK) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_ep_rkey_unpack failed: %d\n",
                                __FILE__, __LINE__, status);
            ret = OMPI_ERROR;
            goto error;
        }
    }

    free(my_info);
    free(recv_buf);

    ucp_rkey_buffer_release(rkey_buffer);
    ucp_rkey_buffer_release(state_rkey_buffer);

    module->state.lock = TARGET_LOCK_UNLOCKED;
    module->state.post_index = 0;
    memset((void *)module->state.post_state, 0, sizeof(uint64_t) * OMPI_OSC_UCX_POST_PEER_MAX);
    module->state.complete_count = 0;
    module->state.req_flag = 0;
    module->state.acc_lock = TARGET_LOCK_UNLOCKED;
    module->epoch_type.access = NONE_EPOCH;
    module->epoch_type.exposure = NONE_EPOCH;
    module->lock_count = 0;
    module->post_count = 0;
    module->start_group = NULL;
    module->post_group = NULL;
    OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t);
    OBJ_CONSTRUCT(&module->pending_posts, opal_list_t);
    module->global_ops_num = 0;
    module->per_target_ops_nums = calloc(comm_size, sizeof(int));
    module->start_grp_ranks = NULL;
    module->lock_all_is_nocheck = false;

    ret = opal_hash_table_init(&module->outstanding_locks, comm_size);
    if (ret != OPAL_SUCCESS) {
        goto error;
    }

    win->w_osc_module = &module->super;

    /* sync with everyone */

    ret = module->comm->c_coll->coll_barrier(module->comm,
                                             module->comm->c_coll->coll_barrier_module);
    if (ret != OMPI_SUCCESS) {
        goto error;
    }

    return ret;

 error:
    if (my_addr) ucp_worker_release_address(mca_osc_ucx_component.ucp_worker, my_addr);
    if (recv_buf) free(recv_buf);
    if (my_info) free(my_info);
    for (i = 0; i < comm_size; i++) {
        if ((module->win_info_array[i]).rkey != NULL) {
            ucp_rkey_destroy((module->win_info_array[i]).rkey);
        }
        if ((module->state_info_array[i]).rkey != NULL) {
            ucp_rkey_destroy((module->state_info_array[i]).rkey);
        }
    }
    if (rkey_buffer) ucp_rkey_buffer_release(rkey_buffer);
    if (state_rkey_buffer) ucp_rkey_buffer_release(state_rkey_buffer);
    if (module->win_info_array) free(module->win_info_array);
    if (module->state_info_array) free(module->state_info_array);
    if (module->disp_units) free(module->disp_units);
    if (module->comm) ompi_comm_free(&module->comm);
    if (module->per_target_ops_nums) free(module->per_target_ops_nums);
    if (eps_created) {
        for (i = 0; i < comm_size; i++) {
            ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, i);
            ucp_ep_destroy(ep);
        }
    }
    if (worker_created) ucp_worker_destroy(mca_osc_ucx_component.ucp_worker);
    if (module) free(module);
    return ret;
}
int 
ompi_osc_pt2pt_component_select(ompi_win_t *win,
                               ompi_info_t *info,
                               ompi_communicator_t *comm)
{
    ompi_osc_pt2pt_module_t *module = NULL;
    int ret, i;
    ompi_osc_pt2pt_buffer_t *buffer = NULL;
    opal_free_list_item_t *item = NULL;
    char *tmp = NULL;

    /* create module structure */
    module = (ompi_osc_pt2pt_module_t*)
        calloc(1, sizeof(ompi_osc_pt2pt_module_t));
    if (NULL == module) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;

    /* fill in the function pointer part */
    memcpy(module, &ompi_osc_pt2pt_module_template, 
           sizeof(ompi_osc_base_module_t));

    /* initialize the p2p part */
    OBJ_CONSTRUCT(&(module->p2p_lock), opal_mutex_t);
    OBJ_CONSTRUCT(&(module->p2p_cond), opal_condition_t);
    OBJ_CONSTRUCT(&(module->p2p_acc_lock), opal_mutex_t);
    OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
    OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
    OBJ_CONSTRUCT(&(module->p2p_locks_pending), opal_list_t);
    OBJ_CONSTRUCT(&(module->p2p_unlocks_pending), opal_list_t);

    module->p2p_win = win;

    ret = ompi_comm_dup(comm, &(module->p2p_comm));
    if (ret != OMPI_SUCCESS) goto cleanup;

    opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                        "pt2pt component creating window with id %d",
                        ompi_comm_get_cid(module->p2p_comm));

    asprintf(&tmp, "%d", ompi_comm_get_cid(module->p2p_comm));
    ompi_win_set_name(win, tmp);
    free(tmp);

    module->p2p_num_pending_sendreqs = (unsigned int*)
        malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_num_pending_sendreqs) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }
    memset(module->p2p_num_pending_sendreqs, 0, 
           sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));

    module->p2p_num_pending_out = 0;
    module->p2p_num_pending_in = 0;
    module->p2p_num_post_msgs = 0;
    module->p2p_num_complete_msgs = 0;
    module->p2p_tag_counter = 0;

    module->p2p_copy_num_pending_sendreqs = (unsigned int*)
        malloc(sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_copy_num_pending_sendreqs) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }
    memset(module->p2p_num_pending_sendreqs, 0, 
           sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));

    /* fence data */
    module->p2p_fence_coll_counts = (int*)
        malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_fence_coll_counts) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }
    for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) {
        module->p2p_fence_coll_counts[i] = 1;
    }

    /* pwsc data */
    module->p2p_pw_group = NULL;
    module->p2p_sc_group = NULL;
    module->p2p_sc_remote_active_ranks = (bool*)
        malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_sc_remote_active_ranks) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }

    module->p2p_sc_remote_ranks = (int*)
        malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
    if (NULL == module->p2p_sc_remote_ranks) {
        ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
        goto cleanup;
    }

    /* lock data */
    module->p2p_lock_status = 0;
    module->p2p_shared_count = 0;
    module->p2p_lock_received_ack = 0;

    /* fill in window information */
    win->w_osc_module = (ompi_osc_base_module_t*) module;

    /* sync memory - make sure all initialization completed */
    opal_atomic_mb();

    /* start up receive for protocol headers */
    OPAL_FREE_LIST_GET(&mca_osc_pt2pt_component.p2p_c_buffers,
                        item, ret);
    if (OMPI_SUCCESS != ret) goto cleanup;
    buffer = (ompi_osc_pt2pt_buffer_t*) item;
    buffer->data = (void*) module;

    ret = ompi_osc_pt2pt_component_irecv(buffer->payload,
                                         mca_osc_pt2pt_component.p2p_c_eager_size,
                                         MPI_BYTE,
                                         MPI_ANY_SOURCE,
                                         CONTROL_MSG_TAG,
                                         module->p2p_comm,
                                         &(buffer->request),
                                         component_fragment_cb,
                                         buffer);
    if (OMPI_SUCCESS != ret) goto cleanup;

    return OMPI_SUCCESS;

 cleanup:
    OBJ_DESTRUCT(&module->p2p_unlocks_pending);
    OBJ_DESTRUCT(&module->p2p_locks_pending);
    OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
    OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
    OBJ_DESTRUCT(&module->p2p_acc_lock);
    OBJ_DESTRUCT(&module->p2p_cond);
    OBJ_DESTRUCT(&module->p2p_lock);

    if (NULL != buffer) {
        OPAL_FREE_LIST_RETURN(&mca_osc_pt2pt_component.p2p_c_buffers, item);
    }
    if (NULL != module->p2p_sc_remote_ranks) {
        free(module->p2p_sc_remote_ranks);
    }
    if (NULL != module->p2p_sc_remote_active_ranks) {
        free(module->p2p_sc_remote_active_ranks);
    }
    if (NULL != module->p2p_fence_coll_counts) {
        free(module->p2p_fence_coll_counts);
    }
    if (NULL != module->p2p_copy_num_pending_sendreqs) {
        free(module->p2p_copy_num_pending_sendreqs);
    }
    if (NULL != module->p2p_num_pending_sendreqs) {
        free(module->p2p_num_pending_sendreqs);
    }
    if (NULL != module->p2p_comm) ompi_comm_free(&module->p2p_comm);

#if OPAL_ENABLE_DEBUG
    memset(module, 0, sizeof(ompi_osc_base_module_t));
#endif
    if (NULL != module) free(module);

    return ret;
}
예제 #16
0
/*
 * Init module on the communicator
 */
int mca_coll_hierarch_module_enable (mca_coll_base_module_t *module,
				     struct ompi_communicator_t *comm)
{
    int color;
    int size, rank, ret=OMPI_SUCCESS;
    
    struct ompi_communicator_t *lcomm=NULL;
    struct ompi_communicator_t *llcomm=NULL;
    struct mca_coll_hierarch_llead_t *llead=NULL;
    mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
    
    color = hierarch_module->hier_colorarr[rank];
    
    /* Generate the subcommunicator based on the color returned by
       the previous function. */
    ret = ompi_comm_split ( comm, color, rank, &lcomm, 0 );
    if ( OMPI_SUCCESS != ret ) {
        goto exit;
    }
    if ( OMPI_COMM_CID_IS_LOWER ( lcomm, comm ) ) {
        /* Mark the communicator as 'extra retain' and increase the
           reference count by one more. See ompi_comm_activate
           for detailed comments
	*/
        OMPI_COMM_SET_EXTRA_RETAIN (lcomm);
        OBJ_RETAIN(lcomm);
    }
    
    hierarch_module->hier_comm     = comm;
    hierarch_module->hier_lcomm    = lcomm;
    hierarch_module->hier_num_reqs = 2 * size;
    hierarch_module->hier_reqs     = (ompi_request_t **) malloc (sizeof(ompi_request_t)*size*2);
    if ( NULL == hierarch_module->hier_reqs ) {
        goto exit;
    }
    
    /* allocate a certain number of the hierarch_llead structures, which store
       information about local leader and the according subcommunicators 
    */
    llead = (struct mca_coll_hierarch_llead_t * ) malloc ( 
                                                          sizeof(struct mca_coll_hierarch_llead_t));
    if ( NULL == llead ) {
        goto exit;
    }

    /* These two routines set all relevant entries in the mca_coll_base_comm_t 
     * structure. The first one makes all entries which are independent of the 
     * offset (and have to be done only once per module. The second one is 
     * depending on the offset, and has to be called therefore every time we need 
     * a new llcomm 
     */
    mca_coll_hierarch_get_llr ( hierarch_module );
    mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, 1 );        
    
    /* Generate the lleader communicator assuming that all lleaders are the first
       process in the list of processes with the same color. A function generating 
       other lleader-comms will follow soon. */
    color = MPI_UNDEFINED;
    if ( llead->am_lleader ) {
	color = 1;
    }
    ret = ompi_comm_split ( comm, color, rank, &llcomm, 0);
    if ( OMPI_SUCCESS != ret ) {
        goto exit;
    }
    if ( OMPI_COMM_CID_IS_LOWER ( llcomm, comm ) ) {
        /* Mark the communicator as 'extra retain' and increase the
           reference count by one more. See ompi_comm_activate
	   for detailed explanation. 
	*/
        OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
        OBJ_RETAIN(llcomm);
    }

    
    llead->llcomm = llcomm;
    
    /* Store it now on the data structure */
    OBJ_CONSTRUCT(&(hierarch_module->hier_llead), opal_pointer_array_t);
    opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
    
    if ( mca_coll_hierarch_verbose_param ) {
        mca_coll_hierarch_dump_struct (hierarch_module);
    }
    
 exit:
    if ( OMPI_SUCCESS != ret ) {
        if (NULL != llead) {
            free(llead);
        }
        ompi_comm_free ( &lcomm );
	return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}