Ejemplo n.º 1
0
/*
 *  Allreduce
 *
 *  Function:   - allreduce
 *  Accepts:    - same as MPI_Allreduce()
 *  Returns:    - MPI_SUCCESS or error code
 */
int mca_coll_fca_allreduce(void *sbuf, void *rbuf, int count,
                           struct ompi_datatype_t *dtype, struct ompi_op_t *op,
                           struct ompi_communicator_t *comm,
                           mca_coll_base_module_t *module)
{
    mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
    fca_reduce_spec_t spec;
    int ret;

    spec.sbuf = sbuf;
    spec.rbuf = rbuf;
    if (mca_coll_fca_fill_reduce_spec(count, dtype, op, &spec,
                                      fca_module->fca_comm_caps.max_payload)
            != OMPI_SUCCESS) {
        FCA_VERBOSE(5, "Unsupported allreduce operation %s, using fallback\n", op->o_name);
        goto orig_allreduce;
    }

    FCA_VERBOSE(5,"Using FCA Allreduce");
    ret = mca_coll_fca_component.fca_ops.do_all_reduce(fca_module->fca_comm, &spec);
    if (ret < 0) {
        if (ret == -EUSEMPI) {
            goto orig_allreduce;
        }
        FCA_ERROR("Allreduce failed: %s", mca_coll_fca_component.fca_ops.strerror(ret));
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;

orig_allreduce:
    return fca_module->previous_allreduce(sbuf, rbuf, count, dtype, op, comm,
                                          fca_module->previous_allreduce_module);
}
Ejemplo n.º 2
0
static int _create_fca_comm(mca_scoll_fca_module_t *fca_module)
{
    int comm_size;
    int rc, ret;

    rc = _fca_comm_new(fca_module);
    if (rc != OSHMEM_SUCCESS)
        return rc;

    /* allocate comm_init_spec */
    FCA_MODULE_VERBOSE(fca_module,
                       1,
                       "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
                       fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx, fca_module->num_local_procs);

    comm_size = fca_module->comm->proc_count;
    ret = mca_scoll_fca_comm_init(mca_scoll_fca_component.fca_context,
                                  oshmem_proc_group_find_id(fca_module->comm,
                                                            fca_module->rank),
                                  comm_size,
                                  fca_module->local_proc_idx,
                                  fca_module->num_local_procs,
                                  &fca_module->fca_comm_desc,
                                  &fca_module->fca_comm);
    if (ret < 0) {
        FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
        return OSHMEM_ERROR;
    }

    /* get communicator capabilities */
    ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps);
    if (ret < 0) {
        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
        return OSHMEM_ERROR;
    }

    /* by this point every rank in the communicator is set up */
    FCA_MODULE_VERBOSE(fca_module,
                       1,
                       "Initialized FCA communicator, comm_id %d",
                       fca_module->fca_comm_desc.comm_id);

    return OSHMEM_SUCCESS;
}
Ejemplo n.º 3
0
static void __destroy_fca_comm(mca_coll_fca_module_t *fca_module)
{
    int ret;

    fca_comm_destroy(fca_module->fca_comm);
    if (fca_module->rank == 0) {
        ret = fca_comm_end(mca_coll_fca_component.fca_context,
                                                      fca_module->fca_comm_desc.comm_id);
        if (ret < 0) {
            FCA_ERROR("COMM_END failed: %s", fca_strerror(ret));
        }
    }

    FCA_MODULE_VERBOSE(fca_module, 1, "Destroyed FCA communicator, comm_id %d",
                       fca_module->fca_comm_desc.comm_id);
}
Ejemplo n.º 4
0
static void mca_coll_fca_comm_wrap_destruct(mca_coll_fca_comm_wrap_t *item) {

    int ret;

    if(item->fca_comm != NULL)
    {
        fca_comm_destroy(item->fca_comm);
        if (item->rank == 0) {
            ret = fca_comm_end(mca_coll_fca_component.fca_context,
                    item->comm_id);
            if (ret < 0) {
                FCA_ERROR("COMM_END failed: %s", fca_strerror(ret));
            }
        }

    }

}
Ejemplo n.º 5
0
static void _destroy_fca_comm(mca_scoll_fca_module_t *fca_module)
{
    int ret;
    struct oshmem_group_t *comm = fca_module->comm;
    const int root_pe = oshmem_proc_pe(comm->proc_array[root_id]);

    fca_comm_destroy(fca_module->fca_comm);
    if (comm->my_pe == root_pe && mca_scoll_fca_component.fca_context) {
        ret = fca_comm_end(mca_scoll_fca_component.fca_context,
                           fca_module->fca_comm_desc.comm_id);
        if (ret < 0) {
            FCA_ERROR("COMM_END failed: %s", fca_strerror(ret));
        }
    }

    FCA_MODULE_VERBOSE(fca_module,
                       1,
                       "Destroyed FCA communicator, comm_id %d",
                       fca_module->fca_comm_desc.comm_id);
}
Ejemplo n.º 6
0
/*
 *  * Initialize module on the communicator
 *   */
static int mca_scoll_fca_module_enable(mca_scoll_base_module_t *module,
                                       struct oshmem_group_t *comm)
{

    mca_scoll_fca_module_t *fca_module = (mca_scoll_fca_module_t*) module;
    int rc;

    fca_module->comm = comm;
    fca_module->rank = comm->my_pe;

    rc = mca_scoll_fca_get_fca_lib(comm);
    if (rc != OSHMEM_SUCCESS)
        goto exit_fatal;

    rc = _save_coll_handlers(fca_module);
    if (rc != OSHMEM_SUCCESS)
        goto exit_fatal;

    rc = _get_local_ranks(fca_module);
    if (rc != OSHMEM_SUCCESS)
        goto exit_fatal;

    rc = _create_fca_comm(fca_module);
    if (rc != OSHMEM_SUCCESS)
        goto exit_fatal;

    FCA_MODULE_VERBOSE(fca_module, 1, "FCA Module initialized");
    return OMPI_SUCCESS;

    exit_fatal:
    /* it is possible that other pe(s) succesfully enabled fca.
     * So differnt frameworks will be used for collective ops
     */
    FCA_ERROR("FCA module enable failed - aborting to prevent inconsistent application state");
    /* There's no modules available */
    opal_show_help("help-oshmem-scoll-fca.txt",
                   "module_enable:fatal", true,
		   "FCA module enable failed - aborting to prevent inconsistent application state");
    oshmem_shmem_abort(-1);
    return OMPI_ERROR;
}
Ejemplo n.º 7
0
static int _get_local_ranks(mca_scoll_fca_module_t *fca_module)
{
    struct oshmem_group_t *comm = fca_module->comm;
    oshmem_proc_t* proc;
    int i, rank;

    /* Count the local ranks */
    fca_module->num_local_procs = 0;
    for (rank = 0; rank < comm->proc_count; ++rank) {
        proc = comm->proc_array[rank];
        if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
            if (proc->super.proc_name.vpid == (uint32_t) fca_module->rank) {
                fca_module->local_proc_idx = fca_module->num_local_procs;
            }
            ++fca_module->num_local_procs;
        }
    }
    /* Make a list of local ranks */
    fca_module->local_ranks = calloc(fca_module->num_local_procs,
                                     sizeof *fca_module->local_ranks);
    if (!fca_module->local_ranks) {
        FCA_ERROR("Failed to allocate memory for %d local ranks",
                  fca_module->num_local_procs);
        return OSHMEM_ERROR;
    }

    i = 0;
    for (rank = 0; rank < comm->proc_count; ++rank) {
        proc = comm->proc_array[rank];
        if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
            fca_module->local_ranks[i++] = rank;
        }
    }

    FCA_MODULE_VERBOSE(fca_module,
                       3,
                       "i am %d/%d",
                       fca_module->local_proc_idx, fca_module->num_local_procs);

    return OSHMEM_SUCCESS;
}
Ejemplo n.º 8
0
/*
 *  Function:   - barrier
 *  Returns:    - MPI_SUCCESS or error code
 */
int mca_coll_fca_barrier(struct ompi_communicator_t *comm,
                         mca_coll_base_module_t *module)
{
    mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
    int ret;

    FCA_VERBOSE(5,"Using FCA Barrier");
    ret = mca_coll_fca_component.fca_ops.do_barrier(fca_module->fca_comm);
    if (ret < 0) {
        if (ret == -EUSEMPI) {
            goto orig_barrier;
        }
        FCA_ERROR("Barrier failed: %s", mca_coll_fca_component.fca_ops.strerror(ret));
        return OMPI_ERROR;
    }
    return OMPI_SUCCESS;

orig_barrier:
    return fca_module->previous_barrier(comm, fca_module->previous_barrier_module);

}
Ejemplo n.º 9
0
/**
 * Fills local rank information in fca_module.
 */
static int __get_local_ranks(mca_coll_fca_module_t *fca_module)
{
    ompi_communicator_t *comm = fca_module->comm;
    ompi_proc_t* proc;
    int i, rank;

    /* Count the local ranks */
    fca_module->num_local_procs = 0;
    for (rank = 0; rank < ompi_comm_size(comm); ++rank) {
        proc = __local_rank_lookup(comm, rank);
        if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
            if (rank == fca_module->rank) {
                fca_module->local_proc_idx = fca_module->num_local_procs;
            }
            ++fca_module->num_local_procs;
        }
    }

    /* Make a list of local ranks */
    fca_module->local_ranks = calloc(fca_module->num_local_procs,
                                     sizeof *fca_module->local_ranks);
    if (!fca_module->local_ranks) {
        FCA_ERROR("Failed to allocate memory for %d local ranks",
                  fca_module->num_local_procs);
        return OMPI_ERROR;
    }

    i = 0;
    for (rank = 0; rank < ompi_comm_size(comm); ++rank) {
        proc = __local_rank_lookup(comm, rank);
        if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
            fca_module->local_ranks[i++] = rank;
        }
    }

    FCA_MODULE_VERBOSE(fca_module, 3, "i am %d/%d", fca_module->local_proc_idx,
                       fca_module->num_local_procs);
    return OMPI_SUCCESS;
}
Ejemplo n.º 10
0
static int __fca_comm_new(mca_coll_fca_module_t *fca_module)
{
    ompi_communicator_t *comm = fca_module->comm;
    fca_comm_new_spec_t spec = {0,};
    int info_size, all_info_size;
    void *all_info = NULL;
    void *my_info = NULL;
    int *rcounts = NULL;
    int *displs = NULL;
    int i, rc, ret, comm_size = ompi_comm_size(fca_module->comm);

    /* call fca_get_rank_info() on node managers only*/
    if (fca_module->local_proc_idx == 0) {
#if OMPI_FCA_VERSION >= 30
        my_info = fca_get_rank_info(mca_coll_fca_component.fca_context,
                                    fca_module->rank, &info_size);
#else
        my_info = fca_get_rank_info(mca_coll_fca_component.fca_context,
                                    &info_size);
#endif
        if (!my_info) {
            FCA_ERROR("fca_get_rank_info returned NULL");
            return OMPI_ERROR;
        }
    } else {
        info_size = 0;
    }
    FCA_MODULE_VERBOSE(fca_module, 1, "Info size: %d", info_size);

    /* Allocate gather buffer on the root rank */
    if (fca_module->rank == 0) {
        rcounts = calloc(comm_size, sizeof *rcounts);
    }

    /* Get all rank info sizes using MPI_Gather */
    rc = comm->c_coll.coll_gather(&info_size, 1, MPI_INT, rcounts, 1, MPI_INT, 0,
                                  comm, comm->c_coll.coll_gather_module);
    if (rc != OMPI_SUCCESS)
        return rc;

    /* Allocate buffer for gathering rank information on rank0 */
    if (fca_module->rank == 0) {
        all_info_size = 0;
        displs = calloc(comm_size, sizeof *displs);

        for (i = 0; i < comm_size; ++i) {
            displs[i] = all_info_size;
            all_info_size += rcounts[i];

            if (rcounts[i] > 0)
                ++spec.rank_count;

            FCA_MODULE_VERBOSE(fca_module, 1, "gatherv: rcounts[%d]=%d displs[%d]=%d",
                               i, rcounts[i], i, displs[i]);
        }

        FCA_MODULE_VERBOSE(fca_module, 1, "Total rank_info size: %d", all_info_size);
        all_info = calloc(all_info_size, 1);
    }

    /* Send all node managers information to rank0 using MPI_Gatherv */
    rc = comm->c_coll.coll_gatherv(my_info, info_size, MPI_BYTE,
                                   all_info, rcounts, displs, MPI_BYTE, 0,
                                   comm, comm->c_coll.coll_gather_module);
    if (rc != OMPI_SUCCESS) {
        FCA_ERROR("Failed to gather rank information to rank0: %d", rc);
        return rc;
    }

    /* Rank0 calls fca_comm_new() and fills fca_comm_spec filed */
    if (fca_module->rank == 0) {
        spec.rank_info  = all_info;
        spec.is_comm_world = comm == MPI_COMM_WORLD;

        free(displs);
        free(rcounts);
#if OMPI_FCA_VERSION >= 30
        spec.comm_size = comm_size;

        spec.comm_init_data = NULL;
        spec.comm_init_data_size = 0;
#endif
        FCA_MODULE_VERBOSE(fca_module, 1, "starting fca_comm_new(), rank_count: %d",
                           spec.rank_count);

        ret = fca_comm_new(mca_coll_fca_component.fca_context,
                           &spec, &fca_module->fca_comm_desc);

        free(all_info);
    }

    /* Broadcast return value from rank0 to all other ranks */
    rc = fca_module->previous_bcast(&ret, 1, MPI_INT, 0, comm,
                                    fca_module->previous_bcast_module);
    if (rc != OMPI_SUCCESS) {
        FCA_ERROR("Failed to broadcast comm_new return value from rank0: %d", rc);
        return rc;
    }

    /* Examine comm_new return value */
    if (ret < 0) {
        FCA_ERROR("COMM_NEW failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
    }
#if OMPI_FCA_VERSION >= 30
    /* Send comm_ini_data_size to all ranks in comm */
    rc = fca_module->previous_bcast(&spec.comm_init_data_size, 1, MPI_INT,
                                    0, comm, fca_module->previous_bcast_module);
    if (OMPI_SUCCESS != rc) {
        FCA_ERROR("Failed to broadcast comm init data size value from rank0: %d", rc);
        return rc;
    }

    if (0 != fca_module->rank &&
        NULL == (spec.comm_init_data = calloc(1, spec.comm_init_data_size))) {
        FCA_ERROR("Failed to allocate memory for comm init data.");
        return OMPI_ERROR;
    }

    /* Send comm_ini_data to all ranks in comm */
    rc = fca_module->previous_scatter(spec.comm_init_data, spec.comm_init_data_size, MPI_BYTE,
                                      spec.comm_init_data, spec.comm_init_data_size, MPI_BYTE,
                                      0, comm, fca_module->previous_scatter_module);
    if (OMPI_SUCCESS != rc) {
        FCA_ERROR("Failed to scatter comm_init sizes return value from rank0: %d", rc);
        return rc;
    }
#endif
    /* Release allocate rank_info on node managers */
    if (fca_module->local_proc_idx == 0) {
        fca_free_rank_info(my_info);
    }

    /* Pass fca_comm_desc to all ranks using MPI_Bcast */
    rc = fca_module->previous_bcast(&fca_module->fca_comm_desc,
                                    sizeof(fca_module->fca_comm_desc), MPI_BYTE, 0,
                                    comm, fca_module->previous_bcast_module);
    if (rc != OMPI_SUCCESS) {
        FCA_ERROR("Failed to broadcast comm_desc from rank0: %d", rc);
        return rc;
    }

    FCA_MODULE_VERBOSE(fca_module, 1, "Received FCA communicator spec, comm_id %d",
                       fca_module->fca_comm_desc.comm_id);

#if OMPI_FCA_VERSION >= 30
    /* allocate comm_init_spec */
    FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
                       fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx,
                       fca_module->num_local_procs);

    ret = mca_coll_fca_comm_init(mca_coll_fca_component.fca_context,
                                 fca_module->rank, comm_size,
                                 fca_module->local_proc_idx, fca_module->num_local_procs,
                                 &fca_module->fca_comm_desc, &fca_module->fca_comm,
                                 spec.comm_init_data);
    if (ret < 0) {
        FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
    }

    if (0 != fca_module->rank) {
        free(spec.comm_init_data);
    }
#endif
    return OMPI_SUCCESS;
}
Ejemplo n.º 11
0
static int __create_fca_comm(mca_coll_fca_module_t *fca_module)
{
    int rc, ret;
    int result = MPI_UNEQUAL;
    mca_coll_fca_c_cache_item_t *c_item = NULL, *c_item_new = NULL;
    mca_coll_fca_component_t *cm = &mca_coll_fca_component;
    int comm_size = ompi_comm_size(fca_module->comm);
    ompi_communicator_t *comm = fca_module->comm;
    opal_list_t *c_cache;
    struct timeval start, end, seq_start, seq_end, par_start, par_end;
    int act_deep;


    if(mca_coll_fca_component.fca_verbose == 10) {
        gettimeofday(&start, NULL);
    }

    if(mca_coll_fca_component.fca_enable_cache) {

        c_cache = &cm->c_cache;

        if(mca_coll_fca_component.fca_enable_hash){

            int  grank = ORTE_PROC_MY_NAME->vpid;
            int hash_index, part_of_hash_index;

            if(mca_coll_fca_component.fca_parallel_hash_calc == 1) {

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&par_start, NULL);
                }

                part_of_hash_index = modular_pow(cm->fca_primes[grank % cm->fca_number_of_primes], grank, cm->fca_hash_size);
                rc = comm->c_coll.coll_allreduce(&part_of_hash_index, &hash_index, 1, MPI_INT, MPI_SUM, comm, comm->c_coll.coll_allreduce_module);
                if (rc != OMPI_SUCCESS) {
                    FCA_ERROR("Failed to reduce hash_index: %d", rc);
                    return rc;
                }

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&par_end, NULL);
                    mca_coll_fca_component.fca_work_time_parallel =+
                        par_end.tv_sec - par_start.tv_sec + 1e-6 * (par_end.tv_usec - par_start.tv_usec);
                }
            }else{

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&seq_start, NULL);
                }

                hash_index = 0;
                int q_counter = 0;
                int q_comm_size = ompi_comm_size (comm);

                for(q_counter = 0; q_counter < q_comm_size; q_counter++)
                {
                    hash_index += modular_pow(cm->fca_primes[q_counter % cm->fca_number_of_primes], q_counter, cm->fca_hash_size);
                }

                if(mca_coll_fca_component.fca_verbose == 10){
                    gettimeofday(&seq_end, NULL);
                    mca_coll_fca_component.fca_work_time_sequency =+
                        seq_end.tv_sec - seq_start.tv_sec + 1e-6 * (seq_end.tv_usec - seq_start.tv_usec);
                }

            }

            if(cm->fca_hash[hash_index % cm->fca_hash_size] != NULL)
            {
                c_cache = cm->fca_hash[hash_index % cm->fca_hash_size];
                if(mca_coll_fca_component.fca_verbose == 10) {
                    gettimeofday(&end, NULL);
                    mca_coll_fca_component.fca_total_work_time =+
                        end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);
                    mca_coll_fca_component.fca_hash_hit += 1;
                }
            }else
            {
                if(mca_coll_fca_component.fca_verbose == 10) {
                    mca_coll_fca_component.fca_hash_miss += 1;
                }
                c_cache = OBJ_NEW(opal_list_t);
                cm->fca_hash[hash_index % cm->fca_hash_size] = c_cache;
            }

        }

        act_deep = 0;
        for( c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_first(c_cache);
                c_item != (mca_coll_fca_c_cache_item_t *)opal_list_get_end(c_cache);
                c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_next((opal_list_item_t *) c_item)){
            act_deep++;
            /* first check the size */
            if( c_item && (comm_size == c_item->size)) {
                /* then we have a potential cache hit */
                ompi_comm_compare(comm, c_item->comm, &result);
                if( MPI_CONGRUENT == result) {
                    /* cache hit! Return the context and be done with it */
                    /* first bump the score */
                    ret = fca_comm_get_caps(c_item->fca_comm_wrap->fca_comm, &fca_module->fca_comm_caps);
                    if (ret < 0) {
                        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
                        return OMPI_ERROR;
                    }
                    fca_module->fca_comm = c_item->fca_comm_wrap->fca_comm;

                    if(mca_coll_fca_component.fca_verbose == 10) {
                        gettimeofday(&end, NULL);

                        mca_coll_fca_component.fca_total_work_time =+
                            end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);

                        mca_coll_fca_component.fca_cache_hit += 1;

                        if(act_deep>mca_coll_fca_component.fca_max_deep_in_cache)
                            mca_coll_fca_component.fca_max_deep_in_cache = act_deep;
                    }
                    return OMPI_SUCCESS;
                }
            }
        }
    }
    rc = __fca_comm_new(fca_module);
    if (OMPI_SUCCESS != rc) {
        return rc;
    }
#if OMPI_FCA_VERSION < 30
    /* allocate comm_init_spec */
    FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
                       fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx,
                       fca_module->num_local_procs);

    ret = mca_coll_fca_comm_init(mca_coll_fca_component.fca_context,
                                 fca_module->rank, ompi_comm_size(fca_module->comm),
                                 fca_module->local_proc_idx, fca_module->num_local_procs,
                                 &fca_module->fca_comm_desc, &fca_module->fca_comm);
    if (ret < 0) {
        FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
     }
#endif
    /* get communicator capabilities */
    ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps);
    if (ret < 0) {
        FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
        return OMPI_ERROR;
    }

    /* by this point every rank in the communicator is set up */
    FCA_MODULE_VERBOSE(fca_module, 1, "Initialized FCA communicator, comm_id %d",
            fca_module->fca_comm_desc.comm_id);
    if(mca_coll_fca_component.fca_enable_cache) {

        c_item_new = OBJ_NEW(mca_coll_fca_c_cache_item_t);
        c_item_new->fca_comm_wrap = OBJ_NEW(mca_coll_fca_comm_wrap_t);

        OBJ_RETAIN(comm);

        c_item_new->size = comm_size;
        c_item_new->comm = comm;
        c_item_new->fca_comm_wrap->fca_comm = fca_module->fca_comm;
        c_item_new->fca_comm_wrap->rank = fca_module->rank;
        c_item_new->fca_comm_wrap->comm_id = fca_module->fca_comm_desc.comm_id;

        opal_list_append(c_cache,(opal_list_item_t *) c_item_new);
    }

    if(mca_coll_fca_component.fca_verbose == 10) {

        gettimeofday(&end, NULL);

        mca_coll_fca_component.fca_total_work_time =+
            end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);

        mca_coll_fca_component.fca_cache_miss += 1;
    }
    return OMPI_SUCCESS;
}
Ejemplo n.º 12
0
/*
 *  * Invoked when there's a new communicator that has been created.
 *   * Look at the communicator and decide which set of functions and
 *    * priority we want to return.
 *     */
mca_scoll_base_module_t *
mca_scoll_fca_comm_query(struct oshmem_group_t *comm, int *priority)
{
    mca_scoll_base_module_t *module;
    int size = comm->proc_count;
    int local_peers = 0;

    mca_scoll_fca_module_t *fca_module;

    *priority = 0;
    module = NULL;

    if (!mca_scoll_fca_component.fca_enable) {
        FCA_VERBOSE(20, "FCA is disable on user request => exiting");
        goto exit;
    }

    if (mca_memheap.memheap_component == NULL ) {
        FCA_VERBOSE(20, "No memheap => exiting");
        goto exit;
    }

    if (NULL == mca_scoll_fca_component.ret) {
        MCA_MEMHEAP_CALL(private_alloc(sizeof(int),(void **)&mca_scoll_fca_component.ret));
        MCA_MEMHEAP_CALL(private_alloc(oshmem_group_all->proc_count*sizeof(*mca_scoll_fca_component.rcounts), (void **)&mca_scoll_fca_component.rcounts ));
        MCA_MEMHEAP_CALL(private_alloc(/*info_size*/20,&mca_scoll_fca_component.my_info_exchangeable));
        MCA_MEMHEAP_CALL(private_alloc(sizeof(fca_comm_desc_t), &mca_scoll_fca_component.fca_comm_desc_exchangeable));
    }
    if (size < mca_scoll_fca_component.fca_np) {
        FCA_VERBOSE(20,
                    "size(%d) < fca_np(%d)",
                    size, mca_scoll_fca_component.fca_np);
        goto exit;
    }

    if (size < 2) {
        FCA_VERBOSE(20, "size(%d) < 2", size);
        goto exit;
    }

    if (!have_remote_peers(comm,
                           size,
                           &local_peers) /* || OMPI_COMM_IS_INTER(comm)*/) {
        FCA_VERBOSE(1,
                    "all peers in group are on the same node, fca disabled\n");
        goto exit;
    }

    fca_module = OBJ_NEW(mca_scoll_fca_module_t);
    if (!fca_module) {
        goto exit_fatal;
    }
    fca_module->super.scoll_module_enable = mca_scoll_fca_module_enable;
    fca_module->super.scoll_collect =
            mca_scoll_fca_component.fca_enable_allgather ?
                    mca_scoll_fca_collect : NULL;
    fca_module->super.scoll_reduce =
            mca_scoll_fca_component.fca_enable_allreduce ?
                    mca_scoll_fca_reduce : NULL;
    fca_module->super.scoll_barrier =
            mca_scoll_fca_component.fca_enable_barrier ? mca_scoll_fca_barrier :
                                                         NULL;
    fca_module->super.scoll_broadcast =
            mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast :
                                                       NULL;

    *priority = mca_scoll_fca_component.fca_priority;
    module = &fca_module->super;

    exit:
    FCA_VERBOSE(4,
                "Query FCA module for comm %p size %d rank %d local_peers=%d: priority=%d %s",
                (void *)comm, size, comm->my_pe, local_peers, *priority, module ? "enabled" : "disabled");
    return module;

    exit_fatal:
    /* it is possible that other pe(s) succesfully initialized fca.
     * So differnt frameworks will be used for collective ops
     */
    FCA_ERROR("FCA module query failed - aborting");
    oshmem_shmem_abort(-1);
    return NULL ;
}
Ejemplo n.º 13
0
static int _fca_comm_new(mca_scoll_fca_module_t *fca_module)
{
    struct oshmem_group_t *comm = fca_module->comm;
    fca_comm_new_spec_t spec;
    int info_size = 0, all_info_size = 0;
    void *all_info = NULL, *my_info = NULL;
    int *disps = NULL;
    int i;
    const int root_pe = oshmem_proc_pe(comm->proc_array[root_id]);
    const int my_id = oshmem_proc_group_find_id(comm, comm->my_pe);
    /* call fca_get_rank_info() on node managers only*/

    if (fca_module->local_proc_idx == 0) {
        my_info = fca_get_rank_info(mca_scoll_fca_component.fca_context,
                                    &info_size);
        if (!my_info) {
            FCA_ERROR("fca_get_rank_info returned NULL");
            return OSHMEM_ERROR;
        }

    } else {
        info_size = 0;
    }

    FCA_MODULE_VERBOSE(fca_module, 1, "Info size: %d", info_size);
    for (i = 0; i < comm->proc_count; i++) {
        mca_scoll_fca_component.rcounts[i] = -1;
    }
    _internal_barrier(fca_module);
    MCA_SPML_CALL(put((void *)&mca_scoll_fca_component.rcounts[my_id], (size_t)sizeof(info_size), (void *)&info_size, root_pe));

    if (root_pe == comm->my_pe) {
        int value = -1;
        for (i = 0; i < comm->proc_count; i++) {
            MCA_SPML_CALL(wait((void *)&mca_scoll_fca_component.rcounts[i], SHMEM_CMP_NE, &value, SHMEM_INT));
        }
    }

    /* Allocate buffer for gathering rank information on rank0 */
    if (root_pe == comm->my_pe) {
        all_info_size = 0;
        disps = calloc(comm->proc_count, sizeof *disps);
        for (i = 0; i < comm->proc_count; ++i) {
            disps[i] = all_info_size;
            all_info_size += mca_scoll_fca_component.rcounts[i];
        }
        all_info = NULL;
        FCA_MODULE_VERBOSE(fca_module,
                           1,
                           "Total rank_info size: %d",
                           all_info_size);
        all_info = malloc(all_info_size);
        memset(all_info, 0, all_info_size);
    }

    if (my_info) {
        memcpy(mca_scoll_fca_component.my_info_exchangeable,
               my_info,
               info_size);
    }
    _internal_barrier(fca_module);
    if (root_pe == comm->my_pe) {
        for (i = 0; i < comm->proc_count; i++) {
            if (mca_scoll_fca_component.rcounts[i] > 0) {
                MCA_SPML_CALL(get((void *)mca_scoll_fca_component.my_info_exchangeable, mca_scoll_fca_component.rcounts[i], (void*)(((char*)all_info)+disps[i]),comm->proc_array[i]->super.proc_name.vpid));
            }
        }
    }

    /* Rank0 calls fca_comm_new() and fills fca_comm_spec filed */
    if (root_pe == comm->my_pe) {
        spec.rank_info = all_info;
        spec.is_comm_world = comm == oshmem_group_all;
        spec.rank_count = 0;
        for (i = 0; i < comm->proc_count; ++i) {
            FCA_MODULE_VERBOSE(fca_module,
                               1,
                               "rcounts[%d]=%d disps[%d]=%d",
                               i, mca_scoll_fca_component.rcounts[i], i, disps[i]);
            if (mca_scoll_fca_component.rcounts[i] > 0)
                ++spec.rank_count;
        }

        FCA_MODULE_VERBOSE(fca_module,
                           1,
                           "starting fca_comm_new(), rank_count: %d",
                           spec.rank_count);

        *mca_scoll_fca_component.ret =
                fca_comm_new(mca_scoll_fca_component.fca_context,
                             &spec,
                             &fca_module->fca_comm_desc);

        free(disps);
        free(all_info);
    }

    _internal_barrier(fca_module);

    if (root_pe != comm->my_pe) {
        MCA_SPML_CALL(get((void *)mca_scoll_fca_component.ret,sizeof(int), (void *)mca_scoll_fca_component.ret, root_pe));
    }

    /* Examine comm_new return value */
    _internal_barrier(fca_module);
    if (*mca_scoll_fca_component.ret < 0) {
        FCA_ERROR("rank %i: COMM_NEW failed: %s",
                  fca_module->rank, fca_strerror(*mca_scoll_fca_component.ret));
        return OSHMEM_ERROR;
    }

    /* Release allocate rank_info on node managers */
    if (fca_module->local_proc_idx == 0) {
        fca_free_rank_info(my_info);
    }

    {
        if (root_pe == comm->my_pe) {
            memcpy(mca_scoll_fca_component.fca_comm_desc_exchangeable,
                   &fca_module->fca_comm_desc,
                   sizeof(fca_module->fca_comm_desc));
        }

        _internal_barrier(fca_module);
        if (root_pe != comm->my_pe) {
            MCA_SPML_CALL(get((void *)mca_scoll_fca_component.fca_comm_desc_exchangeable, sizeof(fca_module->fca_comm_desc), (void *)&fca_module->fca_comm_desc, root_pe));
        }

        _internal_barrier(fca_module);

    }
    FCA_MODULE_VERBOSE(fca_module,
                       1,
                       "Received FCA communicator spec, comm_id %d",
                       fca_module->fca_comm_desc.comm_id);
    return OSHMEM_SUCCESS;
}
Ejemplo n.º 14
0
int mca_coll_fca_allgatherv(void *sbuf, int scount,
                           struct ompi_datatype_t *sdtype,
                           void *rbuf, int *rcounts, int *disps,
                           struct ompi_datatype_t *rdtype,
                           struct ompi_communicator_t *comm,
                           mca_coll_base_module_t *module)
{
    mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
#if OMPI_FCA_ALLGATHER == 1
    MCA_COLL_FCA_DECLARE_CONVERTOR(sconv);
    MCA_COLL_FCA_DECLARE_CONVERTOR(rconv);
    fca_gatherv_spec_t spec;
    size_t rgap, rsize;
    int sum_rcounts;
    ptrdiff_t rdtype_extent;
    int comm_size;
    int relemsize;
    size_t displ;
    int i, ret;

    comm_size = ompi_comm_size(fca_module->comm);
    FCA_DT_EXTENT(rdtype, &rdtype_extent);

    /* Setup send buffer */
    spec.sendsize =
            __setup_gather_sendbuf(sbuf, (char *)rbuf + disps[fca_module->rank] * rdtype_extent,
                                   scount, sdtype, &sconv, &spec.sbuf);

    /* Allocate alternative recvsizes/displs on the stack, which will be in bytes */
    spec.recvsizes = alloca(sizeof *spec.recvsizes * comm_size);
    spec.displs = alloca(sizeof *spec.displs * comm_size);

    /* Calculate the size of receive buffer */
    sum_rcounts = 0;
    for (i = 0; i < comm_size; ++i) {
        sum_rcounts += rcounts[i];
    }

    /* convert MPI counts which depend on dtype) to FCA sizes (which are in bytes) */
    if (mca_coll_fca_array_size(rdtype, sum_rcounts, &rgap, &rsize) && rgap == 0) {
        spec.rbuf = rbuf;
        for (i = 0; i < comm_size; ++i) {
            spec.recvsizes[i] = rcounts[i] * rdtype_extent;
            spec.displs[i] = disps[i] * rdtype_extent;
        }
    } else {
        /*
         * Reorder and remove gaps in displs - we want to allocate as little memory
         * as possible, and we should unpack one-by-one anyway.
         */
        FCA_VERBOSE(5, "Reordering AllgatherV displacements");
        mca_coll_fca_convertor_create(&rconv, rdtype, sum_rcounts, rbuf,
                                      MCA_COLL_FCA_CONV_RECV, &spec.rbuf, &rsize);
        assert(rsize % sum_rcounts == 0);
        relemsize = rsize / sum_rcounts;

        displ = 0;
        for (i = 0; i < comm_size; ++i) {
            spec.recvsizes[i] = rcounts[i] * relemsize;
            spec.displs[i] = displ;
            displ += spec.recvsizes[i];
        }
        assert(displ == rsize);
    }

    /* Call FCA AllgatherV */
    FCA_VERBOSE(5,"Using FCA Allgatherv");
    ret = mca_coll_fca_component.fca_ops.do_allgatherv(fca_module->fca_comm, &spec);

    /* Destroy convertors if operation failed */
    if (ret < 0) {
        mca_coll_fca_convertor_destroy(&sconv);
        mca_coll_fca_convertor_destroy(&rconv);
        if (ret == -EUSEMPI) {
            goto orig_allgatherv;
        }
        FCA_ERROR("Allgatherv failed: %s", mca_coll_fca_component.fca_ops.strerror(ret));
        return OMPI_ERROR;
    }

    /* Unpack data and clean up convertor */
    mca_coll_fca_convertor_destroy(&sconv);
    if (mca_coll_fca_convertor_valid(&rconv)) {
        FCA_VERBOSE(5, "Unpacking AllgatherV receive buffer rdtype_extent=%ld",
                    rdtype_extent);
        for (i = 0; i < comm_size; ++i) {
            mca_coll_fca_convertor_set(&rconv, rdtype,
                                       (char*)rbuf + disps[i] * rdtype_extent,
                                       rcounts[i]);
            mca_coll_fca_convertor_process(&rconv, spec.displs[i]);
        }
        mca_coll_fca_convertor_destroy(&rconv);
    }
    return OMPI_SUCCESS;

orig_allgatherv:
#endif
    return fca_module->previous_allgatherv(sbuf, scount, sdtype, rbuf, rcounts,
                                           disps, rdtype, comm,
                                           fca_module->previous_allgatherv_module);
}
Ejemplo n.º 15
0
/*
 *  Allgather
 *
 *  Function:   - allgather
 *  Accepts:    - same as MPI_Allgather()
 *  Returns:    - MPI_SUCCESS or error code
 */
int mca_coll_fca_allgather(void *sbuf, int scount, struct ompi_datatype_t *sdtype,
                           void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
                           struct ompi_communicator_t *comm,
                           mca_coll_base_module_t *module)
{
    mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
#if OMPI_FCA_ALLGATHER == 1
    MCA_COLL_FCA_DECLARE_CONVERTOR(sconv);
    MCA_COLL_FCA_DECLARE_CONVERTOR(rconv);
    fca_gather_spec_t spec = {0,};
    size_t rgap, rsize;
    ptrdiff_t rdtype_extent;
    ssize_t total_rcount;
    int ret;

    FCA_DT_EXTENT(rdtype, &rdtype_extent);

    /* Setup send buffer */
    spec.size =
            __setup_gather_sendbuf(sbuf, (char *)rbuf + rcount * fca_module->rank * rdtype_extent,
                                   scount, sdtype, &sconv, &spec.sbuf);

    /* Setup recv buffer */
    total_rcount = ompi_comm_size(comm) * rcount;
    if (mca_coll_fca_array_size(rdtype, total_rcount, &rgap, &rsize) && rgap == 0) {
        spec.rbuf = rbuf;
    } else {
        mca_coll_fca_convertor_create(&rconv, rdtype, total_rcount, rbuf,
                                      MCA_COLL_FCA_CONV_RECV, &spec.rbuf, &rsize);
    }


    /* Call FCA Allgather */
    FCA_VERBOSE(5,"Using FCA Allgather size");
    ret = mca_coll_fca_component.fca_ops.do_allgather(fca_module->fca_comm, &spec);

    /* Destroy convertors if operation failed */
    if (ret < 0) {
        mca_coll_fca_convertor_destroy(&sconv);
        mca_coll_fca_convertor_destroy(&rconv);
        if (ret == -EUSEMPI) {
            goto orig_allgather;
        }
        FCA_ERROR("Allgather failed: %s", mca_coll_fca_component.fca_ops.strerror(ret));
        return OMPI_ERROR;
    }

    /* Unpack data and clean up convertor */
    mca_coll_fca_convertor_destroy(&sconv);
    if (mca_coll_fca_convertor_valid(&rconv)) {
        FCA_VERBOSE(5, "Unpacking Allgather receive buffer");
        mca_coll_fca_convertor_process(&rconv, 0);
        mca_coll_fca_convertor_destroy(&rconv);
    }
    return OMPI_SUCCESS;

orig_allgather:
#endif
    return fca_module->previous_allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype,
                                          comm, fca_module->previous_allgather_module);
}
Ejemplo n.º 16
0
/*
 *  Function:   - broadcast
 *  Accepts:    - same arguments as MPI_Bcast()
 *  Returns:    - MPI_SUCCESS or error code
 */
int mca_coll_fca_bcast(void *buff, int count, struct ompi_datatype_t *datatype,
                       int root, struct ompi_communicator_t *comm,
                       mca_coll_base_module_t *module)
{
    mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
    MCA_COLL_FCA_DECLARE_CONVERTOR(conv);
    fca_bcast_spec_t spec;
    size_t gap, size;
    int ret;

    FCA_VERBOSE(5, "[%d] Calling mca_coll_fca_bcast, root=%d, count=%d",
                ompi_comm_rank(comm), root, count);

    /* Setup exchange buffer */
    spec.root = root;
    if (mca_coll_fca_array_size(datatype, count, &gap, &size)) {
        spec.buf = buff + gap;
    } else {
        mca_coll_fca_convertor_create(&conv, datatype, count, buff,
                                      (root == fca_module->rank)
                                                    ? MCA_COLL_FCA_CONV_SEND
                                                    : MCA_COLL_FCA_CONV_RECV,
                                      &spec.buf, &size);
    }

    /* Check that operation size does not exceed limit */
    spec.size = size;
    if (spec.size > fca_module->fca_comm_caps.max_payload) {
         FCA_VERBOSE(5, "Unsupported bcast operation size %d, using fallback",
                     spec.size);
         if (spec.buf != buff) {
             mca_coll_fca_convertor_destroy(&conv);
         }
         goto orig_bcast;
    }

    /* Sender may pack data */
    if (spec.buf != buff && root == fca_module->rank) {
        mca_coll_fca_convertor_process(&conv, 0);
    }

    /* Call FCA Bcast */
    FCA_VERBOSE(5, "Using FCA Bcast");
    ret = mca_coll_fca_component.fca_ops.do_bcast(fca_module->fca_comm, &spec);

    /* Destroy convertor if operation failed */
    if (ret < 0) {
        mca_coll_fca_convertor_destroy(&conv);
        if (ret == -EUSEMPI) {
            goto orig_bcast;
        }
        FCA_ERROR("Bcast failed: %s", mca_coll_fca_component.fca_ops.strerror(ret));
        return OMPI_ERROR;
    }

    /* Unpack data and clean up convertor */
    if (mca_coll_fca_convertor_valid(&conv)) {
        if (root != fca_module->rank) {
            mca_coll_fca_convertor_process(&conv, 0);
        }
        mca_coll_fca_convertor_destroy(&conv);
    }
    return OMPI_SUCCESS;

orig_bcast:
    return fca_module->previous_bcast(buff, count, datatype, root, comm,
                                      fca_module->previous_bcast_module);
}