int
ompi_osc_base_get_primitive_type_info(ompi_datatype_t *datatype,
                                      ompi_datatype_t **prim_datatype, 
                                      uint32_t *prim_count)
{
    ompi_datatype_t *primitive_datatype = NULL;
    size_t datatype_size, primitive_size, primitive_count;

    primitive_datatype = ompi_datatype_get_single_predefined_type_from_args(datatype);
    if( NULL == primitive_datatype ) {
        *prim_count = 0;
        return OMPI_SUCCESS;
    }
    ompi_datatype_type_size( datatype, &datatype_size );
    ompi_datatype_type_size( primitive_datatype, &primitive_size );
    primitive_count = datatype_size / primitive_size;
#if OPAL_ENABLE_DEBUG
    assert( 0 == (datatype_size % primitive_size) );
#endif  /* OPAL_ENABLE_DEBUG */

    /* We now have the count as a size_t, convert it to an uint32_t */
    *prim_datatype = primitive_datatype;
    *prim_count = (uint32_t)primitive_count;

    return OMPI_SUCCESS;
}
Beispiel #2
0
int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount,
                                    ompi_datatype_t* sdatatype,
                                    int dest, int stag,
                                    void* recvbuf, size_t rcount,
                                    ompi_datatype_t* rdatatype,
                                    int source, int rtag,
                                    struct ompi_communicator_t* comm,
                                    ompi_status_public_t* status )

{ /* post receive first, then send, then wait... should be fast (I hope) */
    int err, line = 0;
    size_t rtypesize, stypesize;
    ompi_request_t *req;
    ompi_status_public_t rstatus;

    /* post new irecv */
    ompi_datatype_type_size(rdatatype, &rtypesize);
    err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
                              comm, &req));
    if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }

    /* send data to children */
    ompi_datatype_type_size(sdatatype, &stypesize);
    err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag,
                             MCA_PML_BASE_SEND_STANDARD, comm));
    if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }

    err = ompi_request_wait( &req, &rstatus);
    if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }

    if (MPI_STATUS_IGNORE != status) {
        *status = rstatus;
    }

    return (MPI_SUCCESS);

 error_handler:
    /* Error discovered during the posting of the irecv or send,
     * and no status is available.
     */
    OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                  __FILE__, line, err));
    (void)line;  // silence compiler warning
    if (MPI_STATUS_IGNORE != status) {
        status->MPI_ERROR = err;
    }
    return (err);
}
int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
                                         int count, ompi_datatype_t* datatype,
                                         ompi_op_t* op, int root,
                                         ompi_communicator_t* comm, 
                                         mca_coll_base_module_t *module,
                                         uint32_t segsize, 
                                         int max_outstanding_reqs  )
{
    int segcount = count;
    size_t typelng;
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
                                           op, root, comm, module,
                                           data->cached_bintree, 
                                           segcount, max_outstanding_reqs );
}
Beispiel #4
0
int mca_coll_monitoring_ibcast(void *buff, int count,
                               struct ompi_datatype_t *datatype,
                               int root,
                               struct ompi_communicator_t *comm,
                               ompi_request_t ** request,
                               mca_coll_base_module_t *module)
{
    mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
    size_t type_size, data_size;
    const int comm_size = ompi_comm_size(comm);
    ompi_datatype_type_size(datatype, &type_size);
    data_size = count * type_size;
    if( root == ompi_comm_rank(comm) ) {
        int i, rank;
        mca_common_monitoring_coll_o2a(data_size * (comm_size - 1), monitoring_module->data);
        for( i = 0; i < comm_size; ++i ) {
            if( i == root ) continue; /* No self sending */
            /**
             * If this fails the destination is not part of my MPI_COM_WORLD
             * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank
             */
            if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm, &rank) ) {
                mca_common_monitoring_record_coll(rank, data_size);
            }
        }
    }
    return monitoring_module->real.coll_ibcast(buff, count, datatype, root, comm, request, monitoring_module->real.coll_ibcast_module);
}
Beispiel #5
0
int mca_coll_monitoring_ireduce(const void *sbuf, void *rbuf, int count,
                                struct ompi_datatype_t *dtype,
                                struct ompi_op_t *op,
                                int root,
                                struct ompi_communicator_t *comm,
                                ompi_request_t ** request,
                                mca_coll_base_module_t *module)
{
    mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
    if( root == ompi_comm_rank(comm) ) {
        int i, rank;
        size_t type_size, data_size;
        const int comm_size = ompi_comm_size(comm);
        ompi_datatype_type_size(dtype, &type_size);
        data_size = count * type_size;
        for( i = 0; i < comm_size; ++i ) {
            if( root == i ) continue; /* No communication for self */
            /**
             * If this fails the destination is not part of my MPI_COM_WORLD
             * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank
             */
            if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm->c_remote_group, &rank) ) {
                mca_common_monitoring_record_coll(rank, data_size);
            }
        }
        mca_common_monitoring_coll_a2o(data_size * (comm_size - 1), monitoring_module->data);
    }
    return monitoring_module->real.coll_ireduce(sbuf, rbuf, count, dtype, op, root, comm, request, monitoring_module->real.coll_ireduce_module);
}
Beispiel #6
0
int ompi_coll_base_reduce_intra_binomial( const void *sendbuf, void *recvbuf,
                                           int count, ompi_datatype_t* datatype,
                                           ompi_op_t* op, int root,
                                           ompi_communicator_t* comm,
                                           mca_coll_base_module_t *module,
                                           uint32_t segsize,
                                           int max_outstanding_reqs  )
{
    int segcount = count;
    size_t typelng;
    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
    mca_coll_base_comm_t *data = base_module->base_data;

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binomial rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
                                           data->cached_in_order_bmtree,
                                           segcount, max_outstanding_reqs );
}
int mca_coll_monitoring_ialltoallv(const void *sbuf, const int *scounts,
                                   const int *sdisps,
                                   struct ompi_datatype_t *sdtype,
                                   void *rbuf, const int *rcounts,
                                   const int *rdisps,
                                   struct ompi_datatype_t *rdtype,
                                   struct ompi_communicator_t *comm,
                                   ompi_request_t ** request,
                                   mca_coll_base_module_t *module)
{
    mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
    size_t type_size, data_size, data_size_aggreg = 0;
    const int comm_size = ompi_comm_size(comm);
    const int my_rank = ompi_comm_rank(comm);
    int i, rank;
    ompi_datatype_type_size(sdtype, &type_size);
    for( i = 0; i < comm_size; ++i ) {
        if( my_rank == i ) continue; /* No communication for self */
        data_size = scounts[i] * type_size;
        /**
         * If this fails the destination is not part of my MPI_COM_WORLD
         * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank
         */
        if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(i, comm->c_remote_group, &rank) ) {
            mca_common_monitoring_record_coll(rank, data_size);
            data_size_aggreg += data_size;
        }
    }
    mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data);
    return monitoring_module->real.coll_ialltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, comm, request, monitoring_module->real.coll_ialltoallv_module);
}
int
ompi_coll_tuned_bcast_intra_binomial( void* buffer,
                                      int count,
                                      struct ompi_datatype_t* datatype,
                                      int root,
                                      struct ompi_communicator_t* comm,
                                      mca_coll_base_module_t *module,
                                      uint32_t segsize )
{
    int segcount = count;
    size_t typelng;
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    COLL_TUNED_UPDATE_BMTREE( comm, tuned_module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
            segcount, data->cached_bmtree );
}
Beispiel #9
0
int
ompi_osc_portals4_put(void *origin_addr,
                      int origin_count,
                      struct ompi_datatype_t *origin_dt,
                      int target,
                      OPAL_PTRDIFF_TYPE target_disp,
                      int target_count,
                      struct ompi_datatype_t *target_dt,
                      struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Put: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
        length *= origin_count;
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
        ret = PtlPut(md_h,
                     (ptl_size_t) ((char*) origin_addr - (char*) md_base),
                     length,
                     PTL_ACK_REQ,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL,
                     0);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Beispiel #10
0
int ompi_osc_ucx_get(void *origin_addr, int origin_count,
                     struct ompi_datatype_t *origin_dt,
                     int target, ptrdiff_t target_disp, int target_count,
                     struct ompi_datatype_t *target_dt, struct ompi_win_t *win) {
    ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module;
    ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target);
    uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target);
    ucp_rkey_h rkey;
    ptrdiff_t origin_lb, origin_extent, target_lb, target_extent;
    bool is_origin_contig = false, is_target_contig = false;
    ucs_status_t status;
    int ret = OMPI_SUCCESS;

    ret = check_sync_state(module, target, false);
    if (ret != OMPI_SUCCESS) {
        return ret;
    }

    if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) {
        status = get_dynamic_win_info(remote_addr, module, ep, target);
        if (status != UCS_OK) {
            return OMPI_ERROR;
        }
    }

    rkey = (module->win_info_array[target]).rkey;

    ompi_datatype_get_true_extent(origin_dt, &origin_lb, &origin_extent);
    ompi_datatype_get_true_extent(target_dt, &target_lb, &target_extent);

    is_origin_contig = ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count);
    is_target_contig = ompi_datatype_is_contiguous_memory_layout(target_dt, target_count);

    if (is_origin_contig && is_target_contig) {
        /* fast path */
        size_t origin_len;

        ompi_datatype_type_size(origin_dt, &origin_len);
        origin_len *= origin_count;

        status = ucp_get_nbi(ep, (void *)((intptr_t)origin_addr + origin_lb), origin_len,
                             remote_addr + target_lb, rkey);
        if (status != UCS_OK && status != UCS_INPROGRESS) {
            opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                                "%s:%d: ucp_get_nbi failed: %d\n",
                                __FILE__, __LINE__, status);
            return OMPI_ERROR;
        }

        return incr_and_check_ops_num(module, target, ep);
    } else {
        return ddt_put_get(module, origin_addr, origin_count, origin_dt, is_origin_contig,
                           origin_lb, target, ep, remote_addr, rkey, target_count, target_dt,
                           is_target_contig, target_lb, true);
    }
}
Beispiel #11
0
void shmem_integer_get_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *len, MPI_Fint *pe)
{
    size_t integer_type_size = 0;
    ompi_datatype_type_size(&ompi_mpi_integer.dt, &integer_type_size);

    MCA_SPML_CALL(get(oshmem_ctx_default, FPTR_2_VOID_PTR(source),
        OMPI_FINT_2_INT(*len) * integer_type_size,
        FPTR_2_VOID_PTR(target),
        OMPI_FINT_2_INT(*pe)));
}
void shmem_character_get_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *len, MPI_Fint *pe)
{
    size_t character_type_size = 0;
    ompi_datatype_type_size(&ompi_mpi_character.dt, &character_type_size);

    MCA_SPML_CALL(get(FPTR_2_VOID_PTR(source), 
        OMPI_FINT_2_INT(*len) * character_type_size, 
        FPTR_2_VOID_PTR(target), 
        OMPI_FINT_2_INT(*pe)));
}
Beispiel #13
0
void shmem_logical_put_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe)
{
    size_t logical_type_size = 0;
    ompi_datatype_type_size(&ompi_mpi_logical.dt, &logical_type_size);

    MCA_SPML_CALL(put(oshmem_ctx_default, FPTR_2_VOID_PTR(target),
        OMPI_FINT_2_INT(*length) * logical_type_size,
        FPTR_2_VOID_PTR(source),
        OMPI_FINT_2_INT(*pe)));
}
void shmem_complex_put_nbi_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe)
{
    size_t complex_type_size = 0;
    ompi_datatype_type_size(&ompi_mpi_cplex.dt, &complex_type_size);

    MCA_SPML_CALL(put_nb(FPTR_2_VOID_PTR(target),
        OMPI_FINT_2_INT(*length) * complex_type_size,
        FPTR_2_VOID_PTR(source),
        OMPI_FINT_2_INT(*pe), NULL));
}
void shmem_double_put_nbi_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe)
{
    size_t double_precision_type_size = 0;
    ompi_datatype_type_size(&ompi_mpi_dblprec.dt, &double_precision_type_size);

    MCA_SPML_CALL(put_nb(FPTR_2_VOID_PTR(target),
        OMPI_FINT_2_INT(*length) * double_precision_type_size,
        FPTR_2_VOID_PTR(source),
        OMPI_FINT_2_INT(*pe), NULL));
}
int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount,
                                                 struct ompi_datatype_t *sdtype,
                                                 void* rbuf, const int *rcounts,
                                                 const int *rdispls,
                                                 struct ompi_datatype_t *rdtype,
                                                 struct ompi_communicator_t *comm,
                                                 mca_coll_base_module_t *module)
{
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;

    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_allgatherv_intra_dec_dynamic"));

    if (tuned_module->com_rules[ALLGATHERV]) {
        /* We have file based rules:
           - calculate message size and other necessary information */
        int comsize, i;
        int alg, faninout, segsize, ignoreme;
        size_t dsize, total_size;

        comsize = ompi_comm_size(comm);
        ompi_datatype_type_size (sdtype, &dsize);
        total_size = 0;
        for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }

        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV],
                                                        total_size, &faninout, &segsize, &ignoreme);
        if (alg) {
            /* we have found a valid choice from the file based rules for
               this message size */
            return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype,
                                                             rbuf, rcounts,
                                                             rdispls, rdtype,
                                                             comm, module,
                                                             alg, faninout, segsize);
        }
    }

    /* We do not have file based rules */
    if (tuned_module->user_forced[ALLGATHERV].algorithm) {
        /* User-forced algorithm */
        return ompi_coll_tuned_allgatherv_intra_do_this(sbuf, scount, sdtype,
                                                        rbuf, rcounts, rdispls, rdtype,
                                                        comm, module,
                                                        tuned_module->user_forced[ALLGATHERV].algorithm,
                                                        tuned_module->user_forced[ALLGATHERV].tree_fanout,
                                                        tuned_module->user_forced[ALLGATHERV].segsize);
    }

    /* Use default decision */
    return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype,
                                                       rbuf, rcounts,
                                                       rdispls, rdtype,
                                                       comm, module);
}
Beispiel #17
0
/* try to get a small message out on to the wire quickly */
static inline int mca_pml_ob1_send_inline (const void *buf, size_t count,
                                           ompi_datatype_t * datatype,
                                           int dst, int tag, int16_t seqn,
                                           ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint,
                                           ompi_communicator_t * comm)
{
    mca_pml_ob1_match_hdr_t match;
    mca_bml_base_btl_t *bml_btl;
    opal_convertor_t convertor;
    size_t size;
    int rc;

    bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
    if( NULL == bml_btl->btl->btl_sendi)
        return OMPI_ERR_NOT_AVAILABLE;

    ompi_datatype_type_size (datatype, &size);
    if ((size * count) > 256) {  /* some random number */
        return OMPI_ERR_NOT_AVAILABLE;
    }

    if (count > 0) {
        /* initialize just enough of the convertor to avoid a SEGV in opal_convertor_cleanup */
        OBJ_CONSTRUCT(&convertor, opal_convertor_t);

        /* We will create a convertor specialized for the        */
        /* remote architecture and prepared with the datatype.   */
        opal_convertor_copy_and_prepare_for_send (dst_proc->super.proc_convertor,
                                                  (const struct opal_datatype_t *) datatype,
                                                  count, buf, 0, &convertor);
        opal_convertor_get_packed_size (&convertor, &size);
    } else {
        size = 0;
    }

    mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
                                   comm->c_contextid, comm->c_my_rank,
                                   tag, seqn);

    ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc);

    /* try to send immediately */
    rc = mca_bml_base_sendi (bml_btl, &convertor, &match, OMPI_PML_OB1_MATCH_HDR_LEN,
                             size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP,
                             MCA_PML_OB1_HDR_TYPE_MATCH, NULL);
    if (count > 0) {
        opal_convertor_cleanup (&convertor);
    }

    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
	return rc;
    }

    return (int) size;
}
int mca_coll_monitoring_ineighbor_alltoall(const void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void *rbuf, int rcount,
                                           struct ompi_datatype_t *rdtype,
                                           struct ompi_communicator_t *comm,
                                           ompi_request_t ** request,
                                           mca_coll_base_module_t *module)
{
    mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
    size_t type_size, data_size, data_size_aggreg = 0;
    const mca_topo_base_comm_cart_t *cart = comm->c_topo->mtc.cart;
    int dim, srank, drank, world_rank;

    ompi_datatype_type_size(sdtype, &type_size);
    data_size = scount * type_size;

    for( dim = 0; dim < cart->ndims; ++dim ) {
        srank = MPI_PROC_NULL, drank = MPI_PROC_NULL;

        if (cart->dims[dim] > 1) {
            mca_topo_base_cart_shift (comm, dim, 1, &srank, &drank);
        } else if (1 == cart->dims[dim] && cart->periods[dim]) {
            /* Don't record exchanges with self */
            continue;
        }

        if (MPI_PROC_NULL != srank) {
            /**
             * If this fails the destination is not part of my MPI_COM_WORLD
             * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank
             */
            if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(srank, comm, &world_rank) ) {
                mca_common_monitoring_record_coll(world_rank, data_size);
                data_size_aggreg += data_size;
            }
        }

        if (MPI_PROC_NULL != drank) {
            /**
             * If this fails the destination is not part of my MPI_COM_WORLD
             * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank
             */
            if( OPAL_SUCCESS == mca_common_monitoring_get_world_rank(drank, comm, &world_rank) ) {
                mca_common_monitoring_record_coll(world_rank, data_size);
                data_size_aggreg += data_size;
            }
        }
    }

    mca_common_monitoring_coll_a2a(data_size_aggreg, monitoring_module->data);

    return monitoring_module->real.coll_ineighbor_alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, request, monitoring_module->real.coll_ineighbor_alltoall_module);
}
/*
 *	reduce_scatter_intra_dec 
 *
 *	Function:	- seletects reduce_scatter algorithm to use
 *	Accepts:	- same arguments as MPI_Reduce_scatter()
 *	Returns:	- MPI_SUCCESS or error code (passed from 
 *                        the reduce scatter implementation)
 *      Note: If we detect zero valued counts in the rcounts array, we
 *      fall back to the nonoverlapping algorithm because the other
 *      algorithms do not currently handle it.
 */
int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
                                                    int *rcounts,
                                                    struct ompi_datatype_t *dtype,
                                                    struct ompi_op_t *op,
                                                    struct ompi_communicator_t *comm,
                                                    mca_coll_base_module_t *module)
{
    int comm_size, i, pow2;
    size_t total_message_size, dsize;
    const double a = 0.0012;
    const double b = 8.0;
    const size_t small_message_size = 12 * 1024;
    const size_t large_message_size = 256 * 1024;
    bool zerocounts = false;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed"));

    comm_size = ompi_comm_size(comm);
    /* We need data size for decision function */
    ompi_datatype_type_size(dtype, &dsize);
    total_message_size = 0;
    for (i = 0; i < comm_size; i++) { 
        total_message_size += rcounts[i];
        if (0 == rcounts[i]) {
            zerocounts = true;
        }
    }

    if( !ompi_op_is_commute(op) || (zerocounts)) {
        return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, 
                                                                    dtype, op, 
                                                                    comm, module); 
    }
   
    total_message_size *= dsize;

    /* compute the nearest power of 2 */
    pow2 = opal_next_poweroftwo_inclusive (comm_size);

    if ((total_message_size <= small_message_size) ||
        ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
        (comm_size >= a * total_message_size + b)) {
        return 
            ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
                                                                        dtype, op,
                                                                        comm, module);
    } 
    return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
                                                     dtype, op,
                                                     comm, module);
}
int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, 
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int *rcounts, 
                                               int *rdispls,
                                               struct ompi_datatype_t *rdtype, 
                                               struct ompi_communicator_t *comm,
                                               mca_coll_base_module_t *module)
{
    int i;
    int communicator_size;
    size_t dsize, total_dsize;
    
    communicator_size = ompi_comm_size(comm);
    
    /* Special case for 2 processes */
    if (communicator_size == 2) {
        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
                                                           rbuf, rcounts, rdispls, rdtype, 
                                                           comm, module);
    }
    
    /* Determine complete data size */
    ompi_datatype_type_size(sdtype, &dsize);
    total_dsize = 0;
    for (i = 0; i < communicator_size; i++) {
        total_dsize += dsize * rcounts[i];
    }
    
    OPAL_OUTPUT((ompi_coll_tuned_stream, 
                 "ompi_coll_tuned_allgatherv_intra_dec_fixed"
                 " rank %d com_size %d msg_length %lu",
                 ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));
    
    /* Decision based on allgather decision.   */
    if (total_dsize < 50000) {
        return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
                                                      rbuf, rcounts, rdispls, rdtype, 
                                                      comm, module);
    } else {
        if (communicator_size % 2) {
            return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
                                                         rbuf, rcounts, rdispls, rdtype, 
                                                         comm, module);
        } else {
            return  ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
                                                                      rbuf, rcounts, rdispls, rdtype, 
                                                                      comm, module);
        }
    }
}
int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount, 
                                                struct ompi_datatype_t *sdtype,
                                                void* rbuf, int rcount, 
                                                struct ompi_datatype_t *rdtype, 
                                                struct ompi_communicator_t *comm,
                                                mca_coll_base_module_t *module)
{
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, 
                 "ompi_coll_tuned_allgather_intra_dec_dynamic"));
   
    if (data->com_rules[ALLGATHER]) {
        /* We have file based rules:
           - calculate message size and other necessary information */
        int comsize;
        int alg, faninout, segsize, ignoreme;
        size_t dsize;
      
        ompi_datatype_type_size (sdtype, &dsize);
        comsize = ompi_comm_size(comm);
        dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
      
        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], 
                                                        dsize, &faninout, &segsize, &ignoreme);
        if (alg) { 
            /* we have found a valid choice from the file based rules for 
               this message size */
            return ompi_coll_tuned_allgather_intra_do_this (sbuf, scount, sdtype,
                                                            rbuf, rcount, rdtype,
                                                            comm, module,
                                                            alg, faninout, segsize);
        }
    } 

    /* We do not have file based rules */
    if (data->user_forced[ALLGATHER].algorithm) {
        /* User-forced algorithm */
        return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype, 
                                                          rbuf, rcount, rdtype, 
                                                          comm, module);
    }

    /* Use default decision */
    return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, 
                                                      rbuf, rcount, rdtype, 
                                                      comm, module);
}
int ompi_coll_tuned_gather_intra_dec_dynamic(const void *sbuf, int scount,
                                             struct ompi_datatype_t *sdtype,
                                             void* rbuf, int rcount,
                                             struct ompi_datatype_t *rdtype,
                                             int root,
                                             struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
{
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;

    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_gather_intra_dec_dynamic"));

    /**
     * check to see if we have some filebased rules.
     */
    if (tuned_module->com_rules[GATHER]) {
        int comsize, alg, faninout, segsize, max_requests;
        size_t dsize;

        comsize = ompi_comm_size(comm);
        ompi_datatype_type_size (sdtype, &dsize);
        dsize *= comsize;

        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[GATHER],
                                                        dsize, &faninout, &segsize, &max_requests);

        if (alg) {
            /* we have found a valid choice from the file based rules for this message size */
            return ompi_coll_tuned_gather_intra_do_this (sbuf, scount, sdtype,
                                                         rbuf, rcount, rdtype,
                                                         root, comm, module,
                                                         alg, faninout, segsize);
        } /* found a method */
    } /*end if any com rules to check */

    if (tuned_module->user_forced[GATHER].algorithm) {
        return ompi_coll_tuned_gather_intra_do_this(sbuf, scount, sdtype,
                                                    rbuf, rcount, rdtype,
                                                    root, comm, module,
                                                    tuned_module->user_forced[GATHER].algorithm,
                                                    tuned_module->user_forced[GATHER].tree_fanout,
                                                    tuned_module->user_forced[GATHER].segsize);
    }

    return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype,
                                                   rbuf, rcount, rdtype,
                                                   root, comm, module);
}
Beispiel #23
0
MPI_Fint shmem_swap_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T value, MPI_Fint *pe)
{
    size_t integer_type_size = 0;
    MPI_Fint out_value = 0;
    ompi_datatype_type_size(&ompi_mpi_integer.dt, &integer_type_size);

    MCA_ATOMIC_CALL(cswap(FPTR_2_VOID_PTR(target),
        (void *)&out_value,
        NULL,
        FPTR_2_VOID_PTR(value),
        integer_type_size,
        OMPI_FINT_2_INT(*pe)));

    return out_value;
}
int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, 
                                            struct ompi_datatype_t *sdtype,
                                            void* rbuf, int rcount, 
                                            struct ompi_datatype_t *rdtype, 
                                            int root, struct ompi_communicator_t *comm,
                                            mca_coll_base_module_t *module)
{
    const size_t small_block_size = 300;
    const int small_comm_size = 10;
    int communicator_size, rank;
    size_t dsize, block_size;

    OPAL_OUTPUT((ompi_coll_tuned_stream, 
                 "ompi_coll_tuned_scatter_intra_dec_fixed"));

    communicator_size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);
    /* Determine block size */
    if (root == rank) {
        ompi_datatype_type_size(sdtype, &dsize);
        block_size = dsize * scount;
    } else {
        ompi_datatype_type_size(rdtype, &dsize);
        block_size = dsize * rcount;
    } 

    if ((communicator_size > small_comm_size) &&
        (block_size < small_block_size)) {
        return ompi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
                                                       root, comm, module);
    }
    return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, 
                                                       rbuf, rcount, rdtype, 
                                                       root, comm, module);
}
Beispiel #25
0
int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_addr,
                                  void *result_addr, struct ompi_datatype_t *dt,
                                  int target, ptrdiff_t target_disp,
                                  struct ompi_win_t *win) {
    ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module;
    ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target);
    uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target);
    ucp_rkey_h rkey;
    size_t dt_bytes;
    ompi_osc_ucx_internal_request_t *req = NULL;
    int ret = OMPI_SUCCESS;
    ucs_status_t status;

    ret = check_sync_state(module, target, false);
    if (ret != OMPI_SUCCESS) {
        return ret;
    }

    ret = start_atomicity(module, ep, target);
    if (ret != OMPI_SUCCESS) {
        return ret;
    }

    if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) {
        status = get_dynamic_win_info(remote_addr, module, ep, target);
        if (status != UCS_OK) {
            return OMPI_ERROR;
        }
    }

    rkey = (module->win_info_array[target]).rkey;

    ompi_datatype_type_size(dt, &dt_bytes);
    memcpy(result_addr, origin_addr, dt_bytes);
    req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_CSWAP, *(uint64_t *)compare_addr,
                              result_addr, dt_bytes, remote_addr, rkey, req_completion);
    if (UCS_PTR_IS_PTR(req)) {
        ucp_request_release(req);
    }

    ret = incr_and_check_ops_num(module, target, ep);
    if (ret != OMPI_SUCCESS) {
        return ret;
    }

    return end_atomicity(module, ep, target);
}
int ompi_coll_tuned_alltoall_intra_dec_dynamic(const void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int rcount,
                                               struct ompi_datatype_t *rdtype,
                                               struct ompi_communicator_t *comm,
                                               mca_coll_base_module_t *module)
{
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
    if (tuned_module->com_rules[ALLTOALL]) {
        /* we do, so calc the message size or what ever we need and use this for the evaluation */
        int comsize;
        int alg, faninout, segsize, max_requests;
        size_t dsize;

        ompi_datatype_type_size (sdtype, &dsize);
        comsize = ompi_comm_size(comm);
        dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;

        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLTOALL],
                                                        dsize, &faninout, &segsize, &max_requests);

        if (alg) {
            /* we have found a valid choice from the file based rules for this message size */
            return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype,
                                                           rbuf, rcount, rdtype,
                                                           comm, module,
                                                           alg, faninout, segsize, max_requests);
        } /* found a method */
    } /*end if any com rules to check */

    if (tuned_module->user_forced[ALLTOALL].algorithm) {
        return ompi_coll_tuned_alltoall_intra_do_this(sbuf, scount, sdtype,
                                                      rbuf, rcount, rdtype,
                                                      comm, module,
                                                      tuned_module->user_forced[ALLTOALL].algorithm,
                                                      tuned_module->user_forced[ALLTOALL].tree_fanout,
                                                      tuned_module->user_forced[ALLTOALL].segsize,
                                                      tuned_module->user_forced[ALLTOALL].max_requests);
    }
    return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype,
                                                     rbuf, rcount, rdtype,
                                                     comm, module);
}
Beispiel #27
0
static int
create_segments( ompi_datatype_t* datatype, int count,
                 size_t segment_length,
                 ddt_segment_t** segments, int* seg_count )
{
    size_t data_size, total_length, position;
    opal_convertor_t* convertor;
    int i;
    ddt_segment_t* segment;

    ompi_datatype_type_size( datatype, &data_size );
    data_size *= count;
    *seg_count = data_size / segment_length;
    if( ((*seg_count) * segment_length) != data_size )
        *seg_count += 1;
 allocate_segments:
    *segments = (ddt_segment_t*)malloc( (*seg_count) * sizeof(ddt_segment_t) );

    convertor = opal_convertor_create( opal_local_arch, 0 );
    opal_convertor_prepare_for_send( convertor, &(datatype->super), count, NULL );

    position = 0;
    total_length = 0;
    for( i = 0; i < (*seg_count); i++ ) {
        segment = &((*segments)[i]);
        segment->buffer = malloc(segment_length);
        segment->position = position;

        /* Find the end of the segment */
        position += segment_length;
        opal_convertor_set_position( convertor, &position );
        segment->size = position - segment->position;
        total_length += segment->size;
    }
    OBJ_RELEASE(convertor);
    if( total_length != data_size ) {
        for( i = 0; i < (*seg_count); i++ ) {
            segment = &((*segments)[i]);
            free(segment->buffer);
        }
        free( *segments );
        (*seg_count) += 1;
        goto allocate_segments;
    }
    return 0;
}
/*
 *    reduce_scatter_intra_dec 
 *
 *    Function:   - seletects reduce_scatter algorithm to use
 *    Accepts:    - same arguments as MPI_Reduce_scatter()
 *    Returns:    - MPI_SUCCESS or error code (passed from
 *                  the reduce_scatter implementation)
 *                                        
 */
int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, 
                                                     int *rcounts,
                                                     struct ompi_datatype_t *dtype,
                                                     struct ompi_op_t *op,
                                                     struct ompi_communicator_t *comm,
                                                     mca_coll_base_module_t *module)
{
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
    if (data->com_rules[REDUCESCATTER]) {
        /* we do, so calc the message size or what ever we need and use 
           this for the evaluation */
        int alg, faninout, segsize, ignoreme, i, count, size;
        size_t dsize;
        size = ompi_comm_size(comm);
        for (i = 0, count = 0; i < size; i++) { count += rcounts[i];}
        ompi_datatype_type_size (dtype, &dsize);
        dsize *= count;

        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCESCATTER], 
                                                        dsize, &faninout, 
                                                        &segsize, &ignoreme);
        if (alg) { 
            /* we have found a valid choice from the file based rules for this message size */
            return  ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts,
                                                                  dtype, op,
                                                                  comm, module,
                                                                  alg, faninout, 
                                                                  segsize);
        } /* found a method */
    } /*end if any com rules to check */
    
    if (data->user_forced[REDUCESCATTER].algorithm) {
        return ompi_coll_tuned_reduce_scatter_intra_do_forced (sbuf, rbuf, rcounts, 
                                                               dtype, op,
                                                               comm, module);
    }
    return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts,
                                                           dtype, op,
                                                           comm, module);
}
Beispiel #29
0
void shmem_integer_iget_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *tst, MPI_Fint *sst, MPI_Fint *len, MPI_Fint *pe)
{
    int i;
    int length = OMPI_FINT_2_INT(*len);
    int tst_c = OMPI_FINT_2_INT(*tst);
    int sst_c = OMPI_FINT_2_INT(*sst);

    size_t integer_type_size = 0;
    ompi_datatype_type_size(&ompi_mpi_integer.dt, &integer_type_size);

    for (i=0; i<length; i++)
    {  
        MCA_SPML_CALL(get((uint8_t *)FPTR_2_VOID_PTR(source) + i * sst_c * integer_type_size, 
            integer_type_size, 
            (uint8_t *)FPTR_2_VOID_PTR(target) + i * tst_c * integer_type_size, 
            OMPI_FINT_2_INT(*pe)));
    }
}
/*
 *  allreduce_intra
 *
 *  Function:   - allreduce using other MPI collectives
 *  Accepts:    - same as MPI_Allreduce()
 *  Returns:    - MPI_SUCCESS or error code
 */
int
ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
                                           struct ompi_datatype_t *dtype,
                                           struct ompi_op_t *op,
                                           struct ompi_communicator_t *comm,
                                           mca_coll_base_module_t *module)
{
    size_t dsize, block_dsize;
    int comm_size = ompi_comm_size(comm);
    const size_t intermediate_message = 10000;
    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed"));

    /**
     * Decision function based on MX results from the Grig cluster at UTK.
     * 
     * Currently, linear, recursive doubling, and nonoverlapping algorithms 
     * can handle both commutative and non-commutative operations.
     * Ring algorithm does not support non-commutative operations.
     */
    ompi_datatype_type_size(dtype, &dsize);
    block_dsize = dsize * count;

    if (block_dsize < intermediate_message) {
        return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, 
                                                                   count, dtype,
                                                                   op, comm, module));
    } 

    if( ompi_op_is_commute(op) && (count > comm_size) ) {
        const size_t segment_size = 1 << 20; /* 1 MB */
        if ((comm_size * segment_size >= block_dsize)) {
            return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, 
                                                          op, comm, module));
        } else {
            return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, 
                                                                    count, dtype, 
                                                                    op, comm, module,
                                                                    segment_size));
        }
    }

    return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, 
                                                            dtype, op, comm, module));
}