/*
 * Another recursive doubling type algorithm, but in this case
 * we go up the tree and back down the tree.  
 */
int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module)
{
    int rank, size, depth, err, jump, partner;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_barrier_intra_tree %d", 
                 rank));

    /* Find the nearest power of 2 of the communicator size. */
    depth = opal_next_poweroftwo_inclusive(size);

    for (jump=1; jump<depth; jump<<=1) {
        partner = rank ^ jump;
        if (!(partner & (jump-1)) && partner < size) {
            if (partner > rank) {
                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner, 
                                         MCA_COLL_BASE_TAG_BARRIER, comm,
                                         MPI_STATUS_IGNORE));
                if (MPI_SUCCESS != err)
                    return err;
            } else if (partner < rank) {
                err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, partner,
                                         MCA_COLL_BASE_TAG_BARRIER, 
                                         MCA_PML_BASE_SEND_STANDARD, comm));
                if (MPI_SUCCESS != err)
                    return err;
            }
        }
    }
    
    depth >>= 1;
    for (jump = depth; jump>0; jump>>=1) {
        partner = rank ^ jump;
        if (!(partner & (jump-1)) && partner < size) {
            if (partner > rank) {
                err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, partner,
                                         MCA_COLL_BASE_TAG_BARRIER,
                                         MCA_PML_BASE_SEND_STANDARD, comm));
                if (MPI_SUCCESS != err)
                    return err;
            } else if (partner < rank) {
                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner, 
                                         MCA_COLL_BASE_TAG_BARRIER, comm,
                                         MPI_STATUS_IGNORE));
                if (MPI_SUCCESS != err)
                    return err;
            }
        }
    }

    return MPI_SUCCESS;
}
Пример #2
0
/*
 *	reduce_scatter_intra_dec 
 *
 *	Function:	- seletects reduce_scatter algorithm to use
 *	Accepts:	- same arguments as MPI_Reduce_scatter()
 *	Returns:	- MPI_SUCCESS or error code (passed from 
 *                        the reduce scatter implementation)
 *      Note: If we detect zero valued counts in the rcounts array, we
 *      fall back to the nonoverlapping algorithm because the other
 *      algorithms do not currently handle it.
 */
int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
                                                    int *rcounts,
                                                    struct ompi_datatype_t *dtype,
                                                    struct ompi_op_t *op,
                                                    struct ompi_communicator_t *comm,
                                                    mca_coll_base_module_t *module)
{
    int comm_size, i, pow2;
    size_t total_message_size, dsize;
    const double a = 0.0012;
    const double b = 8.0;
    const size_t small_message_size = 12 * 1024;
    const size_t large_message_size = 256 * 1024;
    bool zerocounts = false;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed"));

    comm_size = ompi_comm_size(comm);
    /* We need data size for decision function */
    ompi_datatype_type_size(dtype, &dsize);
    total_message_size = 0;
    for (i = 0; i < comm_size; i++) { 
        total_message_size += rcounts[i];
        if (0 == rcounts[i]) {
            zerocounts = true;
        }
    }

    if( !ompi_op_is_commute(op) || (zerocounts)) {
        return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, 
                                                                    dtype, op, 
                                                                    comm, module); 
    }
   
    total_message_size *= dsize;

    /* compute the nearest power of 2 */
    pow2 = opal_next_poweroftwo_inclusive (comm_size);

    if ((total_message_size <= small_message_size) ||
        ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
        (comm_size >= a * total_message_size + b)) {
        return 
            ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
                                                                        dtype, op,
                                                                        comm, module);
    } 
    return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
                                                     dtype, op,
                                                     comm, module);
}
Пример #3
0
int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount, 
                                              struct ompi_datatype_t *sdtype,
                                              void* rbuf, int rcount, 
                                              struct ompi_datatype_t *rdtype, 
                                              struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
{
    int communicator_size, pow2_size;
    size_t dsize, total_dsize;

    communicator_size = ompi_comm_size(comm);

    /* Special case for 2 processes */
    if (communicator_size == 2) {
        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
                                                          rbuf, rcount, rdtype, 
                                                          comm, module);
    }

    /* Determine complete data size */
    ompi_datatype_type_size(sdtype, &dsize);
    total_dsize = dsize * scount * communicator_size;   
   
    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed"
                 " rank %d com_size %d msg_length %lu",
                 ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize));

    pow2_size = opal_next_poweroftwo_inclusive (communicator_size);

    /* Decision based on MX 2Gb results from Grig cluster at 
       The University of Tennesse, Knoxville 
       - if total message size is less than 50KB use either bruck or 
       recursive doubling for non-power of two and power of two nodes, 
       respectively.
       - else use ring and neighbor exchange algorithms for odd and even 
       number of nodes, respectively.
    */
    if (total_dsize < 50000) {
        if (pow2_size == communicator_size) {
            return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
                                                                     rbuf, rcount, rdtype,
                                                                     comm, module);
        } else {
            return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
                                                         rbuf, rcount, rdtype, 
                                                         comm, module);
        }
    } else {
        if (communicator_size % 2) {
            return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
                                                        rbuf, rcount, rdtype, 
                                                        comm, module);
        } else {
            return  ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype,
                                                                     rbuf, rcount, rdtype,
                                                                     comm, module);
        }
    }
   
#if defined(USE_MPICH2_DECISION)
    /* Decision as in MPICH-2 
       presented in Thakur et.al. "Optimization of Collective Communication 
       Operations in MPICH", International Journal of High Performance Computing 
       Applications, Vol. 19, No. 1, 49-66 (2005)
       - for power-of-two processes and small and medium size messages 
       (up to 512KB) use recursive doubling
       - for non-power-of-two processes and small messages (80KB) use bruck,
       - for everything else use ring.
    */
    if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
        return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
                                                                 rbuf, rcount, rdtype, 
                                                                 comm, module);
    } else if (total_dsize <= 81920) { 
        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
                                                     rbuf, rcount, rdtype,
                                                     comm, module);
    } 
    return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
                                                rbuf, rcount, rdtype,
                                                comm, module);
#endif  /* defined(USE_MPICH2_DECISION) */
}