Пример #1
0
int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
                                        struct ompi_datatype_t *dtype,
                                        int root,
                                        struct ompi_communicator_t *comm,
                                        mca_coll_base_module_t *module,
                                        int algorithm, int faninout, int segsize)

{
    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
                 algorithm, faninout, segsize));

    switch (algorithm) {
    case (0):
        return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
    case (1):
        return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
    case (2):
        return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module, segsize, faninout );
    case (3):
        return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module, segsize );
    case (4):
        return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module, segsize );
    case (5):
        return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module, segsize );
    case (6):
        return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module, segsize );
    default:
        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
                     algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
    } /* switch */
    return (MPI_ERR_ARG);
}
Пример #2
0
int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
        struct ompi_datatype_t *dtype,
        int root,
        struct ompi_communicator_t *comm,
        mca_coll_base_module_t *module)
{
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d",
                 data->user_forced[BCAST].algorithm));

    switch (data->user_forced[BCAST].algorithm) {
    case (0):
        return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
    case (1):
        return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
    case (2):
        return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module,
                data->user_forced[BCAST].segsize,
                data->user_forced[BCAST].chain_fanout );
    case (3):
        return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
                data->user_forced[BCAST].segsize );
    case (4):
        return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
                data->user_forced[BCAST].segsize );
    case (5):
        return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module,
                data->user_forced[BCAST].segsize );
    case (6):
        return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module,
                data->user_forced[BCAST].segsize );
    default:
        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
                     data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
    } /* switch */
    return (MPI_ERR_ARG);
}
/*
 *	bcast_intra_dec 
 *
 *	Function:	- seletects broadcast algorithm to use
 *	Accepts:	- same arguments as MPI_Bcast()
 *	Returns:	- MPI_SUCCESS or error code (passed from the bcast implementation)
 */
int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
                                          struct ompi_datatype_t *datatype, int root,
                                          struct ompi_communicator_t *comm,
                                          mca_coll_base_module_t *module)
{
    /* Decision function based on MX results for 
       messages up to 36MB and communicator sizes up to 64 nodes */
    const size_t small_message_size = 2048;
    const size_t intermediate_message_size = 370728;
    const double a_p16  = 3.2118e-6; /* [1 / byte] */
    const double b_p16  = 8.7936;   
    const double a_p64  = 2.3679e-6; /* [1 / byte] */
    const double b_p64  = 1.1787;     
    const double a_p128 = 1.6134e-6; /* [1 / byte] */
    const double b_p128 = 2.1102;

    int communicator_size;
    int segsize = 0;
    size_t message_size, dsize;

    communicator_size = ompi_comm_size(comm);

    /* else we need data size for decision function */
    ompi_ddt_type_size(datatype, &dsize);
    message_size = dsize * (unsigned long)count;   /* needed for decision */

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_bcast_intra_dec_fixed"
                 " root %d rank %d com_size %d msg_length %lu",
                 root, ompi_comm_rank(comm), communicator_size, (unsigned long)message_size));

    /* Handle messages of small and intermediate size, and 
       single-element broadcasts */
    if ((message_size < small_message_size) || (count <= 1)) {
        /* Binomial without segmentation */
        segsize = 0;
        return  ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype, 
                                                      root, comm, module,
                                                      segsize);

    } else if (message_size < intermediate_message_size) {
        /* SplittedBinary with 1KB segments */
        segsize = 1024;
        return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, 
                                                         root, comm, module,
                                                         segsize);

    } 
    /* Handle large message sizes */
    else if (communicator_size < (a_p128 * message_size + b_p128)) {
        /* Pipeline with 128KB segments */
        segsize = 1024  << 7;
        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
                                                     root, comm, module,
                                                     segsize);

    } else if (communicator_size < 13) {
        /* Split Binary with 8KB segments */
        segsize = 1024 << 3;
        return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, 
                                                         root, comm, module,
                                                         segsize);
       
    } else if (communicator_size < (a_p64 * message_size + b_p64)) {
        /* Pipeline with 64KB segments */
        segsize = 1024 << 6;
        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
                                                     root, comm, module,
                                                     segsize);

    } else if (communicator_size < (a_p16 * message_size + b_p16)) {
        /* Pipeline with 16KB segments */
        segsize = 1024 << 4;
        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
                                                     root, comm, module,
                                                     segsize);

    }

    /* Pipeline with 8KB segments */
    segsize = 1024 << 3;
    return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
                                                 root, comm, module, 
                                                 segsize);
#if 0
    /* this is based on gige measurements */

    if (communicator_size  < 4) {
        return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
    }
    if (communicator_size == 4) {
        if (message_size < 524288) segsize = 0;
        else segsize = 16384;
        return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
    }
    if (communicator_size <= 8 && message_size < 4096) {
        return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
    }
    if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
        segsize = 16384;
        return  ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
    }
    if (message_size >= 524288) {
        segsize = 16384;
        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, module, segsize);
    }
    segsize = 0;
    /* once tested can swap this back in */
    /* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
    return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
#endif  /* 0 */
}