int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, int algorithm, int faninout, int segsize) { OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d", algorithm, faninout, segsize)); switch (algorithm) { case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm, segsize, faninout); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm, segsize); case (4): return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype, op, root, comm, segsize); case (5): return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype, op, root, comm, segsize); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); return (MPI_ERR_ARG); } /* switch */ }
/* * reduce_intra_dec * * Function: - seletects reduce algorithm to use * Accepts: - same arguments as MPI_reduce() * Returns: - MPI_SUCCESS or error code (passed from the reduce implementation) * */ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic")); /* check to see if we have some filebased rules */ if (data->com_rules[REDUCE]) { /* we do, so calc the message size or what ever we need and use this for the evaluation */ int alg, faninout, segsize, max_requests; size_t dsize; ompi_datatype_type_size (datatype, &dsize); dsize *= count; alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCE], dsize, &faninout, &segsize, &max_requests); if (alg) { /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, comm, module, alg, faninout, segsize, max_requests); } /* found a method */ } /*end if any com rules to check */ if (data->user_forced[REDUCE].algorithm) { return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm, module); } return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm, module); }
int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; const int segsize = data->user_forced[REDUCE].segsize; const int chain_fanout = data->user_forced[REDUCE].chain_fanout; const int max_requests = data->user_forced[REDUCE].max_requests; OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", data->user_forced[REDUCE].algorithm)); switch (data->user_forced[REDUCE].algorithm) { case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm, module); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm, module); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm, module, segsize, chain_fanout, max_requests); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm, module, segsize, max_requests); case (4): return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype, op, root, comm, module, segsize, max_requests); case (5): return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype, op, root, comm, module, segsize, max_requests); case (6): return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype, op, root, comm, module, segsize, max_requests); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); return (MPI_ERR_ARG); } /* switch */ }