Пример #1
0
int MPIR_Igather_sched_inter_auto(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint local_size, remote_size;
    MPI_Aint recvtype_size, sendtype_size, nbytes;

    if (root == MPI_PROC_NULL) {
        /* local processes other than root do nothing */
        goto fn_exit;
    }

    remote_size = comm_ptr->remote_size;
    local_size = comm_ptr->local_size;

    if (root == MPI_ROOT) {
        MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
        nbytes = recvtype_size * recvcount * remote_size;
    }
    else {
        /* remote side */
        MPIR_Datatype_get_size_macro(sendtype, sendtype_size);
        nbytes = sendtype_size * sendcount * local_size;
    }

    if (nbytes < MPIR_CVAR_GATHER_INTER_SHORT_MSG_SIZE) {
        mpi_errno = MPIR_Igather_sched_inter_short(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
    } else {
        mpi_errno = MPIR_Igather_sched_inter_long(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm_ptr, s);
    }

fn_exit:
    return mpi_errno;
}
Пример #2
0
int MPIR_Iscatter_sched_intra_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                                       void *recvbuf, int recvcount, MPI_Datatype recvtype,
                                       int root, MPIR_Comm * comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint extent = 0;
    int rank, comm_size, sendtype_size;
    int relative_rank;
    int mask, recvtype_size = 0, src, dst;
    int tmp_buf_size = 0;
    void *tmp_buf = NULL;
    struct shared_state *ss = NULL;
    MPIR_SCHED_CHKPMEM_DECL(4);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    if (((rank == root) && (sendcount == 0)) || ((rank != root) && (recvcount == 0)))
        goto fn_exit;

    MPIR_SCHED_CHKPMEM_MALLOC(ss, struct shared_state *, sizeof(struct shared_state), mpi_errno,
                              "shared_state", MPL_MEM_BUFFER);
    ss->sendcount = sendcount;

    if (rank == root)
        MPIR_Datatype_get_extent_macro(sendtype, extent);

    relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;

    if (rank == root) {
        /* We separate the two cases (root and non-root) because
         * in the event of recvbuf=MPI_IN_PLACE on the root,
         * recvcount and recvtype are not valid */
        MPIR_Datatype_get_size_macro(sendtype, sendtype_size);
        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf +
                                         extent * sendcount * comm_size);

        ss->nbytes = sendtype_size * sendcount;
    } else {
        MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
        MPIR_Ensure_Aint_fits_in_pointer(extent * recvcount * comm_size);
        ss->nbytes = recvtype_size * recvcount;
    }

    ss->curr_count = 0;

    /* all even nodes other than root need a temporary buffer to
     * receive data of max size (ss->nbytes*comm_size)/2 */
    if (relative_rank && !(relative_rank % 2)) {
        tmp_buf_size = (ss->nbytes * comm_size) / 2;
        MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf",
                                  MPL_MEM_BUFFER);
    }
Пример #3
0
int MPIR_Iallgather_sched_intra_auto(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int comm_size, recvtype_size;
    int tot_bytes;

    if (((sendcount == 0) && (sendbuf != MPI_IN_PLACE)) || (recvcount == 0))
        return MPI_SUCCESS;

    comm_size = comm_ptr->local_size;

    MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
    tot_bytes = (MPI_Aint)recvcount * comm_size * recvtype_size;

    if ((tot_bytes < MPIR_CVAR_ALLGATHER_LONG_MSG_SIZE) && !(comm_size & (comm_size - 1))) {
        mpi_errno = MPIR_Iallgather_sched_intra_recursive_doubling(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
    } else if (tot_bytes < MPIR_CVAR_ALLGATHER_SHORT_MSG_SIZE) {
        mpi_errno = MPIR_Iallgather_sched_intra_brucks(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
    } else {
        mpi_errno = MPIR_Iallgather_sched_intra_ring(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm_ptr, s);
    }
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

fn_exit:
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Пример #4
0
/* Algorithm: Short Linear Gather
 *
 * This linear gather algorithm is tuned for short messages. The remote group
 * does a local intracommunicator gather to rank 0. Rank 0 then sends data to
 * root.
 *
 * Cost: (lgp+1).alpha + n.((p-1)/p).beta + n.beta
 */
int MPIR_Igather_sched_inter_short(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                                   void *recvbuf, int recvcount, MPI_Datatype recvtype, int root,
                                   MPIR_Comm * comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int rank;
    MPI_Aint local_size, remote_size;
    MPIR_Comm *newcomm_ptr = NULL;
    MPIR_SCHED_CHKPMEM_DECL(1);

    remote_size = comm_ptr->remote_size;
    local_size = comm_ptr->local_size;

    if (root == MPI_ROOT) {
        /* root receives data from rank 0 on remote group */
        mpi_errno = MPIR_Sched_recv(recvbuf, recvcount * remote_size, recvtype, 0, comm_ptr, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else {
        /* remote group. Rank 0 allocates temporary buffer, does
         * local intracommunicator gather, and then sends the data
         * to root. */
        MPI_Aint sendtype_sz;
        void *tmp_buf = NULL;

        rank = comm_ptr->rank;

        if (rank == 0) {
            MPIR_Datatype_get_size_macro(sendtype, sendtype_sz);
            MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *,
                                      sendcount * local_size * sendtype_sz,
                                      mpi_errno, "tmp_buf", MPL_MEM_BUFFER);
        } else {
Пример #5
0
int MPIR_Type_size_x_impl(MPI_Datatype datatype, MPI_Count * size)
{
    int mpi_errno = MPI_SUCCESS;

    MPIR_Datatype_get_size_macro(datatype, *size);

    return mpi_errno;
}
Пример #6
0
/*
 DLOOP_Named_type_alignsize - calculate alignment in bytes for a struct
                              based on constituent elements.

 Returns alignment in bytes.
*/
static int DLOOP_Named_type_alignsize(MPI_Datatype type, MPI_Aint disp)
{
    int alignsize = 0;

    static int havent_tested_align_rules = 1;
    static int max_intalign = 0, max_fpalign = 0;
    static int have_double_pos_align = 0, have_llint_pos_align = 0;
    static int max_doublealign = 0, max_longdoublealign = 0;

    if (havent_tested_align_rules) {
	max_intalign          = DLOOP_Structalign_integer_max();
	max_fpalign           = DLOOP_Structalign_float_max();
	max_doublealign       = DLOOP_Structalign_double_max();
	max_longdoublealign   = DLOOP_Structalign_long_double_max();
	have_double_pos_align = DLOOP_Structalign_double_position();
	have_llint_pos_align  = DLOOP_Structalign_llint_position();

	havent_tested_align_rules = 0;
    }

    /* skip LBs, UBs, and elements with zero block length */
    if (type == MPI_LB || type == MPI_UB)
	return 0;

    MPIR_Datatype_get_size_macro(type, alignsize);

    switch(type)
    {
	case MPI_FLOAT:
	    if (alignsize > max_fpalign)
		alignsize = max_fpalign;
	    break;
	case MPI_DOUBLE:
	    if (alignsize > max_doublealign)
		alignsize = max_doublealign;

	    if (have_double_pos_align && disp != (MPI_Aint) 0)
		alignsize = 4; /* would be better to test */
	    break;
	case MPI_LONG_DOUBLE:
	    if (alignsize > max_longdoublealign)
		alignsize = max_longdoublealign;
	    break;
	default:
	    if (alignsize > max_intalign)
		alignsize = max_intalign;

	    if (have_llint_pos_align &&
		type == MPI_LONG_LONG_INT &&
		disp != (MPI_Aint) 0)
	    {
		alignsize = 4; /* would be better to test */
	    }
	    break;
    }

    return alignsize;
}
Пример #7
0
int MPIR_Ireduce_scatter_block_sched_intra_auto(const void *sendbuf, void *recvbuf, int recvcount,
                                                MPI_Datatype datatype, MPI_Op op,
                                                MPIR_Comm * comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int is_commutative;
    int total_count, type_size, nbytes;
    int comm_size;

    is_commutative = MPIR_Op_is_commutative(op);

    comm_size = comm_ptr->local_size;
    total_count = recvcount * comm_size;
    if (total_count == 0) {
        goto fn_exit;
    }
    MPIR_Datatype_get_size_macro(datatype, type_size);
    nbytes = total_count * type_size;

    /* select an appropriate algorithm based on commutivity and message size */
    if (is_commutative && (nbytes < MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
        mpi_errno =
            MPIR_Ireduce_scatter_block_sched_intra_recursive_halving(sendbuf, recvbuf, recvcount,
                                                                     datatype, op, comm_ptr, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else if (is_commutative && (nbytes >= MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
        mpi_errno =
            MPIR_Ireduce_scatter_block_sched_intra_pairwise(sendbuf, recvbuf, recvcount, datatype,
                                                            op, comm_ptr, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else {    /* (!is_commutative) */

        if (MPL_is_pof2(comm_size, NULL)) {
            /* noncommutative, pof2 size */
            mpi_errno =
                MPIR_Ireduce_scatter_block_sched_intra_noncommutative(sendbuf, recvbuf, recvcount,
                                                                      datatype, op, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        } else {
            /* noncommutative and non-pof2, use recursive doubling. */
            mpi_errno =
                MPIR_Ireduce_scatter_block_sched_intra_recursive_doubling(sendbuf, recvbuf,
                                                                          recvcount, datatype, op,
                                                                          comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        }
    }

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Пример #8
0
static int create_basic_all_bytes_struct(MPI_Aint count,
                                         const int *blklens,
                                         const MPI_Aint * disps,
                                         const MPI_Datatype * oldtypes,
                                         MPIR_Dataloop ** dlp_p, MPI_Aint * dlsz_p)
{
    int i, err, cur_pos = 0;
    MPI_Aint *tmp_blklens;
    MPI_Aint *tmp_disps;

    /* count is an upper bound on number of type instances */
    tmp_blklens = (MPI_Aint *) MPL_malloc(count * sizeof(MPI_Aint), MPL_MEM_DATATYPE);

    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens) {
        return create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    tmp_disps = (MPI_Aint *) MPL_malloc(count * sizeof(MPI_Aint), MPL_MEM_DATATYPE);

    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps) {
        MPL_free(tmp_blklens);
        return create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    for (i = 0; i < count; i++) {
        if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB && blklens[i] != 0) {
            MPI_Aint sz;

            MPIR_Datatype_get_size_macro(oldtypes[i], sz);
            tmp_blklens[cur_pos] = (int) sz *blklens[i];
            tmp_disps[cur_pos] = disps[i];
            cur_pos++;
        }
    }
    err = MPII_Dataloop_create_indexed(cur_pos, tmp_blklens, tmp_disps, 1,      /* disp in bytes */
                                       MPI_BYTE, dlp_p, dlsz_p);

    MPL_free(tmp_blklens);
    MPL_free(tmp_disps);

    return err;
}
Пример #9
0
int MPIR_Bcast_intra_auto(void *buffer,
                          int count,
                          MPI_Datatype datatype,
                          int root, MPIR_Comm * comm_ptr, MPIR_Errflag_t * errflag)
{
    int mpi_errno = MPI_SUCCESS;
    int mpi_errno_ret = MPI_SUCCESS;
    int comm_size;
    MPI_Aint nbytes = 0;
    MPI_Aint type_size;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPIR_BCAST);

    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPIR_BCAST);

    if (count == 0)
        goto fn_exit;

    MPIR_Datatype_get_size_macro(datatype, type_size);
    nbytes = MPIR_CVAR_MAX_SMP_BCAST_MSG_SIZE ? type_size * count : 0;
    if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES && MPIR_CVAR_ENABLE_SMP_BCAST &&
        nbytes <= MPIR_CVAR_MAX_SMP_BCAST_MSG_SIZE && MPIR_Comm_is_node_aware(comm_ptr)) {
        mpi_errno = MPIR_Bcast_intra_smp(buffer, count, datatype, root, comm_ptr, errflag);
        if (mpi_errno) {
            /* for communication errors, just record the error but continue */
            *errflag =
                MPIX_ERR_PROC_FAILED ==
                MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
            MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
            MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
        }
        goto fn_exit;
    }

    comm_size = comm_ptr->local_size;

    MPIR_Datatype_get_size_macro(datatype, type_size);

    nbytes = type_size * count;
    if (nbytes == 0)
        goto fn_exit;   /* nothing to do */

    if ((nbytes < MPIR_CVAR_BCAST_SHORT_MSG_SIZE) || (comm_size < MPIR_CVAR_BCAST_MIN_PROCS)) {
        mpi_errno = MPIR_Bcast_intra_binomial(buffer, count, datatype, root, comm_ptr, errflag);
    } else {    /* (nbytes >= MPIR_CVAR_BCAST_SHORT_MSG_SIZE) && (comm_size >= MPIR_CVAR_BCAST_MIN_PROCS) */

        if ((nbytes < MPIR_CVAR_BCAST_LONG_MSG_SIZE) && (MPL_is_pof2(comm_size, NULL))) {
            mpi_errno =
                MPIR_Bcast_intra_scatter_recursive_doubling_allgather(buffer, count, datatype, root,
                                                                      comm_ptr, errflag);
        } else {        /* (nbytes >= MPIR_CVAR_BCAST_LONG_MSG_SIZE) || !(comm_size_is_pof2) */

            mpi_errno =
                MPIR_Bcast_intra_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr,
                                                        errflag);
        }
    }
    if (mpi_errno) {
        /* for communication errors, just record the error but continue */
        *errflag =
            MPIX_ERR_PROC_FAILED ==
            MPIR_ERR_GET_CLASS(mpi_errno) ? MPIR_ERR_PROC_FAILED : MPIR_ERR_OTHER;
        MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
        MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
    }

  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPIR_BCAST);

    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno_ret)
        mpi_errno = mpi_errno_ret;
    else if (*errflag != MPIR_ERR_NONE)
        MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
    /* --END ERROR HANDLING-- */
    return mpi_errno;
}
Пример #10
0
/* Algorithm: Nonblocking all-to-all for sendbuf==MPI_IN_PLACE.
 *
 * Restrictions: Only for MPI_IN_PLACE
 *
 * We use nonblocking equivalent of pair-wise sendrecv_replace in order to
 * conserve memory usage, which is keeping with the spirit of the MPI-2.2
 * Standard.  But because of this approach all processes must agree on the
 * global schedule of "sendrecv_replace" operations to avoid deadlock.
 *
 * Note that this is not an especially efficient algorithm in terms of time.
 * Something like MADRE is probably the best solution for the MPI_IN_PLACE
 * scenario. */
int MPIR_Ialltoall_sched_intra_inplace(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                                       void *recvbuf, int recvcount, MPI_Datatype recvtype,
                                       MPIR_Comm * comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    void *tmp_buf = NULL;
    int i, j;
    int rank, comm_size;
    int nbytes, recvtype_size;
    MPI_Aint recvtype_extent;
    int peer;
    MPIR_SCHED_CHKPMEM_DECL(1);

#ifdef HAVE_ERROR_CHECKING
    MPIR_Assert(sendbuf == MPI_IN_PLACE);
#endif

    if (recvcount == 0)
        goto fn_exit;

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;
    MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
    MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);
    nbytes = recvtype_size * recvcount;

    MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf", MPL_MEM_BUFFER);

    for (i = 0; i < comm_size; ++i) {
        /* start inner loop at i to avoid re-exchanging data */
        for (j = i; j < comm_size; ++j) {
            if (rank == i && rank == j) {
                /* no need to "sendrecv_replace" for ourselves */
            } else if (rank == i || rank == j) {
                if (rank == i)
                    peer = j;
                else
                    peer = i;

                /* pack to tmp_buf */
                mpi_errno = MPIR_Sched_copy(((char *) recvbuf + peer * recvcount * recvtype_extent),
                                            recvcount, recvtype, tmp_buf, nbytes, MPI_BYTE, s);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);

                /* now simultaneously send from tmp_buf and recv to recvbuf */
                mpi_errno = MPIR_Sched_send(tmp_buf, nbytes, MPI_BYTE, peer, comm_ptr, s);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                mpi_errno = MPIR_Sched_recv(((char *) recvbuf + peer * recvcount * recvtype_extent),
                                            recvcount, recvtype, peer, comm_ptr, s);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);
            }
        }
    }

    MPIR_SCHED_CHKPMEM_COMMIT(s);
  fn_exit:
    return mpi_errno;
  fn_fail:
    MPIR_SCHED_CHKPMEM_REAP(s);
    goto fn_exit;
}
Пример #11
0
/*@

MPI_Gatherv - Gathers into specified locations from all processes in a group

Input Parameters:
+ sendbuf - starting address of send buffer (choice)
. sendcount - number of elements in send buffer (integer)
. sendtype - data type of send buffer elements (handle)
. recvcounts - integer array (of length group size)
containing the number of elements that are received from each process
(significant only at 'root')
. displs - integer array (of length group size). Entry
 'i'  specifies the displacement relative to recvbuf  at
which to place the incoming data from process  'i'  (significant only
at root)
. recvtype - data type of recv buffer elements
(significant only at 'root') (handle)
. root - rank of receiving process (integer)
- comm - communicator (handle)

Output Parameters:
. recvbuf - address of receive buffer (choice, significant only at 'root')

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM
.N MPI_ERR_TYPE
.N MPI_ERR_BUFFER
@*/
int MPI_Gatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                void *recvbuf, const int *recvcounts, const int *displs,
                MPI_Datatype recvtype, int root, MPI_Comm comm)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Comm *comm_ptr = NULL;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_GATHERV);

    MPIR_ERRTEST_INITIALIZED_ORDIE();

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_COLL_ENTER(MPID_STATE_MPI_GATHERV);

    /* Validate parameters, especially handles needing to be converted */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_ERRTEST_COMM(comm, mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPIR_Comm_get_ptr(comm, comm_ptr);

    /* Validate parameters and objects (post conversion) */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_Datatype *sendtype_ptr = NULL, *recvtype_ptr = NULL;
            int i, rank, comm_size;

            MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, FALSE);
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;

            if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
                MPIR_ERRTEST_INTRA_ROOT(comm_ptr, root, mpi_errno);

                if (sendbuf != MPI_IN_PLACE) {
                    MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
                    MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
                    if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(sendtype, sendtype_ptr);
                        MPIR_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    MPIR_ERRTEST_USERBUFFER(sendbuf, sendcount, sendtype, mpi_errno);
                }

                rank = comm_ptr->rank;
                if (rank == root) {
                    comm_size = comm_ptr->local_size;
                    for (i = 0; i < comm_size; i++) {
                        MPIR_ERRTEST_COUNT(recvcounts[i], mpi_errno);
                        MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
                    }
                    if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(recvtype, recvtype_ptr);
                        MPIR_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }

                    for (i = 0; i < comm_size; i++) {
                        if (recvcounts[i] > 0) {
                            MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcounts[i], mpi_errno);
                            MPIR_ERRTEST_USERBUFFER(recvbuf, recvcounts[i], recvtype, mpi_errno);
                            break;
                        }
                    }

                    /* catch common aliasing cases */
                    if (sendbuf != MPI_IN_PLACE && sendtype == recvtype &&
                        recvcounts[comm_ptr->rank] != 0 && sendcount != 0) {
                        int recvtype_size;
                        MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
                        MPIR_ERRTEST_ALIAS_COLL(sendbuf,
                                                (char *) recvbuf +
                                                displs[comm_ptr->rank] * recvtype_size, mpi_errno);
                    }
                } else
                    MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
            }

            if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
                MPIR_ERRTEST_INTER_ROOT(comm_ptr, root, mpi_errno);

                if (root == MPI_ROOT) {
                    comm_size = comm_ptr->remote_size;
                    for (i = 0; i < comm_size; i++) {
                        MPIR_ERRTEST_COUNT(recvcounts[i], mpi_errno);
                        MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
                    }
                    if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(recvtype, recvtype_ptr);
                        MPIR_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    for (i = 0; i < comm_size; i++) {
                        if (recvcounts[i] > 0) {
                            MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcounts[i], mpi_errno);
                            MPIR_ERRTEST_USERBUFFER(recvbuf, recvcounts[i], recvtype, mpi_errno);
                            break;
                        }
                    }
                } else if (root != MPI_PROC_NULL) {
                    MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
                    MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
                    if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(sendtype, sendtype_ptr);
                        MPIR_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
                    MPIR_ERRTEST_USERBUFFER(sendbuf, sendcount, sendtype, mpi_errno);
                }
            }
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    mpi_errno = MPIR_Gatherv(sendbuf, sendcount, sendtype,
                             recvbuf, recvcounts, displs, recvtype, root, comm_ptr, &errflag);
    if (mpi_errno)
        goto fn_fail;

    /* ... end of body of routine ... */

  fn_exit:
    MPIR_FUNC_TERSE_COLL_EXIT(MPID_STATE_MPI_GATHERV);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#ifdef HAVE_ERROR_CHECKING
    {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
                                 "**mpi_gatherv", "**mpi_gatherv %p %d %D %p %p %p %D %d %C",
                                 sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs,
                                 recvtype, root, comm);
    }
#endif
    mpi_errno = MPIR_Err_return_comm(comm_ptr, __func__, mpi_errno);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Пример #12
0
int MPIR_Reduce_intra_auto (
    const void *sendbuf,
    void *recvbuf,
    int count,
    MPI_Datatype datatype,
    MPI_Op op,
    int root,
    MPIR_Comm *comm_ptr,
    MPIR_Errflag_t *errflag )
{
    int mpi_errno = MPI_SUCCESS;
    int mpi_errno_ret = MPI_SUCCESS;
    int is_commutative, type_size, pof2;
    int nbytes = 0;

    if (count == 0) return MPI_SUCCESS;

    /* is the op commutative? We do SMP optimizations only if it is. */
    is_commutative = MPIR_Op_is_commutative(op);

    MPIR_Datatype_get_size_macro(datatype, type_size);
    nbytes = MPIR_CVAR_MAX_SMP_REDUCE_MSG_SIZE ? type_size*count : 0;

    if (MPIR_CVAR_ENABLE_SMP_COLLECTIVES &&
            MPIR_CVAR_ENABLE_SMP_REDUCE &&
            MPIR_Comm_is_node_aware(comm_ptr) &&
            is_commutative &&
            nbytes <= MPIR_CVAR_MAX_SMP_REDUCE_MSG_SIZE) {
        mpi_errno = MPIR_Reduce_intra_smp(sendbuf, recvbuf, count, datatype,
                op, root, comm_ptr, errflag);

        if (mpi_errno) {
            /* for communication errors, just record the error but continue */
            *errflag = MPIR_ERR_GET_CLASS(mpi_errno);
            MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
            MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
        }

        goto fn_exit;
    }

    MPIR_Datatype_get_size_macro(datatype, type_size);

    /* get nearest power-of-two less than or equal to comm_size */
    pof2 = comm_ptr->pof2;

    if ((count*type_size > MPIR_CVAR_REDUCE_SHORT_MSG_SIZE) &&
        (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) && (count >= pof2)) {
        /* do a reduce-scatter followed by gather to root. */
        mpi_errno = MPIR_Reduce_intra_reduce_scatter_gather(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, errflag);
    }
    else {
        /* use a binomial tree algorithm */ 
        mpi_errno = MPIR_Reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, errflag);
    }
    if (mpi_errno) {
        /* for communication errors, just record the error but continue */
        *errflag = MPIR_ERR_GET_CLASS(mpi_errno);
        MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
        MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
    }

  fn_exit:
    if (mpi_errno_ret)
        mpi_errno = mpi_errno_ret;
    else if (*errflag != MPIR_ERR_NONE)
        MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");
    return mpi_errno;
}
/*@
   Dataloop_create_blockindexed - create blockindexed dataloop

   Arguments:
+  MPI_Aint count
.  void *displacement_array (array of either MPI_Aints or ints)
.  int displacement_in_bytes (boolean)
.  MPI_Datatype old_type
.  MPIR_Dataloop **output_dataloop_ptr
.  int output_dataloop_size

.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int MPII_Dataloop_create_blockindexed(MPI_Aint icount,
                                      MPI_Aint iblklen,
                                      const void *disp_array,
                                      int dispinbytes,
                                      MPI_Datatype oldtype,
                                      MPIR_Dataloop ** dlp_p, MPI_Aint * dlsz_p)
{
    int err, is_builtin, is_vectorizable = 1;
    int i;
    MPI_Aint new_loop_sz;

    MPI_Aint contig_count, count, blklen;
    MPI_Aint old_extent, eff_disp0, eff_disp1, last_stride;
    MPIR_Dataloop *new_dlp;

    count = (MPI_Aint) icount;  /* avoid subsequent casting */
    blklen = (MPI_Aint) iblklen;

    /* if count or blklen are zero, handle with contig code, call it a int */
    if (count == 0 || blklen == 0) {
        err = MPII_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p);
        return err;
    }

    is_builtin = (MPII_DATALOOP_HANDLE_HASLOOP(oldtype)) ? 0 : 1;

    if (is_builtin) {
        MPIR_Datatype_get_size_macro(oldtype, old_extent);
    } else {
        MPIR_Datatype_get_extent_macro(oldtype, old_extent);
    }

    contig_count = MPII_Datatype_blockindexed_count_contig(count,
                                                           blklen, disp_array, dispinbytes,
                                                           old_extent);

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than a blockindexed dataloop.
     */
    if ((contig_count == 1) &&
        ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
         (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) {
        err = MPII_Dataloop_create_contiguous(icount * iblklen, oldtype, dlp_p, dlsz_p);
        return err;
    }

    /* optimization:
     *
     * if contig_count == 1 store it as a blockindexed with one
     * element rather than as a lot of individual blocks.
     */
    if (contig_count == 1) {
        /* adjust count and blklen and drop through */
        blklen *= count;
        count = 1;
        iblklen *= icount;
        icount = 1;
    }

    /* optimization:
     *
     * if displacements start at zero and result in a fixed stride,
     * store it as a vector rather than a blockindexed dataloop.
     */
    eff_disp0 = (dispinbytes) ? ((MPI_Aint) ((MPI_Aint *) disp_array)[0]) :
        (((MPI_Aint) ((int *) disp_array)[0]) * old_extent);

    if (count > 1 && eff_disp0 == (MPI_Aint) 0) {
        eff_disp1 = (dispinbytes) ?
            ((MPI_Aint) ((MPI_Aint *) disp_array)[1]) :
            (((MPI_Aint) ((int *) disp_array)[1]) * old_extent);
        last_stride = eff_disp1 - eff_disp0;

        for (i = 2; i < count; i++) {
            eff_disp0 = eff_disp1;
            eff_disp1 = (dispinbytes) ?
                ((MPI_Aint) ((MPI_Aint *) disp_array)[i]) :
                (((MPI_Aint) ((int *) disp_array)[i]) * old_extent);
            if (eff_disp1 - eff_disp0 != last_stride) {
                is_vectorizable = 0;
                break;
            }
        }
        if (is_vectorizable) {
            err = MPII_Dataloop_create_vector(count, blklen, last_stride, 1,    /* strideinbytes */
                                              oldtype, dlp_p, dlsz_p);
            return err;
        }
    }

    /* TODO: optimization:
     *
     * if displacements result in a fixed stride, but first displacement
     * is not zero, store it as a blockindexed (blklen == 1) of a vector.
     */

    /* TODO: optimization:
     *
     * if a blockindexed of a contig, absorb the contig into the blocklen
     * parameter and keep the same overall depth
     */

    /* otherwise storing as a blockindexed dataloop */

    /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
     * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
     */

    if (is_builtin) {
        MPII_Dataloop_alloc(MPII_DATALOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = MPII_DATALOOP_KIND_BLOCKINDEXED | MPII_DATALOOP_FINAL_MASK;

        new_dlp->el_size = old_extent;
        new_dlp->el_extent = old_extent;
        new_dlp->el_type = oldtype;
    } else {
        MPIR_Dataloop *old_loop_ptr = NULL;
        MPI_Aint old_loop_sz = 0;

        MPII_DATALOOP_GET_LOOPPTR(oldtype, old_loop_ptr);
        MPII_DATALOOP_GET_LOOPSIZE(oldtype, old_loop_sz);

        MPII_Dataloop_alloc_and_copy(MPII_DATALOOP_KIND_BLOCKINDEXED,
                                     count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = MPII_DATALOOP_KIND_BLOCKINDEXED;

        MPIR_Datatype_get_size_macro(oldtype, new_dlp->el_size);
        MPIR_Datatype_get_extent_macro(oldtype, new_dlp->el_extent);
        MPIR_Datatype_get_basic_type(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.bi_t.count = count;
    new_dlp->loop_params.bi_t.blocksize = blklen;

    /* copy in displacement parameters
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    blockindexed_array_copy(count,
                            disp_array,
                            new_dlp->loop_params.bi_t.offset_array, dispinbytes, old_extent);

    *dlp_p = new_dlp;
    *dlsz_p = new_loop_sz;

    return 0;
}
Пример #14
0
int MPIR_Ireduce_scatter_sched_intra_auto(const void *sendbuf, void *recvbuf,
                                          const int recvcounts[], MPI_Datatype datatype, MPI_Op op,
                                          MPIR_Comm * comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    int is_commutative;
    int total_count, type_size, nbytes;
    int comm_size;

    is_commutative = MPIR_Op_is_commutative(op);

    comm_size = comm_ptr->local_size;
    total_count = 0;
    for (i = 0; i < comm_size; i++) {
        total_count += recvcounts[i];
    }
    if (total_count == 0) {
        goto fn_exit;
    }
    MPIR_Datatype_get_size_macro(datatype, type_size);
    nbytes = total_count * type_size;

    /* select an appropriate algorithm based on commutivity and message size */
    if (is_commutative && (nbytes < MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
        mpi_errno =
            MPIR_Ireduce_scatter_sched_intra_recursive_halving(sendbuf, recvbuf, recvcounts,
                                                               datatype, op, comm_ptr, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else if (is_commutative && (nbytes >= MPIR_CVAR_REDUCE_SCATTER_COMMUTATIVE_LONG_MSG_SIZE)) {
        mpi_errno =
            MPIR_Ireduce_scatter_sched_intra_pairwise(sendbuf, recvbuf, recvcounts, datatype, op,
                                                      comm_ptr, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else {    /* (!is_commutative) */

        int is_block_regular = TRUE;
        for (i = 0; i < (comm_size - 1); ++i) {
            if (recvcounts[i] != recvcounts[i + 1]) {
                is_block_regular = FALSE;
                break;
            }
        }

        if (MPL_is_pof2(comm_size, NULL) && is_block_regular) {
            /* noncommutative, pof2 size, and block regular */
            mpi_errno =
                MPIR_Ireduce_scatter_sched_intra_noncommutative(sendbuf, recvbuf, recvcounts,
                                                                datatype, op, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        } else {
            /* noncommutative and (non-pof2 or block irregular), use recursive doubling. */
            mpi_errno =
                MPIR_Ireduce_scatter_sched_intra_recursive_doubling(sendbuf, recvbuf, recvcounts,
                                                                    datatype, op, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        }
    }

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Пример #15
0
/*@
MPI_Iallgather - Gathers data from all tasks and distribute the combined data
                 to all tasks in a nonblocking way

Input Parameters:
+ sendbuf - starting address of the send buffer (choice)
. sendcount - number of elements in send buffer (non-negative integer)
. sendtype - data type of send buffer elements (handle)
. recvcount - number of elements in receive buffer (non-negative integer)
. recvtype - data type of receive buffer elements (handle)
- comm - communicator (handle)

Output Parameters:
+ recvbuf - starting address of the receive buffer (choice)
- request - communication request (handle)

.N ThreadSafe

.N Fortran

.N Errors
@*/
int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                   void *recvbuf, int recvcount, MPI_Datatype recvtype,
                   MPI_Comm comm, MPI_Request *request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Comm *comm_ptr = NULL;
    MPIR_Request *request_ptr = NULL;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IALLGATHER);

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IALLGATHER);

    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS
        {
            if (sendbuf != MPI_IN_PLACE) {
                MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
                MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
            }
            MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
            MPIR_ERRTEST_COMM(comm, mpi_errno);

            /* TODO more checks may be appropriate */
        }
        MPID_END_ERROR_CHECKS
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPIR_Comm_get_ptr(comm, comm_ptr);

    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS
        {
            MPIR_Comm_valid_ptr( comm_ptr, mpi_errno, FALSE );
            if (sendbuf != MPI_IN_PLACE && HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
                MPIR_Datatype *sendtype_ptr = NULL;
                MPIR_Datatype_get_ptr(sendtype, sendtype_ptr);
                MPIR_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
                if (mpi_errno != MPI_SUCCESS) goto fn_fail;
                MPIR_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
                if (mpi_errno != MPI_SUCCESS) goto fn_fail;
            }

            if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
                MPIR_Datatype *recvtype_ptr = NULL;
                MPIR_Datatype_get_ptr(recvtype, recvtype_ptr);
                MPIR_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
                if (mpi_errno != MPI_SUCCESS) goto fn_fail;
                MPIR_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
                if (mpi_errno != MPI_SUCCESS) goto fn_fail;
            }

            MPIR_ERRTEST_ARGNULL(request,"request", mpi_errno);

            /* catch common aliasing cases */
            if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM && recvbuf != MPI_IN_PLACE && sendtype == recvtype && sendcount == recvcount && sendcount != 0) {
                int recvtype_size;
                MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
                MPIR_ERRTEST_ALIAS_COLL(sendbuf, (char*)recvbuf + comm_ptr->rank*recvcount*recvtype_size, mpi_errno);
            }

            /* TODO more checks may be appropriate (counts, in_place, etc) */
        }
        MPID_END_ERROR_CHECKS
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    mpi_errno = MPIR_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount,
                                recvtype, comm_ptr, &request_ptr);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* return the handle of the request to the user */
    if(request_ptr)
        *request = request_ptr->handle;
    else *request = MPI_REQUEST_NULL;

    /* ... end of body of routine ... */

fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IALLGATHER);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
    {
        mpi_errno = MPIR_Err_create_code(
            mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
            "**mpi_iallgather", "**mpi_iallgather %p %d %D %p %d %D %C %p", sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm(comm_ptr, FCNAME, mpi_errno);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
int MPIR_Ireduce_scatter_sched_intra_recursive_halving(const void *sendbuf, void *recvbuf,
                                                       const int recvcounts[],
                                                       MPI_Datatype datatype, MPI_Op op,
                                                       MPIR_Comm * comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int rank, comm_size, i;
    MPI_Aint extent, true_extent, true_lb;
    int *disps;
    void *tmp_recvbuf, *tmp_results;
    int type_size ATTRIBUTE((unused)), total_count, dst;
    int mask;
    int *newcnts, *newdisps, rem, newdst, send_idx, recv_idx, last_idx, send_cnt, recv_cnt;
    int pof2, old_i, newrank;
    MPIR_SCHED_CHKPMEM_DECL(5);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    MPIR_Datatype_get_extent_macro(datatype, extent);
    MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);

#ifdef HAVE_ERROR_CHECKING
    MPIR_Assert(MPIR_Op_is_commutative(op));
#endif

    MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps",
                              MPL_MEM_BUFFER);

    total_count = 0;
    for (i = 0; i < comm_size; i++) {
        disps[i] = total_count;
        total_count += recvcounts[i];
    }

    if (total_count == 0) {
        goto fn_exit;
    }

    MPIR_Datatype_get_size_macro(datatype, type_size);

    /* allocate temp. buffer to receive incoming data */
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_recvbuf, void *, total_count * (MPL_MAX(true_extent, extent)),
                              mpi_errno, "tmp_recvbuf", MPL_MEM_BUFFER);
    /* adjust for potential negative lower bound in datatype */
    tmp_recvbuf = (void *) ((char *) tmp_recvbuf - true_lb);

    /* need to allocate another temporary buffer to accumulate
     * results because recvbuf may not be big enough */
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_results, void *, total_count * (MPL_MAX(true_extent, extent)),
                              mpi_errno, "tmp_results", MPL_MEM_BUFFER);
    /* adjust for potential negative lower bound in datatype */
    tmp_results = (void *) ((char *) tmp_results - true_lb);

    /* copy sendbuf into tmp_results */
    if (sendbuf != MPI_IN_PLACE)
        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                    tmp_results, total_count, datatype, s);
    else
        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                    tmp_results, total_count, datatype, s);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);
    MPIR_SCHED_BARRIER(s);

    pof2 = comm_ptr->pof2;

    rem = comm_size - pof2;

    /* In the non-power-of-two case, all even-numbered
     * processes of rank < 2*rem send their data to
     * (rank+1). These even-numbered processes no longer
     * participate in the algorithm until the very end. The
     * remaining processes form a nice power-of-two. */

    if (rank < 2 * rem) {
        if (rank % 2 == 0) {    /* even */
            mpi_errno = MPIR_Sched_send(tmp_results, total_count, datatype, rank + 1, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);

            /* temporarily set the rank to -1 so that this
             * process does not pariticipate in recursive
             * doubling */
            newrank = -1;
        } else {        /* odd */
            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, total_count, datatype, rank - 1, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);

            /* do the reduction on received data. since the
             * ordering is right, it doesn't matter whether
             * the operation is commutative or not. */
            mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, total_count, datatype, op, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);

            /* change the rank */
            newrank = rank / 2;
        }
    } else      /* rank >= 2*rem */
        newrank = rank - rem;

    if (newrank != -1) {
        /* recalculate the recvcounts and disps arrays because the
         * even-numbered processes who no longer participate will
         * have their result calculated by the process to their
         * right (rank+1). */

        MPIR_SCHED_CHKPMEM_MALLOC(newcnts, int *, pof2 * sizeof(int), mpi_errno, "newcnts",
                                  MPL_MEM_BUFFER);
        MPIR_SCHED_CHKPMEM_MALLOC(newdisps, int *, pof2 * sizeof(int), mpi_errno, "newdisps",
                                  MPL_MEM_BUFFER);

        for (i = 0; i < pof2; i++) {
            /* what does i map to in the old ranking? */
            old_i = (i < rem) ? i * 2 + 1 : i + rem;
            if (old_i < 2 * rem) {
                /* This process has to also do its left neighbor's
                 * work */
                newcnts[i] = recvcounts[old_i] + recvcounts[old_i - 1];
            } else
                newcnts[i] = recvcounts[old_i];
        }

        newdisps[0] = 0;
        for (i = 1; i < pof2; i++)
            newdisps[i] = newdisps[i - 1] + newcnts[i - 1];

        mask = pof2 >> 1;
        send_idx = recv_idx = 0;
        last_idx = pof2;
        while (mask > 0) {
            newdst = newrank ^ mask;
            /* find real rank of dest */
            dst = (newdst < rem) ? newdst * 2 + 1 : newdst + rem;

            send_cnt = recv_cnt = 0;
            if (newrank < newdst) {
                send_idx = recv_idx + mask;
                for (i = send_idx; i < last_idx; i++)
                    send_cnt += newcnts[i];
                for (i = recv_idx; i < send_idx; i++)
                    recv_cnt += newcnts[i];
            } else {
                recv_idx = send_idx + mask;
                for (i = send_idx; i < recv_idx; i++)
                    send_cnt += newcnts[i];
                for (i = recv_idx; i < last_idx; i++)
                    recv_cnt += newcnts[i];
            }

            /* Send data from tmp_results. Recv into tmp_recvbuf */
            {
                /* avoid sending and receiving pointless 0-byte messages */
                int send_dst = (send_cnt ? dst : MPI_PROC_NULL);
                int recv_dst = (recv_cnt ? dst : MPI_PROC_NULL);

                mpi_errno = MPIR_Sched_send(((char *) tmp_results + newdisps[send_idx] * extent),
                                            send_cnt, datatype, send_dst, comm_ptr, s);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                mpi_errno = MPIR_Sched_recv(((char *) tmp_recvbuf + newdisps[recv_idx] * extent),
                                            recv_cnt, datatype, recv_dst, comm_ptr, s);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);
            }

            /* tmp_recvbuf contains data received in this step.
             * tmp_results contains data accumulated so far */
            if (recv_cnt) {
                mpi_errno = MPIR_Sched_reduce(((char *) tmp_recvbuf + newdisps[recv_idx] * extent),
                                              ((char *) tmp_results + newdisps[recv_idx] * extent),
                                              recv_cnt, datatype, op, s);
                MPIR_SCHED_BARRIER(s);
            }

            /* update send_idx for next iteration */
            send_idx = recv_idx;
            last_idx = recv_idx + mask;
            mask >>= 1;
        }

        /* copy this process's result from tmp_results to recvbuf */
        if (recvcounts[rank]) {
            mpi_errno = MPIR_Sched_copy(((char *) tmp_results + disps[rank] * extent),
                                        recvcounts[rank], datatype,
                                        recvbuf, recvcounts[rank], datatype, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);
        }

    }
int MPIR_Ireduce_scatter_sched_intra_recursive_doubling(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                 MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
                                 MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int rank, comm_size, i;
    MPI_Aint extent, true_extent, true_lb;
    int  *disps;
    void *tmp_recvbuf, *tmp_results;
    int type_size ATTRIBUTE((unused)), dis[2], blklens[2], total_count, dst;
    int mask, dst_tree_root, my_tree_root, j, k;
    int received;
    MPI_Datatype sendtype, recvtype;
    int nprocs_completed, tmp_mask, tree_root, is_commutative;
    MPIR_SCHED_CHKPMEM_DECL(5);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    MPIR_Datatype_get_extent_macro(datatype, extent);
    MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
    is_commutative = MPIR_Op_is_commutative(op);

    MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps", MPL_MEM_BUFFER);

    total_count = 0;
    for (i=0; i<comm_size; i++) {
        disps[i] = total_count;
        total_count += recvcounts[i];
    }

    if (total_count == 0) {
        goto fn_exit;
    }

    MPIR_Datatype_get_size_macro(datatype, type_size);

    /* total_count*extent eventually gets malloced. it isn't added to
     * a user-passed in buffer */
    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));


    /* need to allocate temporary buffer to receive incoming data*/
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf", MPL_MEM_BUFFER);
    /* adjust for potential negative lower bound in datatype */
    tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);

    /* need to allocate another temporary buffer to accumulate
       results */
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results", MPL_MEM_BUFFER);
    /* adjust for potential negative lower bound in datatype */
    tmp_results = (void *)((char*)tmp_results - true_lb);

    /* copy sendbuf into tmp_results */
    if (sendbuf != MPI_IN_PLACE)
        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                    tmp_results, total_count, datatype, s);
    else
        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                    tmp_results, total_count, datatype, s);

    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIR_SCHED_BARRIER(s);

    mask = 0x1;
    i = 0;
    while (mask < comm_size) {
        dst = rank ^ mask;

        dst_tree_root = dst >> i;
        dst_tree_root <<= i;

        my_tree_root = rank >> i;
        my_tree_root <<= i;

        /* At step 1, processes exchange (n-n/p) amount of
           data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p)
           amount of data, and so forth. We use derived datatypes for this.

           At each step, a process does not need to send data
           indexed from my_tree_root to
           my_tree_root+mask-1. Similarly, a process won't receive
           data indexed from dst_tree_root to dst_tree_root+mask-1. */

        /* calculate sendtype */
        blklens[0] = blklens[1] = 0;
        for (j=0; j<my_tree_root; j++)
            blklens[0] += recvcounts[j];
        for (j=my_tree_root+mask; j<comm_size; j++)
            blklens[1] += recvcounts[j];

        dis[0] = 0;
        dis[1] = blklens[0];
        for (j=my_tree_root; (j<my_tree_root+mask) && (j<comm_size); j++)
            dis[1] += recvcounts[j];

        mpi_errno = MPIR_Type_indexed_impl(2, blklens, dis, datatype, &sendtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Type_commit_impl(&sendtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        /* calculate recvtype */
        blklens[0] = blklens[1] = 0;
        for (j=0; j<dst_tree_root && j<comm_size; j++)
            blklens[0] += recvcounts[j];
        for (j=dst_tree_root+mask; j<comm_size; j++)
            blklens[1] += recvcounts[j];

        dis[0] = 0;
        dis[1] = blklens[0];
        for (j=dst_tree_root; (j<dst_tree_root+mask) && (j<comm_size); j++)
            dis[1] += recvcounts[j];

        mpi_errno = MPIR_Type_indexed_impl(2, blklens, dis, datatype, &recvtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Type_commit_impl(&recvtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        received = 0;
        if (dst < comm_size) {
            /* tmp_results contains data to be sent in each step. Data is
               received in tmp_recvbuf and then accumulated into
               tmp_results. accumulation is done later below.   */

            mpi_errno = MPIR_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);
            received = 1;
        }

        /* if some processes in this process's subtree in this step
           did not have any destination process to communicate with
           because of non-power-of-two, we need to send them the
           result. We use a logarithmic recursive-halfing algorithm
           for this. */

        if (dst_tree_root + mask > comm_size) {
            nprocs_completed = comm_size - my_tree_root - mask;
            /* nprocs_completed is the number of processes in this
               subtree that have all the data. Send data to others
               in a tree fashion. First find root of current tree
               that is being divided into two. k is the number of
               least-significant bits in this process's rank that
               must be zeroed out to find the rank of the root */
            j = mask;
            k = 0;
            while (j) {
                j >>= 1;
                k++;
            }
            k--;

            tmp_mask = mask >> 1;
            while (tmp_mask) {
                dst = rank ^ tmp_mask;

                tree_root = rank >> k;
                tree_root <<= k;

                /* send only if this proc has data and destination
                   doesn't have data. at any step, multiple processes
                   can send if they have the data */
                if ((dst > rank) &&
                    (rank < tree_root + nprocs_completed)
                    && (dst >= tree_root + nprocs_completed))
                {
                    /* send the current result */
                    mpi_errno = MPIR_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);
                }
                /* recv only if this proc. doesn't have data and sender
                   has data */
                else if ((dst < rank) &&
                         (dst < tree_root + nprocs_completed) &&
                         (rank >= tree_root + nprocs_completed))
                {
                    mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);
                    received = 1;
                }
                tmp_mask >>= 1;
                k--;
            }
        }

        /* N.B. The following comment comes from the FT version of
         * MPI_Reduce_scatter.  It does not currently apply to this code, but
         * will in the future when we update the NBC code to be fault-tolerant
         * in roughly the same fashion. [goodell@ 2011-03-03] */
        /* The following reduction is done here instead of after
           the MPIC_Sendrecv or MPIC_Recv above. This is
           because to do it above, in the noncommutative
           case, we would need an extra temp buffer so as not to
           overwrite temp_recvbuf, because temp_recvbuf may have
           to be communicated to other processes in the
           non-power-of-two case. To avoid that extra allocation,
           we do the reduce here. */
        if (received) {
            if (is_commutative || (dst_tree_root < my_tree_root)) {
                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                mpi_errno = MPIR_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
                                              ((char *)tmp_results + dis[1]*extent),
                                              blklens[1], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);
            }
            else {
                mpi_errno = MPIR_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                mpi_errno = MPIR_Sched_reduce(((char *)tmp_results + dis[1]*extent),
                                              ((char *)tmp_recvbuf + dis[1]*extent),
                                              blklens[1], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);

                /* copy result back into tmp_results */
                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, 1, recvtype,
                                            tmp_results, 1, recvtype, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);
            }
        }

        MPIR_Type_free_impl(&sendtype);
        MPIR_Type_free_impl(&recvtype);

        mask <<= 1;
        i++;
    }
Пример #18
0
int MPIR_Igather_sched_intra_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int comm_size, rank;
    int relative_rank, is_homogeneous;
    int mask, src, dst, relative_src;
    MPI_Aint recvtype_size, sendtype_size, curr_cnt=0, nbytes;
    int recvblks;
    int tmp_buf_size, missing;
    void *tmp_buf = NULL;
    int blocks[2];
    int displs[2];
    MPI_Aint struct_displs[2];
    MPI_Aint extent=0;
    int copy_offset = 0, copy_blks = 0;
    MPI_Datatype types[2], tmp_type;
    MPIR_SCHED_CHKPMEM_DECL(1);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    if (((rank == root) && (recvcount == 0)) || ((rank != root) && (sendcount == 0)))
        goto fn_exit;

    is_homogeneous = TRUE;
#ifdef MPID_HAS_HETERO
    is_homogeneous = !comm_ptr->is_hetero;
#endif

    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);

    /* Use binomial tree algorithm. */

    relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;

    if (rank == root)
    {
        MPIR_Datatype_get_extent_macro(recvtype, extent);
        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf+
                                         (extent*recvcount*comm_size));
    }

    if (is_homogeneous)
    {
        /* communicator is homogeneous. no need to pack buffer. */
        if (rank == root)
        {
            MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
            nbytes = recvtype_size * recvcount;
        }
        else
        {
            MPIR_Datatype_get_size_macro(sendtype, sendtype_size);
            nbytes = sendtype_size * sendcount;
        }

        /* Find the number of missing nodes in my sub-tree compared to
         * a balanced tree */
        for (mask = 1; mask < comm_size; mask <<= 1);
        --mask;
        while (relative_rank & mask) mask >>= 1;
        missing = (relative_rank | mask) - comm_size + 1;
        if (missing < 0) missing = 0;
        tmp_buf_size = (mask - missing);

        /* If the message is smaller than the threshold, we will copy
         * our message in there too */
        if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) tmp_buf_size++;

        tmp_buf_size *= nbytes;

        /* For zero-ranked root, we don't need any temporary buffer */
        if ((rank == root) && (!root || (nbytes >= MPIR_CVAR_GATHER_VSMALL_MSG_SIZE)))
            tmp_buf_size = 0;

        if (tmp_buf_size) {
            MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf", MPL_MEM_BUFFER);
        }

        if (rank == root) {
            if (sendbuf != MPI_IN_PLACE) {
                mpi_errno = MPIR_Localcopy(sendbuf, sendcount, sendtype,
                                           ((char *) recvbuf + extent*recvcount*rank), recvcount, recvtype);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            }
        }
        else if (tmp_buf_size && (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE)) {
            /* copy from sendbuf into tmp_buf */
            mpi_errno = MPIR_Localcopy(sendbuf, sendcount, sendtype,
                                       tmp_buf, nbytes, MPI_BYTE);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
        curr_cnt = nbytes;

        mask = 0x1;
        while (mask < comm_size) {
            if ((mask & relative_rank) == 0) {
                src = relative_rank | mask;
                if (src < comm_size) {
                    src = (src + root) % comm_size;

                    if (rank == root) {
                        recvblks = mask;
                        if ((2 * recvblks) > comm_size)
                            recvblks = comm_size - recvblks;

                        if ((rank + mask + recvblks == comm_size) ||
                            (((rank + mask) % comm_size) < ((rank + mask + recvblks) % comm_size)))
                        {
                            /* If the data contiguously fits into the
                             * receive buffer, place it directly. This
                             * should cover the case where the root is
                             * rank 0. */
                            char *rp = (char *)recvbuf + (((rank + mask) % comm_size)*recvcount*extent);
                            mpi_errno = MPIR_Sched_recv(rp, (recvblks * recvcount), recvtype, src, comm_ptr, s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Sched_barrier(s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                        }
                        else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
                            mpi_errno = MPIR_Sched_recv(tmp_buf, (recvblks * nbytes), MPI_BYTE, src, comm_ptr, s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Sched_barrier(s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            copy_offset = rank + mask;
                            copy_blks = recvblks;
                        }
                        else {
                            blocks[0] = recvcount * (comm_size - root - mask);
                            displs[0] = recvcount * (root + mask);
                            blocks[1] = (recvcount * recvblks) - blocks[0];
                            displs[1] = 0;

                            mpi_errno = MPIR_Type_indexed_impl(2, blocks, displs, recvtype, &tmp_type);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

                            mpi_errno = MPIR_Sched_recv(recvbuf, 1, tmp_type, src, comm_ptr, s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Sched_barrier(s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

                            /* this "premature" free is safe b/c the sched holds an actual ref to keep it alive */
                            MPIR_Type_free_impl(&tmp_type);
                        }
                    }
                    else { /* Intermediate nodes store in temporary buffer */
                        MPI_Aint offset;

                        /* Estimate the amount of data that is going to come in */
                        recvblks = mask;
                        relative_src = ((src - root) < 0) ? (src - root + comm_size) : (src - root);
                        if (relative_src + mask > comm_size)
                            recvblks -= (relative_src + mask - comm_size);

                        if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE)
                            offset = mask * nbytes;
                        else
                            offset = (mask - 1) * nbytes;
                        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + offset), (recvblks * nbytes),
                                                    MPI_BYTE, src, comm_ptr, s);
                        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                        mpi_errno = MPIR_Sched_barrier(s);
                        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                        curr_cnt += (recvblks * nbytes);
                    }
                }
            }
            else {
                dst = relative_rank ^ mask;
                dst = (dst + root) % comm_size;

                if (!tmp_buf_size) {
                    /* leaf nodes send directly from sendbuf */
                    mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    mpi_errno = MPIR_Sched_barrier(s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                }
                else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
                    mpi_errno = MPIR_Sched_send(tmp_buf, curr_cnt, MPI_BYTE, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    mpi_errno = MPIR_Sched_barrier(s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                }
                else {
                    blocks[0] = sendcount;
                    struct_displs[0] = MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf;
                    types[0] = sendtype;
		    /* check for overflow.  work around int limits if needed*/
		    if (curr_cnt - nbytes != (int)(curr_cnt-nbytes)) {
			blocks[1] = 1;
			MPIR_Type_contiguous_x_impl(curr_cnt - nbytes,
				MPI_BYTE, &(types[1]));
		    } else {
			MPIR_Assign_trunc(blocks[1], curr_cnt - nbytes, int);
			types[1] = MPI_BYTE;
		    }
                    struct_displs[1] = MPIR_VOID_PTR_CAST_TO_MPI_AINT tmp_buf;

                    mpi_errno = MPIR_Type_create_struct_impl(2, blocks, struct_displs, types, &tmp_type);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

                    mpi_errno = MPIR_Sched_send(MPI_BOTTOM, 1, tmp_type, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);

                    /* this "premature" free is safe b/c the sched holds an actual ref to keep it alive */
                    MPIR_Type_free_impl(&tmp_type);
                }

                break;
            }
            mask <<= 1;
        }

        if ((rank == root) && root && (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) && copy_blks) {
            /* reorder and copy from tmp_buf into recvbuf */
            /* FIXME why are there two copies here? */
            mpi_errno = MPIR_Sched_copy(tmp_buf, nbytes * (comm_size - copy_offset), MPI_BYTE,
                                       ((char *)recvbuf + extent * recvcount * copy_offset),
                                       recvcount * (comm_size - copy_offset), recvtype, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            mpi_errno = MPIR_Sched_copy((char *)tmp_buf + nbytes * (comm_size - copy_offset),
                                        nbytes * (copy_blks - comm_size + copy_offset), MPI_BYTE,
                                        recvbuf, recvcount * (copy_blks - comm_size + copy_offset),
                                        recvtype, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
    }
#ifdef MPID_HAS_HETERO
    else {
Пример #19
0
/* segment_init
 *
 * buf    - datatype buffer location
 * count  - number of instances of the datatype in the buffer
 * handle - handle for datatype (could be derived or not)
 * segp   - pointer to previously allocated segment structure
 *
 * Notes:
 * - Assumes that the segment has been allocated.
 *
 */
static inline void segment_init(const void *buf,
                                MPI_Aint count, MPI_Datatype handle, struct MPIR_Segment *segp)
{
    MPI_Aint elmsize = 0;
    int i, depth = 0;
    int branch_detected = 0;

    struct MPII_Dataloop_stackelm *elmp;
    struct MPIR_Dataloop *dlp = 0, *sblp = &segp->builtin_loop;

#ifdef MPII_DATALOOP_DEBUG_MANIPULATE
    MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE,
                    (MPL_DBG_FDEST, "segment_init: count = %d, buf = %x\n", count, buf));
#endif

    if (!MPII_DATALOOP_HANDLE_HASLOOP(handle)) {
        /* simplest case; datatype has no loop (basic) */

        MPIR_Datatype_get_size_macro(handle, elmsize);

        sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK;
        sblp->loop_params.c_t.count = count;
        sblp->loop_params.c_t.dataloop = 0;
        sblp->el_size = elmsize;
        MPIR_Datatype_get_basic_type(handle, sblp->el_type);
        MPIR_Datatype_get_extent_macro(handle, sblp->el_extent);

        dlp = sblp;
        depth = 1;
    } else if (count == 0) {
        /* only use the builtin */
        sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK;
        sblp->loop_params.c_t.count = 0;
        sblp->loop_params.c_t.dataloop = 0;
        sblp->el_size = 0;
        sblp->el_extent = 0;

        dlp = sblp;
        depth = 1;
    } else if (count == 1) {
        /* don't use the builtin */
        MPII_DATALOOP_GET_LOOPPTR(handle, dlp);
    } else {
        /* default: need to use builtin to handle contig; must check
         * loop depth first
         */
        MPIR_Dataloop *oldloop; /* loop from original type, before new count */
        MPI_Aint type_size, type_extent;
        MPI_Datatype el_type;

        MPII_DATALOOP_GET_LOOPPTR(handle, oldloop);
        MPIR_Assert(oldloop != NULL);
        MPIR_Datatype_get_size_macro(handle, type_size);
        MPIR_Datatype_get_extent_macro(handle, type_extent);
        MPIR_Datatype_get_basic_type(handle, el_type);

        if (depth == 1 && ((oldloop->kind & MPII_DATALOOP_KIND_MASK) == MPII_DATALOOP_KIND_CONTIG)) {
            if (type_size == type_extent) {
                /* use a contig */
                sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK;
                sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count;
                sblp->loop_params.c_t.dataloop = NULL;
                sblp->el_size = oldloop->el_size;
                sblp->el_extent = oldloop->el_extent;
                sblp->el_type = oldloop->el_type;
            } else {
                /* use a vector, with extent of original type becoming the stride */
                sblp->kind = MPII_DATALOOP_KIND_VECTOR | MPII_DATALOOP_FINAL_MASK;
                sblp->loop_params.v_t.count = count;
                sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count;
                sblp->loop_params.v_t.stride = type_extent;
                sblp->loop_params.v_t.dataloop = NULL;
                sblp->el_size = oldloop->el_size;
                sblp->el_extent = oldloop->el_extent;
                sblp->el_type = oldloop->el_type;
            }
        } else {
            /* general case */
            sblp->kind = MPII_DATALOOP_KIND_CONTIG;
            sblp->loop_params.c_t.count = count;
            sblp->loop_params.c_t.dataloop = oldloop;
            sblp->el_size = type_size;
            sblp->el_extent = type_extent;
            sblp->el_type = el_type;

            depth++;    /* we're adding to the depth with the builtin */
            MPIR_Assert(depth < (MPII_DATALOOP_MAX_DATATYPE_DEPTH));
        }

        dlp = sblp;
    }

    /* assert instead of return b/c dtype/dloop errorhandling code is inconsistent */
    MPIR_Assert(depth < (MPII_DATALOOP_MAX_DATATYPE_DEPTH));

    /* initialize the rest of the segment values */
    segp->handle = handle;
    segp->ptr = (void *) buf;
    segp->stream_off = 0;
    segp->cur_sp = 0;
    segp->valid_sp = 0;

    /* initialize the first stackelm in its entirety */
    elmp = &(segp->stackelm[0]);
    MPII_Dataloop_stackelm_load(elmp, dlp, 0);
    branch_detected = elmp->may_require_reloading;

    /* Fill in parameters not set by MPII_Dataloop_stackelm_load */
    elmp->orig_offset = 0;
    elmp->curblock = elmp->orig_block;
    /* MPII_Dataloop_stackelm_offset assumes correct orig_count, curcount, loop_p */
    elmp->curoffset = /* elmp->orig_offset + */ MPII_Dataloop_stackelm_offset(elmp);

    i = 1;
    while (!(dlp->kind & MPII_DATALOOP_FINAL_MASK)) {
        /* get pointer to next dataloop */
        switch (dlp->kind & MPII_DATALOOP_KIND_MASK) {
            case MPII_DATALOOP_KIND_CONTIG:
            case MPII_DATALOOP_KIND_VECTOR:
            case MPII_DATALOOP_KIND_BLOCKINDEXED:
            case MPII_DATALOOP_KIND_INDEXED:
                dlp = dlp->loop_params.cm_t.dataloop;
                break;
            case MPII_DATALOOP_KIND_STRUCT:
                dlp = dlp->loop_params.s_t.dataloop_array[0];
                break;
            default:
                /* --BEGIN ERROR HANDLING-- */
                MPIR_Assert(0);
                break;
                /* --END ERROR HANDLING-- */
        }

        MPIR_Assert(i < MPII_DATALOOP_MAX_DATATYPE_DEPTH);

        /* loop_p, orig_count, orig_block, and curcount are all filled by us now.
         * the rest are filled in at processing time.
         */
        elmp = &(segp->stackelm[i]);

        MPII_Dataloop_stackelm_load(elmp, dlp, branch_detected);
        branch_detected = elmp->may_require_reloading;
        i++;

    }

    segp->valid_sp = depth - 1;
}
Пример #20
0
/*@
MPI_Igather - Gathers together values from a group of processes in
              a nonblocking way

Input Parameters:
+ sendbuf - starting address of the send buffer (choice)
. sendcount - number of elements in send buffer (non-negative integer)
. sendtype - data type of send buffer elements (handle)
. recvcount - number of elements for any single receive (non-negative integer, significant only at root)
. recvtype - data type of receive buffer elements (significant only at root) (handle)
. root - rank of receiving process (integer)
- comm - communicator (handle)

Output Parameters:
+ recvbuf - starting address of the receive buffer (significant only at root) (choice)
- request - communication request (handle)

.N ThreadSafe

.N Fortran

.N Errors
@*/
int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
                void *recvbuf, int recvcount, MPI_Datatype recvtype,
                int root, MPI_Comm comm, MPI_Request * request)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Comm *comm_ptr = NULL;
    MPIR_Request *request_ptr = NULL;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_IGATHER);

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_IGATHER);

    /* Validate parameters, especially handles needing to be converted */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_ERRTEST_COMM(comm, mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPIR_Comm_get_ptr(comm, comm_ptr);

    /* Validate parameters and objects (post conversion) */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_Datatype *sendtype_ptr = NULL, *recvtype_ptr = NULL;
            int rank;

            MPIR_Comm_valid_ptr(comm_ptr, mpi_errno, FALSE);
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;

            if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
                MPIR_ERRTEST_INTRA_ROOT(comm_ptr, root, mpi_errno);

                if (sendbuf != MPI_IN_PLACE) {
                    MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
                    MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
                    if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(sendtype, sendtype_ptr);
                        MPIR_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    MPIR_ERRTEST_USERBUFFER(sendbuf, sendcount, sendtype, mpi_errno);
                }

                rank = comm_ptr->rank;
                if (rank == root) {
                    MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
                    MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
                    if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(recvtype, recvtype_ptr);
                        MPIR_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
                    MPIR_ERRTEST_USERBUFFER(recvbuf, recvcount, recvtype, mpi_errno);

                    /* catch common aliasing cases */
                    if (recvbuf != MPI_IN_PLACE && sendtype == recvtype && sendcount == recvcount &&
                        sendcount != 0) {
                        MPI_Aint recvtype_size;
                        MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
                        MPIR_ERRTEST_ALIAS_COLL(sendbuf,
                                                (char *) recvbuf +
                                                comm_ptr->rank * recvcount * recvtype_size,
                                                mpi_errno);
                    }
                } else
                    MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
            }

            if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTERCOMM) {
                MPIR_ERRTEST_INTER_ROOT(comm_ptr, root, mpi_errno);

                if (root == MPI_ROOT) {
                    MPIR_ERRTEST_COUNT(recvcount, mpi_errno);
                    MPIR_ERRTEST_DATATYPE(recvtype, "recvtype", mpi_errno);
                    if (HANDLE_GET_KIND(recvtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(recvtype, recvtype_ptr);
                        MPIR_Datatype_valid_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(recvtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, recvcount, mpi_errno);
                    MPIR_ERRTEST_USERBUFFER(recvbuf, recvcount, recvtype, mpi_errno);
                }

                else if (root != MPI_PROC_NULL) {
                    MPIR_ERRTEST_COUNT(sendcount, mpi_errno);
                    MPIR_ERRTEST_DATATYPE(sendtype, "sendtype", mpi_errno);
                    if (HANDLE_GET_KIND(sendtype) != HANDLE_KIND_BUILTIN) {
                        MPIR_Datatype_get_ptr(sendtype, sendtype_ptr);
                        MPIR_Datatype_valid_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                        MPIR_Datatype_committed_ptr(sendtype_ptr, mpi_errno);
                        if (mpi_errno != MPI_SUCCESS)
                            goto fn_fail;
                    }
                    MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, sendcount, mpi_errno);
                    MPIR_ERRTEST_USERBUFFER(sendbuf, sendcount, sendtype, mpi_errno);
                }
            }
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    mpi_errno = MPIR_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
                             root, comm_ptr, &request_ptr);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    /* create a complete request, if needed */
    if (!request_ptr)
        request_ptr = MPIR_Request_create_complete(MPIR_REQUEST_KIND__COLL);
    /* return the handle of the request to the user */
    *request = request_ptr->handle;

    /* ... end of body of routine ... */

  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_IGATHER);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#ifdef HAVE_ERROR_CHECKING
    {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
                                 "**mpi_igather", "**mpi_igather %p %d %D %p %d %D %d %C %p",
                                 sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root,
                                 comm, request);
    }
#endif
    mpi_errno = MPIR_Err_return_comm(comm_ptr, __func__, mpi_errno);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Пример #21
0
/* MPIR_Get_elements_x_impl
 *
 * Arguments:
 * - byte_count - input/output byte count
 * - datatype - input datatype
 * - elements - Number of basic elements this byte_count would contain
 *
 * Returns number of elements available given the two constraints of number of
 * bytes and count of types.  Also reduces the byte count by the amount taken
 * up by the types.
 */
int MPIR_Get_elements_x_impl(MPI_Count *byte_count, MPI_Datatype datatype, MPI_Count *elements)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Datatype *datatype_ptr = NULL;

    if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
        MPIR_Datatype_get_ptr(datatype, datatype_ptr);
    }

    /* three cases:
     * - nice, simple, single element type
     * - derived type with a zero size
     * - type with multiple element types (nastiest)
     */
    if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN ||
        (datatype_ptr->builtin_element_size != -1 && datatype_ptr->size > 0))
    {
        /* QUESTION: WHAT IF SOMEONE GAVE US AN MPI_UB OR MPI_LB???
         */

        /* in both cases we do not limit the number of types that might
         * be in bytes
         */
        if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
            MPI_Datatype basic_type = MPI_DATATYPE_NULL;
            MPIR_Datatype_get_basic_type(datatype_ptr->basic_type, basic_type);
            *elements = MPIR_Type_get_basic_type_elements(byte_count,
                                                          -1,
                                                          basic_type);
        }
        else {
            /* Behaves just like MPI_Get_Count in the predefined case */
            MPI_Count size;
            MPIR_Datatype_get_size_macro(datatype, size);
            if ((*byte_count % size) != 0)
                *elements = MPI_UNDEFINED;
            else
                *elements = MPIR_Type_get_basic_type_elements(byte_count,
                                                              -1,
                                                              datatype);
        }
        MPIR_Assert(*byte_count >= 0);
    }
    else if (datatype_ptr->size == 0) {
        if (*byte_count > 0) {
            /* --BEGIN ERROR HANDLING-- */

            /* datatype size of zero and count > 0 should never happen. */

            (*elements) = MPI_UNDEFINED;
            /* --END ERROR HANDLING-- */
        }
        else {
            /* This is ambiguous.  However, discussions on MPI Forum
             * reached a consensus that this is the correct return
             * value
             */
            (*elements) = 0;
        }
    }
    else /* derived type with weird element type or weird size */ {
        MPIR_Assert(datatype_ptr->builtin_element_size == -1);

        *elements = MPIR_Type_get_elements(byte_count, -1, datatype);
    }

    return mpi_errno;
}
Пример #22
0
int MPIR_Bcast_intra_binomial(
    void *buffer, 
    int count, 
    MPI_Datatype datatype, 
    int root, 
    MPIR_Comm *comm_ptr,
    MPIR_Errflag_t *errflag)
{
    int        rank, comm_size, src, dst;
    int        relative_rank, mask;
    int mpi_errno = MPI_SUCCESS;
    int mpi_errno_ret = MPI_SUCCESS;
    int nbytes=0;
    int recvd_size;
    MPI_Status status;
    int is_contig, is_homogeneous;
    MPI_Aint type_size;
    MPI_Aint position;
    void *tmp_buf=NULL;
    MPIR_CHKLMEM_DECL(1);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    /* If there is only one process, return */
    if (comm_size == 1) goto fn_exit;

    if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN)
        is_contig = 1;
    else {
        MPIR_Datatype_is_contig(datatype, &is_contig);
    }

    is_homogeneous = 1;
#ifdef MPID_HAS_HETERO
    if (comm_ptr->is_hetero)
        is_homogeneous = 0;
#endif

    /* MPI_Type_size() might not give the accurate size of the packed
     * datatype for heterogeneous systems (because of padding, encoding,
     * etc). On the other hand, MPI_Pack_size() can become very
     * expensive, depending on the implementation, especially for
     * heterogeneous systems. We want to use MPI_Type_size() wherever
     * possible, and MPI_Pack_size() in other places.
     */
    if (is_homogeneous)
        MPIR_Datatype_get_size_macro(datatype, type_size);
    else
	/* --BEGIN HETEROGENEOUS-- */
        MPIR_Pack_size_impl(1, datatype, &type_size);
        /* --END HETEROGENEOUS-- */

    nbytes = type_size * count;
    if (nbytes == 0)
        goto fn_exit; /* nothing to do */

    if (!is_contig || !is_homogeneous)
    {
        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf", MPL_MEM_BUFFER);

        /* TODO: Pipeline the packing and communication */
        position = 0;
        if (rank == root) {
            mpi_errno = MPIR_Pack_impl(buffer, count, datatype, tmp_buf, nbytes,
                                       &position);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
    }
Пример #23
0
/*@
   MPI_Type_match_size - Find an MPI datatype matching a specified size

Input Parameters:
+ typeclass - generic type specifier (integer)
- size - size, in bytes, of representation (integer)

Output Parameters:
. datatype - datatype with correct type, size (handle)

Notes:
'typeclass' is one of 'MPI_TYPECLASS_REAL', 'MPI_TYPECLASS_INTEGER' and
'MPI_TYPECLASS_COMPLEX', corresponding to the desired typeclass.
The function returns an MPI datatype matching a local variable of type
'(typeclass, size)'.

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_ARG
@*/
int MPI_Type_match_size(int typeclass, int size, MPI_Datatype * datatype)
{
    int mpi_errno = MPI_SUCCESS;
#ifdef HAVE_ERROR_CHECKING
    static const char *tname = 0;
#endif
    /* Note that all of the datatype have values, even if the type is
     * not available. We test for that case separately.  We also
     * prefer the Fortran types to the C type, if they are available */
    static MPI_Datatype real_types[] = {
        MPI_REAL4, MPI_REAL8, MPI_REAL16,
        MPI_REAL, MPI_DOUBLE_PRECISION,
        MPI_FLOAT, MPI_DOUBLE, MPI_LONG_DOUBLE
    };
    static MPI_Datatype int_types[] = {
        MPI_INTEGER1, MPI_INTEGER2, MPI_INTEGER4, MPI_INTEGER8, MPI_INTEGER16,
        MPI_INTEGER,
        MPI_CHAR, MPI_SHORT, MPI_INT,
        MPI_LONG, MPI_LONG_LONG
    };
    static MPI_Datatype complex_types[] = {
        MPI_COMPLEX8, MPI_COMPLEX16, MPI_COMPLEX32,
        MPI_COMPLEX, MPI_DOUBLE_COMPLEX,
        MPI_C_COMPLEX, MPI_C_DOUBLE_COMPLEX, MPI_C_LONG_DOUBLE_COMPLEX,
    };
    MPI_Datatype matched_datatype = MPI_DATATYPE_NULL;
    int i;
    MPI_Aint tsize;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_MATCH_SIZE);

    MPIR_ERRTEST_INITIALIZED_ORDIE();

    /* FIXME: This routine does not require the global critical section */
    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_MATCH_SIZE);

    /* Validate parameters and objects (post conversion) */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_ERRTEST_ARGNULL(datatype, "datatype", mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    /* The following implementation follows the suggestion in the
     * MPI-2 standard.
     * The version in the MPI-2 spec makes use of the Fortran optional types;
     * currently, we don't support these from C (see mpi.h.in).
     * Thus, we look at the candidate types and make use of the first fit.
     * Note that the standard doesn't require that this routine return
     * any particular choice of MPI datatype; e.g., it is not required
     * to return MPI_INTEGER4 if a 4-byte integer is requested.
     */
    switch (typeclass) {
        case MPI_TYPECLASS_REAL:
            {
                int nRealTypes = sizeof(real_types) / sizeof(MPI_Datatype);
#ifdef HAVE_ERROR_CHECKING
                tname = "MPI_TYPECLASS_REAL";
#endif
                for (i = 0; i < nRealTypes; i++) {
                    if (real_types[i] == MPI_DATATYPE_NULL) {
                        continue;
                    }
                    MPIR_Datatype_get_size_macro(real_types[i], tsize);
                    if (tsize == size) {
                        matched_datatype = real_types[i];
                        break;
                    }
                }
            }
            break;
        case MPI_TYPECLASS_INTEGER:
            {
                int nIntTypes = sizeof(int_types) / sizeof(MPI_Datatype);
#ifdef HAVE_ERROR_CHECKING
                tname = "MPI_TYPECLASS_INTEGER";
#endif
                for (i = 0; i < nIntTypes; i++) {
                    if (int_types[i] == MPI_DATATYPE_NULL) {
                        continue;
                    }
                    MPIR_Datatype_get_size_macro(int_types[i], tsize);
                    if (tsize == size) {
                        matched_datatype = int_types[i];
                        break;
                    }
                }
            }
            break;
        case MPI_TYPECLASS_COMPLEX:
            {
                int nComplexTypes = sizeof(complex_types) / sizeof(MPI_Datatype);
#ifdef HAVE_ERROR_CHECKING
                tname = "MPI_TYPECLASS_COMPLEX";
#endif
                for (i = 0; i < nComplexTypes; i++) {
                    if (complex_types[i] == MPI_DATATYPE_NULL) {
                        continue;
                    }
                    MPIR_Datatype_get_size_macro(complex_types[i], tsize);
                    if (tsize == size) {
                        matched_datatype = complex_types[i];
                        break;
                    }
                }
            }
            break;
        default:
            /* --BEGIN ERROR HANDLING-- */
            MPIR_ERR_SETANDSTMT(mpi_errno, MPI_ERR_ARG, break, "**typematchnoclass");
            /* --END ERROR HANDLING-- */
    }

    if (mpi_errno == MPI_SUCCESS) {
        if (matched_datatype == MPI_DATATYPE_NULL) {
            /* --BEGIN ERROR HANDLING-- */
            MPIR_ERR_SETANDSTMT2(mpi_errno, MPI_ERR_ARG,;, "**typematchsize",
                                 "**typematchsize %s %d", tname, size);
            /* --END ERROR HANDLING-- */
        } else {
int MPIR_Iallgatherv_sched_intra_recursive_doubling(const void *sendbuf, int sendcount,
                                                    MPI_Datatype sendtype, void *recvbuf,
                                                    const int recvcounts[], const int displs[],
                                                    MPI_Datatype recvtype, MPIR_Comm * comm_ptr,
                                                    MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int comm_size, rank, i, j, k;
    int curr_count, send_offset, incoming_count, recv_offset;
    int mask, dst, total_count, position, offset, my_tree_root, dst_tree_root;
    MPI_Aint recvtype_extent, recvtype_sz;
    void *tmp_buf = NULL;
    MPIR_SCHED_CHKPMEM_DECL(1);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

#ifdef HAVE_ERROR_CHECKING
    /* Currently this algorithm can only handle power-of-2 comm_size.
     * Non power-of-2 comm_size is still experimental */
    MPIR_Assert(!(comm_size & (comm_size - 1)));
#endif /* HAVE_ERROR_CHECKING */

    /* need to receive contiguously into tmp_buf because
     * displs could make the recvbuf noncontiguous */
    MPIR_Datatype_get_size_macro(recvtype, recvtype_sz);
    MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);

    total_count = 0;
    for (i = 0; i < comm_size; i++)
        total_count += recvcounts[i];

    if (total_count == 0)
        goto fn_exit;

    MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, total_count * recvtype_sz,
                              mpi_errno, "tmp_buf", MPL_MEM_BUFFER);

    /* copy local data into right location in tmp_buf */
    position = 0;
    for (i = 0; i < rank; i++)
        position += recvcounts[i];
    if (sendbuf != MPI_IN_PLACE) {
        mpi_errno = MPIR_Sched_copy(sendbuf, sendcount, sendtype,
                                    ((char *) tmp_buf + position * recvtype_sz),
                                    recvcounts[rank] * recvtype_sz, MPI_BYTE, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    } else {
        /* if in_place specified, local data is found in recvbuf */
        mpi_errno = MPIR_Sched_copy(((char *) recvbuf + displs[rank] * recvtype_extent),
                                    recvcounts[rank], recvtype,
                                    ((char *) tmp_buf + position * recvtype_sz),
                                    recvcounts[rank] * recvtype_sz, MPI_BYTE, s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
    }

    curr_count = recvcounts[rank];

    /* never used uninitialized w/o this, but compiler can't tell that */
    incoming_count = -1;

    /* [goodell@] random notes that help slightly when deciphering this code:
     * - mask is also equal to the number of blocks that we are going to recv
     *   (less if comm_size is non-pof2)
     * - FOO_tree_root is the leftmost (lowest ranked) process with whom FOO has
     *   communicated, directly or indirectly, at the beginning of round the
     *   round.  FOO is either "dst" or "my", where "my" means use my rank.
     * - in each round we are going to recv the blocks
     *   B[dst_tree_root],B[dst_tree_root+1],...,B[min(dst_tree_root+mask,comm_size)]
     */
    mask = 0x1;
    i = 0;
    while (mask < comm_size) {
        dst = rank ^ mask;

        /* find offset into send and recv buffers. zero out
         * the least significant "i" bits of rank and dst to
         * find root of src and dst subtrees. Use ranks of
         * roots as index to send from and recv into buffer */

        dst_tree_root = dst >> i;
        dst_tree_root <<= i;

        my_tree_root = rank >> i;
        my_tree_root <<= i;

        if (dst < comm_size) {
            send_offset = 0;
            for (j = 0; j < my_tree_root; j++)
                send_offset += recvcounts[j];

            recv_offset = 0;
            for (j = 0; j < dst_tree_root; j++)
                recv_offset += recvcounts[j];

            incoming_count = 0;
            for (j = dst_tree_root; j < (dst_tree_root + mask) && j < comm_size; ++j)
                incoming_count += recvcounts[j];

            mpi_errno = MPIR_Sched_send(((char *) tmp_buf + send_offset * recvtype_sz),
                                        curr_count * recvtype_sz, MPI_BYTE, dst, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            /* sendrecv, no barrier here */
            mpi_errno = MPIR_Sched_recv(((char *) tmp_buf + recv_offset * recvtype_sz),
                                        incoming_count * recvtype_sz, MPI_BYTE, dst, comm_ptr, s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);

            curr_count += incoming_count;
        }

        /* if some processes in this process's subtree in this step
         * did not have any destination process to communicate with
         * because of non-power-of-two, we need to send them the
         * data that they would normally have received from those
         * processes. That is, the haves in this subtree must send to
         * the havenots. We use a logarithmic
         * recursive-halfing algorithm for this. */

        /* This part of the code will not currently be
         * executed because we are not using recursive
         * doubling for non power of two. Mark it as experimental
         * so that it doesn't show up as red in the coverage
         * tests. */

        /* --BEGIN EXPERIMENTAL-- */
        if (dst_tree_root + mask > comm_size) {
            int tmp_mask, tree_root;
            int nprocs_completed = comm_size - my_tree_root - mask;
            /* nprocs_completed is the number of processes in this
             * subtree that have all the data. Send data to others
             * in a tree fashion. First find root of current tree
             * that is being divided into two. k is the number of
             * least-significant bits in this process's rank that
             * must be zeroed out to find the rank of the root */
            /* [goodell@] it looks like (k==i) is always true, could possibly
             * skip the loop below */
            j = mask;
            k = 0;
            while (j) {
                j >>= 1;
                k++;
            }
            k--;

            tmp_mask = mask >> 1;

            while (tmp_mask) {
                dst = rank ^ tmp_mask;

                tree_root = rank >> k;
                tree_root <<= k;

                /* send only if this proc has data and destination
                 * doesn't have data. at any step, multiple processes
                 * can send if they have the data */
                if ((dst > rank) &&
                    (rank < tree_root + nprocs_completed) &&
                    (dst >= tree_root + nprocs_completed)) {
                    offset = 0;
                    for (j = 0; j < (my_tree_root + mask); j++)
                        offset += recvcounts[j];
                    offset *= recvtype_sz;

                    /* incoming_count was set in the previous
                     * receive. that's the amount of data to be
                     * sent now. */
                    mpi_errno = MPIR_Sched_send(((char *) tmp_buf + offset),
                                                incoming_count * recvtype_sz, MPI_BYTE,
                                                dst, comm_ptr, s);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);
                }
                /* recv only if this proc. doesn't have data and sender
                 * has data */
                else if ((dst < rank) &&
                         (dst < tree_root + nprocs_completed) &&
                         (rank >= tree_root + nprocs_completed)) {

                    offset = 0;
                    for (j = 0; j < (my_tree_root + mask); j++)
                        offset += recvcounts[j];

                    /* recalculate incoming_count, since not all processes will have
                     * this value */
                    incoming_count = 0;
                    for (j = dst_tree_root; j < (dst_tree_root + mask) && j < comm_size; ++j)
                        incoming_count += recvcounts[j];

                    mpi_errno = MPIR_Sched_recv(((char *) tmp_buf + offset * recvtype_sz),
                                                incoming_count * recvtype_sz, MPI_BYTE,
                                                dst, comm_ptr, s);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);
                    curr_count += incoming_count;
                }
                tmp_mask >>= 1;
                k--;
            }
        }
        /* --END EXPERIMENTAL-- */

        mask <<= 1;
        i++;
    }
Пример #25
0
void MPIDU_Type_calc_footprint(MPI_Datatype type, DLOOP_Type_footprint *tfp)
{
    int mpi_errno;
    int nr_ints, nr_aints, nr_types, combiner;
    int *ints;
    MPI_Aint *aints;
    MPI_Datatype *types;

    /* used to store parameters for constituent types */
    DLOOP_Offset size = 0, lb = 0, ub = 0, true_lb = 0, true_ub = 0;
    DLOOP_Offset extent = 0, alignsz;
    int has_sticky_lb, has_sticky_ub;

    /* used for vector/hvector/hvector_integer calculations */
    DLOOP_Offset stride;

    /* used for indexed/hindexed calculations */
    DLOOP_Offset disp;

    /* used for calculations on types with more than one block of data */
    DLOOP_Offset i, min_lb, max_ub, ntypes, tmp_lb, tmp_ub;

    /* used for processing subarray and darray types */
    int ndims;
    MPI_Datatype tmptype;

    MPIR_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    if (combiner == MPI_COMBINER_NAMED) {
	int mpisize;
	MPI_Aint mpiextent;

	MPIR_Datatype_get_size_macro(type, mpisize);
	MPIR_Datatype_get_extent_macro(type, mpiextent);
	tfp->size    = (DLOOP_Offset) mpisize;
	tfp->lb      = 0;
	tfp->ub      = (DLOOP_Offset) mpiextent;
	tfp->true_lb = 0;
	tfp->true_ub = (DLOOP_Offset) mpiextent;
	tfp->extent  = (DLOOP_Offset) mpiextent;
	tfp->alignsz = DLOOP_Named_type_alignsize(type, (MPI_Aint) 0);
	tfp->has_sticky_lb = (type == MPI_LB) ? 1 : 0;
	tfp->has_sticky_ub = (type == MPI_UB) ? 1 : 0;

	goto clean_exit;
    }

    /* get access to contents; need it immediately to check for zero count */
    MPIDU_Type_access_contents(type, &ints, &aints, &types);

    /* knock out all the zero count cases */
    if ((combiner == MPI_COMBINER_CONTIGUOUS ||
	 combiner == MPI_COMBINER_VECTOR ||
	 combiner == MPI_COMBINER_HVECTOR_INTEGER ||
	 combiner == MPI_COMBINER_HVECTOR ||
	 combiner == MPI_COMBINER_INDEXED_BLOCK ||
	 combiner == MPI_COMBINER_HINDEXED_BLOCK ||
	 combiner == MPI_COMBINER_INDEXED ||
	 combiner == MPI_COMBINER_HINDEXED_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT) && ints[0] == 0)
    {
	tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
	tfp->true_lb = tfp->true_ub = 0;
	tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	goto clean_exit;
    }

    if (combiner != MPI_COMBINER_STRUCT &&
	combiner != MPI_COMBINER_STRUCT_INTEGER)
    {
	DLOOP_Type_footprint cfp;

	MPIDU_Type_calc_footprint(types[0], &cfp);
	size    = cfp.size;
	lb      = cfp.lb;
	ub      = cfp.ub;
	true_lb = cfp.true_lb;
	true_ub = cfp.true_ub;
	extent  = cfp.extent;
	alignsz = cfp.alignsz;
	has_sticky_lb = cfp.has_sticky_lb;
	has_sticky_ub = cfp.has_sticky_ub;

	/* initialize some common values so we don't have to assign
	 * them in every case below.
	 */
	tfp->alignsz = alignsz;
	tfp->has_sticky_lb = has_sticky_lb;
	tfp->has_sticky_ub = has_sticky_ub;

    }

    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    tfp->size    = size;
	    tfp->lb      = lb;
	    tfp->ub      = ub;
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = extent;
	    break;
	case MPI_COMBINER_RESIZED:
	    tfp->size    = size;
	    tfp->lb      = aints[0]; /* lb */
	    tfp->ub      = aints[0] + aints[1];
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = aints[1]; /* extent */
	    tfp->has_sticky_lb = 1;
	    tfp->has_sticky_ub = 1;
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    DLOOP_DATATYPE_CONTIG_LB_UB(ints[0] /* count */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	    if (combiner == MPI_COMBINER_VECTOR) stride = (DLOOP_Offset) ints[2] * extent;
	    else if (combiner == MPI_COMBINER_HVECTOR) stride = aints[0];
	    else /* HVECTOR_INTEGER */ stride = (DLOOP_Offset) ints[2];

	    DLOOP_DATATYPE_VECTOR_LB_UB(ints[0] /* count */,
					stride /* stride in bytes */,
					ints[1] /* blklen */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] * extent /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] * extent /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    /* find first non-zero blocklength element */
	    for (i=0; i < ints[0] && ints[i+1] == 0; i++);
	    if (i == ints[0]) {
		/* all zero blocklengths */
		tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
		tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	    }
	    else {
		/* prime min_lb, max_ub, count */
		ntypes = ints[i+1];
		if (combiner == MPI_COMBINER_INDEXED)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1];
		else /* MPI_COMBINER_HINDEXED */
		    disp = aints[i];

		DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */,
					   disp,
					   lb, ub, extent,
					   min_lb, max_ub);

		for (i++; i < ints[0]; i++) {
		    /* skip zero blocklength elements */
		    if (ints[i+1] == 0) continue;

		    ntypes += ints[i+1];
		    if (combiner == MPI_COMBINER_INDEXED)
			disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		    else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
			disp = (DLOOP_Offset) ints[ints[0]+i+1];
		    else /* MPI_COMBINER_HINDEXED */
			disp = aints[i];

		    DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1],
					       disp,
					       lb, ub, extent,
					       tmp_lb, tmp_ub);
		    if (tmp_lb < min_lb) min_lb = tmp_lb;
		    if (tmp_ub > max_ub) max_ub = tmp_ub;
		}
		tfp->size    = ntypes * size;
		tfp->lb      = min_lb;
		tfp->ub      = max_ub;
		tfp->true_lb = min_lb + (true_lb - lb);
		tfp->true_ub = max_ub + (true_ub - ub);
		tfp->extent  = tfp->ub - tfp->lb;
	    }
	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	    DLOOP_Assert(combiner != MPI_COMBINER_STRUCT_INTEGER);
	    break;
	case MPI_COMBINER_STRUCT:
	    /* sufficiently complicated to pull out into separate fn */
	    DLOOP_Type_calc_footprint_struct(type,
					     combiner, ints, aints, types,
					     tfp);
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    MPIDU_Type_convert_subarray(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsz */,
						  &ints[1+2*ndims] /* strts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);
	    MPIDU_Type_calc_footprint(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];

	    MPIDU_Type_convert_darray(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    MPIDU_Type_calc_footprint(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_F90_REAL:
	case MPI_COMBINER_F90_COMPLEX:
	case MPI_COMBINER_F90_INTEGER:
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:
    MPIDU_Type_release_contents(type, &ints, &aints, &types);
    return;
}
Пример #26
0
static int create_flattened_struct(MPI_Aint count,
                                   const int *blklens,
                                   const MPI_Aint * disps,
                                   const MPI_Datatype * oldtypes,
                                   MPIR_Dataloop ** dlp_p, MPI_Aint * dlsz_p)
{
    /* arbitrary types, convert to bytes and use indexed */
    int i, err, nr_blks = 0;
    MPI_Aint *tmp_blklens;
    MPI_Aint *tmp_disps;        /* since we're calling another fn that takes
                                 * this type as an input parameter */
    MPI_Aint bytes;
    MPIR_Segment *segp;

    int first_ind;
    MPI_Aint last_ind;

    /* use segment code once to count contiguous regions */
    for (i = 0; i < count; i++) {
        int is_basic;

        /* ignore type elements with a zero blklen */
        if (blklens[i] == 0)
            continue;

        is_basic = (MPII_DATALOOP_HANDLE_HASLOOP(oldtypes[i])) ? 0 : 1;

        if (is_basic && (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB)) {
            nr_blks++;
        } else {        /* derived type; get a count of contig blocks */

            MPI_Aint tmp_nr_blks, sz;

            MPIR_Datatype_get_size_macro(oldtypes[i], sz);

            /* if the derived type has some data to contribute,
             * add to flattened representation */
            if (sz > 0) {
                segp = MPIR_Segment_alloc(NULL, (MPI_Aint) blklens[i], oldtypes[i]);

                bytes = MPIR_SEGMENT_IGNORE_LAST;

                MPIR_Segment_count_contig_blocks(segp, 0, &bytes, &tmp_nr_blks);
                MPIR_Segment_free(segp);

                nr_blks += tmp_nr_blks;
            }
        }
    }

    /* it's possible for us to get to this point only to realize that
     * there isn't any data in this type. in that case do what we always
     * do: store a simple contig of zero ints and call it done.
     */
    if (nr_blks == 0) {
        err = MPII_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p);
        return err;

    }

    nr_blks += 2;       /* safety measure */

    tmp_blklens = (MPI_Aint *) MPL_malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens) {
        return create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */


    tmp_disps = (MPI_Aint *) MPL_malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps) {
        MPL_free(tmp_blklens);
        return create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code again to flatten the type */
    first_ind = 0;
    for (i = 0; i < count; i++) {
        int is_basic;
        MPI_Aint sz = -1;

        is_basic = (MPII_DATALOOP_HANDLE_HASLOOP(oldtypes[i])) ? 0 : 1;
        if (!is_basic)
            MPIR_Datatype_get_size_macro(oldtypes[i], sz);

        /* we're going to use the segment code to flatten the type.
         * we put in our displacement as the buffer location, and use
         * the blocklength as the count value to get N contiguous copies
         * of the type.
         *
         * Note that we're going to get back values in bytes, so that will
         * be our new element type.
         */
        if (oldtypes[i] != MPI_UB &&
            oldtypes[i] != MPI_LB && blklens[i] != 0 && (is_basic || sz > 0)) {
            segp = MPIR_Segment_alloc((char *) disps[i], (MPI_Aint) blklens[i], oldtypes[i]);

            last_ind = nr_blks - first_ind;
            bytes = MPIR_SEGMENT_IGNORE_LAST;
            MPII_Dataloop_segment_flatten(segp,
                                          0,
                                          &bytes,
                                          &tmp_blklens[first_ind], &tmp_disps[first_ind],
                                          &last_ind);
            MPIR_Segment_free(segp);
            first_ind += last_ind;
        }
    }
    nr_blks = first_ind;

#if 0
    if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE, VERBOSE)) {
        MPL_DBG_OUT(MPIR_DBG_DATATYPE, "--- start of flattened type ---");
        for (i = 0; i < nr_blks; i++) {
            MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE, (MPL_DBG_FDEST,
                                                "a[%d] = (%d, " MPI_AINT_FMT_DEC_SPEC ")", i,
                                                tmp_blklens[i], tmp_disps[i]));
        }
        MPL_DBG_OUT(MPIR_DBG_DATATYPE, "--- end of flattened type ---");
    }
#endif

    err = MPII_Dataloop_create_indexed(nr_blks, tmp_blklens, tmp_disps, 1,      /* disp in bytes */
                                       MPI_BYTE, dlp_p, dlsz_p);

    MPL_free(tmp_blklens);
    MPL_free(tmp_disps);

    return err;
}
Пример #27
0
/*@
    MPI_Type_size - Return the number of bytes occupied by entries
                    in the datatype

Input Parameters:
. datatype - datatype (handle) 

Output Parameters:
. size - datatype size (integer) 

.N SignalSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_TYPE
.N MPI_ERR_ARG
@*/
int MPI_Type_size(MPI_Datatype datatype, int *size)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Count size_x = MPI_UNDEFINED;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_SIZE);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_SIZE);

    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno);
	}
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */
	    
    /* If this is a built-in datatype, then get the size out of the handle */
    if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN)
    {
	MPIR_Datatype_get_size_macro(datatype, *size);
	goto fn_exit;
    }

    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_Datatype *datatype_ptr = NULL;

            /* Convert MPI object handles to object pointers */
            MPIR_Datatype_get_ptr( datatype, datatype_ptr );

            /* Validate datatype_ptr */
            MPIR_Datatype_valid_ptr( datatype_ptr, mpi_errno );
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    mpi_errno = MPIR_Type_size_x_impl(datatype, &size_x);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    MPIR_Assert(size_x >= 0);
    /* handle overflow: see MPI-3 p.104 */
    *size = (size_x > INT_MAX) ? MPI_UNDEFINED : (int)size_x;

    /* ... end of body of routine ... */

  fn_exit:
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_SIZE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
	    "**mpi_type_size", 
	    "**mpi_type_size %D %p", datatype, size);
    }
    mpi_errno = MPIR_Err_return_comm( NULL, FCNAME, mpi_errno );
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Пример #28
0
/* Algorithm: Blocked Alltoallw
 *
 * Since each process sends/receives different amounts of data to every other
 * process, we don't know the total message size for all processes without
 * additional communication. Therefore we simply use the "middle of the road"
 * isend/irecv algorithm that works reasonably well in all cases.
 *
 * We post all irecvs and isends and then do a waitall. We scatter the order of
 * sources and destinations among the processes, so that all processes don't
 * try to send/recv to/from the same process at the same time.
 *
 * *** Modification: We post only a small number of isends and irecvs at a time
 * and wait on them as suggested by Tony Ladd. ***
 */
int MPIR_Ialltoallw_sched_intra_blocked(const void *sendbuf, const int sendcounts[],
                                        const int sdispls[], const MPI_Datatype sendtypes[],
                                        void *recvbuf, const int recvcounts[], const int rdispls[],
                                        const MPI_Datatype recvtypes[], MPIR_Comm * comm_ptr,
                                        MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int comm_size, i;
    int dst, rank;
    int ii, ss, bblock;
    int type_size;

#ifdef HAVE_ERROR_CHECKING
    MPIR_Assert(sendbuf != MPI_IN_PLACE);
#endif /* HAVE_ERROR_CHECKING */

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    bblock = MPIR_CVAR_ALLTOALL_THROTTLE;
    if (bblock == 0)
        bblock = comm_size;

    /* post only bblock isends/irecvs at a time as suggested by Tony Ladd */
    for (ii = 0; ii < comm_size; ii += bblock) {
        ss = comm_size - ii < bblock ? comm_size - ii : bblock;

        /* do the communication -- post ss sends and receives: */
        for (i = 0; i < ss; i++) {
            dst = (rank + i + ii) % comm_size;
            if (recvcounts[dst]) {
                MPIR_Datatype_get_size_macro(recvtypes[dst], type_size);
                if (type_size) {
                    mpi_errno = MPIR_Sched_recv((char *) recvbuf + rdispls[dst],
                                                recvcounts[dst], recvtypes[dst], dst, comm_ptr, s);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                }
            }
        }

        for (i = 0; i < ss; i++) {
            dst = (rank - i - ii + comm_size) % comm_size;
            if (sendcounts[dst]) {
                MPIR_Datatype_get_size_macro(sendtypes[dst], type_size);
                if (type_size) {
                    mpi_errno = MPIR_Sched_send((char *) sendbuf + sdispls[dst],
                                                sendcounts[dst], sendtypes[dst], dst, comm_ptr, s);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                }
            }
        }

        /* force our block of sends/recvs to complete before starting the next block */
        MPIR_SCHED_BARRIER(s);
    }
  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}