Example #1
0
static int MPIR_FreeF90Datatypes(void *d)
{
    int i;

    for (i = 0; i < nAlloc; i++) {
        MPIR_Type_free_impl(&f90Types[i].d);
    }
    return 0;
}
Example #2
0
int MPIR_Type_contiguous_x_impl(MPI_Count count,
                              MPI_Datatype oldtype,
			      MPI_Datatype *newtype)
{
    /* to make 'count' fit MPI-3 type processing routines (which take integer
     * counts), we construct a type consisting of N INT_MAX chunks followed by
     * a remainder.  e.g for a count of 4000000000 bytes you would end up with
     * one 2147483647-byte chunk followed immediately by a 1852516353-byte
     * chunk */
    MPI_Datatype chunks, remainder;
    MPI_Aint lb, extent, disps[2];
    int blocklens[2];
    MPI_Datatype types[2];
    int mpi_errno;

    /* truly stupendously large counts will overflow an integer with this math,
     * but that is a problem for a few decades from now.  Sorry, few decades
     * from now! */
    MPIR_Assert(count/INT_MAX == (int)(count/INT_MAX));
    int c = (int)(count/INT_MAX); /* OK to cast until 'count' is 256 bits */
    int r = count%INT_MAX;

    mpi_errno = MPIR_Type_vector_impl(c, INT_MAX, INT_MAX, oldtype, &chunks);
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
    mpi_errno = MPIR_Type_contiguous_impl(r, oldtype, &remainder);
    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

    MPIR_Type_get_extent_impl(oldtype, &lb, &extent);

    blocklens[0] = 1;      blocklens[1] = 1;
    disps[0]     = 0;      disps[1]     = c*extent*INT_MAX;
    types[0]     = chunks; types[1]     = remainder;

    mpi_errno = MPIR_Type_create_struct_impl(2, blocklens, disps, types, newtype);

    MPIR_Type_free_impl(&chunks);
    MPIR_Type_free_impl(&remainder);

fn_exit:
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
int MPIR_Alltoall_intra_scattered(
    const void *sendbuf,
    int sendcount, 
    MPI_Datatype sendtype, 
    void *recvbuf, 
    int recvcount, 
    MPI_Datatype recvtype, 
    MPIR_Comm *comm_ptr,
    MPIR_Errflag_t *errflag )
{
    int          comm_size, i, j;
    MPI_Aint     sendtype_extent, recvtype_extent;
    int mpi_errno=MPI_SUCCESS, dst, rank;
    int mpi_errno_ret = MPI_SUCCESS;
    MPI_Datatype newtype = MPI_DATATYPE_NULL;
    MPIR_Request **reqarray;
    MPI_Status *starray;
    MPIR_CHKLMEM_DECL(6);

    if (recvcount == 0) return MPI_SUCCESS;

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

#ifdef HAVE_ERROR_CHECKING
    MPIR_Assert(sendbuf != MPI_IN_PLACE);
#endif /* HAVE_ERROR_CHECKING */

    /* Get extent of send and recv types */
    MPIR_Datatype_get_extent_macro(recvtype, recvtype_extent);
    MPIR_Datatype_get_extent_macro(sendtype, sendtype_extent);
    int ii, ss, bblock;

    bblock = MPIR_CVAR_ALLTOALL_THROTTLE;
    if (bblock == 0) bblock = comm_size;

    MPIR_CHKLMEM_MALLOC(reqarray, MPIR_Request **, 2*bblock*sizeof(MPIR_Request*), mpi_errno, "reqarray", MPL_MEM_BUFFER);

    MPIR_CHKLMEM_MALLOC(starray, MPI_Status *, 2*bblock*sizeof(MPI_Status), mpi_errno, "starray", MPL_MEM_BUFFER);

    for (ii=0; ii<comm_size; ii+=bblock) {
        ss = comm_size-ii < bblock ? comm_size-ii : bblock;
        /* do the communication -- post ss sends and receives: */
        for ( i=0; i<ss; i++ ) { 
            dst = (rank+i+ii) % comm_size;
            mpi_errno = MPIC_Irecv((char *)recvbuf +
                                      dst*recvcount*recvtype_extent, 
                                      recvcount, recvtype, dst,
                                      MPIR_ALLTOALL_TAG, comm_ptr,
                                      &reqarray[i]);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }

        for ( i=0; i<ss; i++ ) { 
            dst = (rank-i-ii+comm_size) % comm_size;
            mpi_errno = MPIC_Isend((char *)sendbuf +
                                      dst*sendcount*sendtype_extent, 
                                      sendcount, sendtype, dst,
                                      MPIR_ALLTOALL_TAG, comm_ptr,
                                      &reqarray[i+ss], errflag);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }

        /* ... then wait for them to finish: */
        mpi_errno = MPIC_Waitall(2*ss,reqarray,starray, errflag);
        if (mpi_errno && mpi_errno != MPI_ERR_IN_STATUS) MPIR_ERR_POP(mpi_errno);
        
        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno == MPI_ERR_IN_STATUS) {
            for (j=0; j<2*ss; j++) {
                if (starray[j].MPI_ERROR != MPI_SUCCESS) {
                    mpi_errno = starray[j].MPI_ERROR;
                    if (mpi_errno) {
                        /* for communication errors, just record the error but continue */
                        *errflag = MPIR_ERR_GET_CLASS(mpi_errno);
                        MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
                        MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
                    }
                }
            }
        }
        /* --END ERROR HANDLING-- */
    }

 fn_exit:
    MPIR_CHKLMEM_FREEALL();
    if (mpi_errno_ret)
        mpi_errno = mpi_errno_ret;
    else if (*errflag != MPIR_ERR_NONE)
        MPIR_ERR_SET(mpi_errno, *errflag, "**coll_fail");

    return mpi_errno;
 fn_fail:
    if (newtype != MPI_DATATYPE_NULL)
        MPIR_Type_free_impl(&newtype);
    goto fn_exit;
}
Example #4
0
int MPII_Dataloop_convert_subarray(int ndims,
                                   int *array_of_sizes,
                                   int *array_of_subsizes,
                                   int *array_of_starts,
                                   int order, MPI_Datatype oldtype, MPI_Datatype * newtype)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Aint extent, disps[3], size;
    int i, blklens[3];
    MPI_Datatype tmp1, tmp2, types[3];

    MPIR_Datatype_get_extent_macro(oldtype, extent);

    if (order == MPI_ORDER_FORTRAN) {
        /* dimension 0 changes fastest */
        if (ndims == 1) {
            mpi_errno = MPIR_Type_contiguous_impl(array_of_subsizes[0], oldtype, &tmp1);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        } else {
            mpi_errno = MPIR_Type_vector_impl(array_of_subsizes[1],
                                              array_of_subsizes[0],
                                              array_of_sizes[0], oldtype, &tmp1);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            size = (MPI_Aint) (array_of_sizes[0]) * extent;
            for (i = 2; i < ndims; i++) {
                size *= (MPI_Aint) (array_of_sizes[i - 1]);
                mpi_errno = MPIR_Type_hvector_impl(array_of_subsizes[i], 1, size, tmp1, &tmp2);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                MPIR_Type_free_impl(&tmp1);
                tmp1 = tmp2;
            }
        }

        /* add displacement and UB */
        disps[1] = (MPI_Aint) (array_of_starts[0]);
        size = 1;
        for (i = 1; i < ndims; i++) {
            size *= (MPI_Aint) (array_of_sizes[i - 1]);
            disps[1] += size * (MPI_Aint) (array_of_starts[i]);
        }
        /* rest done below for both Fortran and C order */
    }

    else {      /* order == MPI_ORDER_C */

        /* dimension ndims-1 changes fastest */
        if (ndims == 1) {
            mpi_errno = MPIR_Type_contiguous_impl(array_of_subsizes[0], oldtype, &tmp1);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
        } else {
            mpi_errno = MPIR_Type_vector_impl(array_of_subsizes[ndims - 2],
                                              array_of_subsizes[ndims - 1],
                                              array_of_sizes[ndims - 1], oldtype, &tmp1);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);

            size = (MPI_Aint) (array_of_sizes[ndims - 1]) * extent;
            for (i = ndims - 3; i >= 0; i--) {
                size *= (MPI_Aint) (array_of_sizes[i + 1]);
                mpi_errno = MPIR_Type_hvector_impl(array_of_subsizes[i], 1, size, tmp1, &tmp2);
                if (mpi_errno)
                    MPIR_ERR_POP(mpi_errno);
                MPIR_Type_free_impl(&tmp1);
                tmp1 = tmp2;
            }
        }

        /* add displacement and UB */
        disps[1] = (MPI_Aint) (array_of_starts[ndims - 1]);
        size = 1;
        for (i = ndims - 2; i >= 0; i--) {
            size *= (MPI_Aint) (array_of_sizes[i + 1]);
            disps[1] += size * (MPI_Aint) (array_of_starts[i]);
        }
    }

    disps[1] *= extent;

    disps[2] = extent;
    for (i = 0; i < ndims; i++)
        disps[2] *= (MPI_Aint) (array_of_sizes[i]);

    disps[0] = 0;
    blklens[0] = blklens[1] = blklens[2] = 1;
    types[0] = MPI_LB;
    types[1] = tmp1;
    types[2] = MPI_UB;

    mpi_errno = MPIR_Type_struct_impl(3, blklens, disps, types, newtype);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    MPIR_Type_free_impl(&tmp1);

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #5
0
/*@
   MPI_Type_create_darray - Create a datatype representing a distributed array

Input Parameters:
+ size - size of process group (positive integer)
. rank - rank in process group (nonnegative integer)
. ndims - number of array dimensions as well as process grid dimensions (positive integer)
. array_of_gsizes - number of elements of type oldtype in each dimension of global array (array of positive integers)
. array_of_distribs - distribution of array in each dimension (array of state)
. array_of_dargs - distribution argument in each dimension (array of positive integers)
. array_of_psizes - size of process grid in each dimension (array of positive integers)
. order - array storage order flag (state)
- oldtype - old datatype (handle)

Output Parameters:
. newtype - new datatype (handle)

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_TYPE
.N MPI_ERR_ARG
@*/
int MPI_Type_create_darray(int size,
                           int rank,
                           int ndims,
                           const int array_of_gsizes[],
                           const int array_of_distribs[],
                           const int array_of_dargs[],
                           const int array_of_psizes[],
                           int order, MPI_Datatype oldtype, MPI_Datatype * newtype)
{
    int mpi_errno = MPI_SUCCESS, i;
    MPI_Datatype new_handle;

    int procs, tmp_rank, tmp_size, *coords;
    MPI_Aint *st_offsets, orig_extent, disps[3];
    MPI_Datatype type_old, type_new = MPI_DATATYPE_NULL, tmp_type;

#ifdef HAVE_ERROR_CHECKING
    MPI_Aint size_with_aint;
    MPI_Offset size_with_offset;
#endif

    int *ints;
    MPIR_Datatype *datatype_ptr = NULL;
    MPIR_CHKLMEM_DECL(3);
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_DARRAY);

    MPIR_ERRTEST_INITIALIZED_ORDIE();

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_CREATE_DARRAY);

    /* Validate parameters, especially handles needing to be converted */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_ERRTEST_DATATYPE(oldtype, "datatype", mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#endif

    /* Convert MPI object handles to object pointers */
    MPIR_Datatype_get_ptr(oldtype, datatype_ptr);
    MPIR_Datatype_get_extent_macro(oldtype, orig_extent);

    /* Validate parameters and objects (post conversion) */
#ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            /* Check parameters */
            MPIR_ERRTEST_ARGNONPOS(size, "size", mpi_errno, MPI_ERR_ARG);
            /* use MPI_ERR_RANK class for PE-MPI compatibility */
            MPIR_ERR_CHKANDJUMP3((rank < 0 || rank >= size), mpi_errno, MPI_ERR_RANK,
                                 "**argrange", "**argrange %s %d %d", "rank", rank, (size - 1));
            MPIR_ERRTEST_ARGNONPOS(ndims, "ndims", mpi_errno, MPI_ERR_DIMS);

            MPIR_ERRTEST_ARGNULL(array_of_gsizes, "array_of_gsizes", mpi_errno);
            MPIR_ERRTEST_ARGNULL(array_of_distribs, "array_of_distribs", mpi_errno);
            MPIR_ERRTEST_ARGNULL(array_of_dargs, "array_of_dargs", mpi_errno);
            MPIR_ERRTEST_ARGNULL(array_of_psizes, "array_of_psizes", mpi_errno);
            if (order != MPI_ORDER_C && order != MPI_ORDER_FORTRAN) {
                mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                                 MPIR_ERR_RECOVERABLE,
                                                 __func__,
                                                 __LINE__,
                                                 MPI_ERR_ARG, "**arg", "**arg %s", "order");
                goto fn_fail;
            }

            tmp_size = 1;
            for (i = 0; mpi_errno == MPI_SUCCESS && i < ndims; i++) {
                MPIR_ERRTEST_ARGNONPOS(array_of_gsizes[i], "gsize", mpi_errno, MPI_ERR_ARG);
                MPIR_ERRTEST_ARGNONPOS(array_of_psizes[i], "psize", mpi_errno, MPI_ERR_ARG);

                if ((array_of_distribs[i] != MPI_DISTRIBUTE_NONE) &&
                    (array_of_distribs[i] != MPI_DISTRIBUTE_BLOCK) &&
                    (array_of_distribs[i] != MPI_DISTRIBUTE_CYCLIC)) {
                    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                                     MPIR_ERR_RECOVERABLE,
                                                     __func__,
                                                     __LINE__, MPI_ERR_ARG, "**darrayunknown", 0);
                    goto fn_fail;
                }

                if ((array_of_dargs[i] != MPI_DISTRIBUTE_DFLT_DARG) && (array_of_dargs[i] <= 0)) {
                    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                                     MPIR_ERR_RECOVERABLE,
                                                     __func__,
                                                     __LINE__,
                                                     MPI_ERR_ARG,
                                                     "**arg", "**arg %s", "array_of_dargs");
                    goto fn_fail;
                }

                if ((array_of_distribs[i] == MPI_DISTRIBUTE_NONE) && (array_of_psizes[i] != 1)) {
                    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                                     MPIR_ERR_RECOVERABLE,
                                                     __func__,
                                                     __LINE__,
                                                     MPI_ERR_ARG,
                                                     "**darraydist",
                                                     "**darraydist %d %d", i, array_of_psizes[i]);
                    goto fn_fail;
                }

                tmp_size *= array_of_psizes[i];
            }

            MPIR_ERR_CHKANDJUMP1((tmp_size != size), mpi_errno, MPI_ERR_ARG,
                                 "**arg", "**arg %s", "array_of_psizes");

            /* TODO: GET THIS CHECK IN ALSO */

            /* check if MPI_Aint is large enough for size of global array.
             * if not, complain. */

            size_with_aint = orig_extent;
            for (i = 0; i < ndims; i++)
                size_with_aint *= array_of_gsizes[i];
            size_with_offset = orig_extent;
            for (i = 0; i < ndims; i++)
                size_with_offset *= array_of_gsizes[i];
            if (size_with_aint != size_with_offset) {
                mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                                 MPIR_ERR_FATAL,
                                                 __func__,
                                                 __LINE__,
                                                 MPI_ERR_ARG,
                                                 "**darrayoverflow",
                                                 "**darrayoverflow %L", size_with_offset);
                goto fn_fail;
            }

            /* Validate datatype_ptr */
            MPIR_Datatype_valid_ptr(datatype_ptr, mpi_errno);
            /* If datatype_ptr is not valid, it will be reset to null */
            /* --BEGIN ERROR HANDLING-- */
            if (mpi_errno)
                goto fn_fail;
            /* --END ERROR HANDLING-- */
        }
        MPID_END_ERROR_CHECKS;
    }
#endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ... */

/* calculate position in Cartesian grid as MPI would (row-major
   ordering) */
    MPIR_CHKLMEM_MALLOC_ORJUMP(coords, int *, ndims * sizeof(int), mpi_errno,
                               "position is Cartesian grid", MPL_MEM_COMM);

    procs = size;
    tmp_rank = rank;
    for (i = 0; i < ndims; i++) {
        procs = procs / array_of_psizes[i];
        coords[i] = tmp_rank / procs;
        tmp_rank = tmp_rank % procs;
    }

    MPIR_CHKLMEM_MALLOC_ORJUMP(st_offsets, MPI_Aint *, ndims * sizeof(MPI_Aint), mpi_errno,
                               "st_offsets", MPL_MEM_COMM);

    type_old = oldtype;

    if (order == MPI_ORDER_FORTRAN) {
        /* dimension 0 changes fastest */
        for (i = 0; i < ndims; i++) {
            switch (array_of_distribs[i]) {
                case MPI_DISTRIBUTE_BLOCK:
                    mpi_errno = MPIR_Type_block(array_of_gsizes,
                                                i,
                                                ndims,
                                                array_of_psizes[i],
                                                coords[i],
                                                array_of_dargs[i],
                                                order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    break;
                case MPI_DISTRIBUTE_CYCLIC:
                    mpi_errno = MPIR_Type_cyclic(array_of_gsizes,
                                                 i,
                                                 ndims,
                                                 array_of_psizes[i],
                                                 coords[i],
                                                 array_of_dargs[i],
                                                 order,
                                                 orig_extent, type_old, &type_new, st_offsets + i);
                    break;
                case MPI_DISTRIBUTE_NONE:
                    /* treat it as a block distribution on 1 process */
                    mpi_errno = MPIR_Type_block(array_of_gsizes,
                                                i,
                                                ndims,
                                                1,
                                                0,
                                                MPI_DISTRIBUTE_DFLT_DARG,
                                                order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    break;
            }
            if (i) {
                MPIR_Type_free_impl(&type_old);
            }
            type_old = type_new;

            /* --BEGIN ERROR HANDLING-- */
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;
            /* --END ERROR HANDLING-- */
        }

        /* add displacement and UB */
        disps[1] = st_offsets[0];
        tmp_size = 1;
        for (i = 1; i < ndims; i++) {
            tmp_size *= array_of_gsizes[i - 1];
            disps[1] += (MPI_Aint) tmp_size *st_offsets[i];
        }
        /* rest done below for both Fortran and C order */
    }

    else {      /* order == MPI_ORDER_C */

        /* dimension ndims-1 changes fastest */
        for (i = ndims - 1; i >= 0; i--) {
            switch (array_of_distribs[i]) {
                case MPI_DISTRIBUTE_BLOCK:
                    mpi_errno = MPIR_Type_block(array_of_gsizes,
                                                i,
                                                ndims,
                                                array_of_psizes[i],
                                                coords[i],
                                                array_of_dargs[i],
                                                order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    break;
                case MPI_DISTRIBUTE_CYCLIC:
                    mpi_errno = MPIR_Type_cyclic(array_of_gsizes,
                                                 i,
                                                 ndims,
                                                 array_of_psizes[i],
                                                 coords[i],
                                                 array_of_dargs[i],
                                                 order,
                                                 orig_extent, type_old, &type_new, st_offsets + i);
                    break;
                case MPI_DISTRIBUTE_NONE:
                    /* treat it as a block distribution on 1 process */
                    mpi_errno = MPIR_Type_block(array_of_gsizes,
                                                i,
                                                ndims,
                                                array_of_psizes[i],
                                                coords[i],
                                                MPI_DISTRIBUTE_DFLT_DARG,
                                                order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    break;
            }
            if (i != ndims - 1) {
                MPIR_Type_free_impl(&type_old);
            }
            type_old = type_new;

            /* --BEGIN ERROR HANDLING-- */
            if (mpi_errno != MPI_SUCCESS)
                goto fn_fail;
            /* --END ERROR HANDLING-- */
        }

        /* add displacement and UB */
        disps[1] = st_offsets[ndims - 1];
        tmp_size = 1;
        for (i = ndims - 2; i >= 0; i--) {
            tmp_size *= array_of_gsizes[i + 1];
            disps[1] += (MPI_Aint) tmp_size *st_offsets[i];
        }
    }

    disps[1] *= orig_extent;

    disps[2] = orig_extent;
    for (i = 0; i < ndims; i++)
        disps[2] *= (MPI_Aint) (array_of_gsizes[i]);

    disps[0] = 0;

/* Instead of using MPI_LB/MPI_UB, which have been removed from MPI in MPI-3,
   use MPI_Type_create_resized. Use hindexed_block to set the starting displacement
   of the datatype (disps[1]) and type_create_resized to set lb to 0 (disps[0])
   and extent to disps[2], which makes ub = disps[2].
 */
    mpi_errno = MPIR_Type_blockindexed(1, 1, &disps[1], 1,      /* 1 means disp is in bytes */
                                       type_new, &tmp_type);

    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;
    /* --END ERROR HANDLING-- */

    mpi_errno = MPIR_Type_create_resized(tmp_type, 0, disps[2], &new_handle);

    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;
    /* --END ERROR HANDLING-- */

    MPIR_Type_free_impl(&tmp_type);
    MPIR_Type_free_impl(&type_new);

    /* at this point we have the new type, and we've cleaned up any
     * intermediate types created in the process.  we just need to save
     * all our contents/envelope information.
     */

    /* Save contents */
    MPIR_CHKLMEM_MALLOC_ORJUMP(ints, int *, (4 * ndims + 4) * sizeof(int), mpi_errno,
                               "content description", MPL_MEM_BUFFER);

    ints[0] = size;
    ints[1] = rank;
    ints[2] = ndims;

    for (i = 0; i < ndims; i++) {
        ints[i + 3] = array_of_gsizes[i];
    }
    for (i = 0; i < ndims; i++) {
        ints[i + ndims + 3] = array_of_distribs[i];
    }
    for (i = 0; i < ndims; i++) {
        ints[i + 2 * ndims + 3] = array_of_dargs[i];
    }
    for (i = 0; i < ndims; i++) {
        ints[i + 3 * ndims + 3] = array_of_psizes[i];
    }
    ints[4 * ndims + 3] = order;
    MPIR_Datatype_get_ptr(new_handle, datatype_ptr);
    mpi_errno = MPIR_Datatype_set_contents(datatype_ptr,
                                           MPI_COMBINER_DARRAY,
                                           4 * ndims + 4, 0, 1, ints, NULL, &oldtype);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS)
        goto fn_fail;
    /* --END ERROR HANDLING-- */

    MPIR_OBJ_PUBLISH_HANDLE(*newtype, new_handle);
    /* ... end of body of routine ... */

  fn_exit:
    MPIR_CHKLMEM_FREEALL();
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_CREATE_DARRAY);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#ifdef HAVE_ERROR_CHECKING
    {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
                                 "**mpi_type_create_darray",
                                 "**mpi_type_create_darray %d %d %d %p %p %p %p %d %D %p", size,
                                 rank, ndims, array_of_gsizes, array_of_distribs, array_of_dargs,
                                 array_of_psizes, order, oldtype, newtype);
    }
#endif
    mpi_errno = MPIR_Err_return_comm(NULL, __func__, mpi_errno);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Example #6
0
PMPI_LOCAL int MPIR_Type_cyclic(const int *array_of_gsizes,
                                int dim,
                                int ndims,
                                int nprocs,
                                int rank,
                                int darg,
                                int order,
                                MPI_Aint orig_extent,
                                MPI_Datatype type_old,
                                MPI_Datatype * type_new, MPI_Aint * st_offset)
{
/* nprocs = no. of processes in dimension dim of grid
   rank = coordinate of this process in dimension dim */
    int mpi_errno, blksize, i, blklens[3], st_index, end_index, local_size, rem, count;
    MPI_Aint stride, disps[3];
    MPI_Datatype type_tmp, type_indexed, types[3];

    if (darg == MPI_DISTRIBUTE_DFLT_DARG)
        blksize = 1;
    else
        blksize = darg;

#ifdef HAVE_ERROR_CHECKING
    if (blksize <= 0) {
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
                                         MPIR_ERR_RECOVERABLE,
                                         __func__,
                                         __LINE__,
                                         MPI_ERR_ARG,
                                         "**darraycyclic", "**darraycyclic %d", blksize);
        return mpi_errno;
    }
#endif

    st_index = rank * blksize;
    end_index = array_of_gsizes[dim] - 1;

    if (end_index < st_index)
        local_size = 0;
    else {
        local_size = ((end_index - st_index + 1) / (nprocs * blksize)) * blksize;
        rem = (end_index - st_index + 1) % (nprocs * blksize);
        local_size += MPL_MIN(rem, blksize);
    }

    count = local_size / blksize;
    rem = local_size % blksize;

    stride = (MPI_Aint) nprocs *(MPI_Aint) blksize *orig_extent;
    if (order == MPI_ORDER_FORTRAN)
        for (i = 0; i < dim; i++)
            stride *= (MPI_Aint) (array_of_gsizes[i]);
    else
        for (i = ndims - 1; i > dim; i--)
            stride *= (MPI_Aint) (array_of_gsizes[i]);

    mpi_errno = MPIR_Type_vector(count, blksize, stride, 1,     /* stride in bytes */
                                 type_old, type_new);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        mpi_errno =
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__, MPI_ERR_OTHER,
                                 "**fail", 0);
        return mpi_errno;
    }
    /* --END ERROR HANDLING-- */

    if (rem) {
        /* if the last block is of size less than blksize, include
         * it separately using MPI_Type_struct */

        types[0] = *type_new;
        types[1] = type_old;
        disps[0] = 0;
        disps[1] = (MPI_Aint) count *stride;
        blklens[0] = 1;
        blklens[1] = rem;

        mpi_errno = MPIR_Type_struct(2, blklens, disps, types, &type_tmp);
        MPIR_Type_free_impl(type_new);
        *type_new = type_tmp;

        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno != MPI_SUCCESS) {
            mpi_errno =
                MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__,
                                     MPI_ERR_OTHER, "**fail", 0);
            return mpi_errno;
        }
        /* --END ERROR HANDLING-- */
    }

    /* In the first iteration, we need to set the displacement in that
     * dimension correctly. */
    if (((order == MPI_ORDER_FORTRAN) && (dim == 0)) ||
        ((order == MPI_ORDER_C) && (dim == ndims - 1))) {
        disps[0] = 0;
        disps[1] = (MPI_Aint) rank *(MPI_Aint) blksize *orig_extent;
        disps[2] = orig_extent * (MPI_Aint) (array_of_gsizes[dim]);

/* Instead of using MPI_LB/MPI_UB, which have been removed from MPI in MPI-3,
   use MPI_Type_create_resized. Use hindexed_block to set the starting displacement
   of the datatype (disps[1]) and type_create_resized to set lb to 0 (disps[0])
   and extent to disps[2], which makes ub = disps[2].
 */
        mpi_errno = MPIR_Type_blockindexed(1, 1, &disps[1], 1,  /* 1 means disp is in bytes */
                                           *type_new, &type_indexed);

        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno != MPI_SUCCESS) {
            mpi_errno =
                MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__,
                                     MPI_ERR_OTHER, "**fail", 0);
            return mpi_errno;
        }
        /* --END ERROR HANDLING-- */

        mpi_errno = MPIR_Type_create_resized(type_indexed, 0, disps[2], &type_tmp);

        MPIR_Type_free_impl(&type_indexed);
        MPIR_Type_free_impl(type_new);
        *type_new = type_tmp;

        /* --BEGIN ERROR HANDLING-- */
        if (mpi_errno != MPI_SUCCESS) {
            mpi_errno =
                MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, __func__, __LINE__,
                                     MPI_ERR_OTHER, "**fail", 0);
            return mpi_errno;
        }
        /* --END ERROR HANDLING-- */

        *st_offset = 0; /* set it to 0 because it is taken care of in
                         * the struct above */
    } else {
        *st_offset = (MPI_Aint) rank *(MPI_Aint) blksize;
        /* st_offset is in terms of no. of elements of type oldtype in
         * this dimension */
    }

    if (local_size == 0)
        *st_offset = 0;

    return MPI_SUCCESS;
}
Example #7
0
int MPIR_Ialltoall_bruck(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPID_Comm *comm_ptr, MPID_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    int nbytes, recvtype_size, recvbuf_extent, newtype_size;
    int rank, comm_size;
    void *tmp_buf = NULL;
    MPI_Aint sendtype_extent, recvtype_extent, recvtype_true_lb, recvtype_true_extent;
    int pof2, dst, src;
    int count, block;
    MPI_Datatype newtype;
    int *displs;
    MPIU_CHKLMEM_DECL(1); /* displs */
    MPIR_SCHED_CHKPMEM_DECL(2); /* tmp_buf (2x) */

    MPIU_Assert(sendbuf != MPI_IN_PLACE); /* we do not handle in-place */

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);
    MPID_Datatype_get_size_macro(recvtype, recvtype_size);
    MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);

    /* allocate temporary buffer */
    /* must be same size as entire recvbuf for Phase 3 */
    nbytes = recvtype_size * recvcount * comm_size;
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, nbytes, mpi_errno, "tmp_buf");

    /* Do Phase 1 of the algorithim. Shift the data blocks on process i
     * upwards by a distance of i blocks. Store the result in recvbuf. */
    mpi_errno = MPID_Sched_copy(((char *) sendbuf + rank*sendcount*sendtype_extent),
                                (comm_size - rank)*sendcount, sendtype,
                                recvbuf, (comm_size - rank)*recvcount, recvtype, s);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPID_Sched_copy(sendbuf, rank*sendcount, sendtype,
                                ((char *) recvbuf + (comm_size-rank)*recvcount*recvtype_extent),
                                rank*recvcount, recvtype, s);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    MPID_SCHED_BARRIER(s);
    /* Input data is now stored in recvbuf with datatype recvtype */

    /* Now do Phase 2, the communication phase. It takes
       ceiling(lg p) steps. In each step i, each process sends to rank+2^i
       and receives from rank-2^i, and exchanges all data blocks
       whose ith bit is 1. */

    /* allocate displacements array for indexed datatype used in
       communication */

    MPIU_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");

    pof2 = 1;
    while (pof2 < comm_size) {
        dst = (rank + pof2) % comm_size;
        src = (rank - pof2 + comm_size) % comm_size;

        /* Exchange all data blocks whose ith bit is 1 */
        /* Create an indexed datatype for the purpose */

        count = 0;
        for (block=1; block<comm_size; block++) {
            if (block & pof2) {
                displs[count] = block * recvcount;
                count++;
            }
        }

        mpi_errno = MPIR_Type_create_indexed_block_impl(count, recvcount,
                                                        displs, recvtype, &newtype);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Type_commit_impl(&newtype);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_Datatype_get_size_macro(newtype, newtype_size);

        /* we will usually copy much less than nbytes */
        mpi_errno = MPID_Sched_copy(recvbuf, 1, newtype, tmp_buf, newtype_size, MPI_BYTE, s);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_SCHED_BARRIER(s);

        /* now send and recv in parallel */
        mpi_errno = MPID_Sched_send(tmp_buf, newtype_size, MPI_BYTE, dst, comm_ptr, s);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        mpi_errno = MPID_Sched_recv(recvbuf, 1, newtype, src, comm_ptr, s);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_SCHED_BARRIER(s);

        MPIR_Type_free_impl(&newtype);

        pof2 *= 2;
    }

    /* Phase 3: Rotate blocks in recvbuf upwards by (rank + 1) blocks. Need
     * a temporary buffer of the same size as recvbuf. */

    /* get true extent of recvtype */
    MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);

    recvbuf_extent = recvcount * comm_size * (MPIR_MAX(recvtype_true_extent, recvtype_extent));
    /* not a leak, old tmp_buf value is still tracked by CHKPMEM macros */
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
    /* adjust for potential negative lower bound in datatype */
    tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);

    mpi_errno = MPID_Sched_copy(((char *) recvbuf + (rank+1)*recvcount*recvtype_extent),
                                (comm_size - rank - 1)*recvcount, recvtype,
                                tmp_buf, (comm_size - rank - 1)*recvcount, recvtype, s);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    mpi_errno = MPID_Sched_copy(recvbuf, (rank+1)*recvcount, recvtype,
                                ((char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent),
                                (rank+1)*recvcount, recvtype, s);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    MPID_SCHED_BARRIER(s);

    /* Blocks are in the reverse order now (comm_size-1 to 0).
     * Reorder them to (0 to comm_size-1) and store them in recvbuf. */

    for (i = 0; i < comm_size; i++){
        mpi_errno = MPID_Sched_copy(((char *) tmp_buf + i*recvcount*recvtype_extent),
                                    recvcount, recvtype,
                                    ((char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent),
                                    recvcount, recvtype, s);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    MPIR_SCHED_CHKPMEM_COMMIT(s);
fn_exit:
    MPIU_CHKLMEM_FREEALL();
    return mpi_errno;
fn_fail:
    MPIR_SCHED_CHKPMEM_REAP(s);
    goto fn_exit;
}
Example #8
0
/*@
   MPI_Type_create_subarray - Create a datatype for a subarray of a regular,
    multidimensional array

Input Parameters:
+ ndims - number of array dimensions (positive integer)
. array_of_sizes - number of elements of type oldtype in each dimension of the
  full array (array of positive integers)
. array_of_subsizes - number of elements of type oldtype in each dimension of
  the subarray (array of positive integers)
. array_of_starts - starting coordinates of the subarray in each dimension
  (array of nonnegative integers)
. order - array storage order flag (state)
- oldtype - array element datatype (handle)

Output Parameters:
. newtype - new datatype (handle)

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_TYPE
.N MPI_ERR_ARG
@*/
int MPI_Type_create_subarray(int ndims,
			     const int array_of_sizes[],
			     const int array_of_subsizes[],
			     const int array_of_starts[],
			     int order,
			     MPI_Datatype oldtype,
			     MPI_Datatype *newtype)
{
    static const char FCNAME[] = "MPI_Type_create_subarray";
    int mpi_errno = MPI_SUCCESS, i;
    MPI_Datatype new_handle;

    /* these variables are from the original version in ROMIO */
    MPI_Aint size, extent, disps[3];
    MPI_Datatype tmp1, tmp2;

#   ifdef HAVE_ERROR_CHECKING
    MPI_Aint   size_with_aint;
    MPI_Offset size_with_offset;
#   endif

    /* for saving contents */
    int *ints;
    MPID_Datatype *new_dtp;

    MPIU_CHKLMEM_DECL(1);
    MPID_MPI_STATE_DECL(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);

    MPIR_ERRTEST_INITIALIZED_ORDIE();

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);

#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPID_Datatype *datatype_ptr = NULL;

	    /* Check parameters */
	    MPIR_ERRTEST_ARGNONPOS(ndims, "ndims", mpi_errno, MPI_ERR_DIMS);
	    MPIR_ERRTEST_ARGNULL(array_of_sizes, "array_of_sizes", mpi_errno);
	    MPIR_ERRTEST_ARGNULL(array_of_subsizes, "array_of_subsizes", mpi_errno);
	    MPIR_ERRTEST_ARGNULL(array_of_starts, "array_of_starts", mpi_errno);
	    for (i=0; mpi_errno == MPI_SUCCESS && i < ndims; i++) {
		MPIR_ERRTEST_ARGNONPOS(array_of_sizes[i], "size", mpi_errno, MPI_ERR_ARG);
		MPIR_ERRTEST_ARGNONPOS(array_of_subsizes[i], "subsize", mpi_errno, MPI_ERR_ARG);
		MPIR_ERRTEST_ARGNEG(array_of_starts[i], "start", mpi_errno);
		if (array_of_subsizes[i] > array_of_sizes[i]) {
		    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
						     MPIR_ERR_RECOVERABLE,
						     FCNAME,
						     __LINE__,
						     MPI_ERR_ARG,
						     "**argrange",
						     "**argrange %s %d %d",
						     "array_of_subsizes",
						     array_of_subsizes[i],
						     array_of_sizes[i]);
                    goto fn_fail;
		}
		if (array_of_starts[i] > (array_of_sizes[i] - array_of_subsizes[i]))
		{
		    mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
						     MPIR_ERR_RECOVERABLE,
						     FCNAME,
						     __LINE__,
						     MPI_ERR_ARG,
						     "**argrange",
						     "**argrange %s %d %d",
						     "array_of_starts",
						     array_of_starts[i],
						     array_of_sizes[i] -
						     array_of_subsizes[i]);
                    goto fn_fail;
		}
	    }
	    if (order != MPI_ORDER_FORTRAN && order != MPI_ORDER_C) {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
						 MPIR_ERR_RECOVERABLE,
						 FCNAME,
						 __LINE__,
						 MPI_ERR_ARG,
						 "**arg",
						 "**arg %s",
						 "order");
                goto fn_fail;
	    }

	    MPIR_Type_extent_impl(oldtype, &extent);

	    /* check if MPI_Aint is large enough for size of global array.
	       if not, complain. */

	    size_with_aint = extent;
	    for (i=0; i<ndims; i++) size_with_aint *= array_of_sizes[i];
	    size_with_offset = extent;
	    for (i=0; i<ndims; i++) size_with_offset *= array_of_sizes[i];
	    if (size_with_aint != size_with_offset) {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
						 MPIR_ERR_FATAL,
						 FCNAME,
						 __LINE__,
						 MPI_ERR_ARG,
						 "**subarrayoflow",
						 "**subarrayoflow %L",
						 size_with_offset);
                goto fn_fail;
            }

            /* Get handles to MPI objects. */
            MPID_Datatype_get_ptr(oldtype, datatype_ptr);

            /* Validate datatype_ptr */
            MPID_Datatype_valid_ptr(datatype_ptr, mpi_errno);
	    /* If datatype_ptr is not valid, it will be reset to null */
            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ... */

    /* TODO: CHECK THE ERROR RETURNS FROM ALL THESE!!! */

    /* TODO: GRAB EXTENT WITH A MACRO OR SOMETHING FASTER */
    MPIR_Type_extent_impl(oldtype, &extent);

    if (order == MPI_ORDER_FORTRAN) {
	if (ndims == 1)
	    mpi_errno = MPID_Type_contiguous(array_of_subsizes[0],
					     oldtype,
					     &tmp1);
	else {
	    mpi_errno = MPID_Type_vector(array_of_subsizes[1],
					 array_of_subsizes[0],
					 (MPI_Aint)(array_of_sizes[0]),
					 0, /* stride in types */
					 oldtype,
					 &tmp1);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

	    size = ((MPI_Aint)(array_of_sizes[0])) * extent;
	    for (i=2; i<ndims; i++) {
		size *= (MPI_Aint)(array_of_sizes[i-1]);
		mpi_errno = MPID_Type_vector(array_of_subsizes[i],
					     1,
					     size,
					     1, /* stride in bytes */
					     tmp1,
					     &tmp2);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
		MPIR_Type_free_impl(&tmp1);
		tmp1 = tmp2;
	    }
	}
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
	
	/* add displacement and UB */
	
	disps[1] = (MPI_Aint)(array_of_starts[0]);
	size = 1;
	for (i=1; i<ndims; i++) {
	    size *= (MPI_Aint)(array_of_sizes[i-1]);
	    disps[1] += size * (MPI_Aint)(array_of_starts[i]);
	}
        /* rest done below for both Fortran and C order */
    }
    else /* MPI_ORDER_C */ {
	/* dimension ndims-1 changes fastest */
	if (ndims == 1) {
	    mpi_errno = MPID_Type_contiguous(array_of_subsizes[0],
					     oldtype,
					     &tmp1);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

	}
	else {
	    mpi_errno = MPID_Type_vector(array_of_subsizes[ndims-2],
					 array_of_subsizes[ndims-1],
					 (MPI_Aint)(array_of_sizes[ndims-1]),
					 0, /* stride in types */
					 oldtype,
					 &tmp1);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

	    size = (MPI_Aint)(array_of_sizes[ndims-1]) * extent;
	    for (i=ndims-3; i>=0; i--) {
		size *= (MPI_Aint)(array_of_sizes[i+1]);
		mpi_errno = MPID_Type_vector(array_of_subsizes[i],
					     1,    /* blocklen */
					     size, /* stride */
					     1,    /* stride in bytes */
					     tmp1, /* old type */
					     &tmp2);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);

		MPIR_Type_free_impl(&tmp1);
		tmp1 = tmp2;
	    }
	}
	
	/* add displacement and UB */
	
	disps[1] = (MPI_Aint)(array_of_starts[ndims-1]);
	size = 1;
	for (i=ndims-2; i>=0; i--) {
	    size *= (MPI_Aint)(array_of_sizes[i+1]);
	    disps[1] += size * (MPI_Aint)(array_of_starts[i]);
	}
    }

    disps[1] *= extent;

    disps[2] = extent;
    for (i=0; i<ndims; i++) disps[2] *= (MPI_Aint)(array_of_sizes[i]);

    disps[0] = 0;

/* Instead of using MPI_LB/MPI_UB, which have been removed from MPI in MPI-3,
   use MPI_Type_create_resized. Use hindexed_block to set the starting displacement
   of the datatype (disps[1]) and type_create_resized to set lb to 0 (disps[0])
   and extent to disps[2], which makes ub = disps[2].
 */

    mpi_errno = MPID_Type_blockindexed(1, 1, &disps[1],
                                       1, /* 1 means disp is in bytes */
                                       tmp1, &tmp2);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    mpi_errno = MPID_Type_create_resized(tmp2, 0, disps[2], &new_handle);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    MPIR_Type_free_impl(&tmp1);
    MPIR_Type_free_impl(&tmp2);

    /* at this point we have the new type, and we've cleaned up any
     * intermediate types created in the process.  we just need to save
     * all our contents/envelope information.
     */

    /* Save contents */
    MPIU_CHKLMEM_MALLOC_ORJUMP(ints, int *, (3 * ndims + 2) * sizeof(int), mpi_errno, "content description");

    ints[0] = ndims;
    for (i=0; i < ndims; i++) {
	ints[i + 1] = array_of_sizes[i];
    }
    for(i=0; i < ndims; i++) {
	ints[i + ndims + 1] = array_of_subsizes[i];
    }
    for(i=0; i < ndims; i++) {
	ints[i + 2*ndims + 1] = array_of_starts[i];
    }
    ints[3*ndims + 1] = order;

    MPID_Datatype_get_ptr(new_handle, new_dtp);
    mpi_errno = MPID_Datatype_set_contents(new_dtp,
					   MPI_COMBINER_SUBARRAY,
					   3 * ndims + 2, /* ints */
					   0, /* aints */
					   1, /* types */
					   ints,
					   NULL,
					   &oldtype);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);


    MPIR_OBJ_PUBLISH_HANDLE(*newtype, new_handle);
    /* ... end of body of routine ... */

  fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_TYPE_CREATE_SUBARRAY);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_type_create_subarray",
	    "**mpi_type_create_subarray %d %p %p %p %d %D %p", ndims, array_of_sizes, array_of_subsizes,
	    array_of_starts, order, oldtype, newtype);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm(NULL, FCNAME, mpi_errno);
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Example #9
0
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
				     DLOOP_Dataloop **dlp_p,
				     int *dlsz_p,
				     int *dldepth_p,
				     int flag)
{
    int i;
    int err;

    int nr_ints, nr_aints, nr_types, combiner;
    MPI_Datatype *types;
    int *ints;
    MPI_Aint *aints;

    DLOOP_Dataloop *old_dlp;
    int old_dlsz, old_dldepth;

    int dummy1, dummy2, dummy3, type0_combiner, ndims;
    MPI_Datatype tmptype;

    MPI_Aint stride;
    MPI_Aint *disps;

    MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    /* some named types do need dataloops; handle separately. */
    if (combiner == MPI_COMBINER_NAMED) {
	DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
	return;
    }
    else if (combiner == MPI_COMBINER_F90_REAL ||
             combiner == MPI_COMBINER_F90_COMPLEX ||
             combiner == MPI_COMBINER_F90_INTEGER)
    {
        MPI_Datatype f90basetype;
        DLOOP_Handle_get_basic_type_macro(type, f90basetype);
        PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
                                                   f90basetype,
                                                   dlp_p, dlsz_p,
                                                   dldepth_p,
                                                   flag);
        return;
    }

    /* Q: should we also check for "hasloop", or is the COMBINER
     *    check above enough to weed out everything that wouldn't
     *    have a loop?
     */
    DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
    if (old_dlp != NULL) {
	/* dataloop already created; just return it. */
	*dlp_p = old_dlp;
	DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	return;
    }

    PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);

    /* first check for zero count on types where that makes sense */
    switch(combiner) {
	case MPI_COMBINER_CONTIGUOUS:
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_INDEXED_BLOCK:
	case MPI_COMBINER_HINDEXED_BLOCK:
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    if (ints[0] == 0) {
		PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							   MPI_INT,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);
		goto clean_exit;
	    }
	    break;
	default:
	    break;
    }

    /* recurse, processing types "below" this one before processing
     * this one, if those type don't already have dataloops.
     *
     * note: in the struct case below we'll handle any additional
     *       types "below" the current one.
     */
    MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
    if (type0_combiner != MPI_COMBINER_NAMED)
    {
	DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
	if (old_dlp == NULL)
	{
	    /* no dataloop already present; create and store one */
	    PREPEND_PREFIX(Dataloop_create)(types[0],
					    &old_dlp,
					    &old_dlsz,
					    &old_dldepth,
					    flag);

	    DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
	    DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
	}
	else {
	    DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
	}
    }
       
    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);
	    }
	    break;
	case MPI_COMBINER_RESIZED:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);

		(*dlp_p)->el_extent = aints[1]; /* extent */
	    }
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
						       types[0] /* oldtype */,
						       dlp_p, dlsz_p,
						       dldepth_p,
						       flag);
	    break;
	case MPI_COMBINER_VECTOR:
	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   ints[2] /* stride */,
						   0 /* stride not bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	    /* fortran hvector has integer stride in bytes */
	    if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
		stride = (MPI_Aint) ints[2];
	    }
	    else {
		stride = aints[0];
	    }

	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   stride,
						   1 /* stride in bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 &ints[2] /* disps */,
							 0 /* disp not bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
            disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
            for (i = 0; i < ints[0]; i++)
                disps[i] = aints[i];
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 disps /* disps */,
							 1 /* disp is bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
            DLOOP_Free(disps);
	    break;
	case MPI_COMBINER_INDEXED:
	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    &ints[ints[0]+1] /* disp */,
						    0 /* disp not in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);
	    break;
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    disps,
						    1 /* disp in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);

	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		DLOOP_Free(disps);
	    }

	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    for (i = 1; i < ints[0]; i++) {
		int type_combiner;
		MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);

		if (type_combiner != MPI_COMBINER_NAMED) {
		    DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
		    if (old_dlp == NULL)
		    {
			PREPEND_PREFIX(Dataloop_create)(types[i],
							&old_dlp,
							&old_dlsz,
							&old_dldepth,
							flag);
			
			DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
						       flag);
			DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
							flag);
			DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
							 flag);
		    }
		}
	    }
	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

            err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
                                                         &ints[1] /* blklens */,
                                                         disps,
                                                         types /* oldtype array */,
                                                         dlp_p, dlsz_p, dldepth_p,
                                                         flag);
            /* TODO if/when this function returns error codes, propagate this failure instead */
            DLOOP_Assert(0 == err);
            /* if (err) return err; */

	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		DLOOP_Free(disps);
	    }
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsizes */,
						  &ints[1+2*ndims] /* starts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);
	    
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];
	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);

	    MPIR_Type_free_impl(&tmptype);
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:

    PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);

    /* for now we just leave the intermediate dataloops in place.
     * could remove them to save space if we wanted.
     */

    return;
}
Example #10
0
int MPIR_Alltoall_intra( 
    const void *sendbuf,
    int sendcount, 
    MPI_Datatype sendtype, 
    void *recvbuf, 
    int recvcount, 
    MPI_Datatype recvtype, 
    MPIR_Comm *comm_ptr,
    MPIR_Errflag_t *errflag )
{
    int          comm_size, i, j, pof2;
    MPI_Aint     sendtype_extent, recvtype_extent;
    MPI_Aint recvtype_true_extent, recvbuf_extent, recvtype_true_lb;
    int mpi_errno=MPI_SUCCESS, src, dst, rank, nbytes;
    int mpi_errno_ret = MPI_SUCCESS;
    MPI_Status status;
    int sendtype_size, block, *displs, count;
    MPI_Aint pack_size, position;
    MPI_Datatype newtype = MPI_DATATYPE_NULL;
    void *tmp_buf;
    MPIR_Request **reqarray;
    MPI_Status *starray;
    MPIR_CHKLMEM_DECL(6);

    if (recvcount == 0) return MPI_SUCCESS;

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    /* Get extent of send and recv types */
    MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
    MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);

    MPID_Datatype_get_size_macro(sendtype, sendtype_size);
    nbytes = sendtype_size * sendcount;

    if (sendbuf == MPI_IN_PLACE) {
        /* We use pair-wise sendrecv_replace in order to conserve memory usage,
         * which is keeping with the spirit of the MPI-2.2 Standard.  But
         * because of this approach all processes must agree on the global
         * schedule of sendrecv_replace operations to avoid deadlock.
         *
         * Note that this is not an especially efficient algorithm in terms of
         * time and there will be multiple repeated malloc/free's rather than
         * maintaining a single buffer across the whole loop.  Something like
         * MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
        for (i = 0; i < comm_size; ++i) {
            /* start inner loop at i to avoid re-exchanging data */
            for (j = i; j < comm_size; ++j) {
                if (rank == i) {
                    /* also covers the (rank == i && rank == j) case */
                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + j*recvcount*recvtype_extent),
                                                         recvcount, recvtype,
                                                         j, MPIR_ALLTOALL_TAG,
                                                         j, MPIR_ALLTOALL_TAG,
                                                         comm_ptr, &status, errflag);
                    if (mpi_errno) {
                        /* for communication errors, just record the error but continue */
                        *errflag = MPIR_ERR_GET_CLASS(mpi_errno);
                        MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
                        MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
                    }
                }
                else if (rank == j) {
                    /* same as above with i/j args reversed */
                    mpi_errno = MPIC_Sendrecv_replace(((char *)recvbuf + i*recvcount*recvtype_extent),
                                                         recvcount, recvtype,
                                                         i, MPIR_ALLTOALL_TAG,
                                                         i, MPIR_ALLTOALL_TAG,
                                                         comm_ptr, &status, errflag);
                    if (mpi_errno) {
                        /* for communication errors, just record the error but continue */
                        *errflag = MPIR_ERR_GET_CLASS(mpi_errno);
                        MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
                        MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
                    }
                }
            }
        }
    }
    else if ((nbytes <= MPIR_CVAR_ALLTOALL_SHORT_MSG_SIZE) && (comm_size >= 8)) {

        /* use the indexing algorithm by Jehoshua Bruck et al,
         * IEEE TPDS, Nov. 97 */ 

        /* allocate temporary buffer */
        MPIR_Pack_size_impl(recvcount*comm_size, recvtype, &pack_size);
        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, pack_size, mpi_errno, "tmp_buf");

        /* Do Phase 1 of the algorithim. Shift the data blocks on process i
         * upwards by a distance of i blocks. Store the result in recvbuf. */
        mpi_errno = MPIR_Localcopy((char *) sendbuf + 
			   rank*sendcount*sendtype_extent, 
                           (comm_size - rank)*sendcount, sendtype, recvbuf, 
                           (comm_size - rank)*recvcount, recvtype);
	if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
        mpi_errno = MPIR_Localcopy(sendbuf, rank*sendcount, sendtype, 
                        (char *) recvbuf + 
				   (comm_size-rank)*recvcount*recvtype_extent, 
                                   rank*recvcount, recvtype);
	if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
        /* Input data is now stored in recvbuf with datatype recvtype */

        /* Now do Phase 2, the communication phase. It takes
           ceiling(lg p) steps. In each step i, each process sends to rank+2^i
           and receives from rank-2^i, and exchanges all data blocks
           whose ith bit is 1. */

        /* allocate displacements array for indexed datatype used in
           communication */

        MPIR_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");

        pof2 = 1;
        while (pof2 < comm_size) {
            dst = (rank + pof2) % comm_size;
            src = (rank - pof2 + comm_size) % comm_size;

            /* Exchange all data blocks whose ith bit is 1 */
            /* Create an indexed datatype for the purpose */

            count = 0;
            for (block=1; block<comm_size; block++) {
                if (block & pof2) {
                    displs[count] = block * recvcount;
                    count++;
                }
            }

            mpi_errno = MPIR_Type_create_indexed_block_impl(count, recvcount,
                                                            displs, recvtype, &newtype);
	    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

            mpi_errno = MPIR_Type_commit_impl(&newtype);
	    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

            position = 0;
            mpi_errno = MPIR_Pack_impl(recvbuf, 1, newtype, tmp_buf, pack_size, &position);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

            mpi_errno = MPIC_Sendrecv(tmp_buf, position, MPI_PACKED, dst,
                                         MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
                                         src, MPIR_ALLTOALL_TAG, comm_ptr,
                                         MPI_STATUS_IGNORE, errflag);
            if (mpi_errno) {
                /* for communication errors, just record the error but continue */
                *errflag = MPIR_ERR_GET_CLASS(mpi_errno);
                MPIR_ERR_SET(mpi_errno, *errflag, "**fail");
                MPIR_ERR_ADD(mpi_errno_ret, mpi_errno);
            }

            MPIR_Type_free_impl(&newtype);

            pof2 *= 2;
        }

        /* Rotate blocks in recvbuf upwards by (rank + 1) blocks. Need
         * a temporary buffer of the same size as recvbuf. */
        
        /* get true extent of recvtype */
        MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);

        recvbuf_extent = recvcount * comm_size *
            (MPL_MAX(recvtype_true_extent, recvtype_extent));
        MPIR_CHKLMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
        /* adjust for potential negative lower bound in datatype */
        tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);

        mpi_errno = MPIR_Localcopy((char *) recvbuf + (rank+1)*recvcount*recvtype_extent, 
                       (comm_size - rank - 1)*recvcount, recvtype, tmp_buf, 
                       (comm_size - rank - 1)*recvcount, recvtype);
	if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }
        mpi_errno = MPIR_Localcopy(recvbuf, (rank+1)*recvcount, recvtype, 
                       (char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent, 
                       (rank+1)*recvcount, recvtype);
	if (mpi_errno) { MPIR_ERR_POP(mpi_errno); }

        /* Blocks are in the reverse order now (comm_size-1 to 0). 
         * Reorder them to (0 to comm_size-1) and store them in recvbuf. */

        for (i=0; i<comm_size; i++){
            mpi_errno = MPIR_Localcopy((char *) tmp_buf + i*recvcount*recvtype_extent,
                                       recvcount, recvtype, 
                                       (char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent, 
                                       recvcount, recvtype);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
    }
Example #11
0
int MPIR_Igather_sched_intra_binomial(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPIR_Comm *comm_ptr, MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int comm_size, rank;
    int relative_rank, is_homogeneous;
    int mask, src, dst, relative_src;
    MPI_Aint recvtype_size, sendtype_size, curr_cnt=0, nbytes;
    int recvblks;
    int tmp_buf_size, missing;
    void *tmp_buf = NULL;
    int blocks[2];
    int displs[2];
    MPI_Aint struct_displs[2];
    MPI_Aint extent=0;
    int copy_offset = 0, copy_blks = 0;
    MPI_Datatype types[2], tmp_type;
    MPIR_SCHED_CHKPMEM_DECL(1);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    if (((rank == root) && (recvcount == 0)) || ((rank != root) && (sendcount == 0)))
        goto fn_exit;

    is_homogeneous = TRUE;
#ifdef MPID_HAS_HETERO
    is_homogeneous = !comm_ptr->is_hetero;
#endif

    MPIR_Assert(comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM);

    /* Use binomial tree algorithm. */

    relative_rank = (rank >= root) ? rank - root : rank - root + comm_size;

    if (rank == root)
    {
        MPIR_Datatype_get_extent_macro(recvtype, extent);
        MPIR_Ensure_Aint_fits_in_pointer(MPIR_VOID_PTR_CAST_TO_MPI_AINT recvbuf+
                                         (extent*recvcount*comm_size));
    }

    if (is_homogeneous)
    {
        /* communicator is homogeneous. no need to pack buffer. */
        if (rank == root)
        {
            MPIR_Datatype_get_size_macro(recvtype, recvtype_size);
            nbytes = recvtype_size * recvcount;
        }
        else
        {
            MPIR_Datatype_get_size_macro(sendtype, sendtype_size);
            nbytes = sendtype_size * sendcount;
        }

        /* Find the number of missing nodes in my sub-tree compared to
         * a balanced tree */
        for (mask = 1; mask < comm_size; mask <<= 1);
        --mask;
        while (relative_rank & mask) mask >>= 1;
        missing = (relative_rank | mask) - comm_size + 1;
        if (missing < 0) missing = 0;
        tmp_buf_size = (mask - missing);

        /* If the message is smaller than the threshold, we will copy
         * our message in there too */
        if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) tmp_buf_size++;

        tmp_buf_size *= nbytes;

        /* For zero-ranked root, we don't need any temporary buffer */
        if ((rank == root) && (!root || (nbytes >= MPIR_CVAR_GATHER_VSMALL_MSG_SIZE)))
            tmp_buf_size = 0;

        if (tmp_buf_size) {
            MPIR_SCHED_CHKPMEM_MALLOC(tmp_buf, void *, tmp_buf_size, mpi_errno, "tmp_buf", MPL_MEM_BUFFER);
        }

        if (rank == root) {
            if (sendbuf != MPI_IN_PLACE) {
                mpi_errno = MPIR_Localcopy(sendbuf, sendcount, sendtype,
                                           ((char *) recvbuf + extent*recvcount*rank), recvcount, recvtype);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            }
        }
        else if (tmp_buf_size && (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE)) {
            /* copy from sendbuf into tmp_buf */
            mpi_errno = MPIR_Localcopy(sendbuf, sendcount, sendtype,
                                       tmp_buf, nbytes, MPI_BYTE);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
        curr_cnt = nbytes;

        mask = 0x1;
        while (mask < comm_size) {
            if ((mask & relative_rank) == 0) {
                src = relative_rank | mask;
                if (src < comm_size) {
                    src = (src + root) % comm_size;

                    if (rank == root) {
                        recvblks = mask;
                        if ((2 * recvblks) > comm_size)
                            recvblks = comm_size - recvblks;

                        if ((rank + mask + recvblks == comm_size) ||
                            (((rank + mask) % comm_size) < ((rank + mask + recvblks) % comm_size)))
                        {
                            /* If the data contiguously fits into the
                             * receive buffer, place it directly. This
                             * should cover the case where the root is
                             * rank 0. */
                            char *rp = (char *)recvbuf + (((rank + mask) % comm_size)*recvcount*extent);
                            mpi_errno = MPIR_Sched_recv(rp, (recvblks * recvcount), recvtype, src, comm_ptr, s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Sched_barrier(s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                        }
                        else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
                            mpi_errno = MPIR_Sched_recv(tmp_buf, (recvblks * nbytes), MPI_BYTE, src, comm_ptr, s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Sched_barrier(s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            copy_offset = rank + mask;
                            copy_blks = recvblks;
                        }
                        else {
                            blocks[0] = recvcount * (comm_size - root - mask);
                            displs[0] = recvcount * (root + mask);
                            blocks[1] = (recvcount * recvblks) - blocks[0];
                            displs[1] = 0;

                            mpi_errno = MPIR_Type_indexed_impl(2, blocks, displs, recvtype, &tmp_type);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

                            mpi_errno = MPIR_Sched_recv(recvbuf, 1, tmp_type, src, comm_ptr, s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                            mpi_errno = MPIR_Sched_barrier(s);
                            if (mpi_errno) MPIR_ERR_POP(mpi_errno);

                            /* this "premature" free is safe b/c the sched holds an actual ref to keep it alive */
                            MPIR_Type_free_impl(&tmp_type);
                        }
                    }
                    else { /* Intermediate nodes store in temporary buffer */
                        MPI_Aint offset;

                        /* Estimate the amount of data that is going to come in */
                        recvblks = mask;
                        relative_src = ((src - root) < 0) ? (src - root + comm_size) : (src - root);
                        if (relative_src + mask > comm_size)
                            recvblks -= (relative_src + mask - comm_size);

                        if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE)
                            offset = mask * nbytes;
                        else
                            offset = (mask - 1) * nbytes;
                        mpi_errno = MPIR_Sched_recv(((char *)tmp_buf + offset), (recvblks * nbytes),
                                                    MPI_BYTE, src, comm_ptr, s);
                        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                        mpi_errno = MPIR_Sched_barrier(s);
                        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                        curr_cnt += (recvblks * nbytes);
                    }
                }
            }
            else {
                dst = relative_rank ^ mask;
                dst = (dst + root) % comm_size;

                if (!tmp_buf_size) {
                    /* leaf nodes send directly from sendbuf */
                    mpi_errno = MPIR_Sched_send(sendbuf, sendcount, sendtype, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    mpi_errno = MPIR_Sched_barrier(s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                }
                else if (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) {
                    mpi_errno = MPIR_Sched_send(tmp_buf, curr_cnt, MPI_BYTE, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    mpi_errno = MPIR_Sched_barrier(s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                }
                else {
                    blocks[0] = sendcount;
                    struct_displs[0] = MPIR_VOID_PTR_CAST_TO_MPI_AINT sendbuf;
                    types[0] = sendtype;
		    /* check for overflow.  work around int limits if needed*/
		    if (curr_cnt - nbytes != (int)(curr_cnt-nbytes)) {
			blocks[1] = 1;
			MPIR_Type_contiguous_x_impl(curr_cnt - nbytes,
				MPI_BYTE, &(types[1]));
		    } else {
			MPIR_Assign_trunc(blocks[1], curr_cnt - nbytes, int);
			types[1] = MPI_BYTE;
		    }
                    struct_displs[1] = MPIR_VOID_PTR_CAST_TO_MPI_AINT tmp_buf;

                    mpi_errno = MPIR_Type_create_struct_impl(2, blocks, struct_displs, types, &tmp_type);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    mpi_errno = MPIR_Type_commit_impl(&tmp_type);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

                    mpi_errno = MPIR_Sched_send(MPI_BOTTOM, 1, tmp_type, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);

                    /* this "premature" free is safe b/c the sched holds an actual ref to keep it alive */
                    MPIR_Type_free_impl(&tmp_type);
                }

                break;
            }
            mask <<= 1;
        }

        if ((rank == root) && root && (nbytes < MPIR_CVAR_GATHER_VSMALL_MSG_SIZE) && copy_blks) {
            /* reorder and copy from tmp_buf into recvbuf */
            /* FIXME why are there two copies here? */
            mpi_errno = MPIR_Sched_copy(tmp_buf, nbytes * (comm_size - copy_offset), MPI_BYTE,
                                       ((char *)recvbuf + extent * recvcount * copy_offset),
                                       recvcount * (comm_size - copy_offset), recvtype, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            mpi_errno = MPIR_Sched_copy((char *)tmp_buf + nbytes * (comm_size - copy_offset),
                                        nbytes * (copy_blks - comm_size + copy_offset), MPI_BYTE,
                                        recvbuf, recvcount * (copy_blks - comm_size + copy_offset),
                                        recvtype, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
    }
#ifdef MPID_HAS_HETERO
    else {
Example #12
0
/* Returns MPI_SUCCESS on success, an MPI error code on failure.  Code above
 * needs to call MPIO_Err_return_xxx.
 */
static int MPII_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nprocs,
                            int rank, int darg, int order, MPI_Aint orig_extent,
                            MPI_Datatype type_old, MPI_Datatype * type_new, MPI_Aint * st_offset)
{
/* nprocs = no. of processes in dimension dim of grid
   rank = coordinate of this process in dimension dim */
    int mpi_errno = MPI_SUCCESS;
    int blksize, i, blklens[3], st_index, end_index, local_size, rem, count;
    MPI_Aint stride, disps[3];
    MPI_Datatype type_tmp, types[3];

    if (darg == MPI_DISTRIBUTE_DFLT_DARG)
        blksize = 1;
    else
        blksize = darg;

    MPIR_ERR_CHKINTERNAL(blksize <= 0, mpi_errno, "blksize must be > 0");

    st_index = rank * blksize;
    end_index = array_of_gsizes[dim] - 1;

    if (end_index < st_index)
        local_size = 0;
    else {
        local_size = ((end_index - st_index + 1) / (nprocs * blksize)) * blksize;
        rem = (end_index - st_index + 1) % (nprocs * blksize);
        local_size += (rem < blksize) ? rem : blksize;
    }

    count = local_size / blksize;
    rem = local_size % blksize;

    stride = ((MPI_Aint) nprocs) * ((MPI_Aint) blksize) * orig_extent;
    if (order == MPI_ORDER_FORTRAN)
        for (i = 0; i < dim; i++)
            stride *= (MPI_Aint) (array_of_gsizes[i]);
    else
        for (i = ndims - 1; i > dim; i--)
            stride *= (MPI_Aint) (array_of_gsizes[i]);

    mpi_errno = MPIR_Type_hvector_impl(count, blksize, stride, type_old, type_new);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    if (rem) {
        /* if the last block is of size less than blksize, include
         * it separately using MPI_Type_struct */

        types[0] = *type_new;
        types[1] = type_old;
        disps[0] = 0;
        disps[1] = ((MPI_Aint) count) * stride;
        blklens[0] = 1;
        blklens[1] = rem;

        mpi_errno = MPIR_Type_struct_impl(2, blklens, disps, types, &type_tmp);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        MPIR_Type_free_impl(type_new);
        *type_new = type_tmp;
    }

    /* In the first iteration, we need to set the displacement in that
     * dimension correctly. */
    if (((order == MPI_ORDER_FORTRAN) && (dim == 0)) ||
        ((order == MPI_ORDER_C) && (dim == ndims - 1))) {
        types[0] = MPI_LB;
        disps[0] = 0;
        types[1] = *type_new;
        disps[1] = ((MPI_Aint) rank) * ((MPI_Aint) blksize) * orig_extent;
        types[2] = MPI_UB;
        disps[2] = orig_extent * ((MPI_Aint) (array_of_gsizes[dim]));
        blklens[0] = blklens[1] = blklens[2] = 1;

        mpi_errno = MPIR_Type_struct_impl(3, blklens, disps, types, &type_tmp);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        MPIR_Type_free_impl(type_new);
        *type_new = type_tmp;

        *st_offset = 0; /* set it to 0 because it is taken care of in
                         * the struct above */
    } else {
        *st_offset = ((MPI_Aint) rank) * ((MPI_Aint) blksize);
        /* st_offset is in terms of no. of elements of type oldtype in
         * this dimension */
    }

    if (local_size == 0)
        *st_offset = 0;

  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #13
0
int MPIR_Type_convert_darray(int size,
                             int rank,
                             int ndims,
                             int *array_of_gsizes,
                             int *array_of_distribs,
                             int *array_of_dargs,
                             int *array_of_psizes,
                             int order, MPI_Datatype oldtype, MPI_Datatype * newtype)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Datatype type_old, type_new = MPI_DATATYPE_NULL, types[3];
    int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
    MPI_Aint *st_offsets, orig_extent, disps[3];

    MPIR_Datatype_get_extent_macro(oldtype, orig_extent);

/* calculate position in Cartesian grid as MPI would (row-major
   ordering) */
    coords = (int *) DLOOP_Malloc(ndims * sizeof(int), MPL_MEM_DATATYPE);
    procs = size;
    tmp_rank = rank;
    for (i = 0; i < ndims; i++) {
        procs = procs / array_of_psizes[i];
        coords[i] = tmp_rank / procs;
        tmp_rank = tmp_rank % procs;
    }

    st_offsets = (MPI_Aint *) DLOOP_Malloc(ndims * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    type_old = oldtype;

    if (order == MPI_ORDER_FORTRAN) {
        /* dimension 0 changes fastest */
        for (i = 0; i < ndims; i++) {
            switch (array_of_distribs[i]) {
                case MPI_DISTRIBUTE_BLOCK:
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims,
                                                array_of_psizes[i],
                                                coords[i], array_of_dargs[i],
                                                order, orig_extent,
                                                type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_CYCLIC:
                    mpi_errno = MPII_Type_cyclic(array_of_gsizes, i, ndims,
                                                 array_of_psizes[i], coords[i],
                                                 array_of_dargs[i], order,
                                                 orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_NONE:
                    /* treat it as a block distribution on 1 process */
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, 1, 0,
                                                MPI_DISTRIBUTE_DFLT_DARG, order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
            }
            if (i)
                MPIR_Type_free_impl(&type_old);
            type_old = type_new;
        }

        /* add displacement and UB */
        disps[1] = st_offsets[0];
        tmp_size = 1;
        for (i = 1; i < ndims; i++) {
            tmp_size *= array_of_gsizes[i - 1];
            disps[1] += ((MPI_Aint) tmp_size) * st_offsets[i];
        }
        /* rest done below for both Fortran and C order */
    }

    else {      /* order == MPI_ORDER_C */

        /* dimension ndims-1 changes fastest */
        for (i = ndims - 1; i >= 0; i--) {
            switch (array_of_distribs[i]) {
                case MPI_DISTRIBUTE_BLOCK:
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
                                                coords[i], array_of_dargs[i], order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_CYCLIC:
                    mpi_errno = MPII_Type_cyclic(array_of_gsizes, i, ndims,
                                                 array_of_psizes[i], coords[i],
                                                 array_of_dargs[i], order,
                                                 orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_NONE:
                    /* treat it as a block distribution on 1 process */
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
                                                coords[i], MPI_DISTRIBUTE_DFLT_DARG, order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
            }
            if (i != ndims - 1)
                MPIR_Type_free_impl(&type_old);
            type_old = type_new;
        }

        /* add displacement and UB */
        disps[1] = st_offsets[ndims - 1];
        tmp_size = 1;
        for (i = ndims - 2; i >= 0; i--) {
            tmp_size *= array_of_gsizes[i + 1];
            disps[1] += ((MPI_Aint) tmp_size) * st_offsets[i];
        }
    }

    disps[1] *= orig_extent;

    disps[2] = orig_extent;
    for (i = 0; i < ndims; i++)
        disps[2] *= (MPI_Aint) (array_of_gsizes[i]);

    disps[0] = 0;
    blklens[0] = blklens[1] = blklens[2] = 1;
    types[0] = MPI_LB;
    types[1] = type_new;
    types[2] = MPI_UB;

    DLOOP_Free(st_offsets);
    DLOOP_Free(coords);

    mpi_errno = MPIR_Type_struct_impl(3, blklens, disps, types, newtype);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    MPIR_Type_free_impl(&type_new);


  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}
Example #14
0
void MPIDU_Type_calc_footprint(MPI_Datatype type, DLOOP_Type_footprint *tfp)
{
    int mpi_errno;
    int nr_ints, nr_aints, nr_types, combiner;
    int *ints;
    MPI_Aint *aints;
    MPI_Datatype *types;

    /* used to store parameters for constituent types */
    DLOOP_Offset size = 0, lb = 0, ub = 0, true_lb = 0, true_ub = 0;
    DLOOP_Offset extent = 0, alignsz;
    int has_sticky_lb, has_sticky_ub;

    /* used for vector/hvector/hvector_integer calculations */
    DLOOP_Offset stride;

    /* used for indexed/hindexed calculations */
    DLOOP_Offset disp;

    /* used for calculations on types with more than one block of data */
    DLOOP_Offset i, min_lb, max_ub, ntypes, tmp_lb, tmp_ub;

    /* used for processing subarray and darray types */
    int ndims;
    MPI_Datatype tmptype;

    MPIR_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    if (combiner == MPI_COMBINER_NAMED) {
	int mpisize;
	MPI_Aint mpiextent;

	MPIR_Datatype_get_size_macro(type, mpisize);
	MPIR_Datatype_get_extent_macro(type, mpiextent);
	tfp->size    = (DLOOP_Offset) mpisize;
	tfp->lb      = 0;
	tfp->ub      = (DLOOP_Offset) mpiextent;
	tfp->true_lb = 0;
	tfp->true_ub = (DLOOP_Offset) mpiextent;
	tfp->extent  = (DLOOP_Offset) mpiextent;
	tfp->alignsz = DLOOP_Named_type_alignsize(type, (MPI_Aint) 0);
	tfp->has_sticky_lb = (type == MPI_LB) ? 1 : 0;
	tfp->has_sticky_ub = (type == MPI_UB) ? 1 : 0;

	goto clean_exit;
    }

    /* get access to contents; need it immediately to check for zero count */
    MPIDU_Type_access_contents(type, &ints, &aints, &types);

    /* knock out all the zero count cases */
    if ((combiner == MPI_COMBINER_CONTIGUOUS ||
	 combiner == MPI_COMBINER_VECTOR ||
	 combiner == MPI_COMBINER_HVECTOR_INTEGER ||
	 combiner == MPI_COMBINER_HVECTOR ||
	 combiner == MPI_COMBINER_INDEXED_BLOCK ||
	 combiner == MPI_COMBINER_HINDEXED_BLOCK ||
	 combiner == MPI_COMBINER_INDEXED ||
	 combiner == MPI_COMBINER_HINDEXED_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT) && ints[0] == 0)
    {
	tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
	tfp->true_lb = tfp->true_ub = 0;
	tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	goto clean_exit;
    }

    if (combiner != MPI_COMBINER_STRUCT &&
	combiner != MPI_COMBINER_STRUCT_INTEGER)
    {
	DLOOP_Type_footprint cfp;

	MPIDU_Type_calc_footprint(types[0], &cfp);
	size    = cfp.size;
	lb      = cfp.lb;
	ub      = cfp.ub;
	true_lb = cfp.true_lb;
	true_ub = cfp.true_ub;
	extent  = cfp.extent;
	alignsz = cfp.alignsz;
	has_sticky_lb = cfp.has_sticky_lb;
	has_sticky_ub = cfp.has_sticky_ub;

	/* initialize some common values so we don't have to assign
	 * them in every case below.
	 */
	tfp->alignsz = alignsz;
	tfp->has_sticky_lb = has_sticky_lb;
	tfp->has_sticky_ub = has_sticky_ub;

    }

    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    tfp->size    = size;
	    tfp->lb      = lb;
	    tfp->ub      = ub;
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = extent;
	    break;
	case MPI_COMBINER_RESIZED:
	    tfp->size    = size;
	    tfp->lb      = aints[0]; /* lb */
	    tfp->ub      = aints[0] + aints[1];
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = aints[1]; /* extent */
	    tfp->has_sticky_lb = 1;
	    tfp->has_sticky_ub = 1;
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    DLOOP_DATATYPE_CONTIG_LB_UB(ints[0] /* count */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	    if (combiner == MPI_COMBINER_VECTOR) stride = (DLOOP_Offset) ints[2] * extent;
	    else if (combiner == MPI_COMBINER_HVECTOR) stride = aints[0];
	    else /* HVECTOR_INTEGER */ stride = (DLOOP_Offset) ints[2];

	    DLOOP_DATATYPE_VECTOR_LB_UB(ints[0] /* count */,
					stride /* stride in bytes */,
					ints[1] /* blklen */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] * extent /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] * extent /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    /* find first non-zero blocklength element */
	    for (i=0; i < ints[0] && ints[i+1] == 0; i++);
	    if (i == ints[0]) {
		/* all zero blocklengths */
		tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
		tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	    }
	    else {
		/* prime min_lb, max_ub, count */
		ntypes = ints[i+1];
		if (combiner == MPI_COMBINER_INDEXED)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1];
		else /* MPI_COMBINER_HINDEXED */
		    disp = aints[i];

		DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */,
					   disp,
					   lb, ub, extent,
					   min_lb, max_ub);

		for (i++; i < ints[0]; i++) {
		    /* skip zero blocklength elements */
		    if (ints[i+1] == 0) continue;

		    ntypes += ints[i+1];
		    if (combiner == MPI_COMBINER_INDEXED)
			disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		    else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
			disp = (DLOOP_Offset) ints[ints[0]+i+1];
		    else /* MPI_COMBINER_HINDEXED */
			disp = aints[i];

		    DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1],
					       disp,
					       lb, ub, extent,
					       tmp_lb, tmp_ub);
		    if (tmp_lb < min_lb) min_lb = tmp_lb;
		    if (tmp_ub > max_ub) max_ub = tmp_ub;
		}
		tfp->size    = ntypes * size;
		tfp->lb      = min_lb;
		tfp->ub      = max_ub;
		tfp->true_lb = min_lb + (true_lb - lb);
		tfp->true_ub = max_ub + (true_ub - ub);
		tfp->extent  = tfp->ub - tfp->lb;
	    }
	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	    DLOOP_Assert(combiner != MPI_COMBINER_STRUCT_INTEGER);
	    break;
	case MPI_COMBINER_STRUCT:
	    /* sufficiently complicated to pull out into separate fn */
	    DLOOP_Type_calc_footprint_struct(type,
					     combiner, ints, aints, types,
					     tfp);
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    MPIDU_Type_convert_subarray(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsz */,
						  &ints[1+2*ndims] /* strts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);
	    MPIDU_Type_calc_footprint(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];

	    MPIDU_Type_convert_darray(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    MPIDU_Type_calc_footprint(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_F90_REAL:
	case MPI_COMBINER_F90_COMPLEX:
	case MPI_COMBINER_F90_INTEGER:
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:
    MPIDU_Type_release_contents(type, &ints, &aints, &types);
    return;
}
int MPIR_Ireduce_scatter_sched_intra_recursive_doubling(const void *sendbuf, void *recvbuf, const int recvcounts[],
                                 MPI_Datatype datatype, MPI_Op op, MPIR_Comm *comm_ptr,
                                 MPIR_Sched_t s)
{
    int mpi_errno = MPI_SUCCESS;
    int rank, comm_size, i;
    MPI_Aint extent, true_extent, true_lb;
    int  *disps;
    void *tmp_recvbuf, *tmp_results;
    int type_size ATTRIBUTE((unused)), dis[2], blklens[2], total_count, dst;
    int mask, dst_tree_root, my_tree_root, j, k;
    int received;
    MPI_Datatype sendtype, recvtype;
    int nprocs_completed, tmp_mask, tree_root, is_commutative;
    MPIR_SCHED_CHKPMEM_DECL(5);

    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    MPIR_Datatype_get_extent_macro(datatype, extent);
    MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent);
    is_commutative = MPIR_Op_is_commutative(op);

    MPIR_SCHED_CHKPMEM_MALLOC(disps, int *, comm_size * sizeof(int), mpi_errno, "disps", MPL_MEM_BUFFER);

    total_count = 0;
    for (i=0; i<comm_size; i++) {
        disps[i] = total_count;
        total_count += recvcounts[i];
    }

    if (total_count == 0) {
        goto fn_exit;
    }

    MPIR_Datatype_get_size_macro(datatype, type_size);

    /* total_count*extent eventually gets malloced. it isn't added to
     * a user-passed in buffer */
    MPIR_Ensure_Aint_fits_in_pointer(total_count * MPL_MAX(true_extent, extent));


    /* need to allocate temporary buffer to receive incoming data*/
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_recvbuf, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_recvbuf", MPL_MEM_BUFFER);
    /* adjust for potential negative lower bound in datatype */
    tmp_recvbuf = (void *)((char*)tmp_recvbuf - true_lb);

    /* need to allocate another temporary buffer to accumulate
       results */
    MPIR_SCHED_CHKPMEM_MALLOC(tmp_results, void *, total_count*(MPL_MAX(true_extent,extent)), mpi_errno, "tmp_results", MPL_MEM_BUFFER);
    /* adjust for potential negative lower bound in datatype */
    tmp_results = (void *)((char*)tmp_results - true_lb);

    /* copy sendbuf into tmp_results */
    if (sendbuf != MPI_IN_PLACE)
        mpi_errno = MPIR_Sched_copy(sendbuf, total_count, datatype,
                                    tmp_results, total_count, datatype, s);
    else
        mpi_errno = MPIR_Sched_copy(recvbuf, total_count, datatype,
                                    tmp_results, total_count, datatype, s);

    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIR_SCHED_BARRIER(s);

    mask = 0x1;
    i = 0;
    while (mask < comm_size) {
        dst = rank ^ mask;

        dst_tree_root = dst >> i;
        dst_tree_root <<= i;

        my_tree_root = rank >> i;
        my_tree_root <<= i;

        /* At step 1, processes exchange (n-n/p) amount of
           data; at step 2, (n-2n/p) amount of data; at step 3, (n-4n/p)
           amount of data, and so forth. We use derived datatypes for this.

           At each step, a process does not need to send data
           indexed from my_tree_root to
           my_tree_root+mask-1. Similarly, a process won't receive
           data indexed from dst_tree_root to dst_tree_root+mask-1. */

        /* calculate sendtype */
        blklens[0] = blklens[1] = 0;
        for (j=0; j<my_tree_root; j++)
            blklens[0] += recvcounts[j];
        for (j=my_tree_root+mask; j<comm_size; j++)
            blklens[1] += recvcounts[j];

        dis[0] = 0;
        dis[1] = blklens[0];
        for (j=my_tree_root; (j<my_tree_root+mask) && (j<comm_size); j++)
            dis[1] += recvcounts[j];

        mpi_errno = MPIR_Type_indexed_impl(2, blklens, dis, datatype, &sendtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Type_commit_impl(&sendtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        /* calculate recvtype */
        blklens[0] = blklens[1] = 0;
        for (j=0; j<dst_tree_root && j<comm_size; j++)
            blklens[0] += recvcounts[j];
        for (j=dst_tree_root+mask; j<comm_size; j++)
            blklens[1] += recvcounts[j];

        dis[0] = 0;
        dis[1] = blklens[0];
        for (j=dst_tree_root; (j<dst_tree_root+mask) && (j<comm_size); j++)
            dis[1] += recvcounts[j];

        mpi_errno = MPIR_Type_indexed_impl(2, blklens, dis, datatype, &recvtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIR_Type_commit_impl(&recvtype);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        received = 0;
        if (dst < comm_size) {
            /* tmp_results contains data to be sent in each step. Data is
               received in tmp_recvbuf and then accumulated into
               tmp_results. accumulation is done later below.   */

            mpi_errno = MPIR_Sched_send(tmp_results, 1, sendtype, dst, comm_ptr, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            MPIR_SCHED_BARRIER(s);
            received = 1;
        }

        /* if some processes in this process's subtree in this step
           did not have any destination process to communicate with
           because of non-power-of-two, we need to send them the
           result. We use a logarithmic recursive-halfing algorithm
           for this. */

        if (dst_tree_root + mask > comm_size) {
            nprocs_completed = comm_size - my_tree_root - mask;
            /* nprocs_completed is the number of processes in this
               subtree that have all the data. Send data to others
               in a tree fashion. First find root of current tree
               that is being divided into two. k is the number of
               least-significant bits in this process's rank that
               must be zeroed out to find the rank of the root */
            j = mask;
            k = 0;
            while (j) {
                j >>= 1;
                k++;
            }
            k--;

            tmp_mask = mask >> 1;
            while (tmp_mask) {
                dst = rank ^ tmp_mask;

                tree_root = rank >> k;
                tree_root <<= k;

                /* send only if this proc has data and destination
                   doesn't have data. at any step, multiple processes
                   can send if they have the data */
                if ((dst > rank) &&
                    (rank < tree_root + nprocs_completed)
                    && (dst >= tree_root + nprocs_completed))
                {
                    /* send the current result */
                    mpi_errno = MPIR_Sched_send(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);
                }
                /* recv only if this proc. doesn't have data and sender
                   has data */
                else if ((dst < rank) &&
                         (dst < tree_root + nprocs_completed) &&
                         (rank >= tree_root + nprocs_completed))
                {
                    mpi_errno = MPIR_Sched_recv(tmp_recvbuf, 1, recvtype, dst, comm_ptr, s);
                    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                    MPIR_SCHED_BARRIER(s);
                    received = 1;
                }
                tmp_mask >>= 1;
                k--;
            }
        }

        /* N.B. The following comment comes from the FT version of
         * MPI_Reduce_scatter.  It does not currently apply to this code, but
         * will in the future when we update the NBC code to be fault-tolerant
         * in roughly the same fashion. [goodell@ 2011-03-03] */
        /* The following reduction is done here instead of after
           the MPIC_Sendrecv or MPIC_Recv above. This is
           because to do it above, in the noncommutative
           case, we would need an extra temp buffer so as not to
           overwrite temp_recvbuf, because temp_recvbuf may have
           to be communicated to other processes in the
           non-power-of-two case. To avoid that extra allocation,
           we do the reduce here. */
        if (received) {
            if (is_commutative || (dst_tree_root < my_tree_root)) {
                mpi_errno = MPIR_Sched_reduce(tmp_recvbuf, tmp_results, blklens[0], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                mpi_errno = MPIR_Sched_reduce(((char *)tmp_recvbuf + dis[1]*extent),
                                              ((char *)tmp_results + dis[1]*extent),
                                              blklens[1], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);
            }
            else {
                mpi_errno = MPIR_Sched_reduce(tmp_results, tmp_recvbuf, blklens[0], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                mpi_errno = MPIR_Sched_reduce(((char *)tmp_results + dis[1]*extent),
                                              ((char *)tmp_recvbuf + dis[1]*extent),
                                              blklens[1], datatype, op, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);

                /* copy result back into tmp_results */
                mpi_errno = MPIR_Sched_copy(tmp_recvbuf, 1, recvtype,
                                            tmp_results, 1, recvtype, s);
                if (mpi_errno) MPIR_ERR_POP(mpi_errno);
                MPIR_SCHED_BARRIER(s);
            }
        }

        MPIR_Type_free_impl(&sendtype);
        MPIR_Type_free_impl(&recvtype);

        mask <<= 1;
        i++;
    }
Example #16
0
int MPIR_Alltoall_intra( 
    const void *sendbuf,
    int sendcount, 
    MPI_Datatype sendtype, 
    void *recvbuf, 
    int recvcount, 
    MPI_Datatype recvtype, 
    MPID_Comm *comm_ptr,
    int *errflag )
{
    int          comm_size, i, j, pof2;
    MPI_Aint     sendtype_extent, recvtype_extent;
    MPI_Aint recvtype_true_extent, recvbuf_extent, recvtype_true_lb;
    int mpi_errno=MPI_SUCCESS, src, dst, rank, nbytes;
    int mpi_errno_ret = MPI_SUCCESS;
    MPI_Status status;
    int sendtype_size, pack_size, block, position, *displs, count;
    MPI_Datatype newtype = MPI_DATATYPE_NULL;
    void *tmp_buf;
    MPI_Comm comm;
    MPI_Request *reqarray;
    MPI_Status *starray;
    MPIU_CHKLMEM_DECL(6);
#ifdef MPIR_OLD_SHORT_ALLTOALL_ALG
    MPI_Aint sendtype_true_extent, sendbuf_extent, sendtype_true_lb;
    int k, p, curr_cnt, dst_tree_root, my_tree_root;
    int last_recv_cnt, mask, tmp_mask, tree_root, nprocs_completed;
#endif

    if (recvcount == 0) return MPI_SUCCESS;

    comm = comm_ptr->handle;
    comm_size = comm_ptr->local_size;
    rank = comm_ptr->rank;

    /* Get extent of send and recv types */
    MPID_Datatype_get_extent_macro(recvtype, recvtype_extent);
    MPID_Datatype_get_extent_macro(sendtype, sendtype_extent);

    MPID_Datatype_get_size_macro(sendtype, sendtype_size);
    nbytes = sendtype_size * sendcount;

    /* check if multiple threads are calling this collective function */
    MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );

    if (sendbuf == MPI_IN_PLACE) {
        /* We use pair-wise sendrecv_replace in order to conserve memory usage,
         * which is keeping with the spirit of the MPI-2.2 Standard.  But
         * because of this approach all processes must agree on the global
         * schedule of sendrecv_replace operations to avoid deadlock.
         *
         * Note that this is not an especially efficient algorithm in terms of
         * time and there will be multiple repeated malloc/free's rather than
         * maintaining a single buffer across the whole loop.  Something like
         * MADRE is probably the best solution for the MPI_IN_PLACE scenario. */
        for (i = 0; i < comm_size; ++i) {
            /* start inner loop at i to avoid re-exchanging data */
            for (j = i; j < comm_size; ++j) {
                if (rank == i) {
                    /* also covers the (rank == i && rank == j) case */
                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + j*recvcount*recvtype_extent),
                                                         recvcount, recvtype,
                                                         j, MPIR_ALLTOALL_TAG,
                                                         j, MPIR_ALLTOALL_TAG,
                                                         comm, &status, errflag);
                    if (mpi_errno) {
                        /* for communication errors, just record the error but continue */
                        *errflag = TRUE;
                        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                        MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                    }
                }
                else if (rank == j) {
                    /* same as above with i/j args reversed */
                    mpi_errno = MPIC_Sendrecv_replace_ft(((char *)recvbuf + i*recvcount*recvtype_extent),
                                                         recvcount, recvtype,
                                                         i, MPIR_ALLTOALL_TAG,
                                                         i, MPIR_ALLTOALL_TAG,
                                                         comm, &status, errflag);
                    if (mpi_errno) {
                        /* for communication errors, just record the error but continue */
                        *errflag = TRUE;
                        MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                        MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                    }
                }
            }
        }
    }
    else if ((nbytes <= MPIR_PARAM_ALLTOALL_SHORT_MSG_SIZE) && (comm_size >= 8)) {

        /* use the indexing algorithm by Jehoshua Bruck et al,
         * IEEE TPDS, Nov. 97 */ 

        /* allocate temporary buffer */
        MPIR_Pack_size_impl(recvcount*comm_size, recvtype, &pack_size);
        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, pack_size, mpi_errno, "tmp_buf");

        /* Do Phase 1 of the algorithim. Shift the data blocks on process i
         * upwards by a distance of i blocks. Store the result in recvbuf. */
        mpi_errno = MPIR_Localcopy((char *) sendbuf + 
			   rank*sendcount*sendtype_extent, 
                           (comm_size - rank)*sendcount, sendtype, recvbuf, 
                           (comm_size - rank)*recvcount, recvtype);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
        mpi_errno = MPIR_Localcopy(sendbuf, rank*sendcount, sendtype, 
                        (char *) recvbuf + 
				   (comm_size-rank)*recvcount*recvtype_extent, 
                                   rank*recvcount, recvtype);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
        /* Input data is now stored in recvbuf with datatype recvtype */

        /* Now do Phase 2, the communication phase. It takes
           ceiling(lg p) steps. In each step i, each process sends to rank+2^i
           and receives from rank-2^i, and exchanges all data blocks
           whose ith bit is 1. */

        /* allocate displacements array for indexed datatype used in
           communication */

        MPIU_CHKLMEM_MALLOC(displs, int *, comm_size * sizeof(int), mpi_errno, "displs");

        pof2 = 1;
        while (pof2 < comm_size) {
            dst = (rank + pof2) % comm_size;
            src = (rank - pof2 + comm_size) % comm_size;

            /* Exchange all data blocks whose ith bit is 1 */
            /* Create an indexed datatype for the purpose */

            count = 0;
            for (block=1; block<comm_size; block++) {
                if (block & pof2) {
                    displs[count] = block * recvcount;
                    count++;
                }
            }

            mpi_errno = MPIR_Type_create_indexed_block_impl(count, recvcount,
                                                            displs, recvtype, &newtype);
	    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

            mpi_errno = MPIR_Type_commit_impl(&newtype);
	    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

            position = 0;
            mpi_errno = MPIR_Pack_impl(recvbuf, 1, newtype, tmp_buf, pack_size, &position);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);

            mpi_errno = MPIC_Sendrecv_ft(tmp_buf, position, MPI_PACKED, dst,
                                         MPIR_ALLTOALL_TAG, recvbuf, 1, newtype,
                                         src, MPIR_ALLTOALL_TAG, comm,
                                         MPI_STATUS_IGNORE, errflag);
            if (mpi_errno) {
                /* for communication errors, just record the error but continue */
                *errflag = TRUE;
                MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
            }

            MPIR_Type_free_impl(&newtype);

            pof2 *= 2;
        }

        /* Rotate blocks in recvbuf upwards by (rank + 1) blocks. Need
         * a temporary buffer of the same size as recvbuf. */
        
        /* get true extent of recvtype */
        MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &recvtype_true_extent);

        recvbuf_extent = recvcount * comm_size *
            (MPIR_MAX(recvtype_true_extent, recvtype_extent));
        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, recvbuf_extent, mpi_errno, "tmp_buf");
        /* adjust for potential negative lower bound in datatype */
        tmp_buf = (void *)((char*)tmp_buf - recvtype_true_lb);

        mpi_errno = MPIR_Localcopy((char *) recvbuf + (rank+1)*recvcount*recvtype_extent, 
                       (comm_size - rank - 1)*recvcount, recvtype, tmp_buf, 
                       (comm_size - rank - 1)*recvcount, recvtype);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
        mpi_errno = MPIR_Localcopy(recvbuf, (rank+1)*recvcount, recvtype, 
                       (char *) tmp_buf + (comm_size-rank-1)*recvcount*recvtype_extent, 
                       (rank+1)*recvcount, recvtype);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

        /* Blocks are in the reverse order now (comm_size-1 to 0). 
         * Reorder them to (0 to comm_size-1) and store them in recvbuf. */

        for (i=0; i<comm_size; i++){
            mpi_errno = MPIR_Localcopy((char *) tmp_buf + i*recvcount*recvtype_extent,
                                       recvcount, recvtype, 
                                       (char *) recvbuf + (comm_size-i-1)*recvcount*recvtype_extent, 
                                       recvcount, recvtype);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        }


#ifdef MPIR_OLD_SHORT_ALLTOALL_ALG
        /* Short message. Use recursive doubling. Each process sends all
           its data at each step along with all data it received in
           previous steps. */
        
        /* need to allocate temporary buffer of size
           sendbuf_extent*comm_size */
        
        /* get true extent of sendtype */
        MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &sendtype_true_extent);

        sendbuf_extent = sendcount * comm_size *
            (MPIR_MAX(sendtype_true_extent, sendtype_extent));
        MPIU_CHKLMEM_MALLOC(tmp_buf, void *, sendbuf_extent*comm_size, mpi_errno, "tmp_buf");
        
        /* adjust for potential negative lower bound in datatype */
        tmp_buf = (void *)((char*)tmp_buf - sendtype_true_lb);
        
        /* copy local sendbuf into tmp_buf at location indexed by rank */
        curr_cnt = sendcount*comm_size;
        mpi_errno = MPIR_Localcopy(sendbuf, curr_cnt, sendtype,
                                   ((char *)tmp_buf + rank*sendbuf_extent),
                                   curr_cnt, sendtype);
	if (mpi_errno) { MPIU_ERR_POP(mpi_errno);}
        
        mask = 0x1;
        i = 0;
        while (mask < comm_size) {
            dst = rank ^ mask;
            
            dst_tree_root = dst >> i;
            dst_tree_root <<= i;
            
            my_tree_root = rank >> i;
            my_tree_root <<= i;
            
            if (dst < comm_size) {
                mpi_errno = MPIC_Sendrecv_ft(((char *)tmp_buf +
                                              my_tree_root*sendbuf_extent),
                                             curr_cnt, sendtype,
                                             dst, MPIR_ALLTOALL_TAG, 
                                             ((char *)tmp_buf +
                                              dst_tree_root*sendbuf_extent),
                                             sendbuf_extent*(comm_size-dst_tree_root),
                                             sendtype, dst, MPIR_ALLTOALL_TAG, 
                                             comm, &status, errflag);
                if (mpi_errno) {
                    /* for communication errors, just record the error but continue */
                    *errflag = TRUE;
                    MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                    MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                    last_recv_cnt = 0;
                } else
                    /* in case of non-power-of-two nodes, less data may be
                       received than specified */
                    MPIR_Get_count_impl(&status, sendtype, &last_recv_cnt);
                curr_cnt += last_recv_cnt;
            }
            
            /* if some processes in this process's subtree in this step
               did not have any destination process to communicate with
               because of non-power-of-two, we need to send them the
               result. We use a logarithmic recursive-halfing algorithm
               for this. */
            
            if (dst_tree_root + mask > comm_size) {
                nprocs_completed = comm_size - my_tree_root - mask;
                /* nprocs_completed is the number of processes in this
                   subtree that have all the data. Send data to others
                   in a tree fashion. First find root of current tree
                   that is being divided into two. k is the number of
                   least-significant bits in this process's rank that
                   must be zeroed out to find the rank of the root */ 
                j = mask;
                k = 0;
                while (j) {
                    j >>= 1;
                    k++;
                }
                k--;
                
                tmp_mask = mask >> 1;
                while (tmp_mask) {
                    dst = rank ^ tmp_mask;
                    
                    tree_root = rank >> k;
                    tree_root <<= k;
                    
                    /* send only if this proc has data and destination
                       doesn't have data. at any step, multiple processes
                       can send if they have the data */
                    if ((dst > rank) && 
                        (rank < tree_root + nprocs_completed)
                        && (dst >= tree_root + nprocs_completed)) {
                        /* send the data received in this step above */
                        mpi_errno = MPIC_Send_ft(((char *)tmp_buf +
                                                  dst_tree_root*sendbuf_extent),
                                                 last_recv_cnt, sendtype,
                                                 dst, MPIR_ALLTOALL_TAG,
                                                 comm, errflag);  
                        if (mpi_errno) {
                            /* for communication errors, just record the error but continue */
                            *errflag = TRUE;
                            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                        }
                    }
                    /* recv only if this proc. doesn't have data and sender
                       has data */
                    else if ((dst < rank) && 
                             (dst < tree_root + nprocs_completed) &&
                             (rank >= tree_root + nprocs_completed)) {
                        mpi_errno = MPIC_Recv_ft(((char *)tmp_buf +
                                                  dst_tree_root*sendbuf_extent),
                                                 sendbuf_extent*(comm_size-dst_tree_root),
                                                 sendtype,   
                                                 dst, MPIR_ALLTOALL_TAG,
                                                 comm, &status, errflag); 
                        if (mpi_errno) {
                            /* for communication errors, just record the error but continue */
                            *errflag = TRUE;
                            MPIU_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail");
                            MPIU_ERR_ADD(mpi_errno_ret, mpi_errno);
                            last_recv_cnt = 0;
                        } else
                            MPIR_Get_count_impl(&status, sendtype, &last_recv_cnt);
                        curr_cnt += last_recv_cnt;
                    }
                    tmp_mask >>= 1;
                    k--;
                }
            }
            
            mask <<= 1;
            i++;
        }
Example #17
0
/*@
    MPI_Type_free - Frees the datatype

Input Parameters:
. datatype - datatype that is freed (handle) 

Predefined types:

The MPI standard states that (in Opaque Objects)
.Bqs
MPI provides certain predefined opaque objects and predefined, static handles
to these objects. Such objects may not be destroyed.
.Bqe

Thus, it is an error to free a predefined datatype.  The same section makes
it clear that it is an error to free a null datatype.

.N ThreadSafe

.N Fortran

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_TYPE
.N MPI_ERR_ARG
@*/
int MPI_Type_free(MPI_Datatype *datatype)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_TYPE_FREE);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_TYPE_FREE);
    
    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_ARGNULL(datatype, "datatype", mpi_errno);
	    MPIR_ERRTEST_DATATYPE(*datatype, "datatype", mpi_errno);
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif
    
    /* Convert MPI object handles to object pointers */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPIR_Datatype *datatype_ptr = NULL;

	    /* Check for built-in type */
	    if (HANDLE_GET_KIND(*datatype) == HANDLE_KIND_BUILTIN) {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
						 MPIR_ERR_RECOVERABLE,
						 FCNAME, __LINE__,
						 MPI_ERR_TYPE,
						 "**dtypeperm", 0);
		goto fn_fail;
	    }

	    /* all but MPI_2INT of the pair types are not stored as builtins
	     * but should be treated similarly.
	     */
	    if (*datatype == MPI_FLOAT_INT ||
		*datatype == MPI_DOUBLE_INT ||
		*datatype == MPI_LONG_INT ||
		*datatype == MPI_SHORT_INT ||
		*datatype == MPI_LONG_DOUBLE_INT)
	    {
		mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
						 MPIR_ERR_RECOVERABLE,
						 FCNAME, __LINE__,
						 MPI_ERR_TYPE,
						  "**dtypeperm", 0);
		goto fn_fail;
	    }
            /* Validate parameters, especially handles needing to be converted */
            MPIR_Datatype_get_ptr( *datatype, datatype_ptr );

            /* Validate datatype_ptr */
            MPIR_Datatype_valid_ptr(datatype_ptr, mpi_errno);
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    MPIR_Type_free_impl(datatype);

    /* ... end of body of routine ... */

#ifdef HAVE_ERROR_CHECKING
  fn_exit:
#endif
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_TYPE_FREE);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
  fn_fail:
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
	    "**mpi_type_free",
	    "**mpi_type_free %p", datatype);
    }
    mpi_errno = MPIR_Err_return_comm( NULL, FCNAME, mpi_errno );
    goto fn_exit;
#   endif
    /* --END ERROR HANDLING-- */
}